diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 8bfaf6109f..b8da8d52fb 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -230,7 +230,7 @@ You can read more about relabeling in the following articles: ## Scraping big number of targets A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc. -In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` clustering). +In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling and clustering). Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values. The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster. The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands @@ -241,6 +241,19 @@ spread scrape targets among a cluster of two `vmagent` instances: /path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... ``` +By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances, +then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands +start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances: + +``` +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ... +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ... +``` + +If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`. +See [these docs](https://victoriametrics.github.io/#deduplication) for details. + ## Monitoring diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index bbe6d002de..2372889c86 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,8 @@ # tip +* FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets). + # [v1.55.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.55.1) diff --git a/docs/vmagent.md b/docs/vmagent.md index 8bfaf6109f..b8da8d52fb 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -230,7 +230,7 @@ You can read more about relabeling in the following articles: ## Scraping big number of targets A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc. -In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` clustering). +In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling and clustering). Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values. The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster. The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands @@ -241,6 +241,19 @@ spread scrape targets among a cluster of two `vmagent` instances: /path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... ``` +By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances, +then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands +start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances: + +``` +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ... +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... +/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ... +``` + +If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`. +See [these docs](https://victoriametrics.github.io/#deduplication) for details. + ## Monitoring diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 77029f7be1..2e520a943c 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -47,6 +47,8 @@ var ( "Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets") clusterMemberNum = flag.Int("promscrape.cluster.memberNum", 0, "The number of number in the cluster of scrapers. "+ "It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster") + clusterReplicationFactor = flag.Int("promscrape.cluster.replicationFactor", 1, "The number of members in the cluster, which scrape the same targets. "+ + "If the replication factor is greater than 2, then the deduplication must be enabled at remote storage side. See https://victoriametrics.github.io/#deduplication") ) // Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/ @@ -721,12 +723,25 @@ func appendScrapeWorkKey(dst []byte, target string, extraLabels, metaLabels map[ return dst } -func needSkipScrapeWork(key string) bool { - if *clusterMembersCount <= 0 { +func needSkipScrapeWork(key string, membersCount, replicasCount, memberNum int) bool { + if membersCount <= 1 { return false } h := int(xxhash.Sum64(bytesutil.ToUnsafeBytes(key))) - return (h % *clusterMembersCount) != *clusterMemberNum + idx := h % membersCount + if replicasCount < 1 { + replicasCount = 1 + } + for i := 0; i < replicasCount; i++ { + if idx == memberNum { + return false + } + idx++ + if idx >= replicasCount { + idx = 0 + } + } + return true } func appendSortedKeyValuePairs(dst []byte, m map[string]string) []byte { @@ -753,7 +768,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel bb := scrapeWorkKeyBufPool.Get() defer scrapeWorkKeyBufPool.Put(bb) bb.B = appendScrapeWorkKey(bb.B[:0], target, extraLabels, metaLabels) - if needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B)) { + if needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor, *clusterMemberNum) { return nil, nil } diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 6ca1cf993e..ef4d20599f 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -12,6 +12,31 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" ) +func TestNeedSkipScrapeWork(t *testing.T) { + f := func(key string, membersCount, replicationFactor, memberNum int, needSkipExpected bool) { + t.Helper() + needSkip := needSkipScrapeWork(key, membersCount, replicationFactor, memberNum) + if needSkip != needSkipExpected { + t.Fatalf("unexpected needSkipScrapeWork(key=%q, membersCount=%d, replicationFactor=%d, memberNum=%d)", key, membersCount, replicationFactor, memberNum) + } + } + // Disabled clustering + f("foo", 0, 0, 0, false) + + // A cluster with 2 nodes with disabled replication + f("foo", 2, 0, 0, true) + f("foo", 2, 0, 1, false) + + // A cluster with 2 nodes with replicationFactor=2 + f("foo", 2, 2, 0, false) + f("foo", 2, 2, 1, false) + + // A cluster with 3 nodes with replicationFactor=2 + f("foo", 3, 2, 0, false) + f("foo", 3, 2, 1, true) + f("foo", 3, 2, 2, false) +} + func TestLoadStaticConfigs(t *testing.T) { scs, err := loadStaticConfigs("testdata/file_sd.json") if err != nil {