lib/promscrape: add -promscrape.cluster.replicationFactor command-line flag for replicating scrape targets among vmagent instances in the cluster

This commit is contained in:
Aliaksandr Valialkin 2021-03-04 10:20:15 +02:00
parent 0ce89b85d4
commit 133fb9fc00
5 changed files with 74 additions and 6 deletions

View File

@ -230,7 +230,7 @@ You can read more about relabeling in the following articles:
## Scraping big number of targets ## Scraping big number of targets
A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc. A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc.
In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` clustering). In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling and clustering).
Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values. Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values.
The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster. The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster.
The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands
@ -241,6 +241,19 @@ spread scrape targets among a cluster of two `vmagent` instances:
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... /path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
``` ```
By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
```
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
```
If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`.
See [these docs](https://victoriametrics.github.io/#deduplication) for details.
## Monitoring ## Monitoring

View File

@ -2,6 +2,8 @@
# tip # tip
* FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets).
# [v1.55.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.55.1) # [v1.55.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.55.1)

View File

@ -230,7 +230,7 @@ You can read more about relabeling in the following articles:
## Scraping big number of targets ## Scraping big number of targets
A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc. A single `vmagent` instance can scrape tens of thousands of scrape targets. Sometimes this isn't enough due to limitations on CPU, network, RAM, etc.
In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` clustering). In this case scrape targets can be split among multiple `vmagent` instances (aka `vmagent` horizontal scaling and clustering).
Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values. Each `vmagent` instance in the cluster must use identical `-promscrape.config` files with distinct `-promscrape.cluster.memberNum` values.
The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster. The flag value must be in the range `0 ... N-1`, where `N` is the number of `vmagent` instances in the cluster.
The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands The number of `vmagent` instances in the cluster must be passed to `-promscrape.cluster.membersCount` command-line flag. For example, the following commands
@ -241,6 +241,19 @@ spread scrape targets among a cluster of two `vmagent` instances:
/path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ... /path/to/vmagent -promscrape.cluster.membersCount=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
``` ```
By default each scrape target is scraped only by a single `vmagent` instance in the cluster. If there is a need for replicating scrape targets among multiple `vmagent` instances,
then `-promscrape.cluster.replicationFactor` command-line flag must be set to the desired number of replicas. For example, the following commands
start a cluster of three `vmagent` instances, where each target is scraped by two `vmagent` instances:
```
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=0 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=1 -promscrape.config=/path/to/config.yml ...
/path/to/vmagent -promscrape.cluster.membersCount=3 -promscrape.cluster.replicationFactor=2 -promscrape.cluster.memberNum=2 -promscrape.config=/path/to/config.yml ...
```
If each target is scraped by multiple `vmagent` instances, then data deduplication must be enabled at remote storage pointed by `-remoteWrite.url`.
See [these docs](https://victoriametrics.github.io/#deduplication) for details.
## Monitoring ## Monitoring

View File

@ -47,6 +47,8 @@ var (
"Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets") "Each member then scrapes roughly 1/N of all the targets. By default cluster scraping is disabled, i.e. a single scraper scrapes all the targets")
clusterMemberNum = flag.Int("promscrape.cluster.memberNum", 0, "The number of number in the cluster of scrapers. "+ clusterMemberNum = flag.Int("promscrape.cluster.memberNum", 0, "The number of number in the cluster of scrapers. "+
"It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster") "It must be an unique value in the range 0 ... promscrape.cluster.membersCount-1 across scrapers in the cluster")
clusterReplicationFactor = flag.Int("promscrape.cluster.replicationFactor", 1, "The number of members in the cluster, which scrape the same targets. "+
"If the replication factor is greater than 2, then the deduplication must be enabled at remote storage side. See https://victoriametrics.github.io/#deduplication")
) )
// Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/ // Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/
@ -721,12 +723,25 @@ func appendScrapeWorkKey(dst []byte, target string, extraLabels, metaLabels map[
return dst return dst
} }
func needSkipScrapeWork(key string) bool { func needSkipScrapeWork(key string, membersCount, replicasCount, memberNum int) bool {
if *clusterMembersCount <= 0 { if membersCount <= 1 {
return false return false
} }
h := int(xxhash.Sum64(bytesutil.ToUnsafeBytes(key))) h := int(xxhash.Sum64(bytesutil.ToUnsafeBytes(key)))
return (h % *clusterMembersCount) != *clusterMemberNum idx := h % membersCount
if replicasCount < 1 {
replicasCount = 1
}
for i := 0; i < replicasCount; i++ {
if idx == memberNum {
return false
}
idx++
if idx >= replicasCount {
idx = 0
}
}
return true
} }
func appendSortedKeyValuePairs(dst []byte, m map[string]string) []byte { func appendSortedKeyValuePairs(dst []byte, m map[string]string) []byte {
@ -753,7 +768,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
bb := scrapeWorkKeyBufPool.Get() bb := scrapeWorkKeyBufPool.Get()
defer scrapeWorkKeyBufPool.Put(bb) defer scrapeWorkKeyBufPool.Put(bb)
bb.B = appendScrapeWorkKey(bb.B[:0], target, extraLabels, metaLabels) bb.B = appendScrapeWorkKey(bb.B[:0], target, extraLabels, metaLabels)
if needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B)) { if needSkipScrapeWork(bytesutil.ToUnsafeString(bb.B), *clusterMembersCount, *clusterReplicationFactor, *clusterMemberNum) {
return nil, nil return nil, nil
} }

View File

@ -12,6 +12,31 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
) )
func TestNeedSkipScrapeWork(t *testing.T) {
f := func(key string, membersCount, replicationFactor, memberNum int, needSkipExpected bool) {
t.Helper()
needSkip := needSkipScrapeWork(key, membersCount, replicationFactor, memberNum)
if needSkip != needSkipExpected {
t.Fatalf("unexpected needSkipScrapeWork(key=%q, membersCount=%d, replicationFactor=%d, memberNum=%d)", key, membersCount, replicationFactor, memberNum)
}
}
// Disabled clustering
f("foo", 0, 0, 0, false)
// A cluster with 2 nodes with disabled replication
f("foo", 2, 0, 0, true)
f("foo", 2, 0, 1, false)
// A cluster with 2 nodes with replicationFactor=2
f("foo", 2, 2, 0, false)
f("foo", 2, 2, 1, false)
// A cluster with 3 nodes with replicationFactor=2
f("foo", 3, 2, 0, false)
f("foo", 3, 2, 1, true)
f("foo", 3, 2, 2, false)
}
func TestLoadStaticConfigs(t *testing.T) { func TestLoadStaticConfigs(t *testing.T) {
scs, err := loadStaticConfigs("testdata/file_sd.json") scs, err := loadStaticConfigs("testdata/file_sd.json")
if err != nil { if err != nil {