lib/promscrape: add scrape_align_interval config option into scrape config

This option allows aligning scrapes to a particular intervals.
This commit is contained in:
Aliaksandr Valialkin 2021-02-18 23:51:29 +02:00
parent bd1d906eee
commit 502d0e2524
6 changed files with 61 additions and 23 deletions

View File

@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
the url may contain sensitive information such as auth tokens or passwords.
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
```yml
scrape_configs:
- job: foo
scrape_interval: 1h
scrape_align_interval: 10m
```
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
See available options below if you prefer fixing the root cause of the error:

View File

@ -2,6 +2,8 @@
# tip
* FEATURE: vmagent: add `scrape_align_interval` config option, which can be used for aligning scrapes to the beginning of the configured interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details.
* BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics.

View File

@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
the url may contain sensitive information such as auth tokens or passwords.
Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
```yml
scrape_configs:
- job: foo
scrape_interval: 1h
scrape_align_interval: 10m
```
* If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
See available options below if you prefer fixing the root cause of the error:

View File

@ -89,9 +89,10 @@ type ScrapeConfig struct {
SampleLimit int `yaml:"sample_limit,omitempty"`
// These options are supported only by lib/promscrape.
DisableCompression bool `yaml:"disable_compression,omitempty"`
DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"`
StreamParse bool `yaml:"stream_parse,omitempty"`
DisableCompression bool `yaml:"disable_compression,omitempty"`
DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"`
StreamParse bool `yaml:"stream_parse,omitempty"`
ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"`
// This is set in loadConfig
swc *scrapeWorkConfig
@ -508,6 +509,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
disableCompression: sc.DisableCompression,
disableKeepAlive: sc.DisableKeepAlive,
streamParse: sc.StreamParse,
scrapeAlignInterval: sc.ScrapeAlignInterval,
}
return swc, nil
}
@ -530,6 +532,7 @@ type scrapeWorkConfig struct {
disableCompression bool
disableKeepAlive bool
streamParse bool
scrapeAlignInterval time.Duration
}
func appendKubernetesScrapeWork(dst []*ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]*ScrapeWork, bool) {
@ -761,6 +764,7 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e
DisableCompression: swc.disableCompression,
DisableKeepAlive: swc.disableKeepAlive,
StreamParse: swc.streamParse,
ScrapeAlignInterval: swc.scrapeAlignInterval,
jobNameOriginal: swc.jobName,
})

View File

@ -1275,6 +1275,7 @@ scrape_configs:
disable_keepalive: true
disable_compression: true
stream_parse: true
scrape_align_interval: 1s
static_configs:
- targets:
- 192.168.1.2 # SNMP device.
@ -1323,12 +1324,13 @@ scrape_configs:
Value: "snmp",
},
},
AuthConfig: &promauth.Config{},
SampleLimit: 100,
DisableKeepAlive: true,
DisableCompression: true,
StreamParse: true,
jobNameOriginal: "snmp",
AuthConfig: &promauth.Config{},
SampleLimit: 100,
DisableKeepAlive: true,
DisableCompression: true,
StreamParse: true,
ScrapeAlignInterval: time.Second,
jobNameOriginal: "snmp",
},
})
f(`

View File

@ -90,6 +90,9 @@ type ScrapeWork struct {
// Whether to parse target responses in a streaming manner.
StreamParse bool
// The interval for aligning the first scrape.
ScrapeAlignInterval time.Duration
// The original 'job_name'
jobNameOriginal string
}
@ -100,9 +103,9 @@ type ScrapeWork struct {
func (sw *ScrapeWork) key() string {
// Do not take into account OriginalLabels.
key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v",
"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s",
sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse)
sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval)
return key
}
@ -180,20 +183,27 @@ type scrapeWork struct {
}
func (sw *scrapeWork) run(stopCh <-chan struct{}) {
// Calculate start time for the first scrape from ScrapeURL and labels.
// This should spread load when scraping many targets with different
// scrape urls and labels.
// This also makes consistent scrape times across restarts
// for a target with the same ScrapeURL and labels.
scrapeInterval := sw.Config.ScrapeInterval
key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString())
h := uint32(xxhash.Sum64([]byte(key)))
randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32)))
sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval)
if randSleep < sleepOffset {
randSleep += uint64(scrapeInterval)
var randSleep uint64
if sw.Config.ScrapeAlignInterval <= 0 {
// Calculate start time for the first scrape from ScrapeURL and labels.
// This should spread load when scraping many targets with different
// scrape urls and labels.
// This also makes consistent scrape times across restarts
// for a target with the same ScrapeURL and labels.
key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString())
h := uint32(xxhash.Sum64([]byte(key)))
randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32)))
sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval)
if randSleep < sleepOffset {
randSleep += uint64(scrapeInterval)
}
randSleep -= sleepOffset
} else {
d := uint64(sw.Config.ScrapeAlignInterval)
randSleep = d - uint64(time.Now().UnixNano())%d
randSleep %= uint64(scrapeInterval)
}
randSleep -= sleepOffset
timer := timerpool.Get(time.Duration(randSleep))
var timestamp int64
var ticker *time.Ticker