diff --git a/app/vmagent/README.md b/app/vmagent/README.md index bf40e17ba2..866862761f 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss. the url may contain sensitive information such as auth tokens or passwords. Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls. +* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option + in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes: + + ```yml + scrape_configs: + - job: foo + scrape_interval: 1h + scrape_align_interval: 10m + ``` + * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag. See available options below if you prefer fixing the root cause of the error: diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 4780edae11..17674a6e9e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -2,6 +2,8 @@ # tip +* FEATURE: vmagent: add `scrape_align_interval` config option, which can be used for aligning scrapes to the beginning of the configured interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details. + * BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics. diff --git a/docs/vmagent.md b/docs/vmagent.md index bf40e17ba2..866862761f 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss. the url may contain sensitive information such as auth tokens or passwords. Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls. +* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option + in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes: + + ```yml + scrape_configs: + - job: foo + scrape_interval: 1h + scrape_align_interval: 10m + ``` + * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag. See available options below if you prefer fixing the root cause of the error: diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 31e1ce858d..aade32faac 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -89,9 +89,10 @@ type ScrapeConfig struct { SampleLimit int `yaml:"sample_limit,omitempty"` // These options are supported only by lib/promscrape. - DisableCompression bool `yaml:"disable_compression,omitempty"` - DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"` - StreamParse bool `yaml:"stream_parse,omitempty"` + DisableCompression bool `yaml:"disable_compression,omitempty"` + DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"` + StreamParse bool `yaml:"stream_parse,omitempty"` + ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"` // This is set in loadConfig swc *scrapeWorkConfig @@ -508,6 +509,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf disableCompression: sc.DisableCompression, disableKeepAlive: sc.DisableKeepAlive, streamParse: sc.StreamParse, + scrapeAlignInterval: sc.ScrapeAlignInterval, } return swc, nil } @@ -530,6 +532,7 @@ type scrapeWorkConfig struct { disableCompression bool disableKeepAlive bool streamParse bool + scrapeAlignInterval time.Duration } func appendKubernetesScrapeWork(dst []*ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]*ScrapeWork, bool) { @@ -761,6 +764,7 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e DisableCompression: swc.disableCompression, DisableKeepAlive: swc.disableKeepAlive, StreamParse: swc.streamParse, + ScrapeAlignInterval: swc.scrapeAlignInterval, jobNameOriginal: swc.jobName, }) diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index 9982efb1ae..a1dbc5f571 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -1275,6 +1275,7 @@ scrape_configs: disable_keepalive: true disable_compression: true stream_parse: true + scrape_align_interval: 1s static_configs: - targets: - 192.168.1.2 # SNMP device. @@ -1323,12 +1324,13 @@ scrape_configs: Value: "snmp", }, }, - AuthConfig: &promauth.Config{}, - SampleLimit: 100, - DisableKeepAlive: true, - DisableCompression: true, - StreamParse: true, - jobNameOriginal: "snmp", + AuthConfig: &promauth.Config{}, + SampleLimit: 100, + DisableKeepAlive: true, + DisableCompression: true, + StreamParse: true, + ScrapeAlignInterval: time.Second, + jobNameOriginal: "snmp", }, }) f(` diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 585529e017..82a064094f 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -90,6 +90,9 @@ type ScrapeWork struct { // Whether to parse target responses in a streaming manner. StreamParse bool + // The interval for aligning the first scrape. + ScrapeAlignInterval time.Duration + // The original 'job_name' jobNameOriginal string } @@ -100,9 +103,9 @@ type ScrapeWork struct { func (sw *ScrapeWork) key() string { // Do not take into account OriginalLabels. key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+ - "AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v", + "AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s", sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(), - sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse) + sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval) return key } @@ -180,20 +183,27 @@ type scrapeWork struct { } func (sw *scrapeWork) run(stopCh <-chan struct{}) { - // Calculate start time for the first scrape from ScrapeURL and labels. - // This should spread load when scraping many targets with different - // scrape urls and labels. - // This also makes consistent scrape times across restarts - // for a target with the same ScrapeURL and labels. scrapeInterval := sw.Config.ScrapeInterval - key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString()) - h := uint32(xxhash.Sum64([]byte(key))) - randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32))) - sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval) - if randSleep < sleepOffset { - randSleep += uint64(scrapeInterval) + var randSleep uint64 + if sw.Config.ScrapeAlignInterval <= 0 { + // Calculate start time for the first scrape from ScrapeURL and labels. + // This should spread load when scraping many targets with different + // scrape urls and labels. + // This also makes consistent scrape times across restarts + // for a target with the same ScrapeURL and labels. + key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString()) + h := uint32(xxhash.Sum64([]byte(key))) + randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32))) + sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval) + if randSleep < sleepOffset { + randSleep += uint64(scrapeInterval) + } + randSleep -= sleepOffset + } else { + d := uint64(sw.Config.ScrapeAlignInterval) + randSleep = d - uint64(time.Now().UnixNano())%d + randSleep %= uint64(scrapeInterval) } - randSleep -= sleepOffset timer := timerpool.Get(time.Duration(randSleep)) var timestamp int64 var ticker *time.Ticker