diff --git a/app/vmagent/README.md b/app/vmagent/README.md index b8da8d52f..22aaa2360 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -326,14 +326,24 @@ It may be useful to perform `vmagent` rolling update without any scrape loss. the url may contain sensitive information such as auth tokens or passwords. Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls. -* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set the `scrape_align_interval` option - in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes: +* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval, + then `scrape_align_interval` option must be used. For example, the following config aligns hourly scrapes to the beginning of hour: ```yml scrape_configs: - job_name: foo scrape_interval: 1h - scrape_align_interval: 10m + scrape_align_interval: 1h + ``` + +* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used. + For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute: + + ```yml + scrape_configs: + - job_name: foo + scrape_interval: 1m + scrape_offset: 10s ``` * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen to multiple ports diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a4e868465..4abf0a598 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -7,6 +7,7 @@ - `histogram_stdvar(buckets)` - returns standard variance for the given buckets. - `histogram_stddev(buckets)` - returns standard deviation for the given buckets. * FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets). +* FATURE: vmagent: accept `scrape_offset` option at `scrape_config`. This option may be useful when scrapes must start at the specified offset of every scrape interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details. * FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details. * BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct jobs by sharing the cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113 diff --git a/docs/vmagent.md b/docs/vmagent.md index b8da8d52f..22aaa2360 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -326,14 +326,24 @@ It may be useful to perform `vmagent` rolling update without any scrape loss. the url may contain sensitive information such as auth tokens or passwords. Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls. -* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set the `scrape_align_interval` option - in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes: +* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval, + then `scrape_align_interval` option must be used. For example, the following config aligns hourly scrapes to the beginning of hour: ```yml scrape_configs: - job_name: foo scrape_interval: 1h - scrape_align_interval: 10m + scrape_align_interval: 1h + ``` + +* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used. + For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute: + + ```yml + scrape_configs: + - job_name: foo + scrape_interval: 1m + scrape_offset: 10s ``` * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen to multiple ports diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index 118c5dcc5..8d558862f 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -114,6 +114,7 @@ type ScrapeConfig struct { DisableKeepAlive bool `yaml:"disable_keepalive,omitempty"` StreamParse bool `yaml:"stream_parse,omitempty"` ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"` + ScrapeOffset time.Duration `yaml:"scrape_offset,omitempty"` // This is set in loadConfig swc *scrapeWorkConfig @@ -569,6 +570,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf disableKeepAlive: sc.DisableKeepAlive, streamParse: sc.StreamParse, scrapeAlignInterval: sc.ScrapeAlignInterval, + scrapeOffset: sc.ScrapeOffset, } return swc, nil } @@ -592,6 +594,7 @@ type scrapeWorkConfig struct { disableKeepAlive bool streamParse bool scrapeAlignInterval time.Duration + scrapeOffset time.Duration } type targetLabelsGetter interface { @@ -853,6 +856,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel DisableKeepAlive: swc.disableKeepAlive, StreamParse: swc.streamParse, ScrapeAlignInterval: swc.scrapeAlignInterval, + ScrapeOffset: swc.scrapeOffset, jobNameOriginal: swc.jobName, } diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go index d79bee3c8..f75dae834 100644 --- a/lib/promscrape/config_test.go +++ b/lib/promscrape/config_test.go @@ -1302,6 +1302,7 @@ scrape_configs: disable_compression: true stream_parse: true scrape_align_interval: 1s + scrape_offset: 0.5s static_configs: - targets: - 192.168.1.2 # SNMP device. @@ -1356,6 +1357,7 @@ scrape_configs: DisableCompression: true, StreamParse: true, ScrapeAlignInterval: time.Second, + ScrapeOffset: 500 * time.Millisecond, jobNameOriginal: "snmp", }, }) diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 558f3e8b5..e5e28ff42 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -92,6 +92,9 @@ type ScrapeWork struct { // The interval for aligning the first scrape. ScrapeAlignInterval time.Duration + // The offset for the first scrape. + ScrapeOffset time.Duration + // The original 'job_name' jobNameOriginal string } @@ -102,9 +105,11 @@ type ScrapeWork struct { func (sw *ScrapeWork) key() string { // Do not take into account OriginalLabels. key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+ - "AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s", + "AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, "+ + "ScrapeAlignInterval=%s, ScrapeOffset=%s", sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(), - sw.AuthConfig.String(), sw.MetricRelabelConfigs.String(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval) + sw.AuthConfig.String(), sw.MetricRelabelConfigs.String(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, + sw.ScrapeAlignInterval, sw.ScrapeOffset) return key } @@ -174,9 +179,14 @@ type scrapeWork struct { } func (sw *scrapeWork) run(stopCh <-chan struct{}) { - scrapeInterval := sw.Config.ScrapeInterval var randSleep uint64 - if sw.Config.ScrapeAlignInterval <= 0 { + scrapeInterval := sw.Config.ScrapeInterval + scrapeAlignInterval := sw.Config.ScrapeAlignInterval + scrapeOffset := sw.Config.ScrapeOffset + if scrapeOffset > 0 { + scrapeAlignInterval = scrapeInterval + } + if scrapeAlignInterval <= 0 { // Calculate start time for the first scrape from ScrapeURL and labels. // This should spread load when scraping many targets with different // scrape urls and labels. @@ -191,8 +201,11 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) { } randSleep -= sleepOffset } else { - d := uint64(sw.Config.ScrapeAlignInterval) + d := uint64(scrapeAlignInterval) randSleep = d - uint64(time.Now().UnixNano())%d + if scrapeOffset > 0 { + randSleep += uint64(scrapeOffset) + } randSleep %= uint64(scrapeInterval) } timer := timerpool.Get(time.Duration(randSleep))