lib/promscrape: add scrape_offset option to scrape_config

This option can be used for specifying the particular offset per each scrape interval for target scraping
2024-11-23 12:31:07 +01:00 · 2021-03-08 11:58:25 +02:00 · 2021-03-08 11:58:25 +02:00 · 14a399dd06
commit 14a399dd06
parent 345980f78f
6 changed files with 51 additions and 11 deletions
--- a/app/vmagent/README.md
+++ b/app/vmagent/README.md
@ -326,14 +326,24 @@ It may be useful to perform `vmagent` rolling update without any scrape loss.
  the url may contain sensitive information such as auth tokens or passwords.
  Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.

-* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set the `scrape_align_interval` option
-  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval,
+  then `scrape_align_interval` option  must be used. For example, the following config aligns hourly scrapes to the beginning of hour:

  ```yml
  scrape_configs:
  - job_name: foo
    scrape_interval: 1h
-    scrape_align_interval: 10m
+    scrape_align_interval: 1h
+  ```
+
+* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used.
+  For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute:
+
+  ```yml
+  scrape_configs:
+  - job_name: foo
+    scrape_interval: 1m
+    scrape_offset: 10s
  ```

 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen to multiple ports
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -7,6 +7,7 @@
  - `histogram_stdvar(buckets)` - returns standard variance for the given buckets.
  - `histogram_stddev(buckets)` - returns standard deviation for the given buckets.
 * FEATURE: vmagent: add ability to replicate scrape targets among `vmagent` instances in the cluster with `-promscrape.cluster.replicationFactor` command-line flag. See [these docs](https://victoriametrics.github.io/vmagent.html#scraping-big-number-of-targets).
+* FATURE: vmagent: accept `scrape_offset` option at `scrape_config`. This option may be useful when scrapes must start at the specified offset of every scrape interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details.
 * FEATURE: vmauth: allow using regexp paths in `url_map`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1112) for details.

 * BUGFIX: vmagent: reduce memory usage when Kubernetes service discovery is used in big number of distinct jobs by sharing the cache. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1113
--- a/docs/vmagent.md
+++ b/docs/vmagent.md
@ -326,14 +326,24 @@ It may be useful to perform `vmagent` rolling update without any scrape loss.
  the url may contain sensitive information such as auth tokens or passwords.
  Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.

-* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set the `scrape_align_interval` option
-  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at the beginning of some interval,
+  then `scrape_align_interval` option  must be used. For example, the following config aligns hourly scrapes to the beginning of hour:

  ```yml
  scrape_configs:
  - job_name: foo
    scrape_interval: 1h
-    scrape_align_interval: 10m
+    scrape_align_interval: 1h
+  ```
+
+* By default `vmagent` evenly spreads scrape load in time. If a particular scrape target must be scraped at specific offset, then `scrape_offset` option must be used.
+  For example, the following config instructs `vmagent` to scrape the target at 10 seconds of every minute:
+
+  ```yml
+  scrape_configs:
+  - job_name: foo
+    scrape_interval: 1m
+    scrape_offset: 10s
  ```

 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen to multiple ports
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@ -114,6 +114,7 @@ type ScrapeConfig struct {
 	DisableKeepAlive    bool          `yaml:"disable_keepalive,omitempty"`
 	StreamParse         bool          `yaml:"stream_parse,omitempty"`
 	ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"`
+	ScrapeOffset        time.Duration `yaml:"scrape_offset,omitempty"`

 	// This is set in loadConfig
 	swc *scrapeWorkConfig
@ -569,6 +570,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
 		disableKeepAlive:     sc.DisableKeepAlive,
 		streamParse:          sc.StreamParse,
 		scrapeAlignInterval:  sc.ScrapeAlignInterval,
+		scrapeOffset:         sc.ScrapeOffset,
 	}
 	return swc, nil
 }
@ -592,6 +594,7 @@ type scrapeWorkConfig struct {
 	disableKeepAlive     bool
 	streamParse          bool
 	scrapeAlignInterval  time.Duration
+	scrapeOffset         time.Duration
 }

 type targetLabelsGetter interface {
@ -853,6 +856,7 @@ func (swc *scrapeWorkConfig) getScrapeWork(target string, extraLabels, metaLabel
 		DisableKeepAlive:     swc.disableKeepAlive,
 		StreamParse:          swc.streamParse,
 		ScrapeAlignInterval:  swc.scrapeAlignInterval,
+		ScrapeOffset:         swc.scrapeOffset,

 		jobNameOriginal: swc.jobName,
 	}
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@ -1302,6 +1302,7 @@ scrape_configs:
    disable_compression: true
    stream_parse: true
    scrape_align_interval: 1s
+    scrape_offset: 0.5s
    static_configs:
      - targets:
        - 192.168.1.2  # SNMP device.
@ -1356,6 +1357,7 @@ scrape_configs:
 			DisableCompression:  true,
 			StreamParse:         true,
 			ScrapeAlignInterval: time.Second,
+			ScrapeOffset:        500 * time.Millisecond,
 			jobNameOriginal:     "snmp",
 		},
 	})
--- a/lib/promscrape/scrapework.go
+++ b/lib/promscrape/scrapework.go
@ -92,6 +92,9 @@ type ScrapeWork struct {
 	// The interval for aligning the first scrape.
 	ScrapeAlignInterval time.Duration

+	// The offset for the first scrape.
+	ScrapeOffset time.Duration
+
 	// The original 'job_name'
 	jobNameOriginal string
 }
@ -102,9 +105,11 @@ type ScrapeWork struct {
 func (sw *ScrapeWork) key() string {
 	// Do not take into account OriginalLabels.
 	key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
-		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s",
+		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, "+
+		"ScrapeAlignInterval=%s, ScrapeOffset=%s",
 		sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
-		sw.AuthConfig.String(), sw.MetricRelabelConfigs.String(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval)
+		sw.AuthConfig.String(), sw.MetricRelabelConfigs.String(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse,
+		sw.ScrapeAlignInterval, sw.ScrapeOffset)
 	return key
 }

@ -174,9 +179,14 @@ type scrapeWork struct {
 }

 func (sw *scrapeWork) run(stopCh <-chan struct{}) {
-	scrapeInterval := sw.Config.ScrapeInterval
 	var randSleep uint64
-	if sw.Config.ScrapeAlignInterval <= 0 {
+	scrapeInterval := sw.Config.ScrapeInterval
+	scrapeAlignInterval := sw.Config.ScrapeAlignInterval
+	scrapeOffset := sw.Config.ScrapeOffset
+	if scrapeOffset > 0 {
+		scrapeAlignInterval = scrapeInterval
+	}
+	if scrapeAlignInterval <= 0 {
 		// Calculate start time for the first scrape from ScrapeURL and labels.
 		// This should spread load when scraping many targets with different
 		// scrape urls and labels.
@ -191,8 +201,11 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) {
 		}
 		randSleep -= sleepOffset
 	} else {
-		d := uint64(sw.Config.ScrapeAlignInterval)
+		d := uint64(scrapeAlignInterval)
 		randSleep = d - uint64(time.Now().UnixNano())%d
+		if scrapeOffset > 0 {
+			randSleep += uint64(scrapeOffset)
+		}
 		randSleep %= uint64(scrapeInterval)
 	}
 	timer := timerpool.Get(time.Duration(randSleep))