lib/promscrape: add scrape_align_interval config option into scrape config

This option allows aligning scrapes to a particular intervals.
2024-12-15 08:23:34 +01:00 · 2021-02-18 23:51:29 +02:00 · 2021-02-18 23:51:29 +02:00 · 502d0e2524
commit 502d0e2524
parent bd1d906eee
6 changed files with 61 additions and 23 deletions
--- a/app/vmagent/README.md
+++ b/app/vmagent/README.md
@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
  the url may contain sensitive information such as auth tokens or passwords.
  Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.

+* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
+  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+
+  ```yml
+  scrape_configs:
+  - job: foo
+    scrape_interval: 1h
+    scrape_align_interval: 10m
+  ```
+
 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
  or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
  See available options below if you prefer fixing the root cause of the error:
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -2,6 +2,8 @@

 # tip

+* FEATURE: vmagent: add `scrape_align_interval` config option, which can be used for aligning scrapes to the beginning of the configured interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details.
+
 * BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics.


--- a/docs/vmagent.md
+++ b/docs/vmagent.md
@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
  the url may contain sensitive information such as auth tokens or passwords.
  Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.

+* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
+  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+
+  ```yml
+  scrape_configs:
+  - job: foo
+    scrape_interval: 1h
+    scrape_align_interval: 10m
+  ```
+
 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
  or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
  See available options below if you prefer fixing the root cause of the error:
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@ -92,6 +92,7 @@ type ScrapeConfig struct {
 	DisableCompression  bool          `yaml:"disable_compression,omitempty"`
 	DisableKeepAlive    bool          `yaml:"disable_keepalive,omitempty"`
 	StreamParse         bool          `yaml:"stream_parse,omitempty"`
+	ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"`

 	// This is set in loadConfig
 	swc *scrapeWorkConfig
@ -508,6 +509,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
 		disableCompression:   sc.DisableCompression,
 		disableKeepAlive:     sc.DisableKeepAlive,
 		streamParse:          sc.StreamParse,
+		scrapeAlignInterval:  sc.ScrapeAlignInterval,
 	}
 	return swc, nil
 }
@ -530,6 +532,7 @@ type scrapeWorkConfig struct {
 	disableCompression   bool
 	disableKeepAlive     bool
 	streamParse          bool
+	scrapeAlignInterval  time.Duration
 }

 func appendKubernetesScrapeWork(dst []*ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]*ScrapeWork, bool) {
@ -761,6 +764,7 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e
 		DisableCompression:   swc.disableCompression,
 		DisableKeepAlive:     swc.disableKeepAlive,
 		StreamParse:          swc.streamParse,
+		ScrapeAlignInterval:  swc.scrapeAlignInterval,

 		jobNameOriginal: swc.jobName,
 	})
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@ -1275,6 +1275,7 @@ scrape_configs:
    disable_keepalive: true
    disable_compression: true
    stream_parse: true
+    scrape_align_interval: 1s
    static_configs:
      - targets:
        - 192.168.1.2  # SNMP device.
@ -1328,6 +1329,7 @@ scrape_configs:
 			DisableKeepAlive:    true,
 			DisableCompression:  true,
 			StreamParse:         true,
+			ScrapeAlignInterval: time.Second,
 			jobNameOriginal:     "snmp",
 		},
 	})
--- a/lib/promscrape/scrapework.go
+++ b/lib/promscrape/scrapework.go
@ -90,6 +90,9 @@ type ScrapeWork struct {
 	// Whether to parse target responses in a streaming manner.
 	StreamParse bool

+	// The interval for aligning the first scrape.
+	ScrapeAlignInterval time.Duration
+
 	// The original 'job_name'
 	jobNameOriginal string
 }
@ -100,9 +103,9 @@ type ScrapeWork struct {
 func (sw *ScrapeWork) key() string {
 	// Do not take into account OriginalLabels.
 	key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
-		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v",
+		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s",
 		sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
-		sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse)
+		sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval)
 	return key
 }

@ -180,12 +183,14 @@ type scrapeWork struct {
 }

 func (sw *scrapeWork) run(stopCh <-chan struct{}) {
+	scrapeInterval := sw.Config.ScrapeInterval
+	var randSleep uint64
+	if sw.Config.ScrapeAlignInterval <= 0 {
 		// Calculate start time for the first scrape from ScrapeURL and labels.
 		// This should spread load when scraping many targets with different
 		// scrape urls and labels.
 		// This also makes consistent scrape times across restarts
 		// for a target with the same ScrapeURL and labels.
-	scrapeInterval := sw.Config.ScrapeInterval
 		key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString())
 		h := uint32(xxhash.Sum64([]byte(key)))
 		randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32)))
@ -194,6 +199,11 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) {
 			randSleep += uint64(scrapeInterval)
 		}
 		randSleep -= sleepOffset
+	} else {
+		d := uint64(sw.Config.ScrapeAlignInterval)
+		randSleep = d - uint64(time.Now().UnixNano())%d
+		randSleep %= uint64(scrapeInterval)
+	}
 	timer := timerpool.Get(time.Duration(randSleep))
 	var timestamp int64
 	var ticker *time.Ticker