From 502d0e2524441a4c4dc14ad02c2b5b16424c9e5e Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@gmail.com>
Date: Thu, 18 Feb 2021 23:51:29 +0200
Subject: [PATCH] lib/promscrape: add scrape_align_interval config option into
 scrape config

This option allows aligning scrapes to a particular intervals.
---
 app/vmagent/README.md         | 10 +++++++++
 docs/CHANGELOG.md             |  2 ++
 docs/vmagent.md               | 10 +++++++++
 lib/promscrape/config.go      | 10 ++++++---
 lib/promscrape/config_test.go | 14 +++++++------
 lib/promscrape/scrapework.go  | 38 ++++++++++++++++++++++-------------
 6 files changed, 61 insertions(+), 23 deletions(-)

diff --git a/app/vmagent/README.md b/app/vmagent/README.md
index bf40e17ba2..866862761f 100644
--- a/app/vmagent/README.md
+++ b/app/vmagent/README.md
@@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
   the url may contain sensitive information such as auth tokens or passwords.
   Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
 
+* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
+  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+
+  ```yml
+  scrape_configs:
+  - job: foo
+    scrape_interval: 1h
+    scrape_align_interval: 10m
+  ```
+
 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
   or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
   See available options below if you prefer fixing the root cause of the error:
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 4780edae11..17674a6e9e 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 # tip
 
+* FEATURE: vmagent: add `scrape_align_interval` config option, which can be used for aligning scrapes to the beginning of the configured interval. See [these docs](https://victoriametrics.github.io/vmagent.html#troubleshooting) for details.
+
 * BUGFIX: reduce the probability of `duplicate time series` errors when querying Kubernetes metrics.
 
 
diff --git a/docs/vmagent.md b/docs/vmagent.md
index bf40e17ba2..866862761f 100644
--- a/docs/vmagent.md
+++ b/docs/vmagent.md
@@ -298,6 +298,16 @@ It may be useful for performing `vmagent` rolling update without scrape loss.
   the url may contain sensitive information such as auth tokens or passwords.
   Pass `-remoteWrite.showURL` command-line flag when starting `vmagent` in order to see all the valid urls.
 
+* If scrapes must be aligned in time (for instance, if they must be performed at the beginning of every hour), then set `scrape_align_interval` option
+  in the corresponding scrape config. For example, the following config aligns hourly scrapes to the nearest 10 minutes:
+
+  ```yml
+  scrape_configs:
+  - job: foo
+    scrape_interval: 1h
+    scrape_align_interval: 10m
+  ```
+
 * If you see `skipping duplicate scrape target with identical labels` errors when scraping Kubernetes pods, then it is likely these pods listen multiple ports
   or they use init container. These errors can be either fixed or suppressed with `-promscrape.suppressDuplicateScrapeTargetErrors` command-line flag.
   See available options below if you prefer fixing the root cause of the error:
diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go
index 31e1ce858d..aade32faac 100644
--- a/lib/promscrape/config.go
+++ b/lib/promscrape/config.go
@@ -89,9 +89,10 @@ type ScrapeConfig struct {
 	SampleLimit          int                         `yaml:"sample_limit,omitempty"`
 
 	// These options are supported only by lib/promscrape.
-	DisableCompression bool `yaml:"disable_compression,omitempty"`
-	DisableKeepAlive   bool `yaml:"disable_keepalive,omitempty"`
-	StreamParse        bool `yaml:"stream_parse,omitempty"`
+	DisableCompression  bool          `yaml:"disable_compression,omitempty"`
+	DisableKeepAlive    bool          `yaml:"disable_keepalive,omitempty"`
+	StreamParse         bool          `yaml:"stream_parse,omitempty"`
+	ScrapeAlignInterval time.Duration `yaml:"scrape_align_interval,omitempty"`
 
 	// This is set in loadConfig
 	swc *scrapeWorkConfig
@@ -508,6 +509,7 @@ func getScrapeWorkConfig(sc *ScrapeConfig, baseDir string, globalCfg *GlobalConf
 		disableCompression:   sc.DisableCompression,
 		disableKeepAlive:     sc.DisableKeepAlive,
 		streamParse:          sc.StreamParse,
+		scrapeAlignInterval:  sc.ScrapeAlignInterval,
 	}
 	return swc, nil
 }
@@ -530,6 +532,7 @@ type scrapeWorkConfig struct {
 	disableCompression   bool
 	disableKeepAlive     bool
 	streamParse          bool
+	scrapeAlignInterval  time.Duration
 }
 
 func appendKubernetesScrapeWork(dst []*ScrapeWork, sdc *kubernetes.SDConfig, baseDir string, swc *scrapeWorkConfig) ([]*ScrapeWork, bool) {
@@ -761,6 +764,7 @@ func appendScrapeWork(dst []*ScrapeWork, swc *scrapeWorkConfig, target string, e
 		DisableCompression:   swc.disableCompression,
 		DisableKeepAlive:     swc.disableKeepAlive,
 		StreamParse:          swc.streamParse,
+		ScrapeAlignInterval:  swc.scrapeAlignInterval,
 
 		jobNameOriginal: swc.jobName,
 	})
diff --git a/lib/promscrape/config_test.go b/lib/promscrape/config_test.go
index 9982efb1ae..a1dbc5f571 100644
--- a/lib/promscrape/config_test.go
+++ b/lib/promscrape/config_test.go
@@ -1275,6 +1275,7 @@ scrape_configs:
     disable_keepalive: true
     disable_compression: true
     stream_parse: true
+    scrape_align_interval: 1s
     static_configs:
       - targets:
         - 192.168.1.2  # SNMP device.
@@ -1323,12 +1324,13 @@ scrape_configs:
 					Value: "snmp",
 				},
 			},
-			AuthConfig:         &promauth.Config{},
-			SampleLimit:        100,
-			DisableKeepAlive:   true,
-			DisableCompression: true,
-			StreamParse:        true,
-			jobNameOriginal:    "snmp",
+			AuthConfig:          &promauth.Config{},
+			SampleLimit:         100,
+			DisableKeepAlive:    true,
+			DisableCompression:  true,
+			StreamParse:         true,
+			ScrapeAlignInterval: time.Second,
+			jobNameOriginal:     "snmp",
 		},
 	})
 	f(`
diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go
index 585529e017..82a064094f 100644
--- a/lib/promscrape/scrapework.go
+++ b/lib/promscrape/scrapework.go
@@ -90,6 +90,9 @@ type ScrapeWork struct {
 	// Whether to parse target responses in a streaming manner.
 	StreamParse bool
 
+	// The interval for aligning the first scrape.
+	ScrapeAlignInterval time.Duration
+
 	// The original 'job_name'
 	jobNameOriginal string
 }
@@ -100,9 +103,9 @@ type ScrapeWork struct {
 func (sw *ScrapeWork) key() string {
 	// Do not take into account OriginalLabels.
 	key := fmt.Sprintf("ScrapeURL=%s, ScrapeInterval=%s, ScrapeTimeout=%s, HonorLabels=%v, HonorTimestamps=%v, Labels=%s, "+
-		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v",
+		"AuthConfig=%s, MetricRelabelConfigs=%s, SampleLimit=%d, DisableCompression=%v, DisableKeepAlive=%v, StreamParse=%v, ScrapeAlignInterval=%s",
 		sw.ScrapeURL, sw.ScrapeInterval, sw.ScrapeTimeout, sw.HonorLabels, sw.HonorTimestamps, sw.LabelsString(),
-		sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse)
+		sw.AuthConfig.String(), sw.metricRelabelConfigsString(), sw.SampleLimit, sw.DisableCompression, sw.DisableKeepAlive, sw.StreamParse, sw.ScrapeAlignInterval)
 	return key
 }
 
@@ -180,20 +183,27 @@ type scrapeWork struct {
 }
 
 func (sw *scrapeWork) run(stopCh <-chan struct{}) {
-	// Calculate start time for the first scrape from ScrapeURL and labels.
-	// This should spread load when scraping many targets with different
-	// scrape urls and labels.
-	// This also makes consistent scrape times across restarts
-	// for a target with the same ScrapeURL and labels.
 	scrapeInterval := sw.Config.ScrapeInterval
-	key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString())
-	h := uint32(xxhash.Sum64([]byte(key)))
-	randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32)))
-	sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval)
-	if randSleep < sleepOffset {
-		randSleep += uint64(scrapeInterval)
+	var randSleep uint64
+	if sw.Config.ScrapeAlignInterval <= 0 {
+		// Calculate start time for the first scrape from ScrapeURL and labels.
+		// This should spread load when scraping many targets with different
+		// scrape urls and labels.
+		// This also makes consistent scrape times across restarts
+		// for a target with the same ScrapeURL and labels.
+		key := fmt.Sprintf("ScrapeURL=%s, Labels=%s", sw.Config.ScrapeURL, sw.Config.LabelsString())
+		h := uint32(xxhash.Sum64([]byte(key)))
+		randSleep := uint64(float64(scrapeInterval) * (float64(h) / (1 << 32)))
+		sleepOffset := uint64(time.Now().UnixNano()) % uint64(scrapeInterval)
+		if randSleep < sleepOffset {
+			randSleep += uint64(scrapeInterval)
+		}
+		randSleep -= sleepOffset
+	} else {
+		d := uint64(sw.Config.ScrapeAlignInterval)
+		randSleep = d - uint64(time.Now().UnixNano())%d
+		randSleep %= uint64(scrapeInterval)
 	}
-	randSleep -= sleepOffset
 	timer := timerpool.Get(time.Duration(randSleep))
 	var timestamp int64
 	var ticker *time.Ticker