From 8ee575dee9f9a10112c9171b467b6e845423517b Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 18 Aug 2021 21:58:40 +0300 Subject: [PATCH] lib/promscrape: send stale markers for the previously scraped metrics on failed scrapes like Prometheus does --- docs/CHANGELOG.md | 1 + lib/promscrape/scrapework.go | 30 ++++++++++++++++++++++++++++-- lib/promscrape/scrapework_test.go | 3 +++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index af84a54f7a..ede7c99d47 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -12,6 +12,7 @@ sort: 15 * FEATURE: vmselect: add `-search.noStaleMarkers` command-line flag for stale markers handling in queries. * BUGFIX: vmagent: stop scrapers for deleted targets before starting scrapers for added targets. This should prevent from possible time series overlap when old targets are substituted by new targets (for example, during new deployment in Kubernetes). The overlap could lead to incorrect query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1509). +* BUGFIX: vmagent: send Prometheus stale markers for the previously scraped metrics on failed scrapes like Prometheus does. See [this article](https://www.robustperception.io/staleness-and-promql). * BUGFIX: upgrade base Docker image from Alpine 3.14.0 to Alpine 3.14.1 . This fixes potential security issues - see [Alpine 3.14.1 release notes](https://www.alpinelinux.org/posts/Alpine-3.14.1-released.html). diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 7e9c7e765d..87bc291a45 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -239,7 +239,7 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) { timestamp += scrapeInterval.Milliseconds() select { case <-stopCh: - sw.sendStaleMarkers() + sw.sendStaleMarkers(false) return case tt := <-ticker.C: t := tt.UnixNano() / 1e6 @@ -322,6 +322,9 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error sw.addAutoTimeseries(wc, "scrape_samples_scraped", float64(samplesScraped), scrapeTimestamp) sw.addAutoTimeseries(wc, "scrape_samples_post_metric_relabeling", float64(samplesPostRelabeling), scrapeTimestamp) sw.addAutoTimeseries(wc, "scrape_series_added", float64(seriesAdded), scrapeTimestamp) + if up == 0 { + sw.sendStaleMarkers(true) + } sw.updateActiveSeries(wc) sw.pushData(&wc.writeRequest) sw.prevLabelsLen = len(wc.labels) @@ -334,6 +337,19 @@ func (sw *scrapeWork) scrapeInternal(scrapeTimestamp, realTimestamp int64) error return err } +func isAutogenSeries(name string) bool { + switch name { + case "up", + "scrape_duration_seconds", + "scrape_samples_scraped", + "scrape_samples_post_metric_relabeling", + "scrape_series_added": + return true + default: + return false + } +} + func (sw *scrapeWork) pushData(wr *prompbmarshal.WriteRequest) { startTime := time.Now() sw.PushData(wr) @@ -504,7 +520,7 @@ func (sw *scrapeWork) updateActiveSeries(wc *writeRequestCtx) { sw.activeSeries = as } -func (sw *scrapeWork) sendStaleMarkers() { +func (sw *scrapeWork) sendStaleMarkers(skipAutogenSeries bool) { series := make([]prompbmarshal.TimeSeries, 0, len(sw.activeSeries)) staleMarkSamples := []prompbmarshal.Sample{ { @@ -514,6 +530,7 @@ func (sw *scrapeWork) sendStaleMarkers() { } for _, b := range sw.activeSeries { var labels []prompbmarshal.Label + skipSeries := false for len(b) > 0 { tail, name, err := encoding.UnmarshalBytes(b) if err != nil { @@ -525,16 +542,25 @@ func (sw *scrapeWork) sendStaleMarkers() { logger.Panicf("BUG: cannot unmarshal label value from activeSeries: %s", err) } b = tail + if skipAutogenSeries && string(name) == "__name__" && isAutogenSeries(bytesutil.ToUnsafeString(value)) { + skipSeries = true + } labels = append(labels, prompbmarshal.Label{ Name: bytesutil.ToUnsafeString(name), Value: bytesutil.ToUnsafeString(value), }) } + if skipSeries { + continue + } series = append(series, prompbmarshal.TimeSeries{ Labels: labels, Samples: staleMarkSamples, }) } + if len(series) == 0 { + return + } wr := &prompbmarshal.WriteRequest{ Timeseries: series, } diff --git a/lib/promscrape/scrapework_test.go b/lib/promscrape/scrapework_test.go index 7d2eeced66..c98bb0fb9f 100644 --- a/lib/promscrape/scrapework_test.go +++ b/lib/promscrape/scrapework_test.go @@ -128,6 +128,9 @@ func TestScrapeWorkScrapeInternalSuccess(t *testing.T) { if pushDataCalls == 0 { t.Fatalf("missing pushData calls") } + if len(timeseriesExpected) != 0 { + t.Fatalf("%d series weren't pushed", len(timeseriesExpected)) + } } f(``, &ScrapeWork{}, `