From 0a9be5ef9d1f4df5e98ebbfce182d78b25b98db4 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sat, 16 Oct 2021 18:47:13 +0300 Subject: [PATCH] lib/promscrape: expose `promscrape_series_limit_max_series` and `promscrape_series_limit_current_series` metrics per each scrape target with the enabled unique series limiter --- docs/CHANGELOG.md | 1 + lib/promscrape/scrapework.go | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e06d72af93..e0fc6e5cd9 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -12,6 +12,7 @@ sort: 15 * FEATURE: vmagent: atomatically switch to [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode) if the response from the given target exceeds the command-line flag value `-promscrape.minResponseSizeForStreamParse`. This should reduce memory usage when `vmagent` scrapes targets with non-uniform response sizes (this is the case in Kubernetes monitoring). * FEATURE: vmagent: send Prometheus-like staleness marks in [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode). Previously staleness marks wern't sent in stream parsing mode. See [these docs](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) for details. * FEATURE: vmagent: properly calculate `scrape_series_added` metric for targets in [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode). Previously it was set to 0 in stream parsing mode. See [more details about this metric](https://prometheus.io/docs/concepts/jobs_instances/#automatically-generated-labels-and-time-series). +* FEATURE: vmagent: expose `promscrape_series_limit_max_series` and `promscrape_series_limit_current_series` metrics at `http://vmagent:8429/metrics` for scrape targets with the [enabled series limiter](https://docs.victoriametrics.com/vmagent.html#cardinality-limiter). * FEATURE: vmagent: return error if `sample_limit` or `series_limit` options are set when [stream parsing mode](https://docs.victoriametrics.com/vmagent.html#stream-parsing-mode) is enabled, since these limits cannot be applied in stream parsing mode. * FEATURE: add trigonometric functions, which are going to be added in [Prometheus 2.31](https://github.com/prometheus/prometheus/pull/9239): [acosh](https://docs.victoriametrics.com/MetricsQL.html#acosh), [asinh](https://docs.victoriametrics.com/MetricsQL.html#asinh), [atan](https://docs.victoriametrics.com/MetricsQL.html#atan), [atanh](https://docs.victoriametrics.com/MetricsQL.html#atanh), [cosh](https://docs.victoriametrics.com/MetricsQL.html#cosh), [deg](https://docs.victoriametrics.com/MetricsQL.html#deg), [rad](https://docs.victoriametrics.com/MetricsQL.html#rad), [sinh](https://docs.victoriametrics.com/MetricsQL.html#sinh), [tan](https://docs.victoriametrics.com/MetricsQL.html#tan), [tanh](https://docs.victoriametrics.com/MetricsQL.html#tanh). Also add `atan2` binary operator. See [this pull request](https://github.com/prometheus/prometheus/pull/9248). * FEATURE: consistently return the same set of time series from [limitk](https://docs.victoriametrics.com/MetricsQL.html#limitk) function. This improves the usability of periodically refreshed graphs. diff --git a/lib/promscrape/scrapework.go b/lib/promscrape/scrapework.go index 517fd0b4e1..fc95e8ce96 100644 --- a/lib/promscrape/scrapework.go +++ b/lib/promscrape/scrapework.go @@ -199,6 +199,9 @@ type scrapeWork struct { // Optional limiter on the number of unique series per scrape target. seriesLimiter *bloomfilter.Limiter + // Optional counter on the number of dropped samples if the limit on the number of unique series is set. + seriesLimiterRowsDroppedTotal *metrics.Counter + // prevBodyLen contains the previous response body length for the given scrape work. // It is used as a hint in order to reduce memory usage for body buffers. prevBodyLen int @@ -302,6 +305,13 @@ func (sw *scrapeWork) run(stopCh <-chan struct{}) { t := time.Now().UnixNano() / 1e6 sw.sendStaleSeries("", t, true) if sw.seriesLimiter != nil { + job := sw.Config.Job() + metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL)) + metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_max_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL)) + metrics.UnregisterMetric(fmt.Sprintf(`promscrape_series_limit_current_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL)) sw.seriesLimiter.MustStop() } return @@ -615,22 +625,31 @@ func (sw *scrapeWork) applySeriesLimit(wc *writeRequestCtx) bool { seriesLimit = sw.Config.SeriesLimit } if sw.seriesLimiter == nil && seriesLimit > 0 { + job := sw.Config.Job() sw.seriesLimiter = bloomfilter.NewLimiter(seriesLimit, 24*time.Hour) + sw.seriesLimiterRowsDroppedTotal = metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL)) + _ = metrics.GetOrCreateGauge(fmt.Sprintf(`promscrape_series_limit_max_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL), func() float64 { + return float64(sw.seriesLimiter.MaxItems()) + }) + _ = metrics.GetOrCreateGauge(fmt.Sprintf(`promscrape_series_limit_current_series{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, + sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL), func() float64 { + return float64(sw.seriesLimiter.CurrentItems()) + }) } hsl := sw.seriesLimiter if hsl == nil { return false } dstSeries := wc.writeRequest.Timeseries[:0] - job := sw.Config.Job() limitExceeded := false for _, ts := range wc.writeRequest.Timeseries { h := sw.getLabelsHash(ts.Labels) if !hsl.Add(h) { // The limit on the number of hourly unique series per scrape target has been exceeded. // Drop the metric. - metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_series_limit_rows_dropped_total{scrape_job_original=%q,scrape_job=%q,scrape_target=%q}`, - sw.Config.jobNameOriginal, job, sw.Config.ScrapeURL)).Inc() + sw.seriesLimiterRowsDroppedTotal.Inc() limitExceeded = true continue }