From d0a9b24c5acf7b7bba69e37109003a34ee63484d Mon Sep 17 00:00:00 2001 From: Nikolay Date: Wed, 4 Nov 2020 18:03:43 +0300 Subject: [PATCH] reduces memory usage for vmagent, (#880) * reduces memory usage for vmagent, limits count of droppedTarget, that can be stored for /api/v1/targets page up to 999 items, https://github.com/VictoriaMetrics/VictoriaMetrics/issues/878 * Update app/vmagent/README.md * Update app/vmagent/README.md Co-authored-by: Aliaksandr Valialkin --- app/vmagent/README.md | 3 +++ docs/vmagent.md | 6 ++++++ lib/promscrape/config.go | 3 +++ lib/promscrape/targetstatus.go | 12 +++++++++--- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 22d2c64030..6d132ad5a5 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -231,6 +231,9 @@ This information may be useful for debugging target relabeling. by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets` and `http://vmagent-host:8429/api/v1/targets`. +* The `/api/v1/targets` page could be useful for debugging relabeling process for scrape targets. + This page contains original labels for targets dropped during relabeling (see "droppedTargets" section in the page output). By default up to `-promscrape.maxDroppedTargets` targets are shown here. If your setup drops more targets during relabeling, then increase `-promscrape.maxDroppedTargets` command-line flag value in order to see all the dropped targets. Note that tracking each dropped target requires up to 10Kb of RAM, so big values for `-promscrape.maxDroppedTargets` may result in increased memory usage if big number of scrape targets are dropped during relabeling. + * If `vmagent` scrapes targets with millions of metrics per each target (for instance, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)), then it is recommended enabling `stream parsing mode` in order to reduce memory usage during scraping. This mode may be enabled either globally for all the scrape targets by passing `-promscrape.streamParse` command-line flag or on a per-scrape target basis with `stream_parse: true` option. For example: diff --git a/docs/vmagent.md b/docs/vmagent.md index 22d2c64030..b947820b98 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -231,6 +231,12 @@ This information may be useful for debugging target relabeling. by passing `-promscrape.suppressScrapeErrors` command-line flag to `vmagent`. The most recent scrape error per each target can be observed at `http://vmagent-host:8429/targets` and `http://vmagent-host:8429/api/v1/targets`. +* For debugging scrapes targets relabeling process, you can observe status of targets at `/api/v1/targets` page. + You may need to increase value for the flag `--promscrape.maxDroppedTargets`. + Its needed, when `vmagent` scrapes thousands of targets, and some targets dropped during relabeling. + It requires additional memory for storing those targets (up to 10kb per target). + With service_discovery services, like kubernetes, it may be around 10 000 dropped scrape targets per 100 active scrape targets. + * If `vmagent` scrapes targets with millions of metrics per each target (for instance, when scraping [federation endpoints](https://prometheus.io/docs/prometheus/latest/federation/)), then it is recommended enabling `stream parsing mode` in order to reduce memory usage during scraping. This mode may be enabled either globally for all the scrape targets by passing `-promscrape.streamParse` command-line flag or on a per-scrape target basis with `stream_parse: true` option. For example: diff --git a/lib/promscrape/config.go b/lib/promscrape/config.go index d8ca914699..f61e3d8cdf 100644 --- a/lib/promscrape/config.go +++ b/lib/promscrape/config.go @@ -32,6 +32,9 @@ var ( dryRun = flag.Bool("promscrape.config.dryRun", false, "Checks -promscrape.config file for errors and unsupported fields and then exits. "+ "Returns non-zero exit code on parsing errors and emits these errors to stderr. "+ "Pass -loggerLevel=ERROR if you don't need to see info messages in the output") + maxDroppedTargets = flag.Int("promscrape.maxDroppedTargets", 1000, "Defines how many targets, dropped during service discovery,"+ + " can be stored for /api/v1/targets page. "+ + "This value may be increased for debugging cause of dropping targets during service discovery relabeling") ) // Config represents essential parts from Prometheus config defined at https://prometheus.io/docs/prometheus/latest/configuration/configuration/ diff --git a/lib/promscrape/targetstatus.go b/lib/promscrape/targetstatus.go index 48240368c5..dd24f2495e 100644 --- a/lib/promscrape/targetstatus.go +++ b/lib/promscrape/targetstatus.go @@ -240,12 +240,18 @@ type droppedTarget struct { } func (dt *droppedTargets) Register(originalLabels []prompbmarshal.Label) { + key := promLabelsString(originalLabels) currentTime := fasttime.UnixTimestamp() dt.mu.Lock() - dt.m[key] = droppedTarget{ - originalLabels: originalLabels, - deadline: currentTime + 10*60, + if k, ok := dt.m[key]; ok { + k.deadline = currentTime + 10*60 + dt.m[key] = k + } else if len(dt.m) < *maxDroppedTargets { + dt.m[key] = droppedTarget{ + originalLabels: originalLabels, + deadline: currentTime + 10*60, + } } if currentTime-dt.lastCleanupTime > 60 { for k, v := range dt.m {