From 4a2d7aec7f8bfd0c65891715e96f274f59c2fcaf Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 25 Aug 2021 13:02:52 +0300 Subject: [PATCH] lib/promscrape: expose promscrape_discovery_http_errors_total metric for tracking errors per each http_sd config --- docs/CHANGELOG.md | 1 + lib/promscrape/discovery/http/api.go | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index b554dc5ce0..bc773bc15c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,7 @@ sort: 15 * FEATURE: take into account failed queries in `vm_request_duration_seconds` summary at `/metrics`. Previously only successful queries were taken into account. This could result in skewed summary. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1537). * FEATURE: vmalert: add `-disableAlertgroupLabel` command-line flag for disabling the label with alert group name. This may be needed for proper deduplication in Alertmanager. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1532). * FEATURE: update Go builder from v1.16.7 to v1.17.0. This improves data ingestion and query performance by up to 5% according to benchmarks. See [the release post for Go1.17](https://go.dev/blog/go1.17). +* FEATURE: vmagent: expose `promscrape_discovery_http_errors_total` metric, which can be used for monitoring the number of failed discovery attempts per each `http_sd` config. * BUGFIX: rename `sign` function to `sgn` in order to be consistent with PromQL. See [this pull request from Prometheus](https://github.com/prometheus/prometheus/pull/8457). * BUGFIX: improve the detection of the needed free space for background merge operation. This should prevent from possible out of disk space crashes during big merges. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1560). diff --git a/lib/promscrape/discovery/http/api.go b/lib/promscrape/discovery/http/api.go index e9bf7b08d5..40d9d20f97 100644 --- a/lib/promscrape/discovery/http/api.go +++ b/lib/promscrape/discovery/http/api.go @@ -8,6 +8,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/promscrape/discoveryutils" "github.com/VictoriaMetrics/fasthttp" + "github.com/VictoriaMetrics/metrics" ) var configMap = discoveryutils.NewConfigMap() @@ -15,6 +16,9 @@ var configMap = discoveryutils.NewConfigMap() type apiConfig struct { client *discoveryutils.Client path string + + fetchErrors *metrics.Counter + parseErrors *metrics.Counter } // httpGroupTarget respresent prometheus GroupTarget @@ -44,8 +48,10 @@ func newAPIConfig(sdc *SDConfig, baseDir string) (*apiConfig, error) { return nil, fmt.Errorf("cannot create HTTP client for %q: %w", apiServer, err) } cfg := &apiConfig{ - client: client, - path: parsedURL.RequestURI(), + client: client, + path: parsedURL.RequestURI(), + fetchErrors: metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_discovery_http_errors_total{type="fetch",url=%q}`, sdc.URL)), + parseErrors: metrics.GetOrCreateCounter(fmt.Sprintf(`promscrape_discovery_http_errors_total{type="parse",url=%q}`, sdc.URL)), } return cfg, nil } @@ -64,9 +70,15 @@ func getHTTPTargets(cfg *apiConfig) ([]httpGroupTarget, error) { request.Header.Set("Accept", "application/json") }) if err != nil { + cfg.fetchErrors.Inc() return nil, fmt.Errorf("cannot read http_sd api response: %w", err) } - return parseAPIResponse(data, cfg.path) + tg, err := parseAPIResponse(data, cfg.path) + if err != nil { + cfg.parseErrors.Inc() + return nil, err + } + return tg, nil } func parseAPIResponse(data []byte, path string) ([]httpGroupTarget, error) {