diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c6eaad9424..5a24263e73 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,9 +11,13 @@ in front of VictoriaMetrics. [Contact us](mailto:sales@victoriametrics.com) if y * FEATURE: vmalert: added ability to query Graphite datasource when evaluating alerting and recording rules. See [these docs](https://victoriametrics.github.io/vmalert.html#graphite) for details. * FEATURE: vmagent: added `-remoteWrite.roundDigits` command-line option for rounding metric values to the given number of decimal digits after the point before sending the metric to the corresponding `-remoteWrite.url`. This option can be used for improving data compression on the remote storage, because values with lower number of decimal digits can be compressed better than values with bigger number of decimal digits. * FEATURE: vmagent: added `-remoteWrite.rateLimit` command-line flag for limiting data transfer rate to `-remoteWrite.url`. This may be useful when big amounts of buffered data is sent after temporarily unavailability of the remote storage. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1035 -* FEATURE: vmagent: export `vm_promscrape_scrapes_failed_per_url_total` and `vm_promscrape_scrapes_skipped_by_sample_limit_per_url_total` counters, which may help identifying improperly working scrape targets. - -* BUGFIX: vmagent: reduce the HTTP reconnection rate to scrape targets. Previously vmagent could errorneusly close HTTP keep-alive connections more frequently than needed. +* FEATURE: vmagent: export the following additional metrics, which may be useful during troubleshooting: + - `vm_promscrape_scrapes_failed_per_url_total` + - `vm_promscrape_scrapes_skipped_by_sample_limit_per_url_total` + - `vm_promscrape_discovery_requests_total` + - `vm_promscrape_discovery_retries_total` + - `vm_promscrape_scrape_retries_total` +* BUGFIX: vmagent: reduce HTTP reconnection rate for scrape targets. Previously vmagent could errorneusly close HTTP keep-alive connections more frequently than needed. * BUGFIX: vmagent: retry scrape and service discovery requests when the remote server closes HTTP keep-alive connection. Previously `disable_keepalive: true` option could be used under `scrape_configs` section when working with such servers. diff --git a/lib/promscrape/client.go b/lib/promscrape/client.go index 03cbcf1933..05a12f12f6 100644 --- a/lib/promscrape/client.go +++ b/lib/promscrape/client.go @@ -242,6 +242,7 @@ var ( scrapesOK = metrics.NewCounter(`vm_promscrape_scrapes_total{status_code="200"}`) scrapesGunzipped = metrics.NewCounter(`vm_promscrape_scrapes_gunziped_total`) scrapesGunzipFailed = metrics.NewCounter(`vm_promscrape_scrapes_gunzip_failed_total`) + scrapeRetries = metrics.NewCounter(`vm_promscrape_scrape_retries_total`) ) func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error { @@ -259,6 +260,7 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, if time.Since(deadline) >= 0 { return fmt.Errorf("the server closes all the connection attempts: %w", err) } + scrapeRetries.Inc() } } diff --git a/lib/promscrape/discoveryutils/client.go b/lib/promscrape/discoveryutils/client.go index 0acdace38d..65b514d631 100644 --- a/lib/promscrape/discoveryutils/client.go +++ b/lib/promscrape/discoveryutils/client.go @@ -14,6 +14,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/proxy" "github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool" "github.com/VictoriaMetrics/fasthttp" + "github.com/VictoriaMetrics/metrics" ) var ( @@ -192,6 +193,7 @@ func (c *Client) getAPIResponseWithParamsAndClient(client *fasthttp.HostClient, } func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error { + discoveryRequests.Inc() for { // Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline // across multiple retries. @@ -206,5 +208,11 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, if time.Since(deadline) >= 0 { return fmt.Errorf("the server closes all the connection attempts: %w", err) } + discoveryRetries.Inc() } } + +var ( + discoveryRetries = metrics.NewCounter(`vm_promscrape_discovery_retries_total`) + discoveryRequests = metrics.NewCounter(`vm_promscrape_discovery_requests_total`) +)