From d13906bf1f8d1aad81c58cc6c01bb69e6ed2607c Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 13 May 2021 10:38:43 +0300 Subject: [PATCH] lib/promscrape: exponentially increase retry interval on unsuccesful requests to scrape targets or to service discovery services This should reduce CPU load at vmagent and at remote side when the remote side doesn't accept HTTP requests. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1289 --- lib/promscrape/client.go | 9 ++++++++- lib/promscrape/discoveryutils/client.go | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/promscrape/client.go b/lib/promscrape/client.go index 7cc9ce158d..d7d5f24189 100644 --- a/lib/promscrape/client.go +++ b/lib/promscrape/client.go @@ -299,6 +299,7 @@ var ( ) func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error { + sleepTime := time.Second for { // Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline // across multiple retries. @@ -310,9 +311,15 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, return err } // Retry request if the server closes the keep-alive connection unless deadline exceeds. - if time.Since(deadline) >= 0 { + maxSleepTime := time.Until(deadline) + if sleepTime > maxSleepTime { return fmt.Errorf("the server closes all the connection attempts: %w", err) } + sleepTime += sleepTime + if sleepTime > maxSleepTime { + maxSleepTime = maxSleepTime + } + time.Sleep(sleepTime) scrapeRetries.Inc() } } diff --git a/lib/promscrape/discoveryutils/client.go b/lib/promscrape/discoveryutils/client.go index a26190ee63..053af81179 100644 --- a/lib/promscrape/discoveryutils/client.go +++ b/lib/promscrape/discoveryutils/client.go @@ -222,6 +222,7 @@ func (c *Client) getAPIResponseWithParamsAndClient(client *fasthttp.HostClient, } func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, resp *fasthttp.Response, deadline time.Time) error { + sleepTime := time.Second discoveryRequests.Inc() for { // Use DoDeadline instead of Do even if hc.ReadTimeout is already set in order to guarantee the given deadline @@ -234,9 +235,15 @@ func doRequestWithPossibleRetry(hc *fasthttp.HostClient, req *fasthttp.Request, return err } // Retry request if the server closes the keep-alive connection unless deadline exceeds. - if time.Since(deadline) >= 0 { + maxSleepTime := time.Until(deadline) + if sleepTime > maxSleepTime { return fmt.Errorf("the server closes all the connection attempts: %w", err) } + sleepTime += sleepTime + if sleepTime > maxSleepTime { + sleepTime = maxSleepTime + } + time.Sleep(sleepTime) discoveryRetries.Inc() } }