From 311a81c7b0e79900878db5be2fec0501a037c56a Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 22 Jun 2023 15:07:32 +0200 Subject: [PATCH] vmalert: properly interrupt remotewrite retries on shutdown (#4505) Signed-off-by: hagen1778 --- app/vmalert/remotewrite/remotewrite.go | 5 ++++- docs/CHANGELOG.md | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/app/vmalert/remotewrite/remotewrite.go b/app/vmalert/remotewrite/remotewrite.go index 6597508a28..0dddc8698e 100644 --- a/app/vmalert/remotewrite/remotewrite.go +++ b/app/vmalert/remotewrite/remotewrite.go @@ -147,6 +147,7 @@ func (c *Client) run(ctx context.Context) { wr.Timeseries = append(wr.Timeseries, ts) } lastCtx, cancel := context.WithTimeout(context.Background(), defaultWriteTimeout) + logger.Infof("shutting down remote write client and flushing remained %d series", len(wr.Timeseries)) c.flush(lastCtx, wr) cancel() } @@ -207,6 +208,7 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) { } b := snappy.Encode(nil, data) +L: for attempts := 0; attempts < retryCount; attempts++ { err := c.send(ctx, b) if err == nil { @@ -226,7 +228,8 @@ func (c *Client) flush(ctx context.Context, wr *prompbmarshal.WriteRequest) { // check if request has been cancelled before backoff select { case <-ctx.Done(): - break + logger.Errorf("interrupting retry attempt %d: context cancelled", attempts+1) + break L default: } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f34dd24c91..5b0150ae11 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -46,6 +46,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [storage](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html): Properly creates `parts.json` after migration from versions below `v1.90.0. It must fix errors on start-up after unclean shutdown. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4336) for details. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a memory leak issue associated with chart updates. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4455). * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): retry all errors except 4XX status codes while pushing via remote-write to the remote storage. Previously, errors like broken connection could prevent vmalert from retrying the request. +* BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): properly interrupt retry attempts on vmalert shutdown. Before, vmalert could have waited for all retries to finish for shutdown. ## [v1.91.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.91.2)