From ee5c50244617f62469b2db5049e6658fb7a2fce7 Mon Sep 17 00:00:00 2001 From: Nikolay Date: Thu, 23 Jun 2022 19:17:24 +0200 Subject: [PATCH] app/vmselect: fixes partial response with replicationFactor (#2777) * app/vmselect: fixes partial response with replicationFactor Allow partial response if it meets replicationFactor configured at vmselect https://t.me/VictoriaMetrics_ru1/38490 * docs/CHANGELOG.md: document this change Co-authored-by: Aliaksandr Valialkin --- app/vmselect/netstorage/netstorage.go | 11 ++++++----- docs/CHANGELOG.md | 1 + 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index aac86f9465..bc414d9006 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -1380,11 +1380,6 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co // passed to startStorageNodesRequest must be finished until the deadline. result := <-snr.resultsCh if err := f(result); err != nil { - if snr.denyPartialResponse { - // Immediately return the error to the caller if partial responses are denied. - // There is no need to wait for responses from other vmstorage nodes - they will be processed in background. - return false, err - } var er *errRemote if errors.As(err, &er) { // Immediately return the error reported by vmstorage to the caller, @@ -1393,6 +1388,12 @@ func (snr *storageNodesRequest) collectResults(partialResultsCounter *metrics.Co return false, err } errsPartial = append(errsPartial, err) + if snr.denyPartialResponse && len(errsPartial) >= *replicationFactor { + // Return the error to the caller if partial responses are denied + // and the number of partial responses reach -replicationFactor, + // since this means that the response is partial. + return false, err + } continue } resultsCollected++ diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 1dc03cc113..9cc2b765c2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -29,6 +29,7 @@ scrape_configs: ``` * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes. +* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767). ## [v1.78.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.78.0)