mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-14 16:12:15 +01:00
app/vmselect/netstorage: add -replicationFactor
command-line flag for reducing query duration when a part of vmstorage nodes are temporarily slow and/or temporarily unavailable
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
This commit is contained in:
parent
990eb29a9b
commit
85eecf5801
@ -335,8 +335,9 @@ In order to enable application-level replication, `-replicationFactor=N` command
|
||||
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
||||
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
||||
|
||||
When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect`
|
||||
in order to de-duplicate replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||
When the replication is enabled, `-replicationFactor=N` and `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect` nodes.
|
||||
The `-replicationFactor=N` improves query performance when a part of vmstorage nodes respond slowly and/or temporarily unavailable.
|
||||
The `-dedup.minScrapeInterval=1ms` de-duplicates replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
||||
|
||||
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
||||
|
@ -3,6 +3,7 @@ package netstorage
|
||||
import (
|
||||
"container/heap"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
@ -27,6 +28,9 @@ import (
|
||||
"github.com/VictoriaMetrics/metrics"
|
||||
)
|
||||
|
||||
var replicationFactor = flag.Int("replicationFactor", 1, "How many copies of every time series is available on vmstorage nodes. "+
|
||||
"See -replicationFactor command-line flag for vminsert nodes")
|
||||
|
||||
// Result is a single timeseries result.
|
||||
//
|
||||
// ProcessSearchQuery returns Result slice.
|
||||
@ -1249,6 +1253,7 @@ func processSearchQuery(at *auth.Token, denyPartialResponse bool, sq *storage.Se
|
||||
|
||||
func collectResults(denyPartialResponse bool, resultsCh <-chan interface{}, partialResultsCounter *metrics.Counter, f func(result interface{}) error) (bool, error) {
|
||||
var errors []error
|
||||
resultsCollected := 0
|
||||
for i := 0; i < len(storageNodes); i++ {
|
||||
// There is no need in timer here, since all the goroutines executing
|
||||
// the sn.process* function must be finished until the deadline.
|
||||
@ -1258,6 +1263,16 @@ func collectResults(denyPartialResponse bool, resultsCh <-chan interface{}, part
|
||||
errors = append(errors, err)
|
||||
continue
|
||||
}
|
||||
resultsCollected++
|
||||
if resultsCollected > len(storageNodes)-*replicationFactor {
|
||||
// There is no need in waiting for the remaining results,
|
||||
// because the collected results contain all the data according to the given -replicationFactor.
|
||||
// This should speed up responses when a part of vmstorage nodes are slow and/or temporarily unavailable.
|
||||
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
|
||||
//
|
||||
// It is expected that cap(resultsCh) == len(storageNodes), otherwise goroutine leak is possible.
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
isPartial := false
|
||||
if len(errors) > 0 {
|
||||
|
@ -6,6 +6,8 @@
|
||||
```bash
|
||||
snap install victoriametrics
|
||||
```
|
||||
* FEATURE: vmselect: add `-replicationFactor` command-line flag for reducing query duration when replication is enabled and a part of vmstorage nodes
|
||||
are temporarily slow and/or temporarily unavailable. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
|
||||
* FEATURE: vminsert: export `vm_rpc_vmstorage_is_reachable` metric, which can be used for monitoring reachability of vmstorage nodes from vminsert nodes.
|
||||
* FEATURE: vmagent: add Netflix Eureka service discovery (aka [eureka_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config)).
|
||||
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/851
|
||||
|
@ -335,8 +335,9 @@ In order to enable application-level replication, `-replicationFactor=N` command
|
||||
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
||||
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
||||
|
||||
When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect`
|
||||
in order to de-duplicate replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||
When the replication is enabled, `-replicationFactor=N` and `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect` nodes.
|
||||
The `-replicationFactor=N` improves query performance when a part of vmstorage nodes respond slowly and/or temporarily unavailable.
|
||||
The `-dedup.minScrapeInterval=1ms` de-duplicates replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
||||
|
||||
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
||||
|
Loading…
Reference in New Issue
Block a user