mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-15 08:23:34 +01:00
app/vmselect/netstorage: add -replicationFactor
command-line flag for reducing query duration when a part of vmstorage nodes are temporarily slow and/or temporarily unavailable
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
This commit is contained in:
parent
990eb29a9b
commit
85eecf5801
@ -335,8 +335,9 @@ In order to enable application-level replication, `-replicationFactor=N` command
|
|||||||
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
||||||
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
||||||
|
|
||||||
When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect`
|
When the replication is enabled, `-replicationFactor=N` and `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect` nodes.
|
||||||
in order to de-duplicate replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
The `-replicationFactor=N` improves query performance when a part of vmstorage nodes respond slowly and/or temporarily unavailable.
|
||||||
|
The `-dedup.minScrapeInterval=1ms` de-duplicates replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||||
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
||||||
|
|
||||||
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
||||||
|
@ -3,6 +3,7 @@ package netstorage
|
|||||||
import (
|
import (
|
||||||
"container/heap"
|
"container/heap"
|
||||||
"errors"
|
"errors"
|
||||||
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -27,6 +28,9 @@ import (
|
|||||||
"github.com/VictoriaMetrics/metrics"
|
"github.com/VictoriaMetrics/metrics"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var replicationFactor = flag.Int("replicationFactor", 1, "How many copies of every time series is available on vmstorage nodes. "+
|
||||||
|
"See -replicationFactor command-line flag for vminsert nodes")
|
||||||
|
|
||||||
// Result is a single timeseries result.
|
// Result is a single timeseries result.
|
||||||
//
|
//
|
||||||
// ProcessSearchQuery returns Result slice.
|
// ProcessSearchQuery returns Result slice.
|
||||||
@ -1249,6 +1253,7 @@ func processSearchQuery(at *auth.Token, denyPartialResponse bool, sq *storage.Se
|
|||||||
|
|
||||||
func collectResults(denyPartialResponse bool, resultsCh <-chan interface{}, partialResultsCounter *metrics.Counter, f func(result interface{}) error) (bool, error) {
|
func collectResults(denyPartialResponse bool, resultsCh <-chan interface{}, partialResultsCounter *metrics.Counter, f func(result interface{}) error) (bool, error) {
|
||||||
var errors []error
|
var errors []error
|
||||||
|
resultsCollected := 0
|
||||||
for i := 0; i < len(storageNodes); i++ {
|
for i := 0; i < len(storageNodes); i++ {
|
||||||
// There is no need in timer here, since all the goroutines executing
|
// There is no need in timer here, since all the goroutines executing
|
||||||
// the sn.process* function must be finished until the deadline.
|
// the sn.process* function must be finished until the deadline.
|
||||||
@ -1258,6 +1263,16 @@ func collectResults(denyPartialResponse bool, resultsCh <-chan interface{}, part
|
|||||||
errors = append(errors, err)
|
errors = append(errors, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
resultsCollected++
|
||||||
|
if resultsCollected > len(storageNodes)-*replicationFactor {
|
||||||
|
// There is no need in waiting for the remaining results,
|
||||||
|
// because the collected results contain all the data according to the given -replicationFactor.
|
||||||
|
// This should speed up responses when a part of vmstorage nodes are slow and/or temporarily unavailable.
|
||||||
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
|
||||||
|
//
|
||||||
|
// It is expected that cap(resultsCh) == len(storageNodes), otherwise goroutine leak is possible.
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
isPartial := false
|
isPartial := false
|
||||||
if len(errors) > 0 {
|
if len(errors) > 0 {
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
```bash
|
```bash
|
||||||
snap install victoriametrics
|
snap install victoriametrics
|
||||||
```
|
```
|
||||||
|
* FEATURE: vmselect: add `-replicationFactor` command-line flag for reducing query duration when replication is enabled and a part of vmstorage nodes
|
||||||
|
are temporarily slow and/or temporarily unavailable. See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/711
|
||||||
* FEATURE: vminsert: export `vm_rpc_vmstorage_is_reachable` metric, which can be used for monitoring reachability of vmstorage nodes from vminsert nodes.
|
* FEATURE: vminsert: export `vm_rpc_vmstorage_is_reachable` metric, which can be used for monitoring reachability of vmstorage nodes from vminsert nodes.
|
||||||
* FEATURE: vmagent: add Netflix Eureka service discovery (aka [eureka_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config)).
|
* FEATURE: vmagent: add Netflix Eureka service discovery (aka [eureka_sd_config](https://prometheus.io/docs/prometheus/latest/configuration/configuration/#eureka_sd_config)).
|
||||||
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/851
|
See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/851
|
||||||
|
@ -335,8 +335,9 @@ In order to enable application-level replication, `-replicationFactor=N` command
|
|||||||
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
This guarantees that all the data remains available for querying if up to `N-1` `vmstorage` nodes are unavailable.
|
||||||
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
For example, when `-replicationFactor=3` is passed to `vminsert`, then it replicates all the ingested data to 3 distinct `vmstorage` nodes.
|
||||||
|
|
||||||
When the replication is enabled, `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect`
|
When the replication is enabled, `-replicationFactor=N` and `-dedup.minScrapeInterval=1ms` command-line flag must be passed to `vmselect` nodes.
|
||||||
in order to de-duplicate replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
The `-replicationFactor=N` improves query performance when a part of vmstorage nodes respond slowly and/or temporarily unavailable.
|
||||||
|
The `-dedup.minScrapeInterval=1ms` de-duplicates replicated data during queries. It is OK if `-dedup.minScrapeInterval` exceeds 1ms
|
||||||
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
when [deduplication](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/README.md#deduplication) is used additionally to replication.
|
||||||
|
|
||||||
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
Note that [replication doesn't save from disaster](https://medium.com/@valyala/speeding-up-backups-for-big-time-series-databases-533c1a927883),
|
||||||
|
Loading…
Reference in New Issue
Block a user