app/vmselect: follow-up for 626073bca8

* Rename -search.maxMetricsPointSearch to -search.maxSamplesPerQuery, so it is more consistent with the existing -search.maxSamplesPerSeries
* Move the -search.maxSamplesPerQuery from vmstorage to vmselect, so it could effectively limit the number of raw samples obtained from all the vmstorage nodes
* Document the -search.maxSamplesPerQuery in docs/CHANGELOG.md
This commit is contained in:
Aliaksandr Valialkin 2021-07-28 17:40:09 +03:00
parent 9ffd70a921
commit 8ee8660ac4
7 changed files with 31 additions and 6 deletions

View File

@ -1785,6 +1785,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
-search.maxQueueDuration duration
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-search.maxSamplesPerQuery int
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
-search.maxSamplesPerSeries int
The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage (default 30000000)
-search.maxStalenessInterval duration

View File

@ -28,6 +28,7 @@ var (
maxTagValueSuffixesPerSearch = flag.Int("search.maxTagValueSuffixesPerSearch", 100e3, "The maximum number of tag value suffixes returned from /metrics/find")
maxMetricsPerSearch = flag.Int("search.maxUniqueTimeseries", 300e3, "The maximum number of unique time series each search can scan. This option allows limiting memory usage")
maxSamplesPerSeries = flag.Int("search.maxSamplesPerSeries", 30e6, "The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage")
maxSamplesPerQuery = flag.Int("search.maxSamplesPerQuery", 1e9, "The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries")
)
// Result is a single timeseries result.
@ -423,7 +424,7 @@ func (pts *packedTimeseries) Unpack(dst *Result, tbf *tmpBlocksFile, tr storage.
for _, sb := range upw.sbs {
samples += len(sb.Timestamps)
}
if samples < *maxSamplesPerSeries {
if *maxSamplesPerSeries <= 0 || samples < *maxSamplesPerSeries {
sbs = append(sbs, upw.sbs...)
} else {
firstErr = fmt.Errorf("cannot process more than %d samples per series; either increase -search.maxSamplesPerSeries "+
@ -1006,6 +1007,7 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline search
m := make(map[string][]blockRef, maxSeriesCount)
orderedMetricNames := make([]string, 0, maxSeriesCount)
blocksRead := 0
samples := 0
tbf := getTmpBlocksFile()
var buf []byte
for sr.NextMetricBlock() {
@ -1015,7 +1017,14 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline search
putStorageSearch(sr)
return nil, fmt.Errorf("timeout exceeded while fetching data block #%d from storage: %s", blocksRead, deadline.String())
}
buf = sr.MetricBlockRef.BlockRef.Marshal(buf[:0])
br := sr.MetricBlockRef.BlockRef
samples += br.RowsCount()
if *maxSamplesPerQuery > 0 && samples > *maxSamplesPerQuery {
putTmpBlocksFile(tbf)
putStorageSearch(sr)
return nil, fmt.Errorf("cannot select more than -search.maxSamplesPerQuery=%d samples; possible solutions: to increase the -search.maxSamplesPerQuery; to reduce time range for the query; to use more specific label filters in order to select lower number of series", *maxSamplesPerQuery)
}
buf = br.Marshal(buf[:0])
addr, err := tbf.WriteBlockRefData(buf)
if err != nil {
putTmpBlocksFile(tbf)
@ -1026,7 +1035,7 @@ func ProcessSearchQuery(sq *storage.SearchQuery, fetchData bool, deadline search
metricNameStrUnsafe := bytesutil.ToUnsafeString(metricName)
brs := m[metricNameStrUnsafe]
brs = append(brs, blockRef{
partRef: sr.MetricBlockRef.BlockRef.PartRef(),
partRef: br.PartRef(),
addr: addr,
})
if len(brs) > 1 {

View File

@ -6,7 +6,8 @@ sort: 15
## tip
* FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query could process per each time series. This option can prevent from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067).
* FEATURE: add `-search.maxSamplesPerSeries` command-line flag for limiting the number of raw samples a single query can process per each time series. This option can protect from out of memory errors when a query processes tens of millions of raw samples per series. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1067).
* FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478).
* BUGFIX: vmbackup: automatically set default `us-east1` S3 region if it is missing. This should simplify using S3-compatible services such as MinIO for backups. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1449).
* BUGFIX: vmselect: prevent from possible deadlock when multiple `target` query args are passed to [Graphite Render API](https://docs.victoriametrics.com/#graphite-render-api-usage).

View File

@ -654,6 +654,10 @@ Below is the output for `/path/to/vmselect -help`:
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
-search.maxQueueDuration duration
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-search.maxSamplesPerQuery int
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
-search.maxSamplesPerSeries int
The maximum number of raw samples a single query can scan per each time series. See also -search.maxSamplesPerQuery (default 30000000)
-search.maxStalenessInterval duration
The maximum interval for staleness calculations. By default it is automatically calculated from the median interval between samples. This flag could be useful for tuning Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. See also '-search.maxLookback' flag, which has the same meaning due to historical reasons
-search.maxStatusRequestDuration duration
@ -665,7 +669,7 @@ Below is the output for `/path/to/vmselect -help`:
-search.queryStats.lastQueriesCount int
Query stats for /api/v1/status/top_queries is tracked on this number of last queries. Zero value disables query stats tracking (default 20000)
-search.queryStats.minQueryDuration duration
The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats
The minimum duration for queries to track in query stats at /api/v1/status/top_queries. Queries with lower duration are ignored in query stats (default 1ms)
-search.resetCacheAuthKey string
Optional authKey for resetting rollup cache via /internal/resetRollupResultCache call
-search.treatDotsAsIsInRegexps
@ -760,7 +764,7 @@ Below is the output for `/path/to/vmstorage -help`:
-search.maxTagValues int
The maximum number of tag values returned per search (default 100000)
-search.maxUniqueTimeseries int
The maximum number of unique time series each search can scan (default 300000)
The maximum number of unique time series a single query can process. This allows protecting against heavy queries, which select unexpectedly high number of series. See also -search.maxSamplesPerQuery and -search.maxSamplesPerSeries (default 300000)
-smallMergeConcurrency int
The maximum number of CPU cores to use for small merges. Default value is used if set to 0
-snapshotAuthKey string

View File

@ -1785,6 +1785,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
-search.maxQueueDuration duration
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-search.maxSamplesPerQuery int
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
-search.maxSamplesPerSeries int
The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage (default 30000000)
-search.maxStalenessInterval duration

View File

@ -1789,6 +1789,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 16384)
-search.maxQueueDuration duration
The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached; see also -search.maxQueryDuration (default 10s)
-search.maxSamplesPerQuery int
The maximum number of raw samples a single query can process across all time series. This protects from heavy queries, which select unexpectedly high number of raw samples. See also -search.maxSamplesPerSeries (default 1000000000)
-search.maxSamplesPerSeries int
The maximum number of raw samples a single query can scan per each time series. This option allows limiting memory usage (default 30000000)
-search.maxStalenessInterval duration

View File

@ -48,6 +48,11 @@ func (br *BlockRef) Marshal(dst []byte) []byte {
return br.bh.Marshal(dst)
}
// RowsCount returns the number of rows in br.
func (br *BlockRef) RowsCount() int {
return int(br.bh.RowsCount)
}
// PartRef returns PartRef from br.
func (br *BlockRef) PartRef() PartRef {
return PartRef{