vmselect/promql: check for deadline in count_values fn (#3806)

* vmselect/promql: check for deadline in `count_values` fn `count_values` could be very slow during the data processing. Checking for deadline between iterations supposed to reduce probability of exceeding `search.maxQueryDuration`. The change also adds a new trace record, which captures the time spent in aggregation function. Before that, the trace for aggr funcs could be confusing since it doesn't account for all the places where time was spent. Signed-off-by: hagen1778 <roman@victoriametrics.com> * wip --------- Signed-off-by: hagen1778 <roman@victoriametrics.com> Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2024-11-23 12:31:07 +01:00 · 2023-02-25 01:59:26 +01:00 · 2023-02-25 01:59:26 +01:00 · e1c3267e34
commit e1c3267e34
parent c33cc4322c
6 changed files with 59 additions and 45 deletions
--- a/README.md
+++ b/README.md
@ -1357,6 +1357,7 @@ By default VictoriaMetrics is tuned for an optimal resource usage under typical
 - `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
 - `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
 - `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
+- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
 - `-search.maxSeries` limits the number of time series, which may be returned from [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers). This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxSeries` to quite low value in order limit CPU and memory usage.
 - `-search.maxTagKeys` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage.
 - `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
--- a/app/vmselect/promql/aggr.go
+++ b/app/vmselect/promql/aggr.go
@ -1,6 +1,7 @@
 package promql

 import (
+	"flag"
 	"fmt"
 	"math"
 	"sort"
@ -15,6 +16,8 @@ import (
 	"github.com/cespare/xxhash/v2"
 )

+var maxSeriesPerAggrFunc = flag.Int("search.maxSeriesPerAggrFunc", 1e6, "The maximum number of time series an aggregate MetricsQL function can generate")
+
 var aggrFuncs = map[string]aggrFunc{
 	"any":            aggrFuncAny,
 	"avg":            newAggrFunc(aggrFuncAvg),
@ -106,6 +109,16 @@ func removeGroupTags(metricName *storage.MetricName, modifier *metricsql.Modifie

 func aggrFuncExt(afe func(tss []*timeseries, modifier *metricsql.ModifierExpr) []*timeseries, argOrig []*timeseries,
 	modifier *metricsql.ModifierExpr, maxSeries int, keepOriginal bool) ([]*timeseries, error) {
+	m := aggrPrepareSeries(argOrig, modifier, maxSeries, keepOriginal)
+	rvs := make([]*timeseries, 0, len(m))
+	for _, tss := range m {
+		rv := afe(tss, modifier)
+		rvs = append(rvs, rv...)
+	}
+	return rvs, nil
+}
+
+func aggrPrepareSeries(argOrig []*timeseries, modifier *metricsql.ModifierExpr, maxSeries int, keepOriginal bool) map[string][]*timeseries {
 	// Remove empty time series, e.g. series with all NaN samples,
 	// since such series are ignored by aggregate functions.
 	argOrig = removeEmptySeries(argOrig)
@ -130,21 +143,7 @@ func aggrFuncExt(afe func(tss []*timeseries, modifier *metricsql.ModifierExpr) [
 		m[k] = tss
 	}
 	bbPool.Put(bb)
-
-	srcTssCount := 0
-	dstTssCount := 0
-	rvs := make([]*timeseries, 0, len(m))
-	for _, tss := range m {
-		rv := afe(tss, modifier)
-		rvs = append(rvs, rv...)
-		srcTssCount += len(tss)
-		dstTssCount += len(rv)
-		if dstTssCount > 2000 && dstTssCount > 16*srcTssCount {
-			// This looks like count_values explosion.
-			return nil, fmt.Errorf(`too many timeseries after aggragation; got %d; want less than %d`, dstTssCount, 16*srcTssCount)
-		}
-	}
-	return rvs, nil
+	return m
 }

 func aggrFuncAny(afa *aggrFuncArg) ([]*timeseries, error) {
@ -588,46 +587,56 @@ func aggrFuncCountValues(afa *aggrFuncArg) ([]*timeseries, error) {
 		// Do nothing
 	}

-	afe := func(tss []*timeseries, modififer *metricsql.ModifierExpr) []*timeseries {
-		m := make(map[float64]bool)
+	afe := func(tss []*timeseries, modififer *metricsql.ModifierExpr) ([]*timeseries, error) {
+		m := make(map[float64]*timeseries)
 		for _, ts := range tss {
-			for _, v := range ts.Values {
+			for i, v := range ts.Values {
 				if math.IsNaN(v) {
 					continue
 				}
-				m[v] = true
-			}
-		}
-		values := make([]float64, 0, len(m))
-		for v := range m {
-			values = append(values, v)
-		}
-		sort.Float64s(values)
-
-		var rvs []*timeseries
-		for _, v := range values {
-			var dst timeseries
-			dst.CopyFromShallowTimestamps(tss[0])
-			dst.MetricName.RemoveTag(dstLabel)
-			dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'f', -1, 64))
-			for i := range dst.Values {
-				count := 0
-				for _, ts := range tss {
-					if ts.Values[i] == v {
-						count++
+				dst := m[v]
+				if dst == nil {
+					if len(m) >= *maxSeriesPerAggrFunc {
+						return nil, fmt.Errorf("more than -search.maxSeriesPerAggrFunc=%d are generated by count_values()", *maxSeriesPerAggrFunc)
 					}
+					dst = &timeseries{}
+					dst.CopyFromShallowTimestamps(tss[0])
+					dst.MetricName.RemoveTag(dstLabel)
+					dst.MetricName.AddTag(dstLabel, strconv.FormatFloat(v, 'f', -1, 64))
+					values := dst.Values
+					for j := range values {
+						values[j] = nan
+					}
+					m[v] = dst
 				}
-				n := float64(count)
-				if n == 0 {
-					n = nan
+				values := dst.Values
+				if math.IsNaN(values[i]) {
+					values[i] = 1
+				} else {
+					values[i]++
 				}
-				dst.Values[i] = n
 			}
-			rvs = append(rvs, &dst)
 		}
-		return rvs
+		rvs := make([]*timeseries, 0, len(m))
+		for _, ts := range m {
+			rvs = append(rvs, ts)
+		}
+		return rvs, nil
 	}
-	return aggrFuncExt(afe, args[1], &afa.ae.Modifier, afa.ae.Limit, false)
+
+	m := aggrPrepareSeries(args[1], &afa.ae.Modifier, afa.ae.Limit, false)
+	rvs := make([]*timeseries, 0, len(m))
+	for _, tss := range m {
+		rv, err := afe(tss, modifier)
+		if err != nil {
+			return nil, err
+		}
+		rvs = append(rvs, rv...)
+		if len(rvs) > *maxSeriesPerAggrFunc {
+			return nil, fmt.Errorf("more than -search.maxSeriesPerAggrFunc=%d are generated by count_values()", *maxSeriesPerAggrFunc)
+		}
+	}
+	return rvs, nil
 }

 func newAggrFuncTopK(isReverse bool) aggrFunc {
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -35,6 +35,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
 * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_outliers(k, q)` function for dropping outliers located farther than `k*range_mad(q)` from the `range_median(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759).
 * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add `range_trim_zscore(z, q)` function for dropping outliers located farther than `z*range_stddev(q)` from `range_avg(q)`. This should help removing outliers during query time at [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3759).
 * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): show `median` instead of `avg` in graph tooltip and line legend, since `median` is more tolerant against spikes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3706).
+* FEATURE: add `-search.maxSeriesPerAggrFunc` command-line flag, which can be used for limiting the number of time series [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) can return in a single query. This flag can be useful for preventing OOMs when [count_values](https://docs.victoriametrics.com/MetricsQL.html#count_values) function is improperly used.

 * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): small UX improvements for mobile view. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3707) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3848).
 * FEATURE: add `-search.logQueryMemoryUsage` command-line flag for logging queries, which need more memory than specified by this command-line flag. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3553). Thanks to @michal-kralik for the idea and the intial implementation.
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@ -584,6 +584,7 @@ Some workloads may need fine-grained resource usage limits. In these cases the f
  matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
 - `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated
  per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
+- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
 - `-search.maxSeries` at `vmselect` limits the number of time series, which may be returned from
  [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers).
  This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values.
--- a/docs/README.md
+++ b/docs/README.md
@ -1358,6 +1358,7 @@ By default VictoriaMetrics is tuned for an optimal resource usage under typical
 - `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
 - `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
 - `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
+- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
 - `-search.maxSeries` limits the number of time series, which may be returned from [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers). This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxSeries` to quite low value in order limit CPU and memory usage.
 - `-search.maxTagKeys` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage.
 - `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@ -1361,6 +1361,7 @@ By default VictoriaMetrics is tuned for an optimal resource usage under typical
 - `-search.maxSamplesPerQuery` limits the number of raw samples a single query can process. This allows limiting CPU usage for heavy queries.
 - `-search.maxPointsPerTimeseries` limits the number of calculated points, which can be returned per each matching time series from [range query](https://docs.victoriametrics.com/keyConcepts.html#range-query).
 - `-search.maxPointsSubqueryPerTimeseries` limits the number of calculated points, which can be generated per each matching time series during [subquery](https://docs.victoriametrics.com/MetricsQL.html#subqueries) evaluation.
+- `-search.maxSeriesPerAggrFunc` limits the number of time series, which can be generated by [MetricsQL aggregate functions](https://docs.victoriametrics.com/MetricsQL.html#aggregate-functions) in a single query.
 - `-search.maxSeries` limits the number of time series, which may be returned from [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers). This endpoint is used mostly by Grafana for auto-completion of metric names, label names and label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxSeries` to quite low value in order limit CPU and memory usage.
 - `-search.maxTagKeys` limits the number of items, which may be returned from [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names). This endpoint is used mostly by Grafana for auto-completion of label names. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagKeys` to quite low value in order to limit CPU and memory usage.
 - `-search.maxTagValues` limits the number of items, which may be returned from [/api/v1/label/.../values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values). This endpoint is used mostly by Grafana for auto-completion of label values. Queries to this endpoint may take big amounts of CPU time and memory when the database contains big number of unique time series because of [high churn rate](https://docs.victoriametrics.com/FAQ.html#what-is-high-churn-rate). In this case it might be useful to set the `-search.maxTagValues` to quite low value in order to limit CPU and memory usage.