From 273472dc2014cfb13bc81406357e22ede31ac88f Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Fri, 30 Sep 2022 07:20:34 +0200 Subject: [PATCH] app/vmselect: ignore empty series for `limit_offset` (#3178) * app/vmselect: ignore empty series for `limit_offset` VictoriaMetrics doesn't return empty series (with all NaN values) to the user. But such series are filtered after transform functions. It means `limit_offset` will account for empty series as well. For example, let's consider following data set: ``` time series: foo{label="1"} NaN, NaN, NaN, NaN // empty series foo{label="2"} 1, 2, 3, 4 foo{label="3"} 4, 3, 2, 1 ``` When user requests all series for metric `foo` the empty series will be filtered out: ``` /query=foo: foo{label="v2"} 1, 2, 3, 4 foo{label="v3"} 4, 3, 2, 1 ``` But `limit_offset(1, 1, foo)` is applied to original series, not filtered yet. So it will return `foo{label="v2"}` (skips the first in list) ``` /query=limit_offset(1, 1, foo): foo{label="v2"} 1, 2, 3, 4 ``` Expected result would be to apply `limit_offset` to already filtered list, so in result we receive `foo{label="v3"}`: ``` /query=limit_offset(1, 1, foo): foo{label="v3"} 4, 3, 2, 1 ``` The change does exactly that - filters empty series before applying `limit_offset`. Signed-off-by: hagen1778 * app/vmselect: ignore empty series for `limit_offset` Signed-off-by: hagen1778 Signed-off-by: hagen1778 --- app/vmselect/promql/exec_test.go | 21 +++++++++++++++++++++ app/vmselect/promql/transform.go | 4 +++- docs/CHANGELOG.md | 1 + 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index e2795e2a01..29d1883a4c 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -2291,6 +2291,27 @@ func TestExecSuccess(t *testing.T) { resultExpected := []netstorage.Result{r} f(q, resultExpected) }) + t.Run(`limit_offset NaN`, func(t *testing.T) { + t.Parallel() + // q returns 3 time series, where foo=3 contains only NaN values + // limit_offset suppose to apply offset for non-NaN series only + q := `limit_offset(1, 1, sort_by_label_desc(( + label_set(time()*1, "foo", "1"), + label_set(time()*2, "foo", "2"), + label_set(time()*3, "foo", "3"), + ) < 3000, "foo"))` + r := netstorage.Result{ + MetricName: metricNameExpected, + Values: []float64{1000, 1200, 1400, 1600, 1800, 2000}, + Timestamps: timestampsExpected, + } + r.MetricName.Tags = []storage.Tag{{ + Key: []byte("foo"), + Value: []byte("1"), + }} + resultExpected := []netstorage.Result{r} + f(q, resultExpected) + }) t.Run(`sum(label_graphite_group)`, func(t *testing.T) { t.Parallel() q := `sort(sum by (__name__) ( diff --git a/app/vmselect/promql/transform.go b/app/vmselect/promql/transform.go index 739d6e7c40..fe0baec8e4 100644 --- a/app/vmselect/promql/transform.go +++ b/app/vmselect/promql/transform.go @@ -1854,7 +1854,9 @@ func transformLimitOffset(tfa *transformFuncArg) ([]*timeseries, error) { if err != nil { return nil, fmt.Errorf("cannot obtain offset arg: %w", err) } - rvs := args[2] + // removeEmptySeries so offset will be calculated after empty series + // were filtered out. + rvs := removeEmptySeries(args[2]) if len(rvs) >= offset { rvs = rvs[offset:] } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c27f3d3fdb..cf557b57ce 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -45,6 +45,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): re-evaluate annotations per each alert evaluation. Previously, annotations were evaluated only on alert's value change. This could result in stale annotations in some cases described in [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3119). * BUGFIX: prevent from excessive CPU usage when the storage enters [read-only mode](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode). The previous fix in [v1.81.0](https://docs.victoriametrics.com/CHANGELOG.html#v1810) wasn't complete. * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): change default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. Param `step` is added by vmalert to every rule evaluation request sent to datasource. Before this change, `step` was equal to group's evaluation interval by default. Param `step` for instant queries defines how far VM can look back for the last written data point. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval. +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): ignore empty series when applying `limit_offset`. It should improve queries with additional filters by value in expressions like `limit_offset(1,1, foo > 1)`. ## [v1.81.2](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.81.2)