From e086ef16dac5f9f7c17b45e5d25dff7241db9427 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Sun, 21 Jan 2024 08:53:29 +0800 Subject: [PATCH] =?UTF-8?q?app/vmselect/promql:=20properly=20handle=20poss?= =?UTF-8?q?ible=20negative=20results=20caused=E2=80=A6=20(#5608)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * app/vmselect/promql: properly handle possible negative results caused by float operations precision error in rollup functions like rate() or increase() * fix test --- app/vmselect/promql/exec_test.go | 16 ++++++++-------- app/vmselect/promql/rollup.go | 5 +++++ app/vmselect/promql/rollup_test.go | 13 ++++++++++++- docs/CHANGELOG.md | 4 ++-- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go index 19634753fc..a66f8093d4 100644 --- a/app/vmselect/promql/exec_test.go +++ b/app/vmselect/promql/exec_test.go @@ -8060,10 +8060,10 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`rollup_rate()`, func(t *testing.T) { t.Parallel() - q := `rollup_rate((2000-time())[600s])` + q := `rollup_rate((2200-time())[600s])` r1 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{5, 4, 3, 2, 1, 0}, + Values: []float64{6, 5, 4, 3, 2, 1}, Timestamps: timestampsExpected, } r1.MetricName.Tags = []storage.Tag{{ @@ -8072,7 +8072,7 @@ func TestExecSuccess(t *testing.T) { }} r2 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{6, 5, 4, 3, 2, 1}, + Values: []float64{7, 6, 5, 4, 3, 2}, Timestamps: timestampsExpected, } r2.MetricName.Tags = []storage.Tag{{ @@ -8081,7 +8081,7 @@ func TestExecSuccess(t *testing.T) { }} r3 := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{4, 3, 2, 1, 0, -1}, + Values: []float64{5, 4, 3, 2, 1, 0}, Timestamps: timestampsExpected, } r3.MetricName.Tags = []storage.Tag{{ @@ -8093,10 +8093,10 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`rollup_rate(q, "max")`, func(t *testing.T) { t.Parallel() - q := `rollup_rate((2000-time())[600s], "max")` + q := `rollup_rate((2200-time())[600s], "max")` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{6, 5, 4, 3, 2, 1}, + Values: []float64{7, 6, 5, 4, 3, 2}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} @@ -8104,10 +8104,10 @@ func TestExecSuccess(t *testing.T) { }) t.Run(`rollup_rate(q, "avg")`, func(t *testing.T) { t.Parallel() - q := `rollup_rate((2000-time())[600s], "avg")` + q := `rollup_rate((2200-time())[600s], "avg")` r := netstorage.Result{ MetricName: metricNameExpected, - Values: []float64{5, 4, 3, 2, 1, 0}, + Values: []float64{6, 5, 4, 3, 2, 1}, Timestamps: timestampsExpected, } resultExpected := []netstorage.Result{r} diff --git a/app/vmselect/promql/rollup.go b/app/vmselect/promql/rollup.go index cd5f2bbfff..14d9c5c08f 100644 --- a/app/vmselect/promql/rollup.go +++ b/app/vmselect/promql/rollup.go @@ -859,6 +859,11 @@ func removeCounterResets(values []float64) { } prevValue = v values[i] = v + correction + // Check again, there could be precision error in float operations, + // see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5571 + if i > 0 && values[i] < values[i-1] { + values[i] = values[i-1] + } } } diff --git a/app/vmselect/promql/rollup_test.go b/app/vmselect/promql/rollup_test.go index 766a33eaea..e0ba2b8f00 100644 --- a/app/vmselect/promql/rollup_test.go +++ b/app/vmselect/promql/rollup_test.go @@ -125,7 +125,7 @@ func TestRemoveCounterResets(t *testing.T) { // removeCounterResets doesn't expect negative values, so it doesn't work properly with them. values = []float64{-100, -200, -300, -400} removeCounterResets(values) - valuesExpected = []float64{-100, -300, -600, -1000} + valuesExpected = []float64{-100, -100, -100, -100} timestampsExpected := []int64{0, 1, 2, 3} testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected) @@ -136,6 +136,17 @@ func TestRemoveCounterResets(t *testing.T) { valuesExpected = []float64{100, 100, 125, 125, 145, 195} timestampsExpected = []int64{0, 1, 2, 3, 4, 5} testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected) + + // verify results always increase monotonically with possible float operations precision error + values = []float64{34.094223, 2.7518, 2.140669, 0.044878, 1.887095, 2.546569, 2.490149, 0.045, 0.035684, 0.062454, 0.058296} + removeCounterResets(values) + var prev float64 + for i, v := range values { + if v < prev { + t.Fatalf("error: unexpected value keep getting bigger %d; cur %v; pre %v\n", i, v, prev) + } + prev = v + } } func TestDeltaValues(t *testing.T) { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 72dcf4d245..29f56196c3 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -65,10 +65,10 @@ The sandbox cluster installation is running under the constant load generated by * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): exit if there is config syntax error in [`scrape_config_files`](https://docs.victoriametrics.com/vmagent.html#loading-scrape-configs-from-multiple-files) when `-promscrape.config.strictParse=true`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5508). * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not store scrape response for target in memory when staleness markers are disabled. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5577) for details. * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): fix a link for the statistic inaccuracy explanation in the cardinality explorer tool. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5460). +* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401). * BUGFIX: all: fix potential panic during components shutdown when [metrics push](https://docs.victoriametrics.com/#push-metrics) is configured. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5548). Thanks to @zhdd99 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5549). * BUGFIX: [vmselect](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly determine time range search for instant queries with too big look-behind window like `foo[100y]`. Previously, such queries could return empty responses even if `foo` is present in database. -* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): send `step` param for instant queries. The change reverts [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896) due to reasons explained in [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3896#issuecomment-1896704401). - +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle possible negative results caused by float operations precision error in rollup functions like rate() or increase(). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5571). ## [v1.96.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.96.0)