From dac24aa342b204860c0e3728fff89fddcdc6bab5 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Thu, 21 Apr 2022 15:33:27 +0300
Subject: [PATCH] app/vmselect/promql: properly handle `scalar default vector`,
 `scalar if vector` and `scalar ifnot vector` queries

Previously `vector` time series could be unexpectedly returned from such queries
---
 app/vmselect/promql/binary_op.go |   2 +-
 app/vmselect/promql/exec_test.go | 115 +++++++++++++++++++++++++++++--
 docs/CHANGELOG.md                |   1 +
 3 files changed, 112 insertions(+), 6 deletions(-)

diff --git a/app/vmselect/promql/binary_op.go b/app/vmselect/promql/binary_op.go
index db1f729ca9..699f54d18f 100644
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@@ -136,7 +136,7 @@ func newBinaryOpFunc(bf func(left, right float64, isBool bool) float64) binaryOp
 
 func adjustBinaryOpTags(be *metricsql.BinaryOpExpr, left, right []*timeseries) ([]*timeseries, []*timeseries, []*timeseries, error) {
 	if len(be.GroupModifier.Op) == 0 && len(be.JoinModifier.Op) == 0 {
-		if isScalar(left) {
+		if isScalar(left) && be.Op != "default" && be.Op != "if" && be.Op != "ifnot" {
 			// Fast path: `scalar op vector`
 			rvsLeft := make([]*timeseries, len(right))
 			tsLeft := left[0]
diff --git a/app/vmselect/promql/exec_test.go b/app/vmselect/promql/exec_test.go
index ed7e0f8f27..7c22dcfbc8 100644
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@@ -1855,10 +1855,10 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`drop_common_labels(multi_series)`, func(t *testing.T) {
 		t.Parallel()
-		q := `drop_common_labels((
+		q := `sort_desc(drop_common_labels((
 			label_set(time(), "foo", "bar", "__name__", "xxx", "q", "we"),
 			label_set(time()/10, "foo", "bar", "__name__", "yyy"),
-		))`
+		)))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{1000, 1200, 1400, 1600, 1800, 2000},
@@ -1880,10 +1880,10 @@ func TestExecSuccess(t *testing.T) {
 	})
 	t.Run(`drop_common_labels(multi_args)`, func(t *testing.T) {
 		t.Parallel()
-		q := `drop_common_labels(
+		q := `sort(drop_common_labels(
 			label_set(time(), "foo", "bar", "__name__", "xxx", "q", "we"),
 			label_set(time()/10, "foo", "bar", "__name__", "xxx"),
-		)`
+		))`
 		r1 := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{100, 120, 140, 160, 180, 200},
@@ -2640,7 +2640,7 @@ func TestExecSuccess(t *testing.T) {
 		q := `1 != nan`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
-			Values: []float64{1, 1, 1, 1, 1, 1},
+			Values:     []float64{1, 1, 1, 1, 1, 1},
 			Timestamps: timestampsExpected,
 		}
 		resultExpected := []netstorage.Result{r}
@@ -2761,6 +2761,37 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`scalar default scalar_from_vector`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() > 1400 default scalar(label_set(123, "foo", "bar"))`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{123, 123, 123, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`scalar default vector1`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() > 1400 default label_set(123, "foo", "bar")`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
+	t.Run(`scalar default vector2`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() > 1400 default (
+			label_set(123, "foo", "bar"),
+			label_set(456, "__name__", "xxx"),
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{456, 456, 456, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`scalar default NaN`, func(t *testing.T) {
 		t.Parallel()
 		q := `time() > 1400 default (time() < -100)`
@@ -5923,6 +5954,80 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r1}
 		f(q, resultExpected)
 	})
+	t.Run(`vector2 if vector1`, func(t *testing.T) {
+		t.Parallel()
+		q := `(
+			label_set(time()/10, "x", "y"),
+			label_set(time(), "foo", "bar", "__name__", "x"),
+		) if (
+			label_set(time()>1400, "foo", "bar"),
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("x")
+		r.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`vector2 if vector2`, func(t *testing.T) {
+		t.Parallel()
+		q := `sort((
+			label_set(time()/10, "x", "y"),
+			label_set(time(), "foo", "bar", "__name__", "x"),
+		) if (
+			label_set(time()>1400, "foo", "bar"),
+			label_set(time()<1400, "x", "y"),
+		))`
+		r1 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{100, 120, nan, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		r1.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("x"),
+			Value: []byte("y"),
+		}}
+		r2 := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r2.MetricName.MetricGroup = []byte("x")
+		r2.MetricName.Tags = []storage.Tag{{
+			Key:   []byte("foo"),
+			Value: []byte("bar"),
+		}}
+		resultExpected := []netstorage.Result{r1, r2}
+		f(q, resultExpected)
+	})
+	t.Run(`scalar if vector1`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() if (
+			label_set(123, "foo", "bar"),
+		)`
+		resultExpected := []netstorage.Result{}
+		f(q, resultExpected)
+	})
+	t.Run(`scalar if vector2`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() if (
+			label_set(123, "foo", "bar"),
+			alias(time() > 1400, "xxx"),
+		)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, nan, nan, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`if-default`, func(t *testing.T) {
 		t.Parallel()
 		q := `time() if time() > 1400 default -time()`
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index 2b9bade661..0130a17feb 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -28,6 +28,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
 * BUGFIX: [vmctl](https://docs.victoriametrics.com/vmctl.html): return non-zero exit code on error. This allows handling `vmctl` errors in shell scripts. Previously `vmctl` was returning 0 exit code on error. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2322).
 * BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly show `scrape_timeout` and `scrape_interval` options at `http://vmagent:8429/config` page. Previously these options weren't displayed even if they were set in `-promscrape.config`.
 * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle joins on time series filtered by values. For example, `kube_pod_container_resource_requests{resource="cpu"} * on (namespace,pod) group_left() (kube_pod_status_phase{phase=~"Pending|Running"}==1)`. This query may result in `duplicate time series on the right side` error even if `==1` filter leaves only a single time series per `(namespace,pod)` labels. Now such query is properly executed.
+* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle `scalar default vector`, `scalar if vector` and `scalar ifnot vector` queries. Previously such queries could return unexpected results from the `vector` part.
 
 
 ## [v1.76.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.76.1)