app/vmselect/promql: add support for matching against multiple numeric constants via q == (c1,...,cN) and q != (c1,...,cN) syntax

2024-12-16 17:21:04 +01:00 · 2024-04-19 17:56:29 +02:00 · 2024-04-19 17:56:29 +02:00 · fba3c10ed1
commit fba3c10ed1
parent 5504b6f2bd
5 changed files with 163 additions and 3 deletions
--- a/app/vmselect/promql/binary_op.go
+++ b/app/vmselect/promql/binary_op.go
@ -23,8 +23,8 @@ var binaryOpFuncs = map[string]binaryOpFunc{
 	"atan2": newBinaryOpArithFunc(binaryop.Atan2),

 	// cmp ops
-	"==": newBinaryOpCmpFunc(binaryop.Eq),
-	"!=": newBinaryOpCmpFunc(binaryop.Neq),
+	"==": binaryOpEqFunc,
+	"!=": binaryOpNeqFunc,
 	">":  newBinaryOpCmpFunc(binaryop.Gt),
 	"<":  newBinaryOpCmpFunc(binaryop.Lt),
 	">=": newBinaryOpCmpFunc(binaryop.Gte),
@ -54,6 +54,84 @@ type binaryOpFuncArg struct {

 type binaryOpFunc func(bfa *binaryOpFuncArg) ([]*timeseries, error)

+func binaryOpEqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
+	if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
+		return binaryOpEqStdFunc(bfa)
+	}
+
+	// Special case for `q == (1,2,3)`
+	left := bfa.left
+	right := bfa.right
+	if isUnionFunc(bfa.be.Left) {
+		left, right = right, left
+	}
+	if len(left) == 0 || len(right) == 0 {
+		return nil, nil
+	}
+	for _, tsLeft := range left {
+		values := tsLeft.Values
+		for j, v := range values {
+			if !containsValueAt(right, v, j) {
+				values[j] = nan
+			}
+		}
+	}
+	// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
+	// won't work as expected if `(foo op bar)` results to NaN series.
+	return left, nil
+}
+
+func binaryOpNeqFunc(bfa *binaryOpFuncArg) ([]*timeseries, error) {
+	if !isUnionFunc(bfa.be.Left) && !isUnionFunc(bfa.be.Right) {
+		return binaryOpNeqStdFunc(bfa)
+	}
+
+	// Special case for `q != (1,2,3)`
+	left := bfa.left
+	right := bfa.right
+	if isUnionFunc(bfa.be.Left) {
+		left, right = right, left
+	}
+	if len(left) == 0 {
+		return nil, nil
+	}
+	if len(right) == 0 {
+		return left, nil
+	}
+	for _, tsLeft := range left {
+		values := tsLeft.Values
+		for j, v := range values {
+			if containsValueAt(right, v, j) {
+				values[j] = nan
+			}
+		}
+	}
+	// Do not remove time series containing only NaNs, since then the `(foo op bar) default N`
+	// won't work as expected if `(foo op bar)` results to NaN series.
+	return left, nil
+}
+
+func isUnionFunc(e metricsql.Expr) bool {
+	if fe, ok := e.(*metricsql.FuncExpr); ok && (fe.Name == "" || strings.ToLower(fe.Name) == "union") {
+		return true
+	}
+	return false
+}
+
+func containsValueAt(tss []*timeseries, v float64, idx int) bool {
+	for _, ts := range tss {
+		if ts.Values[idx] == v {
+			return true
+		}
+	}
+	return false
+}
+
+var (
+	binaryOpEqStdFunc  = newBinaryOpCmpFunc(binaryop.Eq)
+	binaryOpNeqStdFunc = newBinaryOpCmpFunc(binaryop.Neq)
+)
+
 func newBinaryOpCmpFunc(cf func(left, right float64) bool) binaryOpFunc {
 	cfe := func(left, right float64, isBool bool) float64 {
 		if !isBool {
--- a/app/vmselect/promql/exec_test.go
+++ b/app/vmselect/promql/exec_test.go
@ -5213,9 +5213,24 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
-	t.Run(`sum(union-args)`, func(t *testing.T) {
+	t.Run(`sum(union-scalars)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sum((1, 2, 3))`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{6, 6, 6, 6, 6, 6},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`sum(union-vectors)`, func(t *testing.T) {
+		t.Parallel()
+		q := `sum((
+			alias(1, "foo"),
+			alias(2, "foo"),
+			alias(3, "foo"),
+		))`
 		r := netstorage.Result{
 			MetricName: metricNameExpected,
 			Values:     []float64{1, 1, 1, 1, 1, 1},
@ -5773,6 +5788,51 @@ func TestExecSuccess(t *testing.T) {
 		resultExpected := []netstorage.Result{r}
 		f(q, resultExpected)
 	})
+	t.Run(`equal-list`, func(t *testing.T) {
+		t.Parallel()
+		q := `time() == (100, 1000, 1400, 600)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, nan, 1400, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`equal-list-reverse`, func(t *testing.T) {
+		t.Parallel()
+		q := `(100, 1000, 1400, 600) == time()`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{1000, nan, 1400, nan, nan, nan},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`not-equal-list`, func(t *testing.T) {
+		t.Parallel()
+		q := `alias(time(), "foobar") != UNIon(100, 1000, 1400, 600)`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1200, nan, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		r.MetricName.MetricGroup = []byte("foobar")
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
+	t.Run(`not-equal-list-reverse`, func(t *testing.T) {
+		t.Parallel()
+		q := `(100, 1000, 1400, 600) != time()`
+		r := netstorage.Result{
+			MetricName: metricNameExpected,
+			Values:     []float64{nan, 1200, nan, 1600, 1800, 2000},
+			Timestamps: timestampsExpected,
+		}
+		resultExpected := []netstorage.Result{r}
+		f(q, resultExpected)
+	})
 	t.Run(`quantiles_over_time(single_sample)`, func(t *testing.T) {
 		t.Parallel()
 		q := `sort_by_label(
--- a/app/vmselect/promql/transform.go
+++ b/app/vmselect/promql/transform.go
@ -1658,6 +1658,16 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 		return evalNumber(tfa.ec, nan), nil
 	}

+	if areAllArgsScalar(args) {
+		// Special case for (v1,...,vN) where vX are scalars - return all the scalars as time series.
+		// This is needed for "q == (v1,...,vN)" and "q != (v1,...,vN)" cases, where vX are numeric constants.
+		rvs := make([]*timeseries, len(args))
+		for i, arg := range args {
+			rvs[i] = arg[0]
+		}
+		return rvs, nil
+	}
+
 	rvs := make([]*timeseries, 0, len(args[0]))
 	m := make(map[string]bool, len(args[0]))
 	bb := bbPool.Get()
@ -1676,6 +1686,15 @@ func transformUnion(tfa *transformFuncArg) ([]*timeseries, error) {
 	return rvs, nil
 }

+func areAllArgsScalar(args [][]*timeseries) bool {
+	for _, arg := range args {
+		if !isScalar(arg) {
+			return false
+		}
+	}
+	return true
+}
+
 func transformLabelKeep(tfa *transformFuncArg) ([]*timeseries, error) {
 	args := tfa.args
 	if len(args) < 1 {
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -30,6 +30,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).

 ## tip

+* FEATURE: [MetricsQL](https://docs.victoriametrics.com/metricsql/): support filtering by multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (200, 201, 300)` returns `status_code` metrics with any of `200`, `201` or `300` values, while `status_code != (400, 404, 500)` returns `status_code` metrics with all the values except of `400`, `404` and `500`.
 * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): add support for fault domain awareness to `vmselect`. It can be configured to return full responses if up to `-globalReplicationFactor - 1` fault domains (aka `vmstorage` groups) are unavailable. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054) and [these docs](https://docs.victoriametrics.com/cluster-victoriametrics/#vmstorage-groups-at-vmselect).
 * FEATURE: all VictoriaMetrics [enterprise](https://docs.victoriametrics.com/enterprise/) components: add support for automatic issuing of TLS certificates for HTTPS server at `-httpListenAddr` via [Let's Encrypt service](https://letsencrypt.org/). See [these docs](https://docs.victoriametrics.com/#automatic-issuing-of-tls-certificates) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5949).
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): support data replication additionally to sharding among remote storage systems if `-remoteWrite.shardByURLReplicas=N` command-line flag is set additionally to `-remoteWrite.shardByURL` command-line flag, where `N` is desired replication factor. This allows setting up data replication among failure domains when the replication factor is smaller than the number of failure domains. See [these docs](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6054).
--- a/docs/MetricsQL.md
+++ b/docs/MetricsQL.md
@ -79,6 +79,8 @@ The list of MetricsQL features on top of PromQL:
 * [Series selectors](https://docs.victoriametrics.com/keyconcepts/#filtering) accept multiple `or` filters. For example, `{env="prod",job="a" or env="dev",job="b"}`
  selects series with `{env="prod",job="a"}` or `{env="dev",job="b"}` labels.
  See [these docs](https://docs.victoriametrics.com/keyconcepts/#filtering-by-multiple-or-filters) for details.
+* Support for matching against multiple numeric constants via `q == (C1, ..., CN)` and `q != (C1, ..., CN)` syntax. For example, `status_code == (300, 301, 304)`
+  returns `status_code` metrics with one of `300`, `301` or `304` values.
 * Support for `group_left(*)` and `group_right(*)` for copying all the labels from time series on the `one` side
  of [many-to-one operations](https://prometheus.io/docs/prometheus/latest/querying/operators/#many-to-one-and-one-to-many-vector-matches).
  The copied label names may clash with the existing label names, so MetricsQL provides an ability to add prefix to the copied metric names