app/vmselect/promql: add range_normalize(q1, ..., qN) function for normalizing query results into [0..1] value range

This may be useful for analyzing correlation between time series with different value ranges
This commit is contained in:
Aliaksandr Valialkin 2022-11-21 23:24:57 +02:00
parent c33bcae457
commit ad548abd6a
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
9 changed files with 63 additions and 5 deletions

View File

@ -6912,6 +6912,23 @@ func TestExecSuccess(t *testing.T) {
resultExpected := []netstorage.Result{r}
f(q, resultExpected)
})
t.Run(`range_normalize(time(),alias(-time(),"negative"))`, func(t *testing.T) {
t.Parallel()
q := `range_normalize(time(),alias(-time(), "negative"))`
r1 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{0, 0.2, 0.4, 0.6, 0.8, 1},
Timestamps: timestampsExpected,
}
r2 := netstorage.Result{
MetricName: metricNameExpected,
Values: []float64{1, 0.8, 0.6, 0.4, 0.2, 0},
Timestamps: timestampsExpected,
}
r2.MetricName.MetricGroup = []byte("negative")
resultExpected := []netstorage.Result{r1, r2}
f(q, resultExpected)
})
t.Run(`range_first(time())`, func(t *testing.T) {
t.Parallel()
q := `range_first(time())`

View File

@ -91,6 +91,7 @@ var transformFuncs = map[string]transformFunc{
"range_linear_regression": transformRangeLinearRegression,
"range_max": newTransformFuncRange(runningMax),
"range_min": newTransformFuncRange(runningMin),
"range_normalize": transformRangeNormalize,
"range_quantile": transformRangeQuantile,
"range_stddev": transformRangeStddev,
"range_stdvar": transformRangeStdvar,
@ -142,6 +143,7 @@ var transformFuncsKeepMetricName = map[string]bool{
"range_linear_regression": true,
"range_max": true,
"range_min": true,
"range_normalize": true,
"range_quantile": true,
"range_stdvar": true,
"range_sddev": true,
@ -1240,6 +1242,38 @@ func newTransformFuncRange(rf func(a, b float64, idx int) float64) transformFunc
}
}
func transformRangeNormalize(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
var rvs []*timeseries
for _, tss := range args {
for _, ts := range tss {
values := ts.Values
vMin := inf
vMax := -inf
for _, v := range values {
if math.IsNaN(v) {
continue
}
if v < vMin {
vMin = v
}
if v > vMax {
vMax = v
}
}
d := vMax - vMin
if math.IsInf(d, 0) {
continue
}
for i, v := range values {
values[i] = (v - vMin) / d
}
rvs = append(rvs, ts)
}
}
return rvs, nil
}
func transformRangeLinearRegression(tfa *transformFuncArg) ([]*timeseries, error) {
args := tfa.args
if err := expectTransformArgsNum(args, 1); err != nil {

View File

@ -17,6 +17,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
* FEATURE: [VictoriaMetrics enterprise](https://docs.victoriametrics.com/enterprise.html): add `-storageNode.filter` command-line flag for filtering the [discovered vmstorage nodes](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#automatic-vmstorage-discovery) with arbitrary regular expressions. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3353).
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow using numeric values with `K`, `Ki`, `M`, `Mi`, `G`, `Gi`, `T` and `Ti` suffixes inside MetricsQL queries. For example `8Ki` equals to `8*1024`, while `8.2M` equals to `8.2*1000*1000`.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_normalize](https://docs.victoriametrics.com/MetricsQL.html#range_normalize) function for normalizing multiple time series into `[0...1]` value range. This function is useful for correlation analyzis of time series with distinct value ranges.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_linear_regression](https://docs.victoriametrics.com/MetricsQL.html#range_linear_regression) function for calculating [simple linear regression](https://en.wikipedia.org/wiki/Simple_linear_regression) over the input time series on the selected time range. This function is useful for predictions and capacity planning. For example, `range_linear_regression(process_resident_memory_bytes)` can predict future memory usage based on the past memory usage.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [range_stddev](https://docs.victoriametrics.com/MetricsQL.html#range_stddev) and [range_stdvar](https://docs.victoriametrics.com/MetricsQL.html#range_stdvar) functions.
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): optimize `expr1 op expr2` query when `expr1` returns an empty result. In this case there is no sense in executing `expr2` for `op` not equal to `or`, since the end result will be empty according to [PromQL series matching rules](https://prometheus.io/docs/prometheus/latest/querying/operators/#vector-matching). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3349). Thanks to @jianglinjian for pointing to this case.

View File

@ -1221,6 +1221,11 @@ over the selected time range per each time series returned by `q`. This function
`range_min(q)` is a [transform function](#transform-functions), which calculates the min value across points per each time series returned by `q`.
#### range_normalize
`range_normalize(q1, ...)` is a [transform function](#transform-functions), which normalizes values for time series returned by `q1, ...` into `[0 ... 1]` range.
This function is useful for correlating time series with distinct value ranges.
#### range_quantile
`range_quantile(phi, q)` is a [transform function](#transform-functions), which returns `phi`-quantile across points per each time series returned by `q`.

2
go.mod
View File

@ -12,7 +12,7 @@ require (
// like https://github.com/valyala/fasthttp/commit/996610f021ff45fdc98c2ce7884d5fa4e7f9199b
github.com/VictoriaMetrics/fasthttp v1.1.0
github.com/VictoriaMetrics/metrics v1.23.0
github.com/VictoriaMetrics/metricsql v0.48.0
github.com/VictoriaMetrics/metricsql v0.49.0
github.com/aws/aws-sdk-go-v2 v1.17.1
github.com/aws/aws-sdk-go-v2/config v1.18.1
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.11.40

4
go.sum
View File

@ -100,8 +100,8 @@ github.com/VictoriaMetrics/fasthttp v1.1.0/go.mod h1:/7DMcogqd+aaD3G3Hg5kFgoFwlR
github.com/VictoriaMetrics/metrics v1.18.1/go.mod h1:ArjwVz7WpgpegX/JpB0zpNF2h2232kErkEnzH1sxMmA=
github.com/VictoriaMetrics/metrics v1.23.0 h1:WzfqyzCaxUZip+OBbg1+lV33WChDSu4ssYII3nxtpeA=
github.com/VictoriaMetrics/metrics v1.23.0/go.mod h1:rAr/llLpEnAdTehiNlUxKgnjcOuROSzpw0GvjpEbvFc=
github.com/VictoriaMetrics/metricsql v0.48.0 h1:rq8ULfIDJ0QyDbyQWRuWrMTffEqL2sevU2Zs3Vx1pfw=
github.com/VictoriaMetrics/metricsql v0.48.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VictoriaMetrics/metricsql v0.49.0 h1:7R04eab3gU0PKu8Ksak7SJnORXm0K+hSGt2+t3XGyKg=
github.com/VictoriaMetrics/metricsql v0.49.0/go.mod h1:6pP1ZeLVJHqJrHlF6Ij3gmpQIznSsgktEcZgsAWYel0=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=

View File

@ -386,7 +386,7 @@ func getTransformArgIdxForOptimization(funcName string, args []Expr) int {
return -1
}
switch funcName {
case "", "absent", "scalar", "union", "vector":
case "", "absent", "scalar", "union", "vector", "range_normalize":
return -1
case "end", "now", "pi", "ru", "start", "step", "time":
return -1

View File

@ -76,6 +76,7 @@ var transformFuncs = map[string]bool{
"range_linear_regression": true,
"range_max": true,
"range_min": true,
"range_normalize": true,
"range_quantile": true,
"range_stddev": true,
"range_stdvar": true,

2
vendor/modules.txt vendored
View File

@ -69,7 +69,7 @@ github.com/VictoriaMetrics/fasthttp/stackless
# github.com/VictoriaMetrics/metrics v1.23.0
## explicit; go 1.15
github.com/VictoriaMetrics/metrics
# github.com/VictoriaMetrics/metricsql v0.48.0
# github.com/VictoriaMetrics/metricsql v0.49.0
## explicit; go 1.13
github.com/VictoriaMetrics/metricsql
github.com/VictoriaMetrics/metricsql/binaryop