diff --git a/app/vmselect/main.go b/app/vmselect/main.go
index fec9d6d42b..e79f20bd6f 100644
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -305,7 +305,7 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
fmt.Fprintf(w, "See docs")
fmt.Fprintf(w, "Useful endpoints:")
fmt.Fprintf(w, `Web UI
`)
- fmt.Fprintf(w, `Metric-level relabel debugging`)
+ fmt.Fprintf(w, `Metric-level relabel debugging`)
fmt.Fprintf(w, `tsdb status page
`)
fmt.Fprintf(w, `top queries
`)
fmt.Fprintf(w, `active queries
`)
@@ -594,6 +594,10 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
promscrapeTargetRelabelDebugRequests.Inc()
promscrape.WriteTargetRelabelDebug(w, r)
return true
+ case "prometheus/expand-with-exprs", "expand-with-exprs":
+ expandWithExprsRequests.Inc()
+ prometheus.ExpandWithExprs(w, r)
+ return true
case "prometheus/api/v1/rules", "prometheus/rules":
rulesRequests.Inc()
if len(*vmalertProxyURL) > 0 {
@@ -768,6 +772,8 @@ var (
promscrapeMetricRelabelDebugRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/metric-relabel-debug"}`)
promscrapeTargetRelabelDebugRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/target-relabel-debug"}`)
+ expandWithExprsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/expand-with-exprs"}`)
+
vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/vmalert"}`)
rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/rules"}`)
alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/select/{}/prometheus/api/v1/alerts"}`)
diff --git a/app/vmselect/prometheus/expand-with-exprs.qtpl b/app/vmselect/prometheus/expand-with-exprs.qtpl
new file mode 100644
index 0000000000..e64eeaefc8
--- /dev/null
+++ b/app/vmselect/prometheus/expand-with-exprs.qtpl
@@ -0,0 +1,245 @@
+{% import (
+ "github.com/VictoriaMetrics/metricsql"
+) %}
+
+{% stripspace %}
+
+// ExpandWithExprsResponse returns a webpage, which expands with templates in q MetricsQL.
+{% func ExpandWithExprsResponse(q string) %}
+
+
+ Let's look at the following real query from Node Exporter Full dashboard: +
+ ++( + ( + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} + - + node_memory_MemFree_bytes{instance=~"$node:$port", job=~"$job"} + ) + / + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} +) + * +100 ++ +
+ It is clear the query calculates the percentage of used memory + for the given $node, $port and $job. Isn't it? :) +
+ ++ What's wrong with this query? Copy-pasted label filters for distinct timeseries + which makes it easy to mistype these filters during modification. + Let's simplify the query with WITH expressions: +
+ ++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + node_memory_MemTotal_bytes{commonFilters} + - + node_memory_MemFree_bytes{commonFilters} +) + / +node_memory_MemTotal_bytes{commonFilters} * 100 ++ +
+ Now label filters are located in a single place instead of three distinct places. + The query mentions node_memory_MemTotal_bytes metric twice and {commonFilters} + three times. WITH expressions may improve this: +
+ ++WITH ( + my_resource_utilization(free, limit, filters) = (limit{filters} - free{filters}) / limit{filters} * 100 +) +my_resource_utilization( + node_memory_MemFree_bytes, + node_memory_MemTotal_bytes, + {instance=~"$node:$port",job=~"$job"}, +) ++ +
+ Now the template function my_resource_utilization() may be used for monitoring arbitrary + resources - memory, CPU, network, storage, you name it. +
+ ++ Let's take another nice query from Node Exporter Full dashboard: +
+ ++( + ( + ( + count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',instance=~"$node:$port",job=~"$job"}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) +) ++ +
+ Do you understand what does this mess do? Is it manageable? :) WITH expressions are happy to help in a few iterations.
+
+
+ 1. Extract common filters used in multiple places into a commonFilters variable:
+
+WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + ( + ( + count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) +) ++ +
+ 2. Extract "count(count(...) by (cpu))" into cpuCount variable: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)) +) +( + ( + cpuCount + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) / cpuCount ++ +
+ 3. Extract rate(...) part into cpuIdle variable, since it is clear now that this part calculates the number of idle CPUs: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)), + cpuIdle = sum(rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ 4. Put node_cpu_seconds_total{commonFilters} into its own varialbe with the name cpuSeconds: +
++WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(cpuSeconds) by (cpu)), + cpuIdle = sum(rate(cpuSeconds{mode='idle'}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ Now the query became more clear comparing to the initial query. +
+ ++ WITH expressions may be nested and may be put anywhere. Try expanding the following query: +
+ ++WITH ( + f(a, b) = WITH ( + f1(x) = b-x, + f2(x) = x+x + ) f1(a)*f2(b) +) f(foo, with(x=bar) x) ++ +{% endfunc %} diff --git a/app/vmselect/prometheus/expand-with-exprs.qtpl.go b/app/vmselect/prometheus/expand-with-exprs.qtpl.go new file mode 100644 index 0000000000..875a33e8b9 --- /dev/null +++ b/app/vmselect/prometheus/expand-with-exprs.qtpl.go @@ -0,0 +1,345 @@ +// Code generated by qtc from "expand-with-exprs.qtpl". DO NOT EDIT. +// See https://github.com/valyala/quicktemplate for details. + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:1 +package prometheus + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:1 +import ( + "github.com/VictoriaMetrics/metricsql" +) + +// ExpandWithExprsResponse returns a webpage, which expands with templates in q MetricsQL. + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +import ( + qtio422016 "io" + + qt422016 "github.com/valyala/quicktemplate" +) + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +var ( + _ = qtio422016.Copy + _ = qt422016.AcquireByteBuffer +) + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +func StreamExpandWithExprsResponse(qw422016 *qt422016.Writer, q string) { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 + qw422016.N().S(`
+ Let's look at the following real query from Node Exporter Full dashboard: +
+ ++( + ( + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} + - + node_memory_MemFree_bytes{instance=~"$node:$port", job=~"$job"} + ) + / + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} +) + * +100 ++ +
+ It is clear the query calculates the percentage of used memory + for the given $node, $port and $job. Isn't it? :) +
+ ++ What's wrong with this query? Copy-pasted label filters for distinct timeseries + which makes it easy to mistype these filters during modification. + Let's simplify the query with WITH expressions: +
+ ++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + node_memory_MemTotal_bytes{commonFilters} + - + node_memory_MemFree_bytes{commonFilters} +) + / +node_memory_MemTotal_bytes{commonFilters} * 100 ++ +
+ Now label filters are located in a single place instead of three distinct places. + The query mentions node_memory_MemTotal_bytes metric twice and {commonFilters} + three times. WITH expressions may improve this: +
+ ++WITH ( + my_resource_utilization(free, limit, filters) = (limit{filters} - free{filters}) / limit{filters} * 100 +) +my_resource_utilization( + node_memory_MemFree_bytes, + node_memory_MemTotal_bytes, + {instance=~"$node:$port",job=~"$job"}, +) ++ +
+ Now the template function my_resource_utilization() may be used for monitoring arbitrary + resources - memory, CPU, network, storage, you name it. +
+ ++ Let's take another nice query from Node Exporter Full dashboard: +
+ ++( + ( + ( + count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',instance=~"$node:$port",job=~"$job"}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) +) ++ +
+ Do you understand what does this mess do? Is it manageable? :) WITH expressions are happy to help in a few iterations.
+
+
+ 1. Extract common filters used in multiple places into a commonFilters variable:
+
+WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + ( + ( + count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) +) ++ +
+ 2. Extract "count(count(...) by (cpu))" into cpuCount variable: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)) +) +( + ( + cpuCount + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) / cpuCount ++ +
+ 3. Extract rate(...) part into cpuIdle variable, since it is clear now that this part calculates the number of idle CPUs: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)), + cpuIdle = sum(rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ 4. Put node_cpu_seconds_total{commonFilters} into its own varialbe with the name cpuSeconds: +
++WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(cpuSeconds) by (cpu)), + cpuIdle = sum(rate(cpuSeconds{mode='idle'}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ Now the query became more clear comparing to the initial query. +
+ ++ WITH expressions may be nested and may be put anywhere. Try expanding the following query: +
+ ++WITH ( + f(a, b) = WITH ( + f1(x) = b-x, + f2(x) = x+x + ) f1(a)*f2(b) +) f(foo, with(x=bar) x) ++ +`) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +func writewithExprsTutorial(qq422016 qtio422016.Writer) { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + streamwithExprsTutorial(qw422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +func withExprsTutorial() string { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + writewithExprsTutorial(qb422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + return qs422016 +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 5e35769c4e..c1c59e2c50 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -61,6 +61,15 @@ var ( // Default step used if not set. const defaultStep = 5 * 60 * 1000 +// ExpandWithExprs handles the request to /expand-with-exprs +func ExpandWithExprs(w http.ResponseWriter, r *http.Request) { + query := r.FormValue("query") + bw := bufferedwriter.Get(w) + defer bufferedwriter.Put(bw) + WriteExpandWithExprsResponse(bw, query) + _ = bw.Flush() +} + // FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/ func FederateHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { defer federateDuration.UpdateDuration(startTime) @@ -413,10 +422,7 @@ func exportHandler(qt *querytracer.Tracer, at *auth.Token, w http.ResponseWriter if format == "promapi" { WriteExportPromAPIFooter(bw, qt) } - if err := bw.Flush(); err != nil { - return err - } - return nil + return bw.Flush() } type exportBlock struct { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index bebb919818..b71adbb922 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -673,7 +673,7 @@ Released at 12-04-2022 * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly propagate limits at `-search.max*` command-line flags from `vminsert` to `vmstorage`. The limits are `-search.maxUniqueTimeseries`, `-search.maxSeries`, `-search.maxFederateSeries`, `-search.maxExportSeries`, `-search.maxGraphiteSeries` and `-search.maxTSDBStatusSeries`. They weren't propagated to `vmstorage` because of the bug. These limits were introduced in [v1.76.0](https://docs.victoriametrics.com/CHANGELOG.html#v1760). See [this bug](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2450). * BUGFIX: fix goroutine leak and possible deadlock when importing invalid data via [native binary format](https://docs.victoriametrics.com/#how-to-import-data-in-native-format). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2423). * BUGFIX: [Graphite Render API](https://docs.victoriametrics.com/#graphite-render-api-usage): properly calculate [hitCount](https://graphite.readthedocs.io/en/latest/functions.html#graphite.render.functions.hitcount) function. Previously it could return empty results if there were no original samples in some parts of the selected time range. -* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/promql/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5). +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly display values greater than 1000 on Y axis. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2409). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 9b0a8c447f..f06e38a962 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -90,7 +90,7 @@ The list of MetricsQL features: * `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`. * `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`. * `WITH` templates. This feature simplifies writing and managing complex queries. - Go to [WITH templates playground](https://play.victoriametrics.com/promql/expand-with-exprs) and try it. + Go to [WITH templates playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) and try it. * String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`. * `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions) and [transform functions](#transform-functions).