diff --git a/app/victoria-metrics/main.go b/app/victoria-metrics/main.go index 503598f00..3839feed9 100644 --- a/app/victoria-metrics/main.go +++ b/app/victoria-metrics/main.go @@ -102,6 +102,7 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool { {"targets", "status for discovered active targets"}, {"service-discovery", "labels before and after relabeling for discovered targets"}, {"metric-relabel-debug", "debug metric relabeling"}, + {"expand-with-exprs", "WITH expressions' tutorial"}, {"api/v1/targets", "advanced information about discovered targets in JSON format"}, {"config", "-promscrape.config contents"}, {"metrics", "available service metrics"}, diff --git a/app/vmselect/main.go b/app/vmselect/main.go index abcea87d8..f021a46c0 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -432,6 +432,10 @@ func RequestHandler(w http.ResponseWriter, r *http.Request) bool { promscrapeTargetRelabelDebugRequests.Inc() promscrape.WriteTargetRelabelDebug(w, r) return true + case "/expand-with-exprs": + expandWithExprsRequests.Inc() + prometheus.ExpandWithExprs(w, r) + return true case "/api/v1/rules", "/rules": rulesRequests.Inc() if len(*vmalertProxyURL) > 0 { @@ -601,6 +605,8 @@ var ( graphiteFunctionsRequests = metrics.NewCounter(`vm_http_requests_total{path="/functions"}`) + expandWithExprsRequests = metrics.NewCounter(`vm_http_requests_total{path="/expand-with-exprs"}`) + vmalertRequests = metrics.NewCounter(`vm_http_requests_total{path="/vmalert"}`) rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`) alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`) diff --git a/app/vmselect/prometheus/expand-with-exprs.qtpl b/app/vmselect/prometheus/expand-with-exprs.qtpl new file mode 100644 index 000000000..e64eeaefc --- /dev/null +++ b/app/vmselect/prometheus/expand-with-exprs.qtpl @@ -0,0 +1,245 @@ +{% import ( + "github.com/VictoriaMetrics/metricsql" +) %} + +{% stripspace %} + +// ExpandWithExprsResponse returns a webpage, which expands with templates in q MetricsQL. +{% func ExpandWithExprsResponse(q string) %} + +
++ Let's look at the following real query from Node Exporter Full dashboard: +
+ ++( + ( + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} + - + node_memory_MemFree_bytes{instance=~"$node:$port", job=~"$job"} + ) + / + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} +) + * +100 ++ +
+ It is clear the query calculates the percentage of used memory + for the given $node, $port and $job. Isn't it? :) +
+ ++ What's wrong with this query? Copy-pasted label filters for distinct timeseries + which makes it easy to mistype these filters during modification. + Let's simplify the query with WITH expressions: +
+ ++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + node_memory_MemTotal_bytes{commonFilters} + - + node_memory_MemFree_bytes{commonFilters} +) + / +node_memory_MemTotal_bytes{commonFilters} * 100 ++ +
+ Now label filters are located in a single place instead of three distinct places. + The query mentions node_memory_MemTotal_bytes metric twice and {commonFilters} + three times. WITH expressions may improve this: +
+ ++WITH ( + my_resource_utilization(free, limit, filters) = (limit{filters} - free{filters}) / limit{filters} * 100 +) +my_resource_utilization( + node_memory_MemFree_bytes, + node_memory_MemTotal_bytes, + {instance=~"$node:$port",job=~"$job"}, +) ++ +
+ Now the template function my_resource_utilization() may be used for monitoring arbitrary + resources - memory, CPU, network, storage, you name it. +
+ ++ Let's take another nice query from Node Exporter Full dashboard: +
+ ++( + ( + ( + count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',instance=~"$node:$port",job=~"$job"}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) +) ++ +
+ Do you understand what does this mess do? Is it manageable? :) WITH expressions are happy to help in a few iterations.
+
+
+ 1. Extract common filters used in multiple places into a commonFilters variable:
+
+WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + ( + ( + count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) +) ++ +
+ 2. Extract "count(count(...) by (cpu))" into cpuCount variable: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)) +) +( + ( + cpuCount + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) / cpuCount ++ +
+ 3. Extract rate(...) part into cpuIdle variable, since it is clear now that this part calculates the number of idle CPUs: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)), + cpuIdle = sum(rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ 4. Put node_cpu_seconds_total{commonFilters} into its own varialbe with the name cpuSeconds: +
++WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(cpuSeconds) by (cpu)), + cpuIdle = sum(rate(cpuSeconds{mode='idle'}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ Now the query became more clear comparing to the initial query. +
+ ++ WITH expressions may be nested and may be put anywhere. Try expanding the following query: +
+ ++WITH ( + f(a, b) = WITH ( + f1(x) = b-x, + f2(x) = x+x + ) f1(a)*f2(b) +) f(foo, with(x=bar) x) ++ +{% endfunc %} diff --git a/app/vmselect/prometheus/expand-with-exprs.qtpl.go b/app/vmselect/prometheus/expand-with-exprs.qtpl.go new file mode 100644 index 000000000..875a33e8b --- /dev/null +++ b/app/vmselect/prometheus/expand-with-exprs.qtpl.go @@ -0,0 +1,345 @@ +// Code generated by qtc from "expand-with-exprs.qtpl". DO NOT EDIT. +// See https://github.com/valyala/quicktemplate for details. + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:1 +package prometheus + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:1 +import ( + "github.com/VictoriaMetrics/metricsql" +) + +// ExpandWithExprsResponse returns a webpage, which expands with templates in q MetricsQL. + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +import ( + qtio422016 "io" + + qt422016 "github.com/valyala/quicktemplate" +) + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +var ( + _ = qtio422016.Copy + _ = qt422016.AcquireByteBuffer +) + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 +func StreamExpandWithExprsResponse(qw422016 *qt422016.Writer, q string) { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:8 + qw422016.N().S(`
+ Let's look at the following real query from Node Exporter Full dashboard: +
+ ++( + ( + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} + - + node_memory_MemFree_bytes{instance=~"$node:$port", job=~"$job"} + ) + / + node_memory_MemTotal_bytes{instance=~"$node:$port", job=~"$job"} +) + * +100 ++ +
+ It is clear the query calculates the percentage of used memory + for the given $node, $port and $job. Isn't it? :) +
+ ++ What's wrong with this query? Copy-pasted label filters for distinct timeseries + which makes it easy to mistype these filters during modification. + Let's simplify the query with WITH expressions: +
+ ++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + node_memory_MemTotal_bytes{commonFilters} + - + node_memory_MemFree_bytes{commonFilters} +) + / +node_memory_MemTotal_bytes{commonFilters} * 100 ++ +
+ Now label filters are located in a single place instead of three distinct places. + The query mentions node_memory_MemTotal_bytes metric twice and {commonFilters} + three times. WITH expressions may improve this: +
+ ++WITH ( + my_resource_utilization(free, limit, filters) = (limit{filters} - free{filters}) / limit{filters} * 100 +) +my_resource_utilization( + node_memory_MemFree_bytes, + node_memory_MemTotal_bytes, + {instance=~"$node:$port",job=~"$job"}, +) ++ +
+ Now the template function my_resource_utilization() may be used for monitoring arbitrary + resources - memory, CPU, network, storage, you name it. +
+ ++ Let's take another nice query from Node Exporter Full dashboard: +
+ ++( + ( + ( + count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',instance=~"$node:$port",job=~"$job"}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}) by (cpu) +) ++ +
+ Do you understand what does this mess do? Is it manageable? :) WITH expressions are happy to help in a few iterations.
+
+
+ 1. Extract common filters used in multiple places into a commonFilters variable:
+
+WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"} +) +( + ( + ( + count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) + ) + ) + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) + / +count( + count(node_cpu_seconds_total{commonFilters}) by (cpu) +) ++ +
+ 2. Extract "count(count(...) by (cpu))" into cpuCount variable: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)) +) +( + ( + cpuCount + - + avg( + sum by (mode) (rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) + ) + ) + * + 100 +) / cpuCount ++ +
+ 3. Extract rate(...) part into cpuIdle variable, since it is clear now that this part calculates the number of idle CPUs: +
++WITH ( + commonFilters = {instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(node_cpu_seconds_total{commonFilters}) by (cpu)), + cpuIdle = sum(rate(node_cpu_seconds_total{mode='idle',commonFilters}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ 4. Put node_cpu_seconds_total{commonFilters} into its own varialbe with the name cpuSeconds: +
++WITH ( + cpuSeconds = node_cpu_seconds_total{instance=~"$node:$port",job=~"$job"}, + cpuCount = count(count(cpuSeconds) by (cpu)), + cpuIdle = sum(rate(cpuSeconds{mode='idle'}[5m])) +) +((cpuCount - cpuIdle) * 100) / cpuCount ++ +
+ Now the query became more clear comparing to the initial query. +
+ ++ WITH expressions may be nested and may be put anywhere. Try expanding the following query: +
+ ++WITH ( + f(a, b) = WITH ( + f1(x) = b-x, + f2(x) = x+x + ) f1(a)*f2(b) +) f(foo, with(x=bar) x) ++ +`) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +func writewithExprsTutorial(qq422016 qtio422016.Writer) { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qw422016 := qt422016.AcquireWriter(qq422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + streamwithExprsTutorial(qw422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qt422016.ReleaseWriter(qw422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} + +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +func withExprsTutorial() string { +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qb422016 := qt422016.AcquireByteBuffer() +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + writewithExprsTutorial(qb422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qs422016 := string(qb422016.B) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + qt422016.ReleaseByteBuffer(qb422016) +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 + return qs422016 +//line app/vmselect/prometheus/expand-with-exprs.qtpl:245 +} diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index d451e956f..4d858b4eb 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -57,6 +57,15 @@ var ( // Default step used if not set. const defaultStep = 5 * 60 * 1000 +// ExpandWithExprs handles the request to /expand-with-exprs +func ExpandWithExprs(w http.ResponseWriter, r *http.Request) { + query := r.FormValue("query") + bw := bufferedwriter.Get(w) + defer bufferedwriter.Put(bw) + WriteExpandWithExprsResponse(bw, query) + _ = bw.Flush() +} + // FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/ func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { defer federateDuration.UpdateDuration(startTime) @@ -399,10 +408,7 @@ func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, cp *commonPara if format == "promapi" { WriteExportPromAPIFooter(bw, qt) } - if err := bw.Flush(); err != nil { - return err - } - return nil + return bw.Flush() } type exportBlock struct { diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index bebb91981..b71adbb92 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -673,7 +673,7 @@ Released at 12-04-2022 * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly propagate limits at `-search.max*` command-line flags from `vminsert` to `vmstorage`. The limits are `-search.maxUniqueTimeseries`, `-search.maxSeries`, `-search.maxFederateSeries`, `-search.maxExportSeries`, `-search.maxGraphiteSeries` and `-search.maxTSDBStatusSeries`. They weren't propagated to `vmstorage` because of the bug. These limits were introduced in [v1.76.0](https://docs.victoriametrics.com/CHANGELOG.html#v1760). See [this bug](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2450). * BUGFIX: fix goroutine leak and possible deadlock when importing invalid data via [native binary format](https://docs.victoriametrics.com/#how-to-import-data-in-native-format). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2423). * BUGFIX: [Graphite Render API](https://docs.victoriametrics.com/#graphite-render-api-usage): properly calculate [hitCount](https://graphite.readthedocs.io/en/latest/functions.html#graphite.render.functions.hitcount) function. Previously it could return empty results if there were no original samples in some parts of the selected time range. -* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/promql/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5). +* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): allow overriding built-in function names inside [WITH templates](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs). For example, `WITH (sum(a,b) = a + b + 1) sum(x,y)` now expands into `x + y + 1`. Previously such a query would fail with `cannot use reserved name` error. See [this bugreport](https://github.com/VictoriaMetrics/metricsql/issues/5). * BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly display values greater than 1000 on Y axis. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2409). diff --git a/docs/MetricsQL.md b/docs/MetricsQL.md index 9b0a8c447..f06e38a96 100644 --- a/docs/MetricsQL.md +++ b/docs/MetricsQL.md @@ -90,7 +90,7 @@ The list of MetricsQL features: * `if` binary operator. `q1 if q2` removes values from `q1` for missing values from `q2`. * `ifnot` binary operator. `q1 ifnot q2` removes values from `q1` for existing values from `q2`. * `WITH` templates. This feature simplifies writing and managing complex queries. - Go to [WITH templates playground](https://play.victoriametrics.com/promql/expand-with-exprs) and try it. + Go to [WITH templates playground](https://play.victoriametrics.com/select/accounting/1/6a716b0f-38bc-4856-90ce-448fd713e3fe/expand-with-exprs) and try it. * String literals may be concatenated. This is useful with `WITH` templates: `WITH (commonPrefix="long_metric_prefix_") {__name__=commonPrefix+"suffix1"} / {__name__=commonPrefix+"suffix2"}`. * `keep_metric_names` modifier can be applied to all the [rollup functions](#rollup-functions) and [transform functions](#transform-functions).