From 7d7cf2b6fdef6230b65caf5db45f208278a3076a Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Mon, 15 Aug 2022 13:50:14 +0300
Subject: [PATCH] app/vmselect: follow-up after
 63e0f16062c32a601a197d27ca6006077ba9bd8c

* Explicitly store a pointer to UserReadableError in the error interface.
  Previously Go automatically converted the value to a pointer before storing in the error interface.

* Add Unwrap() method to UserReadableError, so it can be used transparently with the other code,
  which calls errors.Is() and errors.As().

* Document the change in docs/CHANGELOG.md
---
 app/vmselect/main.go        |  5 ++---
 app/vmselect/promql/eval.go | 42 +++++++++++++++++++++++++------------
 app/vmselect/promql/exec.go | 13 +++++++++---
 docs/CHANGELOG.md           |  1 +
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/app/vmselect/main.go b/app/vmselect/main.go
index ebea036352..4c458e55dc 100644
--- a/app/vmselect/main.go
+++ b/app/vmselect/main.go
@@ -627,12 +627,11 @@ func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
 	}
 	w.WriteHeader(statusCode)
 
-	var ure promql.UserReadableError
+	var ure *promql.UserReadableError
 	if errors.As(err, &ure) {
-		prometheus.WriteErrorResponse(w, statusCode, ure.Err)
+		prometheus.WriteErrorResponse(w, statusCode, ure)
 		return
 	}
-
 	prometheus.WriteErrorResponse(w, statusCode, err)
 }
 
diff --git a/app/vmselect/promql/eval.go b/app/vmselect/promql/eval.go
index 5c59831ae1..a0257e7f05 100644
--- a/app/vmselect/promql/eval.go
+++ b/app/vmselect/promql/eval.go
@@ -311,7 +311,9 @@ func evalExprInternal(qt *querytracer.Tracer, ec *EvalConfig, e metricsql.Expr)
 func evalTransformFunc(qt *querytracer.Tracer, ec *EvalConfig, fe *metricsql.FuncExpr) ([]*timeseries, error) {
 	tf := getTransformFunc(fe.Name)
 	if tf == nil {
-		return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, fe.Name)}
+		return nil, &UserReadableError{
+			Err: fmt.Errorf(`unknown func %q`, fe.Name),
+		}
 	}
 	args, err := evalExprs(qt, ec, fe.Args)
 	if err != nil {
@@ -353,7 +355,9 @@ func evalAggrFunc(qt *querytracer.Tracer, ec *EvalConfig, ae *metricsql.AggrFunc
 	}
 	af := getAggrFunc(ae.Name)
 	if af == nil {
-		return nil, UserReadableError{Err: fmt.Errorf(`unknown func %q`, ae.Name)}
+		return nil, &UserReadableError{
+			Err: fmt.Errorf(`unknown func %q`, ae.Name),
+		}
 	}
 	afa := &aggrFuncArg{
 		ae:   ae,
@@ -696,10 +700,14 @@ func evalRollupFunc(qt *querytracer.Tracer, ec *EvalConfig, funcName string, rf
 	}
 	tssAt, err := evalExpr(qt, ec, re.At)
 	if err != nil {
-		return nil, UserReadableError{Err: fmt.Errorf("cannot evaluate `@` modifier: %w", err)}
+		return nil, &UserReadableError{
+			Err: fmt.Errorf("cannot evaluate `@` modifier: %w", err),
+		}
 	}
 	if len(tssAt) != 1 {
-		return nil, UserReadableError{Err: fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt))}
+		return nil, &UserReadableError{
+			Err: fmt.Errorf("`@` modifier must return a single series; it returns %d series instead", len(tssAt)),
+		}
 	}
 	atTimestamp := int64(tssAt[0].Values[0] * 1000)
 	ecNew := copyEvalConfig(ec)
@@ -759,7 +767,9 @@ func evalRollupFuncWithoutAt(qt *querytracer.Tracer, ec *EvalConfig, funcName st
 		rvs, err = evalRollupFuncWithSubquery(qt, ecNew, funcName, rf, expr, re)
 	}
 	if err != nil {
-		return nil, UserReadableError{Err: err}
+		return nil, &UserReadableError{
+			Err: err,
+		}
 	}
 	if funcName == "absent_over_time" {
 		rvs = aggregateAbsentOverTime(ec, re.Expr, rvs)
@@ -983,7 +993,9 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
 	sq := storage.NewSearchQuery(ec.AuthToken.AccountID, ec.AuthToken.ProjectID, minTimestamp, ec.End, tfss, ec.MaxSeries)
 	rss, isPartial, err := netstorage.ProcessSearchQuery(qt, ec.DenyPartialResponse, sq, ec.Deadline)
 	if err != nil {
-		return nil, UserReadableError{Err: err}
+		return nil, &UserReadableError{
+			Err: err,
+		}
 	}
 	ec.updateIsPartialResponse(isPartial)
 	rssLen := rss.Len()
@@ -1020,12 +1032,14 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
 	rml := getRollupMemoryLimiter()
 	if !rml.Get(uint64(rollupMemorySize)) {
 		rss.Cancel()
-		return nil, UserReadableError{Err: fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
-			"total available memory for concurrent requests: %d bytes; "+
-			"requested memory: %d bytes; "+
-			"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
-			"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
-			rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3)}
+		return nil, &UserReadableError{
+			Err: fmt.Errorf("not enough memory for processing %d data points across %d time series with %d points in each time series; "+
+				"total available memory for concurrent requests: %d bytes; "+
+				"requested memory: %d bytes; "+
+				"possible solutions are: reducing the number of matching time series; switching to node with more RAM; "+
+				"increasing -memory.allowedPercent; increasing `step` query arg (%gs)",
+				rollupPoints, timeseriesLen*len(rcs), pointsPerTimeseries, rml.MaxSize, uint64(rollupMemorySize), float64(ec.Step)/1e3),
+		}
 	}
 	defer rml.Put(uint64(rollupMemorySize))
 
@@ -1038,7 +1052,9 @@ func evalRollupFuncWithMetricExpr(qt *querytracer.Tracer, ec *EvalConfig, funcNa
 		tss, err = evalRollupNoIncrementalAggregate(qt, funcName, keepMetricNames, rss, rcs, preFunc, sharedTimestamps)
 	}
 	if err != nil {
-		return nil, UserReadableError{Err: err}
+		return nil, &UserReadableError{
+			Err: err,
+		}
 	}
 	tss = mergeTimeseries(tssCached, tss, start, ec)
 	if !isPartial {
diff --git a/app/vmselect/promql/exec.go b/app/vmselect/promql/exec.go
index 23785a73b2..47fb51460c 100644
--- a/app/vmselect/promql/exec.go
+++ b/app/vmselect/promql/exec.go
@@ -26,14 +26,21 @@ var (
 		`This option is DEPRECATED in favor of {__graphite__="a.*.c"} syntax for selecting metrics matching the given Graphite metrics filter`)
 )
 
-// UserReadableError is a type of error which supposed
-// to be returned to the user without additional context.
+// UserReadableError is a type of error which supposed to be returned to the user without additional context.
 type UserReadableError struct {
+	// Err is the error which needs to be returned to the user.
 	Err error
 }
 
+// Unwrap returns ure.Err.
+//
+// This is used by standard errors package. See https://golang.org/pkg/errors
+func (ure *UserReadableError) Unwrap() error {
+	return ure.Err
+}
+
 // Error satisfies Error interface
-func (ure UserReadableError) Error() string {
+func (ure *UserReadableError) Error() string {
 	return ure.Err.Error()
 }
 
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index e95f5ec389..0eec544550 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
 
 * SECURITY: [vmalert](https://docs.victoriametrics.com/vmalert.html): do not expose `-remoteWrite.url`, `-remoteRead.url` and `-datasource.url` command-line flag values in logs and at `http://vmalert:8880/flags` page by default, since they may contain sensitive data such as auth keys. This aligns `vmalert` behaviour with [vmagent](https://docs.victoriametrics.com/vmagent.html), which doesn't expose `-remoteWrite.url` command-line flag value in logs and at `http://vmagent:8429/flags` page by default. Specify `-remoteWrite.showURL`, `-remoteRead.showURL` and `-datasource.showURL` command-line flags for showing values for the corresponding `-*.url` flags in logs. Thanks to @mble for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2965).
 
+* FEATURE: return shorter error messages to Grafana and to other clients requesting [/api/v1/query](https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries) and [/api/v1/query_range](https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries) endpoints. This should simplify reading these errors by humans. The long error message with full context is still written to logs.
 * FEATURE: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): improve performance for heavy queries on systems with many CPU cores.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for MX record types in [dns_sd_configs](https://docs.victoriametrics.com/sd_configs.html#dns_sd_configs) in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/10099).
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `__meta_kubernetes_service_port_number` meta-label for `role: service` in [kubernetes_sd_configs](https://docs.victoriametrics.com/sd_configs.html#kubernetes_sd_configs) in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/11002).