From d8681557511e37087a33a4d7d47f3650a3523155 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 1 Dec 2023 17:24:59 +0200 Subject: [PATCH] app/vmselect: do not limit concurrency for static and fast queries Previously concurrency for static and fast queries was limited with the -search.maxConcurrentRequests command-line flag. This could complicate identifying heavy queries via `vmui` at `Top queries` and `Active queries` pages, since `vmui` and these pages couldn't be opened on overloaded vmselect. Thanks to @f41gh7 for the idea. --- app/vmselect/main.go | 323 ++++++++++++++------------ app/vmselect/prometheus/prometheus.go | 6 +- docs/CHANGELOG.md | 1 + 3 files changed, 175 insertions(+), 155 deletions(-) diff --git a/app/vmselect/main.go b/app/vmselect/main.go index fe1b7e9a7c..68886ffd63 100644 --- a/app/vmselect/main.go +++ b/app/vmselect/main.go @@ -179,17 +179,13 @@ var ( ) func requestHandler(w http.ResponseWriter, r *http.Request) bool { - if r.URL.Path == "/" { - if r.Method != http.MethodGet { - return false - } - w.Header().Add("Content-Type", "text/html; charset=utf-8") - fmt.Fprintf(w, `vmselect - a component of VictoriaMetrics cluster
-docs
-`) + path := strings.Replace(r.URL.Path, "//", "/", -1) + + if handleStaticAndSimpleRequests(w, r, path) { return true } + // Handle non-trivial dynamic requests, which may take big amounts of time and resources. startTime := time.Now() defer requestDuration.UpdateDuration(startTime) tracerEnabled := httputils.GetBool(r, "trace") @@ -248,7 +244,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool { }() } - path := strings.Replace(r.URL.Path, "//", "/", -1) if path == "/internal/resetRollupResultCache" { if !httpserver.CheckAuthFlag(w, r, *resetCacheAuthKey, "resetCacheAuthKey") { return true @@ -256,22 +251,6 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool { promql.ResetRollupResultCache() return true } - if path == "/api/v1/status/top_queries" { - globalTopQueriesRequests.Inc() - httpserver.EnableCORS(w, r) - if err := prometheus.QueryStatsHandler(startTime, nil, w, r); err != nil { - globalTopQueriesErrors.Inc() - sendPrometheusError(w, r, err) - return true - } - return true - } - if path == "/api/v1/status/active_queries" { - globalStatusActiveQueriesRequests.Inc() - httpserver.EnableCORS(w, r) - promql.ActiveQueriesHandler(nil, w, r) - return true - } if path == "/admin/tenants" { tenantsRequests.Inc() httpserver.EnableCORS(w, r) @@ -314,69 +293,6 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW httpRequests.Get(at).Inc() httpRequestsDuration.Get(at).Add(int(time.Since(startTime).Milliseconds())) }() - if p.Suffix == "" { - if r.Method != http.MethodGet { - return false - } - w.Header().Add("Content-Type", "text/html; charset=utf-8") - fmt.Fprintf(w, "

VictoriaMetrics cluster - vmselect


") - fmt.Fprintf(w, "See docs
") - fmt.Fprintf(w, "Useful endpoints:
") - fmt.Fprintf(w, `Web UI
`) - fmt.Fprintf(w, `metric-level relabel debugging
`) - fmt.Fprintf(w, `target-level relabel debugging
`) - fmt.Fprintf(w, `WITH expressions' tutorial
`) - fmt.Fprintf(w, `tsdb status page
`) - fmt.Fprintf(w, `top queries
`) - fmt.Fprintf(w, `active queries
`) - return true - } - if strings.HasPrefix(p.Suffix, "static") { - prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") - http.StripPrefix(prefix, staticServer).ServeHTTP(w, r) - return true - } - if strings.HasPrefix(p.Suffix, "prometheus/static") { - prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") - r.URL.Path = strings.Replace(r.URL.Path, "/prometheus/static", "/static", 1) - http.StripPrefix(prefix, staticServer).ServeHTTP(w, r) - return true - } - if p.Suffix == "vmui" || p.Suffix == "graph" || p.Suffix == "prometheus/vmui" || p.Suffix == "prometheus/graph" { - // VMUI access via incomplete url without `/` in the end. Redirect to complete url. - // Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics - // is hidden behind vmauth or similar proxy. - _ = r.ParseForm() - suffix := strings.Replace(p.Suffix, "prometheus/", "../prometheus/", 1) - newURL := suffix + "/?" + r.Form.Encode() - httpserver.Redirect(w, newURL) - return true - } - if strings.HasPrefix(p.Suffix, "graph/") || strings.HasPrefix(p.Suffix, "prometheus/graph/") { - // This is needed for serving /graph URLs from Prometheus datasource in Grafana. - p.Suffix = strings.Replace(p.Suffix, "graph/", "vmui/", 1) - r.URL.Path = strings.Replace(r.URL.Path, "/graph/", "/vmui/", 1) - } - if p.Suffix == "vmui/custom-dashboards" || p.Suffix == "prometheus/vmui/custom-dashboards" { - if err := handleVMUICustomDashboards(w); err != nil { - httpserver.Errorf(w, r, "%s", err) - return true - } - return true - } - if strings.HasPrefix(p.Suffix, "vmui/") || strings.HasPrefix(p.Suffix, "prometheus/vmui/") { - // vmui access. - if strings.HasPrefix(p.Suffix, "vmui/static/") || strings.HasPrefix(p.Suffix, "prometheus/vmui/static/") { - // Allow clients caching static contents for long period of time, since it shouldn't change over time. - // Path to static contents (such as js and css) must be changed whenever its contents is changed. - // See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/ - w.Header().Set("Cache-Control", "max-age=31536000") - } - prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") - r.URL.Path = strings.Replace(r.URL.Path, "/prometheus/vmui/", "/vmui/", 1) - http.StripPrefix(prefix, vmuiFileServer).ServeHTTP(w, r) - return true - } if strings.HasPrefix(p.Suffix, "prometheus/api/v1/label/") { s := p.Suffix[len("prometheus/api/v1/label/"):] if strings.HasSuffix(s, "/values") { @@ -401,46 +317,6 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW } return true } - if strings.HasPrefix(p.Suffix, "graphite/functions") { - funcName := p.Suffix[len("graphite/functions"):] - funcName = strings.TrimPrefix(funcName, "/") - if funcName == "" { - graphiteFunctionsRequests.Inc() - if err := graphite.FunctionsHandler(w, r); err != nil { - graphiteFunctionsErrors.Inc() - httpserver.Errorf(w, r, "%s", err) - return true - } - return true - } - graphiteFunctionDetailsRequests.Inc() - if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil { - graphiteFunctionDetailsErrors.Inc() - httpserver.Errorf(w, r, "%s", err) - return true - } - return true - } - - if p.Suffix == "prometheus/vmalert" { - // vmalert access via incomplete url without `/` in the end. Redirect to complete url. - // Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics - // is hidden behind vmauth or similar proxy. - path := "../" + p.Suffix + "/" - httpserver.Redirect(w, path) - return true - } - if strings.HasPrefix(p.Suffix, "prometheus/vmalert/") { - vmalertRequests.Inc() - if len(*vmalertProxyURL) == 0 { - w.WriteHeader(http.StatusBadRequest) - w.Header().Set("Content-Type", "application/json") - fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`) - return true - } - proxyVMAlertRequests(w, r, p.Suffix) - return true - } switch p.Suffix { case "prometheus/api/v1/query": @@ -497,20 +373,6 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW return true } return true - case "prometheus/api/v1/status/active_queries": - statusActiveQueriesRequests.Inc() - httpserver.EnableCORS(w, r) - promql.ActiveQueriesHandler(at, w, r) - return true - case "prometheus/api/v1/status/top_queries": - topQueriesRequests.Inc() - httpserver.EnableCORS(w, r) - if err := prometheus.QueryStatsHandler(startTime, at, w, r); err != nil { - topQueriesErrors.Inc() - sendPrometheusError(w, r, err) - return true - } - return true case "prometheus/api/v1/export": exportRequests.Inc() if err := prometheus.ExportHandler(startTime, at, w, r); err != nil { @@ -519,14 +381,6 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW return true } return true - case "prometheus/api/v1/export/native": - exportNativeRequests.Inc() - if err := prometheus.ExportNativeHandler(startTime, at, w, r); err != nil { - exportNativeErrors.Inc() - httpserver.Errorf(w, r, "%s", err) - return true - } - return true case "prometheus/api/v1/export/csv": exportCSVRequests.Inc() if err := prometheus.ExportCSVHandler(startTime, at, w, r); err != nil { @@ -535,6 +389,14 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW return true } return true + case "prometheus/api/v1/export/native": + exportNativeRequests.Inc() + if err := prometheus.ExportNativeHandler(startTime, at, w, r); err != nil { + exportNativeErrors.Inc() + httpserver.Errorf(w, r, "%s", err) + return true + } + return true case "prometheus/federate": federateRequests.Inc() if err := prometheus.FederateHandler(startTime, at, w, r); err != nil { @@ -636,6 +498,167 @@ func selectHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW return true } return true + default: + return false + } +} + +func handleStaticAndSimpleRequests(w http.ResponseWriter, r *http.Request, path string) bool { + if path == "/" { + if r.Method != http.MethodGet { + return false + } + w.Header().Add("Content-Type", "text/html; charset=utf-8") + fmt.Fprintf(w, `vmselect - a component of VictoriaMetrics cluster
+docs
+`) + return true + } + if path == "/api/v1/status/top_queries" { + globalTopQueriesRequests.Inc() + httpserver.EnableCORS(w, r) + if err := prometheus.QueryStatsHandler(nil, w, r); err != nil { + globalTopQueriesErrors.Inc() + sendPrometheusError(w, r, err) + return true + } + return true + } + if path == "/api/v1/status/active_queries" { + globalStatusActiveQueriesRequests.Inc() + httpserver.EnableCORS(w, r) + promql.ActiveQueriesHandler(nil, w, r) + return true + } + p, err := httpserver.ParsePath(path) + if err != nil { + return false + } + if p.Suffix == "" { + if r.Method != http.MethodGet { + return false + } + w.Header().Add("Content-Type", "text/html; charset=utf-8") + fmt.Fprintf(w, "

VictoriaMetrics cluster - vmselect


") + fmt.Fprintf(w, "See docs
") + fmt.Fprintf(w, "Useful endpoints:
") + fmt.Fprintf(w, `Web UI
`) + fmt.Fprintf(w, `metric-level relabel debugging
`) + fmt.Fprintf(w, `target-level relabel debugging
`) + fmt.Fprintf(w, `WITH expressions' tutorial
`) + fmt.Fprintf(w, `tsdb status page
`) + fmt.Fprintf(w, `top queries
`) + fmt.Fprintf(w, `active queries
`) + return true + } + if strings.HasPrefix(p.Suffix, "static") { + prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") + http.StripPrefix(prefix, staticServer).ServeHTTP(w, r) + return true + } + if strings.HasPrefix(p.Suffix, "prometheus/static") { + prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") + r.URL.Path = strings.Replace(r.URL.Path, "/prometheus/static", "/static", 1) + http.StripPrefix(prefix, staticServer).ServeHTTP(w, r) + return true + } + if p.Suffix == "vmui" || p.Suffix == "graph" || p.Suffix == "prometheus/vmui" || p.Suffix == "prometheus/graph" { + // VMUI access via incomplete url without `/` in the end. Redirect to complete url. + // Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics + // is hidden behind vmauth or similar proxy. + _ = r.ParseForm() + suffix := strings.Replace(p.Suffix, "prometheus/", "../prometheus/", 1) + newURL := suffix + "/?" + r.Form.Encode() + httpserver.Redirect(w, newURL) + return true + } + if strings.HasPrefix(p.Suffix, "graph/") || strings.HasPrefix(p.Suffix, "prometheus/graph/") { + // This is needed for serving /graph URLs from Prometheus datasource in Grafana. + p.Suffix = strings.Replace(p.Suffix, "graph/", "vmui/", 1) + r.URL.Path = strings.Replace(r.URL.Path, "/graph/", "/vmui/", 1) + } + if p.Suffix == "vmui/custom-dashboards" || p.Suffix == "prometheus/vmui/custom-dashboards" { + if err := handleVMUICustomDashboards(w); err != nil { + httpserver.Errorf(w, r, "%s", err) + return true + } + return true + } + if strings.HasPrefix(p.Suffix, "vmui/") || strings.HasPrefix(p.Suffix, "prometheus/vmui/") { + // vmui access. + if strings.HasPrefix(p.Suffix, "vmui/static/") || strings.HasPrefix(p.Suffix, "prometheus/vmui/static/") { + // Allow clients caching static contents for long period of time, since it shouldn't change over time. + // Path to static contents (such as js and css) must be changed whenever its contents is changed. + // See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/ + w.Header().Set("Cache-Control", "max-age=31536000") + } + prefix := strings.Join([]string{"", p.Prefix, p.AuthToken}, "/") + r.URL.Path = strings.Replace(r.URL.Path, "/prometheus/vmui/", "/vmui/", 1) + http.StripPrefix(prefix, vmuiFileServer).ServeHTTP(w, r) + return true + } + if strings.HasPrefix(p.Suffix, "graphite/functions") { + funcName := p.Suffix[len("graphite/functions"):] + funcName = strings.TrimPrefix(funcName, "/") + if funcName == "" { + graphiteFunctionsRequests.Inc() + if err := graphite.FunctionsHandler(w, r); err != nil { + graphiteFunctionsErrors.Inc() + httpserver.Errorf(w, r, "%s", err) + return true + } + return true + } + graphiteFunctionDetailsRequests.Inc() + if err := graphite.FunctionDetailsHandler(funcName, w, r); err != nil { + graphiteFunctionDetailsErrors.Inc() + httpserver.Errorf(w, r, "%s", err) + return true + } + return true + } + if p.Suffix == "prometheus/vmalert" { + // vmalert access via incomplete url without `/` in the end. Redirect to complete url. + // Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics + // is hidden behind vmauth or similar proxy. + path := "../" + p.Suffix + "/" + httpserver.Redirect(w, path) + return true + } + if strings.HasPrefix(p.Suffix, "prometheus/vmalert/") { + vmalertRequests.Inc() + if len(*vmalertProxyURL) == 0 { + w.WriteHeader(http.StatusBadRequest) + w.Header().Set("Content-Type", "application/json") + fmt.Fprintf(w, "%s", `{"status":"error","msg":"for accessing vmalert flag '-vmalert.proxyURL' must be configured"}`) + return true + } + proxyVMAlertRequests(w, r, p.Suffix) + return true + } + switch p.Suffix { + case "prometheus/api/v1/status/active_queries": + at, err := auth.NewToken(p.AuthToken) + if err != nil { + return false + } + statusActiveQueriesRequests.Inc() + httpserver.EnableCORS(w, r) + promql.ActiveQueriesHandler(at, w, r) + return true + case "prometheus/api/v1/status/top_queries": + at, err := auth.NewToken(p.AuthToken) + if err != nil { + return false + } + topQueriesRequests.Inc() + httpserver.EnableCORS(w, r) + if err := prometheus.QueryStatsHandler(at, w, r); err != nil { + topQueriesErrors.Inc() + sendPrometheusError(w, r, err) + return true + } + return true case "prometheus/metric-relabel-debug", "metric-relabel-debug": promrelabelMetricRelabelDebugRequests.Inc() metric := r.FormValue("metric") diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 7644f43428..d6a7fa5d94 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -1162,9 +1162,7 @@ func getLatencyOffsetMilliseconds(r *http.Request) (int64, error) { } // QueryStatsHandler returns query stats at `/api/v1/status/top_queries` -func QueryStatsHandler(startTime time.Time, at *auth.Token, w http.ResponseWriter, r *http.Request) error { - defer queryStatsDuration.UpdateDuration(startTime) - +func QueryStatsHandler(at *auth.Token, w http.ResponseWriter, r *http.Request) error { topN := 20 topNStr := r.FormValue("topN") if len(topNStr) > 0 { @@ -1193,8 +1191,6 @@ func QueryStatsHandler(startTime time.Time, at *auth.Token, w http.ResponseWrite return nil } -var queryStatsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/top_queries"}`) - // commonParams contains common parameters for all /api/v1/* handlers // // timeout, start, end, match[], extra_label, extra_filters[] diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a16b0d0f6a..a4b9f97db1 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -28,6 +28,7 @@ The sandbox cluster installation is running under the constant load generated by ## tip +* FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for Datadog `/api/v2/series` and `/api/beta/sketches` ingestion protocols to vmagent/vminsert components. See this [doc](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) for examples. Thanks to @AndrewChubatiuk for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5094).