From 23bb38ef8098cf01246ce796d529b99ac04b9194 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 8 Sep 2023 22:39:17 +0200 Subject: [PATCH] app/vmauth: add ability to specify response status codes for retrying requests during load-balancing Response status codes for retrying can be specified via retry_status_codes list See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893 --- app/vmauth/README.md | 60 ++++++++++++++++++++++++------ app/vmauth/auth_config.go | 8 ++-- app/vmauth/example_config.yml | 16 ++++---- app/vmauth/example_config_ent.yml | 12 ------ app/vmauth/main.go | 31 ++++++++++------ app/vmauth/target_url.go | 8 ++-- app/vmauth/target_url_test.go | 62 ++++++++++++++++++------------- docs/CHANGELOG.md | 2 +- docs/vmauth.md | 60 ++++++++++++++++++++++++------ 9 files changed, 171 insertions(+), 88 deletions(-) diff --git a/app/vmauth/README.md b/app/vmauth/README.md index 6477b57dce..6104624123 100644 --- a/app/vmauth/README.md +++ b/app/vmauth/README.md @@ -35,9 +35,42 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner. -`vmauth` retries failing `GET` requests across the configured list of urls. -This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes -in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). + +If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls. + +It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list. + +Load balancing feature can be used in the following cases: + +- Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). + The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests + or requests with 500 and 502 response status codes: + + ```yml + unauthorized_user: + url_prefix: + - http://vmselect1:8481/ + - http://vmselect2:8481/ + - http://vmselect3:8481/ + retry_status_codes: [500, 502] + ``` + +- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries + among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable. + `vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ. + See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details. + + ```yml + unauthorized_user: + url_prefix: + - https://vmselect-az1/?deny_partial_response=1 + - https://vmselect-az2/?deny_partial_response=1 + - https://vmselect-az3/?deny_partial_response=1 + retry_status_codes: [500, 502, 503] + ``` + +Load balancig can also be configured independently per each user and per each `url_map` entry. +See [auth config docs](#auth-config) for more details. ## Concurrency limiting @@ -177,6 +210,7 @@ users: # - http://vmselect2:8481/select/42/prometheus # For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query # or to http://vmselect2:8480/select/42/prometheus/api/v1/query . + # Requests are re-tried at other url_prefix backends if response status codes match 500 or 502. # # - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write . # The "X-Scope-OrgID: abc" http header is added to these requests. @@ -196,6 +230,7 @@ users: url_prefix: - "http://vmselect1:8481/select/42/prometheus" - "http://vmselect2:8481/select/42/prometheus" + retry_status_codes: [500, 502] - src_paths: ["/api/v1/write"] url_prefix: "http://vminsert:8480/insert/42/prometheus" headers: @@ -209,16 +244,14 @@ users: - "http://default2:8888/unsupported_url_handler" # Requests without Authorization header are routed according to `unauthorized_user` section. +# Requests are routed in round-robin fashion between `url_prefix` backends. +# The deny_partial_response query arg is added to all the routed requests. +# The requests are re-tried if url_prefix backends send 500 or 503 response status codes. unauthorized_user: - url_map: - - src_paths: - - /api/v1/query - - /api/v1/query_range - url_prefix: - - http://vmselect1:8481/select/0/prometheus - - http://vmselect2:8481/select/0/prometheus - ip_filters: - allow_list: [8.8.8.8] + url_prefix: + - http://vmselect-az1/?deny_partial_response=1 + - http://vmselect-az2/?deny_partial_response=1 + retry_status_codes: [503, 500] ip_filters: allow_list: ["1.2.3.0/24", "127.0.0.1"] @@ -424,6 +457,9 @@ See the docs at https://docs.victoriametrics.com/vmauth.html . The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000) -maxIdleConnsPerBackend int The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100) + -maxRequestBodySizeToRetry size + The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384) -memory.allowedBytes size Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0) diff --git a/app/vmauth/auth_config.go b/app/vmauth/auth_config.go index af1b0573f6..43b40bc94e 100644 --- a/app/vmauth/auth_config.go +++ b/app/vmauth/auth_config.go @@ -46,6 +46,7 @@ type UserInfo struct { HeadersConf HeadersConf `yaml:",inline"` MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"` DefaultURL *URLPrefix `yaml:"default_url,omitempty"` + RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"` concurrencyLimitCh chan struct{} concurrencyLimitReached *metrics.Counter @@ -111,9 +112,10 @@ func (h *Header) MarshalYAML() (interface{}, error) { // URLMap is a mapping from source paths to target urls. type URLMap struct { - SrcPaths []*SrcPath `yaml:"src_paths,omitempty"` - URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"` - HeadersConf HeadersConf `yaml:",inline"` + SrcPaths []*SrcPath `yaml:"src_paths,omitempty"` + URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"` + HeadersConf HeadersConf `yaml:",inline"` + RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"` } // SrcPath represents an src path diff --git a/app/vmauth/example_config.yml b/app/vmauth/example_config.yml index f9e149626f..b21771c03c 100644 --- a/app/vmauth/example_config.yml +++ b/app/vmauth/example_config.yml @@ -72,6 +72,7 @@ users: # - http://vmselect2:8481/select/42/prometheus # For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query # or to http://vmselect2:8480/select/42/prometheus/api/v1/query . + # Requests are re-tried at other url_prefix backends if response status codes match 500 or 502. # # - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write . # The "X-Scope-OrgID: abc" http header is added to these requests. @@ -90,6 +91,7 @@ users: url_prefix: - "http://vmselect1:8481/select/42/prometheus" - "http://vmselect2:8481/select/42/prometheus" + retry_status_codes: [500, 502] - src_paths: ["/api/v1/write"] url_prefix: "http://vminsert:8480/insert/42/prometheus" headers: @@ -99,11 +101,11 @@ users: - "http://default2:8888/unsupported_url_handler" # Requests without Authorization header are routed according to `unauthorized_user` section. +# Requests are routed in round-robin fashion between `url_prefix` backends. +# The deny_partial_response query arg is added to all the routed requests. +# The requests are re-tried if url_prefix backends send 500 or 503 response status codes. unauthorized_user: - url_map: - - src_paths: - - /api/v1/query - - /api/v1/query_range - url_prefix: - - http://vmselect1:8481/select/0/prometheus - - http://vmselect2:8481/select/0/prometheus + url_prefix: + - http://vmselect-az1/?deny_partial_response=1 + - http://vmselect-az2/?deny_partial_response=1 + retry_status_codes: [503, 500] diff --git a/app/vmauth/example_config_ent.yml b/app/vmauth/example_config_ent.yml index ac0880a4ee..ed9ae26e81 100644 --- a/app/vmauth/example_config_ent.yml +++ b/app/vmauth/example_config_ent.yml @@ -39,18 +39,6 @@ users: - "http://default1:8888/unsupported_url_handler" - "http://default2:8888/unsupported_url_handler" -# Requests without Authorization header are routed according to `unauthorized_user` section. -unauthorized_user: - url_map: - - src_paths: - - /api/v1/query - - /api/v1/query_range - url_prefix: - - http://vmselect1:8481/select/0/prometheus - - http://vmselect2:8481/select/0/prometheus - ip_filters: - allow_list: [8.8.8.8] - ip_filters: allow_list: ["1.2.3.0/24", "127.0.0.1"] deny_list: diff --git a/app/vmauth/main.go b/app/vmauth/main.go index fa28dcb6e7..9e50366de2 100644 --- a/app/vmauth/main.go +++ b/app/vmauth/main.go @@ -155,7 +155,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { u := normalizeURL(r.URL) - up, headersConf := ui.getURLPrefixAndHeaders(u) + up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u) isDefault := false if up == nil { missingRouteRequests.Inc() @@ -163,7 +163,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { httpserver.Errorf(w, r, "missing route for %q", u.String()) return } - up, headersConf = ui.DefaultURL, ui.HeadersConf + up, hc, retryStatusCodes = ui.DefaultURL, ui.HeadersConf, ui.RetryStatusCodes isDefault = true } maxAttempts := up.getBackendsCount() @@ -183,7 +183,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { } else { // Update path for regular routes. targetURL = mergeURLs(targetURL, u) } - ok := tryProcessingRequest(w, r, targetURL, headersConf) + ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes) bu.put() if ok { return @@ -197,11 +197,11 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { httpserver.Errorf(w, r, "%s", err) } -func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, headersConf HeadersConf) bool { +func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url.URL, hc HeadersConf, retryStatusCodes []int) bool { // This code has been copied from net/http/httputil/reverseproxy.go req := sanitizeRequestHeaders(r) req.URL = targetURL - updateHeadersByConfig(req.Header, headersConf.RequestHeaders) + updateHeadersByConfig(req.Header, hc.RequestHeaders) transportOnce.Do(transportInit) res, err := transport.RoundTrip(req) rtb, rtbOK := req.Body.(*readTrackingBody) @@ -226,22 +226,22 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url remoteAddr := httpserver.GetQuotedRemoteAddr(r) // NOTE: do not use httpserver.GetRequestURI // it explicitly reads request body, which may fail retries. - logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of error: %s", remoteAddr, req.URL, targetURL, err) + logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of response error: %s", remoteAddr, req.URL, targetURL, err) return false } - if res.StatusCode/100 >= 5 && (rtbOK && rtb.canRetry()) { - // Retry requests at other backends on 5xx status codes. + if (rtbOK && rtb.canRetry()) && hasInt(retryStatusCodes, res.StatusCode) { + // Retry requests at other backends if it matches retryStatusCodes. // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893 remoteAddr := httpserver.GetQuotedRemoteAddr(r) // NOTE: do not use httpserver.GetRequestURI // it explicitly reads request body, which may fail retries. - logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because of unexpected status code: %d; must be smaller than 500", - remoteAddr, req.URL, targetURL, res.StatusCode) + logger.Warnf("remoteAddr: %s; requestURI: %s; retrying the request to %s because response status code=%d belongs to retry_status_codes=%d", + remoteAddr, req.URL, targetURL, res.StatusCode, retryStatusCodes) return false } removeHopHeaders(res.Header) copyHeader(w.Header(), res.Header) - updateHeadersByConfig(w.Header(), headersConf.ResponseHeaders) + updateHeadersByConfig(w.Header(), hc.ResponseHeaders) w.WriteHeader(res.StatusCode) copyBuf := copyBufPool.Get() @@ -257,6 +257,15 @@ func tryProcessingRequest(w http.ResponseWriter, r *http.Request, targetURL *url return true } +func hasInt(a []int, n int) bool { + for _, x := range a { + if x == n { + return true + } + } + return false +} + var copyBufPool bytesutil.ByteBufferPool func copyHeader(dst, src http.Header) { diff --git a/app/vmauth/target_url.go b/app/vmauth/target_url.go index 9dfe7b070e..02022eba0b 100644 --- a/app/vmauth/target_url.go +++ b/app/vmauth/target_url.go @@ -32,18 +32,18 @@ func mergeURLs(uiURL, requestURI *url.URL) *url.URL { return &targetURL } -func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf) { +func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int) { for _, e := range ui.URLMaps { for _, sp := range e.SrcPaths { if sp.match(u.Path) { - return e.URLPrefix, e.HeadersConf + return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes } } } if ui.URLPrefix != nil { - return ui.URLPrefix, ui.HeadersConf + return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes } - return nil, HeadersConf{} + return nil, HeadersConf{}, nil } func normalizeURL(uOrig *url.URL) *url.URL { diff --git a/app/vmauth/target_url_test.go b/app/vmauth/target_url_test.go index 9a35d19d33..89b18bada3 100644 --- a/app/vmauth/target_url_test.go +++ b/app/vmauth/target_url_test.go @@ -3,18 +3,19 @@ package main import ( "fmt" "net/url" + "reflect" "testing" ) func TestCreateTargetURLSuccess(t *testing.T) { - f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string) { + f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string, expectedRetryStatusCodes []int) { t.Helper() u, err := url.Parse(requestURI) if err != nil { t.Fatalf("cannot parse %q: %s", requestURI, err) } u = normalizeURL(u) - up, headers := ui.getURLPrefixAndHeaders(u) + up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u) if up == nil { t.Fatalf("cannot determie backend: %s", err) } @@ -24,15 +25,18 @@ func TestCreateTargetURLSuccess(t *testing.T) { if target.String() != expectedTarget { t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget) } - headersStr := fmt.Sprintf("%q", headers.RequestHeaders) + headersStr := fmt.Sprintf("%q", hc.RequestHeaders) if headersStr != expectedRequestHeaders { - t.Fatalf("unexpected headers; got %s; want %s", headersStr, expectedRequestHeaders) + t.Fatalf("unexpected request headers; got %s; want %s", headersStr, expectedRequestHeaders) + } + if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) { + t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes) } } // Simple routing with `url_prefix` f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), - }, "", "http://foo.bar/.", "[]", "[]") + }, "", "http://foo.bar/.", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), HeadersConf: HeadersConf{ @@ -41,22 +45,23 @@ func TestCreateTargetURLSuccess(t *testing.T) { Value: "aaa", }}, }, - }, "/", "http://foo.bar", `[{"bb" "aaa"}]`, `[]`) + RetryStatusCodes: []int{503, 501}, + }, "/", "http://foo.bar", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar/federate"), - }, "/", "http://foo.bar/federate", "[]", "[]") + }, "/", "http://foo.bar/federate", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), - }, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]") + }, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/z", "https://sss:3894/x/y/z", "[]", "[]") + }, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]") + }, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]") + }, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil) // Complex routing with `url_map` ui := &UserInfo{ @@ -82,6 +87,7 @@ func TestCreateTargetURLSuccess(t *testing.T) { }, }, }, + RetryStatusCodes: []int{503, 500, 501}, }, { SrcPaths: getSrcPaths([]string{"/api/v1/write"}), @@ -99,10 +105,11 @@ func TestCreateTargetURLSuccess(t *testing.T) { Value: "y", }}, }, + RetryStatusCodes: []int{502}, } - f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`) - f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]") - f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`) + f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}) + f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil) + f(ui, "/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}) // Complex routing regexp paths in `url_map` ui = &UserInfo{ @@ -118,17 +125,17 @@ func TestCreateTargetURLSuccess(t *testing.T) { }, URLPrefix: mustParseURL("http://default-server"), } - f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]") - f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]") - f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]") - f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]") - f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]") + f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil) + f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil) + f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil) + f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil) + f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"), - }, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]") + }, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"), - }, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]") + }, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil) } func TestCreateTargetURLFailure(t *testing.T) { @@ -139,15 +146,18 @@ func TestCreateTargetURLFailure(t *testing.T) { t.Fatalf("cannot parse %q: %s", requestURI, err) } u = normalizeURL(u) - up, headers := ui.getURLPrefixAndHeaders(u) + up, hc, retryStatusCodes := ui.getURLPrefixAndHeaders(u) if up != nil { t.Fatalf("unexpected non-empty up=%#v", up) } - if headers.RequestHeaders != nil { - t.Fatalf("unexpected non-empty headers=%q", headers) + if hc.RequestHeaders != nil { + t.Fatalf("unexpected non-empty request headers=%q", hc.RequestHeaders) } - if headers.ResponseHeaders != nil { - t.Fatalf("unexpected non-empty headers=%q", headers) + if hc.ResponseHeaders != nil { + t.Fatalf("unexpected non-empty response headers=%q", hc.ResponseHeaders) + } + if retryStatusCodes != nil { + t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes) } } f(&UserInfo{}, "/foo/bar") diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c750dd1968..5d8e3c07de 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -42,7 +42,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components * FEATURE: dashboards: provide copies of Grafana dashboards alternated with VictoriaMetrics datasource at [dashboards/vm](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/dashboards/vm). * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): added ability to set, override and clear request and response headers on a per-user and per-path basis. See [this i ssue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4825) and [these docs](https://docs.victoriametrics.com/vmauth.html#auth-config) for details. -* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): automatically retry requests to the remaining backends if they return 5xx response codes and if the size of the request body doesn't exceed the value specified by `-maxRequestBodySizeToRetry` command-line flag. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893). +* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to retry requests to the [remaining backends](https://docs.victoriametrics.com/vmauth.html#load-balancing) if they return response status codes specified in the `retry_status_codes` list. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `eval_offset` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups). If specified, Group will be evaluated at the exact time offset on the range of [0...evaluationInterval]. The setting might be useful for cron-like rules which must be evaluated at specific moments of time. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3409) for details. * FEATURE: limit the length of string params in log messages to 500 chars. Longer string params are replaced with the `first_250_chars..last_250_chars`. This prevents from too long log lines, which can be emitted by VictoriaMetrics components. diff --git a/docs/vmauth.md b/docs/vmauth.md index cbe8b6520a..d6e631a7fd 100644 --- a/docs/vmauth.md +++ b/docs/vmauth.md @@ -46,9 +46,42 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner. -`vmauth` retries failing `GET` requests across the configured list of urls. -This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes -in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). + +If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls. + +It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list. + +Load balancing feature can be used in the following cases: + +- Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). + The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests + or requests with 500 and 502 response status codes: + + ```yml + unauthorized_user: + url_prefix: + - http://vmselect1:8481/ + - http://vmselect2:8481/ + - http://vmselect3:8481/ + retry_status_codes: [500, 502] + ``` + +- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries + among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable. + `vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ. + See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details. + + ```yml + unauthorized_user: + url_prefix: + - https://vmselect-az1/?deny_partial_response=1 + - https://vmselect-az2/?deny_partial_response=1 + - https://vmselect-az3/?deny_partial_response=1 + retry_status_codes: [500, 502, 503] + ``` + +Load balancig can also be configured independently per each user and per each `url_map` entry. +See [auth config docs](#auth-config) for more details. ## Concurrency limiting @@ -188,6 +221,7 @@ users: # - http://vmselect2:8481/select/42/prometheus # For example, http://vmauth:8427/api/v1/query is proxied to http://vmselect1:8480/select/42/prometheus/api/v1/query # or to http://vmselect2:8480/select/42/prometheus/api/v1/query . + # Requests are re-tried at other url_prefix backends if response status codes match 500 or 502. # # - Requests to http://vmauth:8427/api/v1/write are proxied to http://vminsert:8480/insert/42/prometheus/api/v1/write . # The "X-Scope-OrgID: abc" http header is added to these requests. @@ -207,6 +241,7 @@ users: url_prefix: - "http://vmselect1:8481/select/42/prometheus" - "http://vmselect2:8481/select/42/prometheus" + retry_status_codes: [500, 502] - src_paths: ["/api/v1/write"] url_prefix: "http://vminsert:8480/insert/42/prometheus" headers: @@ -220,16 +255,14 @@ users: - "http://default2:8888/unsupported_url_handler" # Requests without Authorization header are routed according to `unauthorized_user` section. +# Requests are routed in round-robin fashion between `url_prefix` backends. +# The deny_partial_response query arg is added to all the routed requests. +# The requests are re-tried if url_prefix backends send 500 or 503 response status codes. unauthorized_user: - url_map: - - src_paths: - - /api/v1/query - - /api/v1/query_range - url_prefix: - - http://vmselect1:8481/select/0/prometheus - - http://vmselect2:8481/select/0/prometheus - ip_filters: - allow_list: [8.8.8.8] + url_prefix: + - http://vmselect-az1/?deny_partial_response=1 + - http://vmselect-az2/?deny_partial_response=1 + retry_status_codes: [503, 500] ip_filters: allow_list: ["1.2.3.0/24", "127.0.0.1"] @@ -435,6 +468,9 @@ See the docs at https://docs.victoriametrics.com/vmauth.html . The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000) -maxIdleConnsPerBackend int The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100) + -maxRequestBodySizeToRetry size + The maximum request body size, which can be cached and re-tried at other backends. Bigger values may require more memory + Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 16384) -memory.allowedBytes size Allowed size of system memory VictoriaMetrics caches may occupy. This option overrides -memory.allowedPercent if set to a non-zero value. Too low a value may increase the cache miss rate usually resulting in higher CPU and disk IO usage. Too high a value may evict too much data from the OS page cache resulting in higher disk IO usage Supports the following optional suffixes for size values: KB, MB, GB, TB, KiB, MiB, GiB, TiB (default 0)