From 042267541f5c0cb5aa1d7b95fb2ce0c19b700c95 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 8 Dec 2023 23:27:53 +0200 Subject: [PATCH] app/vmauth: add support for `hot standby` mode via `first_available` load balancing policy vmauth in `hot standby` mode sends requests to the first url_prefix while it is available. If the first url_prefix becomes unavailable, then vmauth falls back to the next url_prefix. This allows building highly available setup as described at https://docs.victoriametrics.com/vmauth.html#high-availability Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4792 --- app/vmauth/auth_config.go | 156 +++++++++++++++++++++++++++------ app/vmauth/auth_config_test.go | 2 + app/vmauth/main.go | 8 +- app/vmauth/target_url.go | 8 +- app/vmauth/target_url_test.go | 55 ++++++------ docs/CHANGELOG.md | 3 +- docs/vmauth.md | 136 +++++++++++++++++++++++++--- lib/flagutil/array.go | 7 +- lib/flagutil/array_test.go | 12 ++- 9 files changed, 310 insertions(+), 77 deletions(-) diff --git a/app/vmauth/auth_config.go b/app/vmauth/auth_config.go index 9f12b9a42..c70682825 100644 --- a/app/vmauth/auth_config.go +++ b/app/vmauth/auth_config.go @@ -20,6 +20,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" @@ -30,6 +31,10 @@ var ( "See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config") configCheckInterval = flag.Duration("configCheckInterval", 0, "interval for config file re-read. "+ "Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.") + defaultRetryStatusCodes = flagutil.NewArrayInt("retryStatusCodes", 0, "Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. "+ + "See https://docs.victoriametrics.com/vmauth.html#load-balancing for details") + defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+ + "Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details") ) // AuthConfig represents auth config. @@ -50,6 +55,7 @@ type UserInfo struct { MaxConcurrentRequests int `yaml:"max_concurrent_requests,omitempty"` DefaultURL *URLPrefix `yaml:"default_url,omitempty"` RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"` + LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"` DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"` TLSInsecureSkipVerify *bool `yaml:"tls_insecure_skip_verify,omitempty"` TLSCAFile string `yaml:"tls_ca_file,omitempty"` @@ -124,6 +130,7 @@ type URLMap struct { URLPrefix *URLPrefix `yaml:"url_prefix,omitempty"` HeadersConf HeadersConf `yaml:",inline"` RetryStatusCodes []int `yaml:"retry_status_codes,omitempty"` + LoadBalancingPolicy string `yaml:"load_balancing_policy,omitempty"` DropSrcPathPrefixParts int `yaml:"drop_src_path_prefix_parts,omitempty"` } @@ -135,8 +142,28 @@ type SrcPath struct { // URLPrefix represents passed `url_prefix` type URLPrefix struct { - n uint32 + n uint32 + + // the list of backend urls bus []*backendURL + + // requests are re-tried on other backend urls for these http response status codes + retryStatusCodes []int + + // load balancing policy used + loadBalancingPolicy string +} + +func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error { + switch loadBalancingPolicy { + case "", // empty string is equivalent to least_loaded + "least_loaded", + "first_available": + up.loadBalancingPolicy = loadBalancingPolicy + return nil + default: + return fmt.Errorf("unexpected load_balancing_policy: %q; want least_loaded or first_available", loadBalancingPolicy) + } } type backendURL struct { @@ -155,6 +182,10 @@ func (bu *backendURL) setBroken() { atomic.StoreUint64(&bu.brokenDeadline, deadline) } +func (bu *backendURL) get() { + atomic.AddInt32(&bu.concurrentRequests, 1) +} + func (bu *backendURL) put() { atomic.AddInt32(&bu.concurrentRequests, -1) } @@ -163,6 +194,40 @@ func (up *URLPrefix) getBackendsCount() int { return len(up.bus) } +// getBackendURL returns the backendURL depending on the load balance policy. +// +// backendURL.put() must be called on the returned backendURL after the request is complete. +func (up *URLPrefix) getBackendURL() *backendURL { + if up.loadBalancingPolicy == "first_available" { + return up.getFirstAvailableBackendURL() + } + return up.getLeastLoadedBackendURL() +} + +// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken. +// +// backendURL.put() must be called on the returned backendURL after the request is complete. +func (up *URLPrefix) getFirstAvailableBackendURL() *backendURL { + bus := up.bus + + bu := bus[0] + if !bu.isBroken() { + // Fast path - send the request to the first url. + bu.get() + return bu + } + + // Slow path - the first url is temporarily unavailabel. Fall back to the remaining urls. + for i := 1; i < len(bus); i++ { + if !bus[i].isBroken() { + bu = bus[i] + break + } + } + bu.get() + return bu +} + // getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests. // // backendURL.put() must be called on the returned backendURL after the request is complete. @@ -171,7 +236,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL { if len(bus) == 1 { // Fast path - return the only backend url. bu := bus[0] - atomic.AddInt32(&bu.concurrentRequests, 1) + bu.get() return bu } @@ -202,7 +267,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL { minRequests = n } } - atomic.AddInt32(&buMin.concurrentRequests, 1) + buMin.get() return buMin } @@ -212,6 +277,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error { if err := f(&v); err != nil { return err } + var urls []string switch x := v.(type) { case string: @@ -232,6 +298,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error { default: return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v) } + bus := make([]*backendURL, len(urls)) for i, u := range urls { pu, err := url.Parse(u) @@ -440,6 +507,9 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) { if ui.Name != "" { return nil, fmt.Errorf("field name can't be specified for unauthorized_user section") } + if err := ui.initURLs(); err != nil { + return nil, err + } ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`) ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`) ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests()) @@ -480,30 +550,11 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) { if byAuthToken[at2] != nil { return nil, fmt.Errorf("duplicate auth token found for bearer_token=%q, username=%q: %q", ui.BearerToken, ui.Username, at2) } - if ui.URLPrefix != nil { - if err := ui.URLPrefix.sanitize(); err != nil { - return nil, err - } - } - if ui.DefaultURL != nil { - if err := ui.DefaultURL.sanitize(); err != nil { - return nil, err - } - } - for _, e := range ui.URLMaps { - if len(e.SrcPaths) == 0 { - return nil, fmt.Errorf("missing `src_paths` in `url_map`") - } - if e.URLPrefix == nil { - return nil, fmt.Errorf("missing `url_prefix` in `url_map`") - } - if err := e.URLPrefix.sanitize(); err != nil { - return nil, err - } - } - if len(ui.URLMaps) == 0 && ui.URLPrefix == nil { - return nil, fmt.Errorf("missing `url_prefix`") + + if err := ui.initURLs(); err != nil { + return nil, err } + name := ui.name() if ui.BearerToken != "" { if ui.Password != "" { @@ -525,6 +576,7 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) { _ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 { return float64(len(ui.concurrencyLimitCh)) }) + tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile) if err != nil { return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err) @@ -537,6 +589,58 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) { return byAuthToken, nil } +func (ui *UserInfo) initURLs() error { + retryStatusCodes := defaultRetryStatusCodes.Values() + loadBalancingPolicy := *defaultLoadBalancingPolicy + if ui.URLPrefix != nil { + if err := ui.URLPrefix.sanitize(); err != nil { + return err + } + if len(ui.RetryStatusCodes) > 0 { + retryStatusCodes = ui.RetryStatusCodes + } + if ui.LoadBalancingPolicy != "" { + loadBalancingPolicy = ui.LoadBalancingPolicy + } + ui.URLPrefix.retryStatusCodes = retryStatusCodes + if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil { + return err + } + } + if ui.DefaultURL != nil { + if err := ui.DefaultURL.sanitize(); err != nil { + return err + } + } + for _, e := range ui.URLMaps { + if len(e.SrcPaths) == 0 { + return fmt.Errorf("missing `src_paths` in `url_map`") + } + if e.URLPrefix == nil { + return fmt.Errorf("missing `url_prefix` in `url_map`") + } + if err := e.URLPrefix.sanitize(); err != nil { + return err + } + rscs := retryStatusCodes + lbp := loadBalancingPolicy + if len(e.RetryStatusCodes) > 0 { + rscs = e.RetryStatusCodes + } + if e.LoadBalancingPolicy != "" { + lbp = e.LoadBalancingPolicy + } + e.URLPrefix.retryStatusCodes = rscs + if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil { + return err + } + } + if len(ui.URLMaps) == 0 && ui.URLPrefix == nil { + return fmt.Errorf("missing `url_prefix`") + } + return nil +} + func (ui *UserInfo) name() string { if ui.Name != "" { return ui.Name diff --git a/app/vmauth/auth_config_test.go b/app/vmauth/auth_config_test.go index 4d2882b2a..73d30c720 100644 --- a/app/vmauth/auth_config_test.go +++ b/app/vmauth/auth_config_test.go @@ -250,6 +250,7 @@ users: - http://node2:343/bbb tls_insecure_skip_verify: false retry_status_codes: [500, 501] + load_balancing_policy: first_available drop_src_path_prefix_parts: 1 `, map[string]*UserInfo{ getAuthToken("", "foo", "bar"): { @@ -261,6 +262,7 @@ users: }), TLSInsecureSkipVerify: &insecureSkipVerifyFalse, RetryStatusCodes: []int{500, 501}, + LoadBalancingPolicy: "first_available", DropSrcPathPrefixParts: 1, }, }) diff --git a/app/vmauth/main.go b/app/vmauth/main.go index 0747e07ff..197b640f7 100644 --- a/app/vmauth/main.go +++ b/app/vmauth/main.go @@ -164,7 +164,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { u := normalizeURL(r.URL) - up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) + up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) isDefault := false if up == nil { if ui.DefaultURL == nil { @@ -180,7 +180,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { httpserver.Errorf(w, r, "missing route for %q", u.String()) return } - up, hc, retryStatusCodes = ui.DefaultURL, ui.HeadersConf, ui.RetryStatusCodes + up, hc = ui.DefaultURL, ui.HeadersConf isDefault = true } maxAttempts := up.getBackendsCount() @@ -190,7 +190,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { } } for i := 0; i < maxAttempts; i++ { - bu := up.getLeastLoadedBackendURL() + bu := up.getBackendURL() targetURL := bu.url // Don't change path and add request_path query param for default route. if isDefault { @@ -200,7 +200,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) { } else { // Update path for regular routes. targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts) } - ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport) + ok := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui.httpTransport) bu.put() if ok { return diff --git a/app/vmauth/target_url.go b/app/vmauth/target_url.go index 46bb4f395..e1ce736d1 100644 --- a/app/vmauth/target_url.go +++ b/app/vmauth/target_url.go @@ -49,18 +49,18 @@ func dropPrefixParts(path string, parts int) string { return path } -func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) { +func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, int) { for _, e := range ui.URLMaps { for _, sp := range e.SrcPaths { if sp.match(u.Path) { - return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts + return e.URLPrefix, e.HeadersConf, e.DropSrcPathPrefixParts } } } if ui.URLPrefix != nil { - return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts + return ui.URLPrefix, ui.HeadersConf, ui.DropSrcPathPrefixParts } - return nil, HeadersConf{}, nil, 0 + return nil, HeadersConf{}, 0 } func normalizeURL(uOrig *url.URL) *url.URL { diff --git a/app/vmauth/target_url_test.go b/app/vmauth/target_url_test.go index b8aa3eb94..5b6b0ec38 100644 --- a/app/vmauth/target_url_test.go +++ b/app/vmauth/target_url_test.go @@ -79,14 +79,17 @@ func TestDropPrefixParts(t *testing.T) { func TestCreateTargetURLSuccess(t *testing.T) { f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string, - expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) { + expectedRetryStatusCodes []int, expectedLoadBalancingPolicy string, expectedDropSrcPathPrefixParts int) { t.Helper() + if err := ui.initURLs(); err != nil { + t.Fatalf("cannot initialize urls inside UserInfo: %s", err) + } u, err := url.Parse(requestURI) if err != nil { t.Fatalf("cannot parse %q: %s", requestURI, err) } u = normalizeURL(u) - up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) + up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) if up == nil { t.Fatalf("cannot determie backend: %s", err) } @@ -100,8 +103,11 @@ func TestCreateTargetURLSuccess(t *testing.T) { if headersStr != expectedRequestHeaders { t.Fatalf("unexpected request headers; got %s; want %s", headersStr, expectedRequestHeaders) } - if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) { - t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes) + if !reflect.DeepEqual(up.retryStatusCodes, expectedRetryStatusCodes) { + t.Fatalf("unexpected retryStatusCodes; got %d; want %d", up.retryStatusCodes, expectedRetryStatusCodes) + } + if up.loadBalancingPolicy != expectedLoadBalancingPolicy { + t.Fatalf("unexpected loadBalancingPolicy; got %q; want %q", up.loadBalancingPolicy, expectedLoadBalancingPolicy) } if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts { t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts) @@ -110,7 +116,7 @@ func TestCreateTargetURLSuccess(t *testing.T) { // Simple routing with `url_prefix` f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), - }, "", "http://foo.bar/.", "[]", "[]", nil, 0) + }, "", "http://foo.bar/.", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), HeadersConf: HeadersConf{ @@ -120,23 +126,24 @@ func TestCreateTargetURLSuccess(t *testing.T) { }}, }, RetryStatusCodes: []int{503, 501}, + LoadBalancingPolicy: "first_available", DropSrcPathPrefixParts: 2, - }, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2) + }, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, "first_available", 2) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar/federate"), - }, "/", "http://foo.bar/federate", "[]", "[]", nil, 0) + }, "/", "http://foo.bar/federate", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar"), - }, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0) + }, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0) + }, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0) + }, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("https://sss:3894/x/y"), - }, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0) + }, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, "least_loaded", 0) // Complex routing with `url_map` ui := &UserInfo{ @@ -163,6 +170,7 @@ func TestCreateTargetURLSuccess(t *testing.T) { }, }, RetryStatusCodes: []int{503, 500, 501}, + LoadBalancingPolicy: "first_available", DropSrcPathPrefixParts: 1, }, { @@ -184,9 +192,9 @@ func TestCreateTargetURLSuccess(t *testing.T) { RetryStatusCodes: []int{502}, DropSrcPathPrefixParts: 2, } - f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1) - f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0) - f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2) + f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, "first_available", 1) + f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", []int{502}, "least_loaded", 0) + f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, "least_loaded", 2) // Complex routing regexp paths in `url_map` ui = &UserInfo{ @@ -202,17 +210,17 @@ func TestCreateTargetURLSuccess(t *testing.T) { }, URLPrefix: mustParseURL("http://default-server"), } - f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0) - f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0) - f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0) - f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0) - f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0) + f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, "least_loaded", 0) + f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, "least_loaded", 0) + f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, "least_loaded", 0) + f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, "least_loaded", 0) + f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"), - }, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0) + }, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, "least_loaded", 0) f(&UserInfo{ URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"), - }, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0) + }, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, "least_loaded", 0) } func TestCreateTargetURLFailure(t *testing.T) { @@ -223,7 +231,7 @@ func TestCreateTargetURLFailure(t *testing.T) { t.Fatalf("cannot parse %q: %s", requestURI, err) } u = normalizeURL(u) - up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) + up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u) if up != nil { t.Fatalf("unexpected non-empty up=%#v", up) } @@ -233,9 +241,6 @@ func TestCreateTargetURLFailure(t *testing.T) { if hc.ResponseHeaders != nil { t.Fatalf("unexpected non-empty response headers=%q", hc.ResponseHeaders) } - if retryStatusCodes != nil { - t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes) - } if dropSrcPathPrefixParts != 0 { t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts) } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index aa762fb13..b240b7ad2 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ The sandbox cluster installation is running under the constant load generated by * SECURITY: upgrade base docker image (Alpine) from 3.18.4 to 3.18.5. See [alpine 3.18.5 release notes](https://www.alpinelinux.org/posts/Alpine-3.15.11-3.16.8-3.17.6-3.18.5-released.html). * SECURITY: upgrade Go builder from Go1.21.4 to Go1.21.5. See [the list of issues addressed in Go1.21.5](https://github.com/golang/go/issues?q=milestone%3AGo1.21.5+label%3ACherryPickApproved). +* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to send requests to the first available backend and fall back to other `hot standby` backends when the first backend is unavailable. This allows building highly available setups as shown in [these docs](https://docs.victoriametrics.com/vmauth.html#high-availability). * FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-enableMultitenantHandlers` command-line flag, which allows receiving data via [VictoriaMetrics cluster urls](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) at `vmagent` and converting [tenant ids](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) to (`vm_account_id`, `vm_project_id`) labels before sending the data to the configured `-remoteWrite.url`. See [these docs](https://docs.victoriametrics.com/vmagent.html#multitenancy) for details. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110). @@ -42,7 +43,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `keep_if_contains` and `drop_if_contains` relabeling actions. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) for details. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): export `vm_promscrape_scrape_pool_targets` [metric](https://docs.victoriametrics.com/vmagent.html#monitoring) to track the number of targets each scrape job discovers. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5311). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): provide `/vmalert/api/v1/rule` and `/api/v1/rule` API endpoints to get the rule object in JSON format. See [these docs](https://docs.victoriametrics.com/vmalert.html#web) for details. -* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): deprecate process gauge metrics `vmalert_alerting_rules_error` and `vmalert_recording_rules_error` in favour of `vmalert_alerting_rules_errors_total` and `vmalert_recording_rules_errors_total` counter metrics. [Counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric type is more suitable for error counting as it preserves the state change between the scrapes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5160) for details. +* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): deprecate process gauge metrics `vmalert_alerting_rules_error` and `vmalert_recording_rules_error` in favour of `vmalert_alerting_rules_errors_total` and `vmalert_recording_rules_errors_total` counter metrics. [Counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric type is more suitable for error counting as it preserves the state change between the scrapes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5160) for details. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [day_of_year()](https://docs.victoriametrics.com/MetricsQL.html#day_of_year) function, which returns the day of the year for each of the given unix timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5345) for details. Thanks to @luckyxiaoqiang for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5368/). * FEATURE: all VictoriaMetrics binaries: expose additional metrics at `/metrics` page, which may simplify debugging of VictoriaMetrics components (see [this feature request](https://github.com/VictoriaMetrics/metrics/issues/54)): * `go_sched_latencies_seconds` - the [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram), which shows the time goroutines have spent in runnable state before actually running. Big values point to the lack of CPU time for the current workload. diff --git a/docs/vmauth.md b/docs/vmauth.md index dd80fb3dc..0a3b9a351 100644 --- a/docs/vmauth.md +++ b/docs/vmauth.md @@ -53,6 +53,7 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co * [Generic HTTP load balancer](#generic-http-load-balancer) * [Load balancer for vmagent](#load-balancer-for-vmagent) * [Load balancer for VictoriaMetrics cluster](#load-balancer-for-victoriametrics-cluster) +* [High availability](#high-availability) * [TLS termination proxy](#tls-termination-proxy) * [Basic Auth proxy](#basic-auth-proxy) * [Bearer Token auth proxy](#bearer-token-auth-proxy) @@ -69,6 +70,8 @@ unauthorized_user: url_prefix: "http://backend/" ``` +`vmauth` can balance load among multiple backends - see [these docs](#load-balancing) for details. + ### Generic HTTP proxy for different backends `vmauth` can proxy requests to different backends depending on the requested path. @@ -95,6 +98,9 @@ unauthorized_user: default_url: http://some-backend/404-page.html ``` +See [these docs](#dropping-request-path-prefix) for more details. + + ### Generic HTTP load balancer `vmauth` can balance load among multiple HTTP backends in least-loaded round-robin mode. @@ -108,6 +114,8 @@ unauthorized_user: - "http://app-instance-3/" ``` +See [load balancing docs](#load-balancing) for more details. + ### Load balancer for vmagent If [vmagent](https://docs.victoriametrics.com/vmagent.html) is used for processing [data push requests](https://docs.victoriametrics.com/vmagent.html#how-to-push-data-to-vmagent), @@ -128,6 +136,8 @@ unauthorized_user: - "http://vmagent-3:8429/" ``` +See [load balancing docs](#load-balancing) for more details. + ### Load balancer for VictoriaMetrics cluster [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) accepts incoming data via `vminsert` nodes @@ -150,6 +160,27 @@ unauthorized_user: - "http://vmselect-2:8481/" ``` +See [load balancing docs](#load-balancing) for more details. + +### High availability + +`vmauth` automatically switches from temporarily unavailable backend to other hot standby backends listed in `url_prefix` +if it runs with `-loadBalancingPolicy=first_available` command-line flag. The load balancing policy can be overridden at `user` and `url_map` sections +of [`-auth.config`](#auth-config) via `load_balancing_policy` option. For example, the following config instructs `vmauth` to proxy requests to `http://victoria-metrics-main:8428/` backend. +If this backend becomes unavailable, then `vmauth` starts proxying requests to `http://victoria-metrics-standby1:8428/`. +If this backend becomes also unavailable, then requests are proxied to the last specified backend - `http://victoria-metrics-standby2:8428/`: + +```yml +unauthorized_user: + url_prefix: + - "http://victoria-metrics-main:8428/" + - "http://victoria-metrics-standby1:8428/" + - "http://victoria-metrics-standby2:8428/" + load_balancing_policy: first_available +``` + +See [load-balancing docs](#load-balancing) for more details. + ### TLS termination proxy `vmauth` can terminate HTTPS requests to backend services when it runs with the following command-line flags: @@ -273,17 +304,66 @@ users: ## Load balancing -Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. -In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner. +Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the following forms: -If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls. +- A single url. For example: -It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list. + ```yml + unauthorized_user: + url_prefix: 'http://vminsert:8480/insert/0/prometheus/` + ``` + + In this case `vmauth` proxies requests to the specified url. + +- A list of urls. For example: + + ```yml + unauthorized_user: + url_prefix: + - 'http://vminsert-1:8480/insert/0/prometheus/' + - 'http://vminsert-2:8480/insert/0/prometheus/' + - 'http://vminsert-3:8480/insert/0/prometheus/' + ``` + + In this case `vmauth` spreads requests among the specified urls using least-loaded round-robin policy. + This guarantees that incoming load is shared uniformly among the specified backends. + + `vmauth` automatically detects temporarily unavailable backends and spreads incoming queries among the remaining available backends. + This allows restarting the backends and peforming mantenance tasks on the backends without the need to remove them from the `url_prefix` list. + + By default `vmauth` returns backend responses with all the http status codes to the client. It is possible to configure automatic retry of requests + at other backends if the backend responds with status code specified in the `-retryStatusCodes` command-line flag. + It is possible to customize the list of http response status codes to retry via `retry_status_codes` list at `user` and `url_map` level of [`-auth.config`](#auth-config). + For example, the following config re-tries requests on other backends if the current backend returns response with `500` or `502` HTTP status code: + + ```yml + unauthorized_user: + url_prefix: + - http://vmselect1:8481/ + - http://vmselect2:8481/ + - http://vmselect3:8481/ + retry_status_codes: [500, 502] + ``` + + By default `vmauth` uses `least_loaded` policy for spreading incoming requests among available backends. + The policy can be changed to `first_available` via `-loadBalancingPolicy` command-line flag. In this case `vmauth` + sends all the requests to the first specified backend while it is available. `vmauth` starts sending requests to the next + specified backend when the first backend is temporarily unavailable. + It is possible to customize the load balancing policy at the `user` and `url_map` level. + For example, the following config specifies `first_available` load balancing policy for unauthorized requests: + + ```yml + unauthorized_user: + url_prefix: + - http://victoria-metrics-main:8428/ + - http://victoria-metrics-standby:8428/ + load_balancing_policy: first_available + ``` Load balancing feature can be used in the following cases: - Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html). - The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests + The following [`-auth.config`](#auth-config) can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests or requests with 500 and 502 response status codes: ```yml @@ -295,22 +375,49 @@ Load balancing feature can be used in the following cases: retry_status_codes: [500, 502] ``` -- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries - among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable. - `vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ. - See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details. +- Sending select queries to the closest availability zone (AZ), while falling back to other AZs with identical data if the closest AZ is unavaialable. + For example, the following [`-auth.config`](#auth-config) sends select queries to `https://vmselect-az1/` and uses the `https://vmselect-az2/` as a fallback + when `https://vmselect-az1/` is temporarily unavailable or cannot return full responses. + See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details about `deny_partial_response` query arg, + which is added to requests before they are proxied to backends. ```yml unauthorized_user: url_prefix: - https://vmselect-az1/?deny_partial_response=1 - https://vmselect-az2/?deny_partial_response=1 - - https://vmselect-az3/?deny_partial_response=1 retry_status_codes: [500, 502, 503] + load_balancing_policy: first_available ``` -Load balancig can also be configured independently per each user and per each `url_map` entry. -See [auth config docs](#auth-config) for more details. +Load balancig can be configured independently per each `user` entry and per each `url_map` entry. See [auth config docs](#auth-config) for more details. + +### Modifying HTTP headers + +`vmauth` supports the ability to set and remove HTTP request headers before sending the requests to backends. +This is done via `headers` option. For example, the following [`-auth.config`](#auth-config) adds `TenantID: foobar` header +to requests proxied to `http://backend:1234/`. It also overrides `X-Forwarded-For` request header with an empty value. This effectively +removes the `X-Forwarded-For` header from requests proxied to `http://backend:1234/`: + +```yml +unauthorized_user: + url_prefix: "http://backend:1234/" + headers: + - "TenantID: foobar" + - "X-Forwarded-For:" +``` + +`vmauth` also supports the ability to set and remove HTTP response headers before returning the response from the backend to client. +This is done via `response_headers` option. For example, the following [`-auth.config`](#auth-config) adds `Foo: bar` response header +and removes `Server` response header before returning the response to client: + +```yml +unauthorized_user: + url_prefix: "http://backend:1234/" + response_headers: + - "Foo: bar" + - "Server:" +``` ## Concurrency limiting @@ -719,6 +826,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html . Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries -licenseFile string Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag + -loadBalancingPolicy string + The default load balancing policy to use for backend urls specified inside url_prefix section. Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details (default "least_loaded") -logInvalidAuthTokens Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page -loggerDisableTimestamps @@ -767,6 +876,9 @@ See the docs at https://docs.victoriametrics.com/vmauth.html . Auth key for /-/reload http endpoint. It must be passed as authKey=... -responseTimeout duration The timeout for receiving a response from backend (default 5m0s) + -retryStatusCodes array + Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. See https://docs.victoriametrics.com/vmauth.html#load-balancing for details (default 0) + Supports array of values separated by comma or specified via multiple flags. -tls Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set -tlsCertFile string diff --git a/lib/flagutil/array.go b/lib/flagutil/array.go index d90bfa680..8f008bb34 100644 --- a/lib/flagutil/array.go +++ b/lib/flagutil/array.go @@ -224,7 +224,7 @@ func (a *ArrayString) GetOptionalArg(argIdx int) string { // Has the same api as ArrayString. type ArrayBool []bool -// IsBoolFlag implements flag.IsBoolFlag interface +// IsBoolFlag implements flag.IsBoolFlag interface func (a *ArrayBool) IsBoolFlag() bool { return true } // String implements flag.Value interface @@ -312,6 +312,11 @@ type ArrayInt struct { a []int } +// Values returns all the values for a. +func (a *ArrayInt) Values() []int { + return a.a +} + // String implements flag.Value interface func (a *ArrayInt) String() string { x := a.a diff --git a/lib/flagutil/array_test.go b/lib/flagutil/array_test.go index 848fe1b54..ec4d87d76 100644 --- a/lib/flagutil/array_test.go +++ b/lib/flagutil/array_test.go @@ -286,7 +286,7 @@ func TestArrayInt(t *testing.T) { } func TestArrayInt_Set(t *testing.T) { - f := func(s, expectedResult string) { + f := func(s, expectedResult string, expectedValues []int) { t.Helper() var a ArrayInt if err := a.Set(s); err != nil { @@ -296,10 +296,14 @@ func TestArrayInt_Set(t *testing.T) { if result != expectedResult { t.Fatalf("unexpected values parsed;\ngot\n%q\nwant\n%q", result, expectedResult) } + values := a.Values() + if !reflect.DeepEqual(values, expectedValues) { + t.Fatalf("unexpected values;\ngot\n%d\nwant\n%d", values, expectedValues) + } } - f("", "") - f(`1`, `1`) - f(`-2,3,-64`, `-2,3,-64`) + f("", "", nil) + f(`1`, `1`, []int{1}) + f(`-2,3,-64`, `-2,3,-64`, []int{-2, 3, -64}) } func TestArrayInt_GetOptionalArg(t *testing.T) {