app/vmauth: add support for hot standby mode via first_available load balancing policy

vmauth in `hot standby` mode sends requests to the first url_prefix while it is available. If the first url_prefix becomes unavailable, then vmauth falls back to the next url_prefix. This allows building highly available setup as described at https://docs.victoriametrics.com/vmauth.html#high-availability Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4893 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4792
2024-11-23 12:31:07 +01:00 · 2023-12-08 23:27:53 +02:00 · 2023-12-08 23:27:53 +02:00 · 042267541f
commit 042267541f
parent b05e1512d4
9 changed files with 310 additions and 77 deletions
--- a/app/vmauth/auth_config.go
+++ b/app/vmauth/auth_config.go
@ -20,6 +20,7 @@ import (

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@ -30,6 +31,10 @@ var (
 		"See https://docs.victoriametrics.com/vmauth.html for details on the format of this auth config")
 	configCheckInterval = flag.Duration("configCheckInterval", 0, "interval for config file re-read. "+
 		"Zero value disables config re-reading. By default, refreshing is disabled, send SIGHUP for config refresh.")
+	defaultRetryStatusCodes = flagutil.NewArrayInt("retryStatusCodes", 0, "Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. "+
+		"See https://docs.victoriametrics.com/vmauth.html#load-balancing for details")
+	defaultLoadBalancingPolicy = flag.String("loadBalancingPolicy", "least_loaded", "The default load balancing policy to use for backend urls specified inside url_prefix section. "+
+		"Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details")
 )

 // AuthConfig represents auth config.
@ -50,6 +55,7 @@ type UserInfo struct {
 	MaxConcurrentRequests  int         `yaml:"max_concurrent_requests,omitempty"`
 	DefaultURL             *URLPrefix  `yaml:"default_url,omitempty"`
 	RetryStatusCodes       []int       `yaml:"retry_status_codes,omitempty"`
+	LoadBalancingPolicy    string      `yaml:"load_balancing_policy,omitempty"`
 	DropSrcPathPrefixParts int         `yaml:"drop_src_path_prefix_parts,omitempty"`
 	TLSInsecureSkipVerify  *bool       `yaml:"tls_insecure_skip_verify,omitempty"`
 	TLSCAFile              string      `yaml:"tls_ca_file,omitempty"`
@ -124,6 +130,7 @@ type URLMap struct {
 	URLPrefix              *URLPrefix  `yaml:"url_prefix,omitempty"`
 	HeadersConf            HeadersConf `yaml:",inline"`
 	RetryStatusCodes       []int       `yaml:"retry_status_codes,omitempty"`
+	LoadBalancingPolicy    string      `yaml:"load_balancing_policy,omitempty"`
 	DropSrcPathPrefixParts int         `yaml:"drop_src_path_prefix_parts,omitempty"`
 }

@ -135,8 +142,28 @@ type SrcPath struct {

 // URLPrefix represents passed `url_prefix`
 type URLPrefix struct {
-	n   uint32
+	n uint32
+
+	// the list of backend urls
 	bus []*backendURL
+
+	// requests are re-tried on other backend urls for these http response status codes
+	retryStatusCodes []int
+
+	// load balancing policy used
+	loadBalancingPolicy string
+}
+
+func (up *URLPrefix) setLoadBalancingPolicy(loadBalancingPolicy string) error {
+	switch loadBalancingPolicy {
+	case "", // empty string is equivalent to least_loaded
+		"least_loaded",
+		"first_available":
+		up.loadBalancingPolicy = loadBalancingPolicy
+		return nil
+	default:
+		return fmt.Errorf("unexpected load_balancing_policy: %q; want least_loaded or first_available", loadBalancingPolicy)
+	}
 }

 type backendURL struct {
@ -155,6 +182,10 @@ func (bu *backendURL) setBroken() {
 	atomic.StoreUint64(&bu.brokenDeadline, deadline)
 }

+func (bu *backendURL) get() {
+	atomic.AddInt32(&bu.concurrentRequests, 1)
+}
+
 func (bu *backendURL) put() {
 	atomic.AddInt32(&bu.concurrentRequests, -1)
 }
@ -163,6 +194,40 @@ func (up *URLPrefix) getBackendsCount() int {
 	return len(up.bus)
 }

+// getBackendURL returns the backendURL depending on the load balance policy.
+//
+// backendURL.put() must be called on the returned backendURL after the request is complete.
+func (up *URLPrefix) getBackendURL() *backendURL {
+	if up.loadBalancingPolicy == "first_available" {
+		return up.getFirstAvailableBackendURL()
+	}
+	return up.getLeastLoadedBackendURL()
+}
+
+// getFirstAvailableBackendURL returns the first available backendURL, which isn't broken.
+//
+// backendURL.put() must be called on the returned backendURL after the request is complete.
+func (up *URLPrefix) getFirstAvailableBackendURL() *backendURL {
+	bus := up.bus
+
+	bu := bus[0]
+	if !bu.isBroken() {
+		// Fast path - send the request to the first url.
+		bu.get()
+		return bu
+	}
+
+	// Slow path - the first url is temporarily unavailabel. Fall back to the remaining urls.
+	for i := 1; i < len(bus); i++ {
+		if !bus[i].isBroken() {
+			bu = bus[i]
+			break
+		}
+	}
+	bu.get()
+	return bu
+}
+
 // getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
 //
 // backendURL.put() must be called on the returned backendURL after the request is complete.
@ -171,7 +236,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
 	if len(bus) == 1 {
 		// Fast path - return the only backend url.
 		bu := bus[0]
-		atomic.AddInt32(&bu.concurrentRequests, 1)
+		bu.get()
 		return bu
 	}

@ -202,7 +267,7 @@ func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
 			minRequests = n
 		}
 	}
-	atomic.AddInt32(&buMin.concurrentRequests, 1)
+	buMin.get()
 	return buMin
 }

@ -212,6 +277,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
 	if err := f(&v); err != nil {
 		return err
 	}
+
 	var urls []string
 	switch x := v.(type) {
 	case string:
@ -232,6 +298,7 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
 	default:
 		return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v)
 	}
+
 	bus := make([]*backendURL, len(urls))
 	for i, u := range urls {
 		pu, err := url.Parse(u)
@ -440,6 +507,9 @@ func parseAuthConfig(data []byte) (*AuthConfig, error) {
 		if ui.Name != "" {
 			return nil, fmt.Errorf("field name can't be specified for unauthorized_user section")
 		}
+		if err := ui.initURLs(); err != nil {
+			return nil, err
+		}
 		ui.requests = metrics.GetOrCreateCounter(`vmauth_unauthorized_user_requests_total`)
 		ui.requestsDuration = metrics.GetOrCreateSummary(`vmauth_unauthorized_user_request_duration_seconds`)
 		ui.concurrencyLimitCh = make(chan struct{}, ui.getMaxConcurrentRequests())
@ -480,30 +550,11 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
 		if byAuthToken[at2] != nil {
 			return nil, fmt.Errorf("duplicate auth token found for bearer_token=%q, username=%q: %q", ui.BearerToken, ui.Username, at2)
 		}
-		if ui.URLPrefix != nil {
-			if err := ui.URLPrefix.sanitize(); err != nil {
-				return nil, err
-			}
-		}
-		if ui.DefaultURL != nil {
-			if err := ui.DefaultURL.sanitize(); err != nil {
-				return nil, err
-			}
-		}
-		for _, e := range ui.URLMaps {
-			if len(e.SrcPaths) == 0 {
-				return nil, fmt.Errorf("missing `src_paths` in `url_map`")
-			}
-			if e.URLPrefix == nil {
-				return nil, fmt.Errorf("missing `url_prefix` in `url_map`")
-			}
-			if err := e.URLPrefix.sanitize(); err != nil {
-				return nil, err
-			}
-		}
-		if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
-			return nil, fmt.Errorf("missing `url_prefix`")
+
+		if err := ui.initURLs(); err != nil {
+			return nil, err
 		}
+
 		name := ui.name()
 		if ui.BearerToken != "" {
 			if ui.Password != "" {
@ -525,6 +576,7 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
 		_ = metrics.GetOrCreateGauge(fmt.Sprintf(`vmauth_user_concurrent_requests_current{username=%q}`, name), func() float64 {
 			return float64(len(ui.concurrencyLimitCh))
 		})
+
 		tr, err := getTransport(ui.TLSInsecureSkipVerify, ui.TLSCAFile)
 		if err != nil {
 			return nil, fmt.Errorf("cannot initialize HTTP transport: %w", err)
@ -537,6 +589,58 @@ func parseAuthConfigUsers(ac *AuthConfig) (map[string]*UserInfo, error) {
 	return byAuthToken, nil
 }

+func (ui *UserInfo) initURLs() error {
+	retryStatusCodes := defaultRetryStatusCodes.Values()
+	loadBalancingPolicy := *defaultLoadBalancingPolicy
+	if ui.URLPrefix != nil {
+		if err := ui.URLPrefix.sanitize(); err != nil {
+			return err
+		}
+		if len(ui.RetryStatusCodes) > 0 {
+			retryStatusCodes = ui.RetryStatusCodes
+		}
+		if ui.LoadBalancingPolicy != "" {
+			loadBalancingPolicy = ui.LoadBalancingPolicy
+		}
+		ui.URLPrefix.retryStatusCodes = retryStatusCodes
+		if err := ui.URLPrefix.setLoadBalancingPolicy(loadBalancingPolicy); err != nil {
+			return err
+		}
+	}
+	if ui.DefaultURL != nil {
+		if err := ui.DefaultURL.sanitize(); err != nil {
+			return err
+		}
+	}
+	for _, e := range ui.URLMaps {
+		if len(e.SrcPaths) == 0 {
+			return fmt.Errorf("missing `src_paths` in `url_map`")
+		}
+		if e.URLPrefix == nil {
+			return fmt.Errorf("missing `url_prefix` in `url_map`")
+		}
+		if err := e.URLPrefix.sanitize(); err != nil {
+			return err
+		}
+		rscs := retryStatusCodes
+		lbp := loadBalancingPolicy
+		if len(e.RetryStatusCodes) > 0 {
+			rscs = e.RetryStatusCodes
+		}
+		if e.LoadBalancingPolicy != "" {
+			lbp = e.LoadBalancingPolicy
+		}
+		e.URLPrefix.retryStatusCodes = rscs
+		if err := e.URLPrefix.setLoadBalancingPolicy(lbp); err != nil {
+			return err
+		}
+	}
+	if len(ui.URLMaps) == 0 && ui.URLPrefix == nil {
+		return fmt.Errorf("missing `url_prefix`")
+	}
+	return nil
+}
+
 func (ui *UserInfo) name() string {
 	if ui.Name != "" {
 		return ui.Name
--- a/app/vmauth/auth_config_test.go
+++ b/app/vmauth/auth_config_test.go
@ -250,6 +250,7 @@ users:
  - http://node2:343/bbb
  tls_insecure_skip_verify: false
  retry_status_codes: [500, 501]
+  load_balancing_policy: first_available
  drop_src_path_prefix_parts: 1
 `, map[string]*UserInfo{
 		getAuthToken("", "foo", "bar"): {
@ -261,6 +262,7 @@ users:
 			}),
 			TLSInsecureSkipVerify:  &insecureSkipVerifyFalse,
 			RetryStatusCodes:       []int{500, 501},
+			LoadBalancingPolicy:    "first_available",
 			DropSrcPathPrefixParts: 1,
 		},
 	})
--- a/app/vmauth/main.go
+++ b/app/vmauth/main.go
@ -164,7 +164,7 @@ func processUserRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {

 func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
 	u := normalizeURL(r.URL)
-	up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
+	up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
 	isDefault := false
 	if up == nil {
 		if ui.DefaultURL == nil {
@ -180,7 +180,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
 			httpserver.Errorf(w, r, "missing route for %q", u.String())
 			return
 		}
-		up, hc, retryStatusCodes = ui.DefaultURL, ui.HeadersConf, ui.RetryStatusCodes
+		up, hc = ui.DefaultURL, ui.HeadersConf
 		isDefault = true
 	}
 	maxAttempts := up.getBackendsCount()
@ -190,7 +190,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
 		}
 	}
 	for i := 0; i < maxAttempts; i++ {
-		bu := up.getLeastLoadedBackendURL()
+		bu := up.getBackendURL()
 		targetURL := bu.url
 		// Don't change path and add request_path query param for default route.
 		if isDefault {
@ -200,7 +200,7 @@ func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
 		} else { // Update path for regular routes.
 			targetURL = mergeURLs(targetURL, u, dropSrcPathPrefixParts)
 		}
-		ok := tryProcessingRequest(w, r, targetURL, hc, retryStatusCodes, ui.httpTransport)
+		ok := tryProcessingRequest(w, r, targetURL, hc, up.retryStatusCodes, ui.httpTransport)
 		bu.put()
 		if ok {
 			return
--- a/app/vmauth/target_url.go
+++ b/app/vmauth/target_url.go
@ -49,18 +49,18 @@ func dropPrefixParts(path string, parts int) string {
 	return path
 }

-func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, []int, int) {
+func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, HeadersConf, int) {
 	for _, e := range ui.URLMaps {
 		for _, sp := range e.SrcPaths {
 			if sp.match(u.Path) {
-				return e.URLPrefix, e.HeadersConf, e.RetryStatusCodes, e.DropSrcPathPrefixParts
+				return e.URLPrefix, e.HeadersConf, e.DropSrcPathPrefixParts
 			}
 		}
 	}
 	if ui.URLPrefix != nil {
-		return ui.URLPrefix, ui.HeadersConf, ui.RetryStatusCodes, ui.DropSrcPathPrefixParts
+		return ui.URLPrefix, ui.HeadersConf, ui.DropSrcPathPrefixParts
 	}
-	return nil, HeadersConf{}, nil, 0
+	return nil, HeadersConf{}, 0
 }

 func normalizeURL(uOrig *url.URL) *url.URL {
--- a/app/vmauth/target_url_test.go
+++ b/app/vmauth/target_url_test.go
@ -79,14 +79,17 @@ func TestDropPrefixParts(t *testing.T) {

 func TestCreateTargetURLSuccess(t *testing.T) {
 	f := func(ui *UserInfo, requestURI, expectedTarget, expectedRequestHeaders, expectedResponseHeaders string,
-		expectedRetryStatusCodes []int, expectedDropSrcPathPrefixParts int) {
+		expectedRetryStatusCodes []int, expectedLoadBalancingPolicy string, expectedDropSrcPathPrefixParts int) {
 		t.Helper()
+		if err := ui.initURLs(); err != nil {
+			t.Fatalf("cannot initialize urls inside UserInfo: %s", err)
+		}
 		u, err := url.Parse(requestURI)
 		if err != nil {
 			t.Fatalf("cannot parse %q: %s", requestURI, err)
 		}
 		u = normalizeURL(u)
-		up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
+		up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
 		if up == nil {
 			t.Fatalf("cannot determie backend: %s", err)
 		}
@ -100,8 +103,11 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 		if headersStr != expectedRequestHeaders {
 			t.Fatalf("unexpected request headers; got %s; want %s", headersStr, expectedRequestHeaders)
 		}
-		if !reflect.DeepEqual(retryStatusCodes, expectedRetryStatusCodes) {
-			t.Fatalf("unexpected retryStatusCodes; got %d; want %d", retryStatusCodes, expectedRetryStatusCodes)
+		if !reflect.DeepEqual(up.retryStatusCodes, expectedRetryStatusCodes) {
+			t.Fatalf("unexpected retryStatusCodes; got %d; want %d", up.retryStatusCodes, expectedRetryStatusCodes)
+		}
+		if up.loadBalancingPolicy != expectedLoadBalancingPolicy {
+			t.Fatalf("unexpected loadBalancingPolicy; got %q; want %q", up.loadBalancingPolicy, expectedLoadBalancingPolicy)
 		}
 		if dropSrcPathPrefixParts != expectedDropSrcPathPrefixParts {
 			t.Fatalf("unexpected dropSrcPathPrefixParts; got %d; want %d", dropSrcPathPrefixParts, expectedDropSrcPathPrefixParts)
@ -110,7 +116,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 	// Simple routing with `url_prefix`
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar"),
-	}, "", "http://foo.bar/.", "[]", "[]", nil, 0)
+	}, "", "http://foo.bar/.", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar"),
 		HeadersConf: HeadersConf{
@ -120,23 +126,24 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 			}},
 		},
 		RetryStatusCodes:       []int{503, 501},
+		LoadBalancingPolicy:    "first_available",
 		DropSrcPathPrefixParts: 2,
-	}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, 2)
+	}, "/a/b/c", "http://foo.bar/c", `[{"bb" "aaa"}]`, `[]`, []int{503, 501}, "first_available", 2)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar/federate"),
-	}, "/", "http://foo.bar/federate", "[]", "[]", nil, 0)
+	}, "/", "http://foo.bar/federate", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar"),
-	}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, 0)
+	}, "a/b?c=d", "http://foo.bar/a/b?c=d", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("https://sss:3894/x/y"),
-	}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, 0)
+	}, "/z", "https://sss:3894/x/y/z", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("https://sss:3894/x/y"),
-	}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, 0)
+	}, "/../../aaa", "https://sss:3894/x/y/aaa", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("https://sss:3894/x/y"),
-	}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, 0)
+	}, "/./asd/../../aaa?a=d&s=s/../d", "https://sss:3894/x/y/aaa?a=d&s=s%2F..%2Fd", "[]", "[]", nil, "least_loaded", 0)

 	// Complex routing with `url_map`
 	ui := &UserInfo{
@ -163,6 +170,7 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 					},
 				},
 				RetryStatusCodes:       []int{503, 500, 501},
+				LoadBalancingPolicy:    "first_available",
 				DropSrcPathPrefixParts: 1,
 			},
 			{
@ -184,9 +192,9 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 		RetryStatusCodes:       []int{502},
 		DropSrcPathPrefixParts: 2,
 	}
-	f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, 1)
-	f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
-	f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, 2)
+	f(ui, "/vmsingle/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", `[{"xx" "aa"} {"yy" "asdf"}]`, `[{"qwe" "rty"}]`, []int{503, 500, 501}, "first_available", 1)
+	f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", []int{502}, "least_loaded", 0)
+	f(ui, "/foo/bar/api/v1/query_range", "http://default-server/api/v1/query_range", `[{"bb" "aaa"}]`, `[{"x" "y"}]`, []int{502}, "least_loaded", 2)

 	// Complex routing regexp paths in `url_map`
 	ui = &UserInfo{
@ -202,17 +210,17 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 		},
 		URLPrefix: mustParseURL("http://default-server"),
 	}
-	f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, 0)
-	f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, 0)
-	f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, 0)
-	f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, 0)
-	f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, 0)
+	f(ui, "/api/v1/query?query=up", "http://vmselect/0/prometheus/api/v1/query?query=up", "[]", "[]", nil, "least_loaded", 0)
+	f(ui, "/api/v1/query_range?query=up", "http://vmselect/0/prometheus/api/v1/query_range?query=up", "[]", "[]", nil, "least_loaded", 0)
+	f(ui, "/api/v1/label/foo/values", "http://vmselect/0/prometheus/api/v1/label/foo/values", "[]", "[]", nil, "least_loaded", 0)
+	f(ui, "/api/v1/write", "http://vminsert/0/prometheus/api/v1/write", "[]", "[]", nil, "least_loaded", 0)
+	f(ui, "/api/v1/foo/bar", "http://default-server/api/v1/foo/bar", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar?extra_label=team=dev"),
-	}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, 0)
+	}, "/api/v1/query", "http://foo.bar/api/v1/query?extra_label=team=dev", "[]", "[]", nil, "least_loaded", 0)
 	f(&UserInfo{
 		URLPrefix: mustParseURL("http://foo.bar?extra_label=team=mobile"),
-	}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, 0)
+	}, "/api/v1/query?extra_label=team=dev", "http://foo.bar/api/v1/query?extra_label=team%3Dmobile", "[]", "[]", nil, "least_loaded", 0)
 }

 func TestCreateTargetURLFailure(t *testing.T) {
@ -223,7 +231,7 @@ func TestCreateTargetURLFailure(t *testing.T) {
 			t.Fatalf("cannot parse %q: %s", requestURI, err)
 		}
 		u = normalizeURL(u)
-		up, hc, retryStatusCodes, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
+		up, hc, dropSrcPathPrefixParts := ui.getURLPrefixAndHeaders(u)
 		if up != nil {
 			t.Fatalf("unexpected non-empty up=%#v", up)
 		}
@ -233,9 +241,6 @@ func TestCreateTargetURLFailure(t *testing.T) {
 		if hc.ResponseHeaders != nil {
 			t.Fatalf("unexpected non-empty response headers=%q", hc.ResponseHeaders)
 		}
-		if retryStatusCodes != nil {
-			t.Fatalf("unexpected non-empty retryStatusCodes=%d", retryStatusCodes)
-		}
 		if dropSrcPathPrefixParts != 0 {
 			t.Fatalf("unexpected non-zero dropSrcPathPrefixParts=%d", dropSrcPathPrefixParts)
 		}
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -33,6 +33,7 @@ The sandbox cluster installation is running under the constant load generated by
 * SECURITY: upgrade base docker image (Alpine) from 3.18.4 to 3.18.5. See [alpine 3.18.5 release notes](https://www.alpinelinux.org/posts/Alpine-3.15.11-3.16.8-3.17.6-3.18.5-released.html).
 * SECURITY: upgrade Go builder from Go1.21.4 to Go1.21.5. See [the list of issues addressed in Go1.21.5](https://github.com/golang/go/issues?q=milestone%3AGo1.21.5+label%3ACherryPickApproved).

+* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add ability to send requests to the first available backend and fall back to other `hot standby` backends when the first backend is unavailable. This allows building highly available setups as shown in [these docs](https://docs.victoriametrics.com/vmauth.html#high-availability).
 * FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-enableMultitenantHandlers` command-line flag, which allows receiving data via [VictoriaMetrics cluster urls](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) at `vmagent` and converting [tenant ids](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) to (`vm_account_id`, `vm_project_id`) labels before sending the data to the configured `-remoteWrite.url`. See [these docs](https://docs.victoriametrics.com/vmagent.html#multitenancy) for details.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110).
@ -42,7 +43,7 @@ The sandbox cluster installation is running under the constant load generated by
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `keep_if_contains` and `drop_if_contains` relabeling actions. See [these docs](https://docs.victoriametrics.com/vmagent.html#relabeling-enhancements) for details.
 * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): export `vm_promscrape_scrape_pool_targets` [metric](https://docs.victoriametrics.com/vmagent.html#monitoring) to track the number of targets each scrape job discovers. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5311).
 * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): provide `/vmalert/api/v1/rule` and `/api/v1/rule` API endpoints to get the rule object in JSON format. See [these docs](https://docs.victoriametrics.com/vmalert.html#web) for details.
-* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): deprecate process gauge metrics `vmalert_alerting_rules_error` and `vmalert_recording_rules_error` in favour of `vmalert_alerting_rules_errors_total` and `vmalert_recording_rules_errors_total` counter metrics. [Counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric type is more suitable for error counting as it preserves the state change between the scrapes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5160) for details. 
+* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): deprecate process gauge metrics `vmalert_alerting_rules_error` and `vmalert_recording_rules_error` in favour of `vmalert_alerting_rules_errors_total` and `vmalert_recording_rules_errors_total` counter metrics. [Counter](https://docs.victoriametrics.com/keyConcepts.html#counter) metric type is more suitable for error counting as it preserves the state change between the scrapes. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5160) for details.
 * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [day_of_year()](https://docs.victoriametrics.com/MetricsQL.html#day_of_year) function, which returns the day of the year for each of the given unix timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5345) for details. Thanks to @luckyxiaoqiang for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5368/).
 * FEATURE: all VictoriaMetrics binaries: expose additional metrics at `/metrics` page, which may simplify debugging of VictoriaMetrics components (see [this feature request](https://github.com/VictoriaMetrics/metrics/issues/54)):
  * `go_sched_latencies_seconds` - the [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram), which shows the time goroutines have spent in runnable state before actually running. Big values point to the lack of CPU time for the current workload.
--- a/docs/vmauth.md
+++ b/docs/vmauth.md
@ -53,6 +53,7 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co
 * [Generic HTTP load balancer](#generic-http-load-balancer)
 * [Load balancer for vmagent](#load-balancer-for-vmagent)
 * [Load balancer for VictoriaMetrics cluster](#load-balancer-for-victoriametrics-cluster)
+* [High availability](#high-availability)
 * [TLS termination proxy](#tls-termination-proxy)
 * [Basic Auth proxy](#basic-auth-proxy)
 * [Bearer Token auth proxy](#bearer-token-auth-proxy)
@ -69,6 +70,8 @@ unauthorized_user:
  url_prefix: "http://backend/"
 ```

+`vmauth` can balance load among multiple backends - see [these docs](#load-balancing) for details.
+
 ### Generic HTTP proxy for different backends

 `vmauth` can proxy requests to different backends depending on the requested path.
@ -95,6 +98,9 @@ unauthorized_user:
  default_url: http://some-backend/404-page.html
 ```

+See [these docs](#dropping-request-path-prefix) for more details.
+
+
 ### Generic HTTP load balancer

 `vmauth` can balance load among multiple HTTP backends in least-loaded round-robin mode.
@ -108,6 +114,8 @@ unauthorized_user:
  - "http://app-instance-3/"
 ```

+See [load balancing docs](#load-balancing) for more details.
+
 ### Load balancer for vmagent

 If [vmagent](https://docs.victoriametrics.com/vmagent.html) is used for processing [data push requests](https://docs.victoriametrics.com/vmagent.html#how-to-push-data-to-vmagent),
@ -128,6 +136,8 @@ unauthorized_user:
    - "http://vmagent-3:8429/"
 ```

+See [load balancing docs](#load-balancing) for more details.
+
 ### Load balancer for VictoriaMetrics cluster

 [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) accepts incoming data via `vminsert` nodes
@ -150,6 +160,27 @@ unauthorized_user:
    - "http://vmselect-2:8481/"
 ```

+See [load balancing docs](#load-balancing) for more details.
+
+### High availability
+
+`vmauth` automatically switches from temporarily unavailable backend to other hot standby backends listed in `url_prefix`
+if it runs with `-loadBalancingPolicy=first_available` command-line flag. The load balancing policy can be overridden at `user` and `url_map` sections
+of [`-auth.config`](#auth-config) via `load_balancing_policy` option. For example, the following config instructs `vmauth` to proxy requests to `http://victoria-metrics-main:8428/` backend.
+If this backend becomes unavailable, then `vmauth` starts proxying requests to `http://victoria-metrics-standby1:8428/`.
+If this backend becomes also unavailable, then requests are proxied to the last specified backend - `http://victoria-metrics-standby2:8428/`:
+
+```yml
+unauthorized_user:
+  url_prefix:
+  - "http://victoria-metrics-main:8428/"
+  - "http://victoria-metrics-standby1:8428/"
+  - "http://victoria-metrics-standby2:8428/"
+  load_balancing_policy: first_available
+```
+
+See [load-balancing docs](#load-balancing) for more details.
+
 ### TLS termination proxy

 `vmauth` can terminate HTTPS requests to backend services when it runs with the following command-line flags:
@ -273,17 +304,66 @@ users:

 ## Load balancing

-Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
-In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
+Each `url_prefix` in the [-auth.config](#auth-config) can be specified in the following forms:

-If the backend at the configured url isn't available, then `vmauth` tries sending the request to the remaining configured urls.
+- A single url. For example:

-It is possible to configure automatic retry of requests if the backend responds with status code from optional `retry_status_codes` list.
+  ```yml
+  unauthorized_user:
+    url_prefix: 'http://vminsert:8480/insert/0/prometheus/`
+  ```
+
+  In this case `vmauth` proxies requests to the specified url.
+
+- A list of urls. For example:
+
+  ```yml
+  unauthorized_user:
+    url_prefix:
+    - 'http://vminsert-1:8480/insert/0/prometheus/'
+    - 'http://vminsert-2:8480/insert/0/prometheus/'
+    - 'http://vminsert-3:8480/insert/0/prometheus/'
+  ```
+
+  In this case `vmauth` spreads requests among the specified urls using least-loaded round-robin policy.
+  This guarantees that incoming load is shared uniformly among the specified backends.
+
+  `vmauth` automatically detects temporarily unavailable backends and spreads incoming queries among the remaining available backends.
+  This allows restarting the backends and peforming mantenance tasks on the backends without the need to remove them from the `url_prefix` list.
+
+  By default `vmauth` returns backend responses with all the http status codes to the client. It is possible to configure automatic retry of requests
+  at other backends if the backend responds with status code specified in the `-retryStatusCodes` command-line flag.
+  It is possible to customize the list of http response status codes to retry via `retry_status_codes` list at `user` and `url_map` level of [`-auth.config`](#auth-config).
+  For example, the following config re-tries requests on other backends if the current backend returns response with `500` or `502` HTTP status code:
+
+  ```yml
+  unauthorized_user:
+    url_prefix:
+    - http://vmselect1:8481/
+    - http://vmselect2:8481/
+    - http://vmselect3:8481/
+    retry_status_codes: [500, 502]
+  ```
+
+  By default `vmauth` uses `least_loaded` policy for spreading incoming requests among available backends.
+  The policy can be changed to `first_available` via `-loadBalancingPolicy` command-line flag. In this case `vmauth`
+  sends all the requests to the first specified backend while it is available. `vmauth` starts sending requests to the next
+  specified backend when the first backend is temporarily unavailable.
+  It is possible to customize the load balancing policy at the `user` and `url_map` level.
+  For example, the following config specifies `first_available` load balancing policy for unauthorized requests:
+
+  ```yml
+  unauthorized_user:
+    url_prefix:
+    - http://victoria-metrics-main:8428/
+    - http://victoria-metrics-standby:8428/
+    load_balancing_policy: first_available
+  ```

 Load balancing feature can be used in the following cases:

 - Balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
-  The following `-auth.config` file can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests
+  The following [`-auth.config`](#auth-config) can be used for spreading incoming requests among 3 vmselect nodes and re-trying failed requests
  or requests with 500 and 502 response status codes:

  ```yml
@ -295,22 +375,49 @@ Load balancing feature can be used in the following cases:
    retry_status_codes: [500, 502]
  ```

- Spreading select queries among multiple availability zones (AZs) with identical data. For example, the following config spreads select queries
-  among 3 AZs. Requests are re-tried if some AZs are temporarily unavailable or if some `vmstorage` nodes in some AZs are temporarily unavailable.
-  `vmauth` adds `deny_partial_response=1` query arg to all the queries in order to guarantee to get full response from every AZ.
-  See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details.
+- Sending select queries to the closest availability zone (AZ), while falling back to other AZs with identical data if the closest AZ is unavaialable.
+  For example, the following [`-auth.config`](#auth-config) sends select queries to `https://vmselect-az1/` and uses the `https://vmselect-az2/` as a fallback
+  when `https://vmselect-az1/` is temporarily unavailable or cannot return full responses.
+  See [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#cluster-availability) for details about `deny_partial_response` query arg,
+  which is added to requests before they are proxied to backends.

  ```yml
  unauthorized_user:
    url_prefix:
    - https://vmselect-az1/?deny_partial_response=1
    - https://vmselect-az2/?deny_partial_response=1
-    - https://vmselect-az3/?deny_partial_response=1
    retry_status_codes: [500, 502, 503]
+    load_balancing_policy: first_available
  ```

-Load balancig can also be configured independently per each user and per each `url_map` entry.
-See [auth config docs](#auth-config) for more details.
+Load balancig can be configured independently per each `user` entry and per each `url_map` entry. See [auth config docs](#auth-config) for more details.
+
+### Modifying HTTP headers
+
+`vmauth` supports the ability to set and remove HTTP request headers before sending the requests to backends.
+This is done via `headers` option. For example, the following [`-auth.config`](#auth-config) adds `TenantID: foobar` header
+to requests proxied to `http://backend:1234/`. It also overrides `X-Forwarded-For` request header with an empty value. This effectively
+removes the `X-Forwarded-For` header from requests proxied to `http://backend:1234/`:
+
+```yml
+unauthorized_user:
+  url_prefix: "http://backend:1234/"
+  headers:
+  - "TenantID: foobar"
+  - "X-Forwarded-For:"
+```
+
+`vmauth` also supports the ability to set and remove HTTP response headers before returning the response from the backend to client.
+This is done via `response_headers` option. For example, the following [`-auth.config`](#auth-config) adds `Foo: bar` response header
+and removes `Server` response header before returning the response to client:
+
+```yml
+unauthorized_user:
+  url_prefix: "http://backend:1234/"
+  response_headers:
+  - "Foo: bar"
+  - "Server:"
+```

 ## Concurrency limiting

@ -719,6 +826,8 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
     Whether to enable offline verification for VictoriaMetrics Enterprise license key, which has been passed either via -license or via -licenseFile command-line flag. The issued license key must support offline verification feature. Contact info@victoriametrics.com if you need offline license verification. This flag is avilable only in Enterprise binaries
  -licenseFile string
     Path to file with license key for VictoriaMetrics Enterprise. See https://victoriametrics.com/products/enterprise/ . Trial Enterprise license can be obtained from https://victoriametrics.com/products/enterprise/trial/ . This flag is available only in Enterprise binaries. The license key can be also passed inline via -license command-line flag
+  -loadBalancingPolicy string
+     The default load balancing policy to use for backend urls specified inside url_prefix section. Supported policies: least_loaded, first_available. See https://docs.victoriametrics.com/vmauth.html#load-balancing for more details (default "least_loaded")
  -logInvalidAuthTokens
     Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
  -loggerDisableTimestamps
@ -767,6 +876,9 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
     Auth key for /-/reload http endpoint. It must be passed as authKey=...
  -responseTimeout duration
     The timeout for receiving a response from backend (default 5m0s)
+  -retryStatusCodes array
+     Comma-separated list of default HTTP response status codes when vmauth re-tries the request on other backends. See https://docs.victoriametrics.com/vmauth.html#load-balancing for details (default 0)
+     Supports array of values separated by comma or specified via multiple flags.
  -tls
     Whether to enable TLS for incoming HTTP requests at -httpListenAddr (aka https). -tlsCertFile and -tlsKeyFile must be set if -tls is set
  -tlsCertFile string
--- a/lib/flagutil/array.go
+++ b/lib/flagutil/array.go
@ -224,7 +224,7 @@ func (a *ArrayString) GetOptionalArg(argIdx int) string {
 // Has the same api as ArrayString.
 type ArrayBool []bool

-// IsBoolFlag  implements flag.IsBoolFlag interface
+// IsBoolFlag implements flag.IsBoolFlag interface
 func (a *ArrayBool) IsBoolFlag() bool { return true }

 // String implements flag.Value interface
@ -312,6 +312,11 @@ type ArrayInt struct {
 	a            []int
 }

+// Values returns all the values for a.
+func (a *ArrayInt) Values() []int {
+	return a.a
+}
+
 // String implements flag.Value interface
 func (a *ArrayInt) String() string {
 	x := a.a
--- a/lib/flagutil/array_test.go
+++ b/lib/flagutil/array_test.go
@ -286,7 +286,7 @@ func TestArrayInt(t *testing.T) {
 }

 func TestArrayInt_Set(t *testing.T) {
-	f := func(s, expectedResult string) {
+	f := func(s, expectedResult string, expectedValues []int) {
 		t.Helper()
 		var a ArrayInt
 		if err := a.Set(s); err != nil {
@ -296,10 +296,14 @@ func TestArrayInt_Set(t *testing.T) {
 		if result != expectedResult {
 			t.Fatalf("unexpected values parsed;\ngot\n%q\nwant\n%q", result, expectedResult)
 		}
+		values := a.Values()
+		if !reflect.DeepEqual(values, expectedValues) {
+			t.Fatalf("unexpected values;\ngot\n%d\nwant\n%d", values, expectedValues)
+		}
 	}
-	f("", "")
-	f(`1`, `1`)
-	f(`-2,3,-64`, `-2,3,-64`)
+	f("", "", nil)
+	f(`1`, `1`, []int{1})
+	f(`-2,3,-64`, `-2,3,-64`, []int{-2, 3, -64})
 }

 func TestArrayInt_GetOptionalArg(t *testing.T) {