app/vmauth: improve load balancing by sending incoming requests to backends with the lowest number of concurrent requests

While at it, stop sending requests to unavailable backend for 3 seconds before the next attempt. This should reduce the amounts of useless work and the number of useless network packets when the backend is temporarily unavailable.
2024-11-23 12:31:07 +01:00 · 2023-02-11 00:27:40 -08:00 · 2023-02-11 00:27:40 -08:00 · 776391917f
commit 776391917f
parent f3625e4f3f
9 changed files with 175 additions and 48 deletions
--- a/app/vmauth/README.md
+++ b/app/vmauth/README.md
@ -28,7 +28,36 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co

 ## Load balancing

-Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. In the latter case `vmauth` balances load among the configured urls in a round-robin manner. This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
+Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
+In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
+`vmauth` retries failing `GET` requests across the configured list of urls.
+This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes
+in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
+
+## Concurrency limiting
+
+`vmauth` limits the number of concurrent requests it can proxy according to the following command-line flags:
+
+- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
+- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
+
+It is also possible to set individual limits on the number of concurrent requests per each user
+with the `max_concurrent_requests` option - see [auth config example](#auth-config).
+
+`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
+
+The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
+
+- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
+  It is set via `-maxConcurrentRequests` command-line flag.
+- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
+- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
+  because of the global concurrency limit has been reached.
+- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
+- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
+- `vmauth_user_concurrent_requests_limit_reached_total{username="foo"}` - the number of requests rejected with `429 Too Many Requests` error
+  because of the concurrency limit has been reached for the given `username`.
+

 ## Auth config

--- a/app/vmauth/auth_config.go
+++ b/app/vmauth/auth_config.go
@ -13,6 +13,7 @@ import (
 	"sync/atomic"

 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
+	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
 	"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
@ -53,7 +54,7 @@ func (ui *UserInfo) beginConcurrencyLimit() error {
 		return nil
 	default:
 		ui.concurrencyLimitReached.Inc()
-		return fmt.Errorf("cannot handle more than max_concurrent_requests=%d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
+		return fmt.Errorf("cannot handle more than %d concurrent requests from user %s", ui.getMaxConcurrentRequests(), ui.name())
 	}
 }

@ -63,7 +64,7 @@ func (ui *UserInfo) endConcurrencyLimit() {

 func (ui *UserInfo) getMaxConcurrentRequests() int {
 	mcr := ui.MaxConcurrentRequests
-	if mcr > *maxConcurrentPerUserRequests {
+	if mcr <= 0 || mcr > *maxConcurrentPerUserRequests {
 		mcr = *maxConcurrentPerUserRequests
 	}
 	return mcr
@ -111,14 +112,75 @@ type SrcPath struct {

 // URLPrefix represents pased `url_prefix`
 type URLPrefix struct {
-	n    uint32
-	urls []*url.URL
+	n   uint32
+	bus []*backendURL
 }

-func (up *URLPrefix) getNextURL() *url.URL {
+type backendURL struct {
+	brokenDeadline     uint64
+	concurrentRequests int32
+	url                *url.URL
+}
+
+func (bu *backendURL) isBroken() bool {
+	ct := fasttime.UnixTimestamp()
+	return ct < atomic.LoadUint64(&bu.brokenDeadline)
+}
+
+func (bu *backendURL) setBroken() {
+	deadline := fasttime.UnixTimestamp() + 3
+	atomic.StoreUint64(&bu.brokenDeadline, deadline)
+}
+
+func (bu *backendURL) put() {
+	atomic.AddInt32(&bu.concurrentRequests, -1)
+}
+
+func (up *URLPrefix) getBackendsCount() int {
+	return len(up.bus)
+}
+
+// getLeastLoadedBackendURL returns the backendURL with the minimum number of concurrent requests.
+//
+// backendURL.put() must be called on the returned backendURL after the request is complete.
+func (up *URLPrefix) getLeastLoadedBackendURL() *backendURL {
+	bus := up.bus
+	if len(bus) == 1 {
+		// Fast path - return the only backend url.
+		bu := bus[0]
+		atomic.AddInt32(&bu.concurrentRequests, 1)
+		return bu
+	}
+
+	// Slow path - select other backend urls.
 	n := atomic.AddUint32(&up.n, 1)
-	idx := n % uint32(len(up.urls))
-	return up.urls[idx]
+
+	for i := uint32(0); i < uint32(len(bus)); i++ {
+		idx := (n + i) % uint32(len(bus))
+		bu := bus[idx]
+		if bu.isBroken() {
+			continue
+		}
+		if atomic.CompareAndSwapInt32(&bu.concurrentRequests, 0, 1) {
+			// Fast path - return the backend with zero concurrently executed requests.
+			return bu
+		}
+	}
+
+	// Slow path - return the backend with the minimum number of concurrently executed requests.
+	buMin := bus[n%uint32(len(bus))]
+	minRequests := atomic.LoadInt32(&buMin.concurrentRequests)
+	for _, bu := range bus {
+		if bu.isBroken() {
+			continue
+		}
+		if n := atomic.LoadInt32(&bu.concurrentRequests); n < minRequests {
+			buMin = bu
+			minRequests = n
+		}
+	}
+	atomic.AddInt32(&buMin.concurrentRequests, 1)
+	return buMin
 }

 // UnmarshalYAML unmarshals up from yaml.
@ -147,31 +209,33 @@ func (up *URLPrefix) UnmarshalYAML(f func(interface{}) error) error {
 	default:
 		return fmt.Errorf("unexpected type for `url_prefix`: %T; want string or []string", v)
 	}
-	pus := make([]*url.URL, len(urls))
+	bus := make([]*backendURL, len(urls))
 	for i, u := range urls {
 		pu, err := url.Parse(u)
 		if err != nil {
 			return fmt.Errorf("cannot unmarshal %q into url: %w", u, err)
 		}
-		pus[i] = pu
+		bus[i] = &backendURL{
+			url: pu,
+		}
 	}
-	up.urls = pus
+	up.bus = bus
 	return nil
 }

 // MarshalYAML marshals up to yaml.
 func (up *URLPrefix) MarshalYAML() (interface{}, error) {
 	var b []byte
-	if len(up.urls) == 1 {
-		u := up.urls[0].String()
+	if len(up.bus) == 1 {
+		u := up.bus[0].url.String()
 		b = strconv.AppendQuote(b, u)
 		return string(b), nil
 	}
 	b = append(b, '[')
-	for i, pu := range up.urls {
-		u := pu.String()
+	for i, bu := range up.bus {
+		u := bu.url.String()
 		b = strconv.AppendQuote(b, u)
-		if i+1 < len(up.urls) {
+		if i+1 < len(up.bus) {
 			b = append(b, ',')
 		}
 	}
@ -383,12 +447,12 @@ func getAuthToken(bearerToken, username, password string) string {
 }

 func (up *URLPrefix) sanitize() error {
-	for i, pu := range up.urls {
-		puNew, err := sanitizeURLPrefix(pu)
+	for _, bu := range up.bus {
+		puNew, err := sanitizeURLPrefix(bu.url)
 		if err != nil {
 			return err
 		}
-		up.urls[i] = puNew
+		bu.url = puNew
 	}
 	return nil
 }
--- a/app/vmauth/auth_config_test.go
+++ b/app/vmauth/auth_config_test.go
@ -392,15 +392,17 @@ func mustParseURL(u string) *URLPrefix {
 }

 func mustParseURLs(us []string) *URLPrefix {
-	pus := make([]*url.URL, len(us))
+	bus := make([]*backendURL, len(us))
 	for i, u := range us {
 		pu, err := url.Parse(u)
 		if err != nil {
 			panic(fmt.Errorf("BUG: cannot parse %q: %w", u, err))
 		}
-		pus[i] = pu
+		bus[i] = &backendURL{
+			url: pu,
+		}
 	}
 	return &URLPrefix{
-		urls: pus,
+		bus: bus,
 	}
 }
--- a/app/vmauth/main.go
+++ b/app/vmauth/main.go
@ -134,17 +134,21 @@ func requestHandler(w http.ResponseWriter, r *http.Request) bool {

 func processRequest(w http.ResponseWriter, r *http.Request, ui *UserInfo) {
 	u := normalizeURL(r.URL)
-	up, headers, err := ui.getURLPrefix(u)
+	up, headers, err := ui.getURLPrefixAndHeaders(u)
 	if err != nil {
 		httpserver.Errorf(w, r, "cannot determine targetURL: %s", err)
 		return
 	}
 	maxAttempts := up.getBackendsCount()
 	for i := 0; i < maxAttempts; i++ {
-		targetURL := up.mergeURLs(u)
-		if tryProcessingRequest(w, r, targetURL, headers) {
+		bu := up.getLeastLoadedBackendURL()
+		targetURL := mergeURLs(bu.url, u)
+		ok := tryProcessingRequest(w, r, targetURL, headers)
+		bu.put()
+		if ok {
 			return
 		}
+		bu.setBroken()
 	}
 	err = &httpserver.ErrorWithStatusCode{
 		Err:        fmt.Errorf("all the backends for the user %q are unavailable", ui.name()),
--- a/app/vmauth/target_url.go
+++ b/app/vmauth/target_url.go
@ -7,15 +7,6 @@ import (
 	"strings"
 )

-func (up *URLPrefix) mergeURLs(requestURI *url.URL) *url.URL {
-	pu := up.getNextURL()
-	return mergeURLs(pu, requestURI)
-}
-
-func (up *URLPrefix) getBackendsCount() int {
-	return len(up.urls)
-}
-
 func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
 	targetURL := *uiURL
 	targetURL.Path += requestURI.Path
@ -39,7 +30,7 @@ func mergeURLs(uiURL, requestURI *url.URL) *url.URL {
 	return &targetURL
 }

-func (ui *UserInfo) getURLPrefix(u *url.URL) (*URLPrefix, []Header, error) {
+func (ui *UserInfo) getURLPrefixAndHeaders(u *url.URL) (*URLPrefix, []Header, error) {
 	for _, e := range ui.URLMaps {
 		for _, sp := range e.SrcPaths {
 			if sp.match(u.Path) {
--- a/app/vmauth/target_url_test.go
+++ b/app/vmauth/target_url_test.go
@ -14,11 +14,13 @@ func TestCreateTargetURLSuccess(t *testing.T) {
 			t.Fatalf("cannot parse %q: %s", requestURI, err)
 		}
 		u = normalizeURL(u)
-		up, headers, err := ui.getURLPrefix(u)
+		up, headers, err := ui.getURLPrefixAndHeaders(u)
 		if err != nil {
 			t.Fatalf("unexpected error: %s", err)
 		}
-		target := up.mergeURLs(u)
+		bu := up.getLeastLoadedBackendURL()
+		target := mergeURLs(bu.url, u)
+		bu.put()
 		if target.String() != expectedTarget {
 			t.Fatalf("unexpected target; got %q; want %q", target, expectedTarget)
 		}
@ -122,12 +124,12 @@ func TestCreateTargetURLFailure(t *testing.T) {
 			t.Fatalf("cannot parse %q: %s", requestURI, err)
 		}
 		u = normalizeURL(u)
-		up, headers, err := ui.getURLPrefix(u)
+		up, headers, err := ui.getURLPrefixAndHeaders(u)
 		if err == nil {
 			t.Fatalf("expecting non-nil error")
 		}
 		if up != nil {
-			t.Fatalf("unexpected non-empty up=%q", up)
+			t.Fatalf("unexpected non-empty up=%#v", up)
 		}
 		if headers != nil {
 			t.Fatalf("unexpected non-empty headers=%q", headers)
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -15,8 +15,9 @@ The following tip changes can be tested by building VictoriaMetrics components f

 ## tip

-* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add the ability to limit the number of concurrent requests on a per-user basis via `-maxConcurrentPerUserRequests` command-line option and via `max_concurrent_requests` config option. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3346) and [these docs](https://docs.victoriametrics.com/vmauth.html#auth-config).
-* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): automatically retry failing GET requests on all the configured backends.
+* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add the ability to limit the number of concurrent requests on a per-user basis via `-maxConcurrentPerUserRequests` command-line flag and via `max_concurrent_requests` config option. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3346) and [these docs](https://docs.victoriametrics.com/vmauth.html#concurrency-limiting).
+* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): automatically retry failing `GET` requests on all [the configured backends](https://docs.victoriametrics.com/vmauth.html#load-balancing). Previously the backend error has been immediately returned to the client without retrying the request on the remaining backends.
+* FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): choose the backend with the minimum number of concurrently executed requests [among the configured backends](https://docs.victoriametrics.com/vmauth.html#load-balancing) in a round-robin manner for serving the incoming requests. This allows spreading the load among backends more evenly, while improving the response time.
 * FEATURE: [vmalert enterprise](https://docs.victoriametrics.com/vmalert.html): add ability to read alerting and recording rules from S3, GCS or S3-compatible object storage. See [these docs](https://docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage).

 ## [v1.87.1](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.87.1)
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@ -52,7 +52,8 @@ Some facts about tenants in VictoriaMetrics:
 - Each `accountID` and `projectID` is identified by an arbitrary 32-bit integer in the range `[0 .. 2^32)`.
 If `projectID` is missing, then it is automatically assigned to `0`. It is expected that other information about tenants
 such as auth tokens, tenant names, limits, accounting, etc. is stored in a separate relational database. This database must be managed
-by a separate service sitting in front of VictoriaMetrics cluster such as [vmauth](https://docs.victoriametrics.com/vmauth.html) or [vmgateway](https://docs.victoriametrics.com/vmgateway.html). [Contact us](mailto:info@victoriametrics.com) if you need assistance with such service.
+by a separate service sitting in front of VictoriaMetrics cluster such as [vmauth](https://docs.victoriametrics.com/vmauth.html)
+or [vmgateway](https://docs.victoriametrics.com/vmgateway.html). [Contact us](mailto:info@victoriametrics.com) if you need assistance with such service.

 - Tenants are automatically created when the first data point is written into the given tenant.

@ -172,7 +173,8 @@ It is recommended to run at least two nodes for each service for high availabili

 It is preferred to run many small `vmstorage` nodes over a few big `vmstorage` nodes, since this reduces the workload increase on the remaining `vmstorage` nodes when some of `vmstorage` nodes become temporarily unavailable.

-An http load balancer such as [vmauth](https://docs.victoriametrics.com/vmauth.html) or `nginx` must be put in front of `vminsert` and `vmselect` nodes. It must contain the following routing configs according to [the url format](#url-format):
+An http load balancer such as [vmauth](https://docs.victoriametrics.com/vmauth.html) or `nginx` must be put in front of `vminsert` and `vmselect` nodes.
+It must contain the following routing configs according to [the url format](#url-format):

 - requests starting with `/insert` must be routed to port `8480` on `vminsert` nodes.
 - requests starting with `/select` must be routed to port `8481` on `vmselect` nodes.
@ -475,7 +477,8 @@ if some of its components are temporarily unavailable.

 VictoriaMetrics cluster remains available if the following conditions are met:

- HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes.
+- HTTP load balancer must stop routing requests to unavailable `vminsert` and `vmselect` nodes
+  ([vmauth](https://docs.victoriametrics.com/vmauth.html) stops routing requests to unavailable nodes).

 - At least a single `vminsert` node must remain available in the cluster for processing data ingestion workload.
  The remaining active `vminsert` nodes must have enough compute capacity (CPU, RAM, network bandwidth)
--- a/docs/vmauth.md
+++ b/docs/vmauth.md
@ -32,7 +32,36 @@ accounting and rate limiting such as [vmgateway](https://docs.victoriametrics.co

 ## Load balancing

-Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls. In the latter case `vmauth` balances load among the configured urls in a round-robin manner. This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
+Each `url_prefix` in the [-auth.config](#auth-config) may contain either a single url or a list of urls.
+In the latter case `vmauth` balances load among the configured urls in least-loaded round-robin manner.
+`vmauth` retries failing `GET` requests across the configured list of urls.
+This feature is useful for balancing the load among multiple `vmselect` and/or `vminsert` nodes
+in [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html).
+
+## Concurrency limiting
+
+`vmauth` limits the number of concurrent requests it can proxy according to the following command-line flags:
+
+- `-maxConcurrentRequests` limits the global number of concurrent requests `vmauth` can serve across all the configured users.
+- `-maxConcurrentPerUserRequests` limits the number of concurrent requests `vmauth` can serve per each configured user.
+
+It is also possible to set individual limits on the number of concurrent requests per each user
+with the `max_concurrent_requests` option - see [auth config example](#auth-config).
+
+`vmauth` responds with `429 Too Many Requests` HTTP error when the number of concurrent requests exceeds the configured limits.
+
+The following [metrics](#monitoring) related to concurrency limits are exposed by `vmauth`:
+
+- `vmauth_concurrent_requests_capacity` - the global limit on the number of concurrent requests `vmauth` can serve.
+  It is set via `-maxConcurrentRequests` command-line flag.
+- `vmauth_concurrent_requests_current` - the current number of concurrent requests `vmauth` processes.
+- `vmauth_concurrent_requests_limit_reached_total` - the number of requests rejected with `429 Too Many Requests` error
+  because of the global concurrency limit has been reached.
+- `vmauth_user_concurrent_requests_capacity{username="..."}` - the limit on the number of concurrent requests for the given `username`.
+- `vmauth_user_concurrent_requests_current{username="..."}` - the current number of concurrent requests for the given `username`.
+- `vmauth_user_concurrent_requests_limit_reached_total{username="foo"}` - the number of requests rejected with `429 Too Many Requests` error
+  because of the concurrency limit has been reached for the given `username`.
+

 ## Auth config

@ -65,7 +94,7 @@ users:
  #
  # The given user can send maximum 10 concurrent requests according to the provided max_concurrent_requests.
  # Excess concurrent requests are rejected with 429 HTTP status code.
-  # See also -maxConcurrentRequests command-line flag for limiting the global number of concurrent requests.
+  # See also -maxConcurrentPerUserRequests and -maxConcurrentRequests command-line flags.
 - username: "local-single-node"
  password: "***"
  url_prefix: "http://localhost:8428"
@ -268,7 +297,7 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
  -httpListenAddr.useProxyProtocol
     Whether to use proxy protocol for connections accepted at -httpListenAddr . See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt
  -internStringMaxLen int
-     The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 300)
+     The maximum length for strings to intern. Lower limit may save memory at the cost of higher CPU usage. See https://en.wikipedia.org/wiki/String_interning (default 500)
  -logInvalidAuthTokens
     Whether to log requests with invalid auth tokens. Such requests are always counted at vmauth_http_request_errors_total{reason="invalid_auth_token"} metric, which is exposed at /metrics page
  -loggerDisableTimestamps
@ -287,8 +316,10 @@ See the docs at https://docs.victoriametrics.com/vmauth.html .
     Timezone to use for timestamps in logs. Timezone must be a valid IANA Time Zone. For example: America/New_York, Europe/Berlin, Etc/GMT+3 or Local (default "UTC")
  -loggerWarnsPerSecondLimit int
     Per-second limit on the number of WARN messages. If more than the given number of warns are emitted per second, then the remaining warns are suppressed. Zero values disable the rate limit
+  -maxConcurrentPerUserRequests int
+     The maximum number of concurrent requests vmauth can process per each configured user. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentRequests command-line option and max_concurrent_requests option in per-user config (default 300)
  -maxConcurrentRequests int
-     The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxIdleConnsPerBackend and max_concurrent_requests option per each user config (default 1000)
+     The maximum number of concurrent requests vmauth can process. Other requests are rejected with '429 Too Many Requests' http status code. See also -maxConcurrentPerUserRequests and -maxIdleConnsPerBackend command-line options (default 1000)
  -maxIdleConnsPerBackend int
     The maximum number of idle connections vmauth can open per each backend host. See also -maxConcurrentRequests (default 100)
  -memory.allowedBytes size