VictoriaMetrics/app/vlselect/main.go

package vlselect

import (
	"embed"
	"flag"
	"fmt"
	"net/http"
	"strings"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
	"github.com/VictoriaMetrics/metrics"
)

var (
	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+
		"It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+
		"See also -search.maxQueueDuration")
	maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+
		"limit is reached; see also -search.maxQueryDuration")
	maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution")
)

func getDefaultMaxConcurrentRequests() int {
	n := cgroup.AvailableCPUs()
	if n <= 4 {
		n *= 2
	}
	if n > 16 {
		// A single request can saturate all the CPU cores, so there is no sense
		// in allowing higher number of concurrent requests - they will just contend
		// for unavailable CPU time.
		n = 16
	}
	return n
}

// Init initializes vlselect
func Init() {
	concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)
}

// Stop stops vlselect
func Stop() {
}

var concurrencyLimitCh chan struct{}

var (
	concurrencyLimitReached = metrics.NewCounter(`vl_concurrent_select_limit_reached_total`)
	concurrencyLimitTimeout = metrics.NewCounter(`vl_concurrent_select_limit_timeout_total`)

	_ = metrics.NewGauge(`vl_concurrent_select_capacity`, func() float64 {
		return float64(cap(concurrencyLimitCh))
	})
	_ = metrics.NewGauge(`vl_concurrent_select_current`, func() float64 {
		return float64(len(concurrencyLimitCh))
	})
)

//go:embed vmui
var vmuiFiles embed.FS

var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))

// RequestHandler handles select requests for VictoriaLogs
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
	path := r.URL.Path
	if !strings.HasPrefix(path, "/select/") {
		// Skip requests, which do not start with /select/, since these aren't our requests.
		return false
	}
	path = strings.ReplaceAll(path, "//", "/")

	if path == "/select/vmui" {
		// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
		// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
		// is hidden behind vmauth or similar proxy.
		_ = r.ParseForm()
		newURL := "vmui/?" + r.Form.Encode()
		httpserver.Redirect(w, newURL)
		return true
	}
	if strings.HasPrefix(path, "/select/vmui/") {
		if strings.HasPrefix(path, "/select/vmui/static/") {
			// Allow clients caching static contents for long period of time, since it shouldn't change over time.
			// Path to static contents (such as js and css) must be changed whenever its contents is changed.
			// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
			w.Header().Set("Cache-Control", "max-age=31536000")
		}
		r.URL.Path = path
		vmuiFileServer.ServeHTTP(w, r)
		return true
	}

	// Limit the number of concurrent queries, which can consume big amounts of CPU.
	startTime := time.Now()
	ctx := r.Context()
	stopCh := ctx.Done()
	select {
	case concurrencyLimitCh <- struct{}{}:
		defer func() { <-concurrencyLimitCh }()
	default:
		// Sleep for a while until giving up. This should resolve short bursts in requests.
		concurrencyLimitReached.Inc()
		d := getMaxQueryDuration(r)
		if d > *maxQueueDuration {
			d = *maxQueueDuration
		}
		t := timerpool.Get(d)
		select {
		case concurrencyLimitCh <- struct{}{}:
			timerpool.Put(t)
			defer func() { <-concurrencyLimitCh }()
		case <-stopCh:
			timerpool.Put(t)
			remoteAddr := httpserver.GetQuotedRemoteAddr(r)
			requestURI := httpserver.GetRequestURI(r)
			logger.Infof("client has cancelled the request after %.3f seconds: remoteAddr=%s, requestURI: %q",
				time.Since(startTime).Seconds(), remoteAddr, requestURI)
			return true
		case <-t.C:
			timerpool.Put(t)
			concurrencyLimitTimeout.Inc()
			err := &httpserver.ErrorWithStatusCode{
				Err: fmt.Errorf("couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests "+
					"are executed. Possible solutions: to reduce query load; to add more compute resources to the server; "+
					"to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration; to increase -search.maxConcurrentRequests",
					d.Seconds(), *maxConcurrentRequests, maxQueueDuration),
				StatusCode: http.StatusServiceUnavailable,
			}
			httpserver.Errorf(w, r, "%s", err)
			return true
		}
	}

	httpserver.EnableCORS(w, r)
	switch path {
	case "/select/logsql/field_names":
		logsqlFieldNamesRequests.Inc()
		logsql.ProcessFieldNamesRequest(ctx, w, r)
		return true
	case "/select/logsql/field_values":
		logsqlFieldValuesRequests.Inc()
		logsql.ProcessFieldValuesRequest(ctx, w, r)
		return true
	case "/select/logsql/hits":
		logsqlHitsRequests.Inc()
		logsql.ProcessHitsRequest(ctx, w, r)
		return true
	case "/select/logsql/query":
		logsqlQueryRequests.Inc()
		logsql.ProcessQueryRequest(ctx, w, r)
		return true
	case "/select/logsql/stream_label_names":
		logsqlStreamLabelNamesRequests.Inc()
		logsql.ProcessStreamLabelNamesRequest(ctx, w, r)
		return true
	case "/select/logsql/stream_label_values":
		logsqlStreamLabelValuesRequests.Inc()
		logsql.ProcessStreamLabelValuesRequest(ctx, w, r)
		return true
	case "/select/logsql/streams":
		logsqlStreamsRequests.Inc()
		logsql.ProcessStreamsRequest(ctx, w, r)
		return true
	default:
		return false
	}
}

// getMaxQueryDuration returns the maximum duration for query from r.
func getMaxQueryDuration(r *http.Request) time.Duration {
	dms, err := httputils.GetDuration(r, "timeout", 0)
	if err != nil {
		dms = 0
	}
	d := time.Duration(dms) * time.Millisecond
	if d <= 0 || d > *maxQueryDuration {
		d = *maxQueryDuration
	}
	return d
}

var (
	logsqlFieldNamesRequests        = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_names"}`)
	logsqlFieldValuesRequests       = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
	logsqlHitsRequests              = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
	logsqlQueryRequests             = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
	logsqlStreamLabelNamesRequests  = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`)
	logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`)
	logsqlStreamsRequests           = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`)
)
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`package vlselect`

			`import (`
vmui: logs explorer (#4484) * feat: add a logs page * app/vixtoria-logs: add handlers for vmui * feat: add group logs * feat: add logs build * app/vixtoria-logs: update make file * app/vixtoria-logs: cleanup make * app/vixtoria-logs: fix description * fix: correct url for logs * fix: save display view in query params * fix: change logo for logs build * app/vixtoria-logs: remove dashboards from vlselect * app/vixtoria-logs: enable user --------- Co-authored-by: dmitryk-dk <kozlovdmitriyy@gmail.com> 2023-06-21 16:57:09 +02:00			`"embed"`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`"flag"`
			`"fmt"`
			`"net/http"`
			`"strings"`
			`"time"`

			`"github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"`
			`"github.com/VictoriaMetrics/metrics"`
			`)`

			`var (`
			`maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", getDefaultMaxConcurrentRequests(), "The maximum number of concurrent search requests. "+`
			`"It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. "+`
			`"See also -search.maxQueueDuration")`
			`maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the search request waits for execution when -search.maxConcurrentRequests "+`
			`"limit is reached; see also -search.maxQueryDuration")`
			`maxQueryDuration = flag.Duration("search.maxQueryDuration", time.Second*30, "The maximum duration for query execution")`
			`)`

			`func getDefaultMaxConcurrentRequests() int {`
			`n := cgroup.AvailableCPUs()`
			`if n <= 4 {`
			`n *= 2`
			`}`
			`if n > 16 {`
			`// A single request can saturate all the CPU cores, so there is no sense`
			`// in allowing higher number of concurrent requests - they will just contend`
			`// for unavailable CPU time.`
			`n = 16`
			`}`
			`return n`
			`}`

			`// Init initializes vlselect`
			`func Init() {`
			`concurrencyLimitCh = make(chan struct{}, *maxConcurrentRequests)`
			`}`

			`// Stop stops vlselect`
			`func Stop() {`
			`}`

			`var concurrencyLimitCh chan struct{}`

			`var (`
			concurrencyLimitReached = metrics.NewCounter(`vl_concurrent_select_limit_reached_total`)
			concurrencyLimitTimeout = metrics.NewCounter(`vl_concurrent_select_limit_timeout_total`)

			_ = metrics.NewGauge(`vl_concurrent_select_capacity`, func() float64 {
			`return float64(cap(concurrencyLimitCh))`
			`})`
			_ = metrics.NewGauge(`vl_concurrent_select_current`, func() float64 {
			`return float64(len(concurrencyLimitCh))`
			`})`
			`)`

vmui: logs explorer (#4484) * feat: add a logs page * app/vixtoria-logs: add handlers for vmui * feat: add group logs * feat: add logs build * app/vixtoria-logs: update make file * app/vixtoria-logs: cleanup make * app/vixtoria-logs: fix description * fix: correct url for logs * fix: save display view in query params * fix: change logo for logs build * app/vixtoria-logs: remove dashboards from vlselect * app/vixtoria-logs: enable user --------- Co-authored-by: dmitryk-dk <kozlovdmitriyy@gmail.com> 2023-06-21 16:57:09 +02:00			`//go:embed vmui`
			`var vmuiFiles embed.FS`

			`var vmuiFileServer = http.FileServer(http.FS(vmuiFiles))`

app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`// RequestHandler handles select requests for VictoriaLogs`
			`func RequestHandler(w http.ResponseWriter, r *http.Request) bool {`
			`path := r.URL.Path`
app/vlselect: handle vmui at /select/vmui path instead of /vmui This simplifies routing at auth proxies such as vmauth to vlselect component, which serves VMUI - just route all the requests, which start with /select/, to vlselect. 2023-06-22 04:52:48 +02:00			`if !strings.HasPrefix(path, "/select/") {`
			`// Skip requests, which do not start with /select/, since these aren't our requests.`
			`return false`
			`}`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`path = strings.ReplaceAll(path, "//", "/")`

lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`if path == "/select/vmui" {`
app/vlselect: handle vmui at /select/vmui path instead of /vmui This simplifies routing at auth proxies such as vmauth to vlselect component, which serves VMUI - just route all the requests, which start with /select/, to vlselect. 2023-06-22 04:52:48 +02:00			// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
			`// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics`
			`// is hidden behind vmauth or similar proxy.`
			`_ = r.ParseForm()`
			`newURL := "vmui/?" + r.Form.Encode()`
			`httpserver.Redirect(w, newURL)`
			`return true`
			`}`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`if strings.HasPrefix(path, "/select/vmui/") {`
			`if strings.HasPrefix(path, "/select/vmui/static/") {`
app/{vmselect,vlselect}: enable caching of static contents from /vmui/static/ folder at client side This should improve repated VMUI page load times on slow networks See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/ 2023-10-12 09:30:39 +02:00			`// Allow clients caching static contents for long period of time, since it shouldn't change over time.`
			`// Path to static contents (such as js and css) must be changed whenever its contents is changed.`
			`// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/`
			`w.Header().Set("Cache-Control", "max-age=31536000")`
			`}`
app/vlselect: handle vmui at /select/vmui path instead of /vmui This simplifies routing at auth proxies such as vmauth to vlselect component, which serves VMUI - just route all the requests, which start with /select/, to vlselect. 2023-06-22 04:52:48 +02:00			`r.URL.Path = path`
			`vmuiFileServer.ServeHTTP(w, r)`
			`return true`
			`}`

			`// Limit the number of concurrent queries, which can consume big amounts of CPU.`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`startTime := time.Now()`
app/vlselect: follow-up for 451d2abf50171dbdea844b40f14e0205a173d37a - Consistently return the first `limit` log entries if the total size of found log entries doesn't exceed 1Mb. See app/vlselect/logsql/sort_writer.go . Previously random log entries could be returned with each request. - Document the change at docs/VictoriaLogs/CHANGELOG.md - Document the `limit` query arg at docs/VictoriaLogs/querying/README.md - Make the change less intrusive. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5674 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5778 2024-02-18 22:01:34 +01:00			`ctx := r.Context()`
			`stopCh := ctx.Done()`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`select {`
			`case concurrencyLimitCh <- struct{}{}:`
			`defer func() { <-concurrencyLimitCh }()`
			`default:`
			`// Sleep for a while until giving up. This should resolve short bursts in requests.`
			`concurrencyLimitReached.Inc()`
			`d := getMaxQueryDuration(r)`
			`if d > *maxQueueDuration {`
			`d = *maxQueueDuration`
			`}`
			`t := timerpool.Get(d)`
			`select {`
			`case concurrencyLimitCh <- struct{}{}:`
			`timerpool.Put(t)`
			`defer func() { <-concurrencyLimitCh }()`
			`case <-stopCh:`
			`timerpool.Put(t)`
			`remoteAddr := httpserver.GetQuotedRemoteAddr(r)`
			`requestURI := httpserver.GetRequestURI(r)`
			`logger.Infof("client has cancelled the request after %.3f seconds: remoteAddr=%s, requestURI: %q",`
			`time.Since(startTime).Seconds(), remoteAddr, requestURI)`
			`return true`
			`case <-t.C:`
			`timerpool.Put(t)`
			`concurrencyLimitTimeout.Inc()`
			`err := &httpserver.ErrorWithStatusCode{`
			`Err: fmt.Errorf("couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests "+`
			`"are executed. Possible solutions: to reduce query load; to add more compute resources to the server; "+`
			`"to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration; to increase -search.maxConcurrentRequests",`
			`d.Seconds(), *maxConcurrentRequests, maxQueueDuration),`
			`StatusCode: http.StatusServiceUnavailable,`
			`}`
			`httpserver.Errorf(w, r, "%s", err)`
			`return true`
			`}`
			`}`

lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`httpserver.EnableCORS(w, r)`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`switch path {`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`case "/select/logsql/field_names":`
			`logsqlFieldNamesRequests.Inc()`
			`logsql.ProcessFieldNamesRequest(ctx, w, r)`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`return true`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`case "/select/logsql/field_values":`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`logsqlFieldValuesRequests.Inc()`
			`logsql.ProcessFieldValuesRequest(ctx, w, r)`
			`return true`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`case "/select/logsql/hits":`
lib/logstorage: work-in-progress 2024-05-20 04:08:30 +02:00			`logsqlHitsRequests.Inc()`
			`logsql.ProcessHitsRequest(ctx, w, r)`
			`return true`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			`case "/select/logsql/query":`
			`logsqlQueryRequests.Inc()`
			`logsql.ProcessQueryRequest(ctx, w, r)`
			`return true`
			`case "/select/logsql/stream_label_names":`
			`logsqlStreamLabelNamesRequests.Inc()`
			`logsql.ProcessStreamLabelNamesRequest(ctx, w, r)`
			`return true`
			`case "/select/logsql/stream_label_values":`
			`logsqlStreamLabelValuesRequests.Inc()`
			`logsql.ProcessStreamLabelValuesRequest(ctx, w, r)`
			`return true`
			`case "/select/logsql/streams":`
			`logsqlStreamsRequests.Inc()`
			`logsql.ProcessStreamsRequest(ctx, w, r)`
			`return true`
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`default:`
			`return false`
			`}`
			`}`

			`// getMaxQueryDuration returns the maximum duration for query from r.`
			`func getMaxQueryDuration(r *http.Request) time.Duration {`
			`dms, err := httputils.GetDuration(r, "timeout", 0)`
			`if err != nil {`
			`dms = 0`
			`}`
			`d := time.Duration(dms) * time.Millisecond`
			`if d <= 0 \|\| d > *maxQueryDuration {`
			`d = *maxQueryDuration`
			`}`
			`return d`
			`}`

			`var (`
lib/logstorage: work-in-progress 2024-05-22 21:01:20 +02:00			logsqlFieldNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_names"}`)
			logsqlFieldValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/field_values"}`)
			logsqlHitsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/hits"}`)
			logsqlQueryRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/query"}`)
			logsqlStreamLabelNamesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_names"}`)
			logsqlStreamLabelValuesRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/stream_label_values"}`)
			logsqlStreamsRequests = metrics.NewCounter(`vl_http_requests_total{path="/select/logsql/streams"}`)
app/victoria-logs: initial code release 2023-06-20 07:55:12 +02:00			`)`