VictoriaMetrics/app/vmselect/main.go

package vmselect

import (
	"flag"
	"fmt"
	"net/http"
	"runtime"
	"strings"
	"time"

	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"
	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
	"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
	"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
	"github.com/VictoriaMetrics/metrics"
)

var (
	deleteAuthKey         = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")
	maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)*2, "The maximum number of concurrent search requests. It shouldn't exceed 2*vCPUs for better performance. See also -search.maxQueueDuration")
	maxQueueDuration      = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")
)

// Init initializes vmselect
func Init() {
	tmpDirPath := *vmstorage.DataPath + "/tmp"
	fs.RemoveDirContents(tmpDirPath)
	netstorage.InitTmpBlocksDir(tmpDirPath)
	promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")

	concurrencyCh = make(chan struct{}, *maxConcurrentRequests)
}

// Stop stops vmselect
func Stop() {
	promql.StopRollupResultCache()
}

var concurrencyCh chan struct{}

var (
	concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
	concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)

	_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
		return float64(cap(concurrencyCh))
	})
	_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
		return float64(len(concurrencyCh))
	})
)

// RequestHandler handles remote read API requests for Prometheus
func RequestHandler(w http.ResponseWriter, r *http.Request) bool {
	// Limit the number of concurrent queries.
	select {
	case concurrencyCh <- struct{}{}:
		defer func() { <-concurrencyCh }()
	default:
		// Sleep for a while until giving up. This should resolve short bursts in requests.
		concurrencyLimitReached.Inc()
		t := timerpool.Get(*maxQueueDuration)
		select {
		case concurrencyCh <- struct{}{}:
			timerpool.Put(t)
			defer func() { <-concurrencyCh }()
		case <-t.C:
			timerpool.Put(t)
			concurrencyLimitTimeout.Inc()
			err := &httpserver.ErrorWithStatusCode{
				Err: fmt.Errorf("cannot handle more than %d concurrent search requests during %s; possible solutions: "+
					"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity",
					*maxConcurrentRequests, *maxQueueDuration),
				StatusCode: http.StatusServiceUnavailable,
			}
			httpserver.Errorf(w, "%s", err)
			return true
		}
	}

	path := strings.Replace(r.URL.Path, "//", "/", -1)
	if strings.HasPrefix(path, "/api/v1/label/") {
		s := r.URL.Path[len("/api/v1/label/"):]
		if strings.HasSuffix(s, "/values") {
			labelValuesRequests.Inc()
			labelName := s[:len(s)-len("/values")]
			httpserver.EnableCORS(w, r)
			if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {
				labelValuesErrors.Inc()
				sendPrometheusError(w, r, err)
				return true
			}
			return true
		}
	}

	switch path {
	case "/api/v1/query":
		queryRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.QueryHandler(w, r); err != nil {
			queryErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/query_range":
		queryRangeRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.QueryRangeHandler(w, r); err != nil {
			queryRangeErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/series":
		seriesRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.SeriesHandler(w, r); err != nil {
			seriesErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/series/count":
		seriesCountRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.SeriesCountHandler(w, r); err != nil {
			seriesCountErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/labels":
		labelsRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.LabelsHandler(w, r); err != nil {
			labelsErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/labels/count":
		labelsCountRequests.Inc()
		httpserver.EnableCORS(w, r)
		if err := prometheus.LabelsCountHandler(w, r); err != nil {
			labelsCountErrors.Inc()
			sendPrometheusError(w, r, err)
			return true
		}
		return true
	case "/api/v1/export":
		exportRequests.Inc()
		if err := prometheus.ExportHandler(w, r); err != nil {
			exportErrors.Inc()
			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
			return true
		}
		return true
	case "/federate":
		federateRequests.Inc()
		if err := prometheus.FederateHandler(w, r); err != nil {
			federateErrors.Inc()
			httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)
			return true
		}
		return true
	case "/api/v1/rules":
		// Return dumb placeholder
		rulesRequests.Inc()
		w.Header().Set("Content-Type", "application/json")
		fmt.Fprintf(w, "%s", `{"status":"success","data":{"groups":[]}}`)
		return true
	case "/api/v1/alerts":
		// Return dumb placehloder
		alertsRequests.Inc()
		w.Header().Set("Content-Type", "application/json")
		fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
		return true
	case "/api/v1/admin/tsdb/delete_series":
		deleteRequests.Inc()
		authKey := r.FormValue("authKey")
		if authKey != *deleteAuthKey {
			httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)
			return true
		}
		if err := prometheus.DeleteHandler(r); err != nil {
			deleteErrors.Inc()
			httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)
			return true
		}
		w.WriteHeader(http.StatusNoContent)
		return true
	default:
		return false
	}
}

func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {
	logger.Errorf("error in %q: %s", r.URL.Path, err)

	w.Header().Set("Content-Type", "application/json")
	statusCode := http.StatusUnprocessableEntity
	if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {
		statusCode = esc.StatusCode
	}
	w.WriteHeader(statusCode)
	prometheus.WriteErrorResponse(w, statusCode, err)
}

var (
	labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
	labelValuesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)

	queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
	queryErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)

	queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
	queryRangeErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)

	seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
	seriesErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)

	seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
	seriesCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)

	labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
	labelsErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)

	labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
	labelsCountErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)

	deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
	deleteErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)

	exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
	exportErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)

	federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
	federateErrors   = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)

	rulesRequests  = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
	alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
)
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`package vmselect`

			`import (`
			`"flag"`
all: return 503 http error if service is temporarily unavailable Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156 2019-08-23 08:46:45 +02:00			`"fmt"`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`"net/http"`
			`"runtime"`
			`"strings"`
			`"time"`

			`"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/prometheus"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/app/vmstorage"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"`
			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"`
lib/timerpool: use timer pool in concurrency limiters This should reduce the number of memory allocations in highly loaded system 2019-05-28 16:17:19 +02:00			`"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`"github.com/VictoriaMetrics/metrics"`
			`)`

			`var (`
			`deleteAuthKey = flag.String("deleteAuthKey", "", "authKey for metrics' deletion via /api/v1/admin/tsdb/delete_series")`
			`maxConcurrentRequests = flag.Int("search.maxConcurrentRequests", runtime.GOMAXPROCS(-1)2, "The maximum number of concurrent search requests. It shouldn't exceed 2vCPUs for better performance. See also -search.maxQueueDuration")`
			`maxQueueDuration = flag.Duration("search.maxQueueDuration", 10*time.Second, "The maximum time the request waits for execution when -search.maxConcurrentRequests limit is reached")`
			`)`

			`// Init initializes vmselect`
			`func Init() {`
			`tmpDirPath := *vmstorage.DataPath + "/tmp"`
			`fs.RemoveDirContents(tmpDirPath)`
			`netstorage.InitTmpBlocksDir(tmpDirPath)`
			`promql.InitRollupResultCache(*vmstorage.DataPath + "/cache/rollupResult")`
app: add `vm_concurrent_` metrics for visibility in concurrency limiters for vminsert and vmselect 2019-08-05 17:27:50 +02:00
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`concurrencyCh = make(chan struct{}, *maxConcurrentRequests)`
			`}`

			`// Stop stops vmselect`
			`func Stop() {`
			`promql.StopRollupResultCache()`
			`}`

app: add `vm_concurrent_` metrics for visibility in concurrency limiters for vminsert and vmselect 2019-08-05 17:27:50 +02:00			`var concurrencyCh chan struct{}`

			`var (`
			concurrencyLimitReached = metrics.NewCounter(`vm_concurrent_select_limit_reached_total`)
			concurrencyLimitTimeout = metrics.NewCounter(`vm_concurrent_select_limit_timeout_total`)

			_ = metrics.NewGauge(`vm_concurrent_select_capacity`, func() float64 {
			`return float64(cap(concurrencyCh))`
			`})`
			_ = metrics.NewGauge(`vm_concurrent_select_current`, func() float64 {
			`return float64(len(concurrencyCh))`
			`})`
			`)`

all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`// RequestHandler handles remote read API requests for Prometheus`
			`func RequestHandler(w http.ResponseWriter, r *http.Request) bool {`
			`// Limit the number of concurrent queries.`
			`select {`
			`case concurrencyCh <- struct{}{}:`
			`defer func() { <-concurrencyCh }()`
app: add `vm_concurrent_` metrics for visibility in concurrency limiters for vminsert and vmselect 2019-08-05 17:27:50 +02:00			`default:`
			`// Sleep for a while until giving up. This should resolve short bursts in requests.`
			`concurrencyLimitReached.Inc()`
			`t := timerpool.Get(*maxQueueDuration)`
			`select {`
			`case concurrencyCh <- struct{}{}:`
			`timerpool.Put(t)`
			`defer func() { <-concurrencyCh }()`
			`case <-t.C:`
			`timerpool.Put(t)`
			`concurrencyLimitTimeout.Inc()`
all: return 503 http error if service is temporarily unavailable Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156 2019-08-23 08:46:45 +02:00			`err := &httpserver.ErrorWithStatusCode{`
app/{vminsert,vmselect}: improve error messages when VictoriaMetrics cannot handle too high number of concurrent inserts / selects 2020-01-17 12:24:37 +01:00			`Err: fmt.Errorf("cannot handle more than %d concurrent search requests during %s; possible solutions: "+`
			"increase `-search.maxQueueDuration`, increase `-search.maxConcurrentRequests`, increase server capacity",
			`maxConcurrentRequests, maxQueueDuration),`
all: return 503 http error if service is temporarily unavailable Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156 2019-08-23 08:46:45 +02:00			`StatusCode: http.StatusServiceUnavailable,`
			`}`
			`httpserver.Errorf(w, "%s", err)`
app: add `vm_concurrent_` metrics for visibility in concurrency limiters for vminsert and vmselect 2019-08-05 17:27:50 +02:00			`return true`
			`}`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`}`

			`path := strings.Replace(r.URL.Path, "//", "/", -1)`
			`if strings.HasPrefix(path, "/api/v1/label/") {`
			`s := r.URL.Path[len("/api/v1/label/"):]`
			`if strings.HasSuffix(s, "/values") {`
			`labelValuesRequests.Inc()`
			`labelName := s[:len(s)-len("/values")]`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.LabelValuesHandler(labelName, w, r); err != nil {`
			`labelValuesErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
			`}`
			`}`

			`switch path {`
			`case "/api/v1/query":`
			`queryRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.QueryHandler(w, r); err != nil {`
			`queryErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
			`case "/api/v1/query_range":`
			`queryRangeRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.QueryRangeHandler(w, r); err != nil {`
			`queryRangeErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
			`case "/api/v1/series":`
			`seriesRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.SeriesHandler(w, r); err != nil {`
			`seriesErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
			`case "/api/v1/series/count":`
			`seriesCountRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.SeriesCountHandler(w, r); err != nil {`
			`seriesCountErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
			`case "/api/v1/labels":`
			`labelsRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.LabelsHandler(w, r); err != nil {`
			`labelsErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
app/vmselect: add `/api/v1/labels/count` handler for quick detection of labels with the maximum number of distinct values 2019-06-10 17:55:20 +02:00			`case "/api/v1/labels/count":`
			`labelsCountRequests.Inc()`
			`httpserver.EnableCORS(w, r)`
			`if err := prometheus.LabelsCountHandler(w, r); err != nil {`
			`labelsCountErrors.Inc()`
			`sendPrometheusError(w, r, err)`
			`return true`
			`}`
			`return true`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`case "/api/v1/export":`
			`exportRequests.Inc()`
			`if err := prometheus.ExportHandler(w, r); err != nil {`
			`exportErrors.Inc()`
			`httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)`
			`return true`
			`}`
			`return true`
			`case "/federate":`
			`federateRequests.Inc()`
			`if err := prometheus.FederateHandler(w, r); err != nil {`
			`federateErrors.Inc()`
			`httpserver.Errorf(w, "error int %q: %s", r.URL.Path, err)`
			`return true`
			`}`
			`return true`
app/vmselect: add placeholders for `/api/v1/rules` and `/api/v1/alerts` 2019-12-03 18:32:57 +01:00			`case "/api/v1/rules":`
			`// Return dumb placeholder`
			`rulesRequests.Inc()`
			`w.Header().Set("Content-Type", "application/json")`
			fmt.Fprintf(w, "%s", `{"status":"success","data":{"groups":[]}}`)
			`return true`
			`case "/api/v1/alerts":`
			`// Return dumb placehloder`
			`alertsRequests.Inc()`
			`w.Header().Set("Content-Type", "application/json")`
			fmt.Fprintf(w, "%s", `{"status":"success","data":{"alerts":[]}}`)
			`return true`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`case "/api/v1/admin/tsdb/delete_series":`
			`deleteRequests.Inc()`
			`authKey := r.FormValue("authKey")`
			`if authKey != *deleteAuthKey {`
			`httpserver.Errorf(w, "invalid authKey %q. It must match the value from -deleteAuthKey command line flag", authKey)`
			`return true`
			`}`
			`if err := prometheus.DeleteHandler(r); err != nil {`
			`deleteErrors.Inc()`
			`httpserver.Errorf(w, "error in %q: %s", r.URL.Path, err)`
			`return true`
			`}`
			`w.WriteHeader(http.StatusNoContent)`
			`return true`
			`default:`
			`return false`
			`}`
			`}`

			`func sendPrometheusError(w http.ResponseWriter, r *http.Request, err error) {`
			`logger.Errorf("error in %q: %s", r.URL.Path, err)`

			`w.Header().Set("Content-Type", "application/json")`
all: return 503 http error if service is temporarily unavailable Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/156 2019-08-23 08:46:45 +02:00			`statusCode := http.StatusUnprocessableEntity`
			`if esc, ok := err.(*httpserver.ErrorWithStatusCode); ok {`
			`statusCode = esc.StatusCode`
			`}`
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`w.WriteHeader(statusCode)`
			`prometheus.WriteErrorResponse(w, statusCode, err)`
			`}`

			`var (`
			labelValuesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/label/{}/values"}`)
			labelValuesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/label/{}/values"}`)

			queryRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query"}`)
			queryErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query"}`)

			queryRangeRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/query_range"}`)
			queryRangeErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/query_range"}`)

			seriesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series"}`)
			seriesErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series"}`)

			seriesCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/series/count"}`)
			seriesCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/series/count"}`)

			labelsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels"}`)
			labelsErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels"}`)

app/vmselect: add `/api/v1/labels/count` handler for quick detection of labels with the maximum number of distinct values 2019-06-10 17:55:20 +02:00			labelsCountRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/labels/count"}`)
			labelsCountErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/labels/count"}`)

all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			deleteRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/admin/tsdb/delete_series"}`)
			deleteErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/admin/tsdb/delete_series"}`)

			exportRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/export"}`)
			exportErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/api/v1/export"}`)

			federateRequests = metrics.NewCounter(`vm_http_requests_total{path="/federate"}`)
			federateErrors = metrics.NewCounter(`vm_http_request_errors_total{path="/federate"}`)
app/vmselect: add placeholders for `/api/v1/rules` and `/api/v1/alerts` 2019-12-03 18:32:57 +01:00
			rulesRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/rules"}`)
			alertsRequests = metrics.NewCounter(`vm_http_requests_total{path="/api/v1/alerts"}`)
all: open-sourcing single-node version 2019-05-22 23:16:55 +02:00			`)`