2023-06-20 07:55:12 +02:00
package vlselect
import (
2024-06-27 14:18:42 +02:00
"context"
2023-06-21 16:57:09 +02:00
"embed"
2023-06-20 07:55:12 +02:00
"flag"
"fmt"
"net/http"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vlselect/logsql"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/metrics"
)
var (
maxConcurrentRequests = flag . Int ( "search.maxConcurrentRequests" , getDefaultMaxConcurrentRequests ( ) , "The maximum number of concurrent search requests. " +
"It shouldn't be high, since a single request can saturate all the CPU cores, while many concurrently executed requests may require high amounts of memory. " +
"See also -search.maxQueueDuration" )
maxQueueDuration = flag . Duration ( "search.maxQueueDuration" , 10 * time . Second , "The maximum time the search request waits for execution when -search.maxConcurrentRequests " +
"limit is reached; see also -search.maxQueryDuration" )
2024-06-27 14:18:42 +02:00
maxQueryDuration = flag . Duration ( "search.maxQueryDuration" , time . Second * 30 , "The maximum duration for query execution. It can be overridden on a per-query basis via 'timeout' query arg" )
2023-06-20 07:55:12 +02:00
)
func getDefaultMaxConcurrentRequests ( ) int {
n := cgroup . AvailableCPUs ( )
if n <= 4 {
n *= 2
}
if n > 16 {
// A single request can saturate all the CPU cores, so there is no sense
// in allowing higher number of concurrent requests - they will just contend
// for unavailable CPU time.
n = 16
}
return n
}
// Init initializes vlselect
func Init ( ) {
concurrencyLimitCh = make ( chan struct { } , * maxConcurrentRequests )
}
// Stop stops vlselect
func Stop ( ) {
}
var concurrencyLimitCh chan struct { }
var (
concurrencyLimitReached = metrics . NewCounter ( ` vl_concurrent_select_limit_reached_total ` )
concurrencyLimitTimeout = metrics . NewCounter ( ` vl_concurrent_select_limit_timeout_total ` )
_ = metrics . NewGauge ( ` vl_concurrent_select_capacity ` , func ( ) float64 {
return float64 ( cap ( concurrencyLimitCh ) )
} )
_ = metrics . NewGauge ( ` vl_concurrent_select_current ` , func ( ) float64 {
return float64 ( len ( concurrencyLimitCh ) )
} )
)
2023-06-21 16:57:09 +02:00
//go:embed vmui
var vmuiFiles embed . FS
var vmuiFileServer = http . FileServer ( http . FS ( vmuiFiles ) )
2023-06-20 07:55:12 +02:00
// RequestHandler handles select requests for VictoriaLogs
func RequestHandler ( w http . ResponseWriter , r * http . Request ) bool {
path := r . URL . Path
2023-06-22 04:52:48 +02:00
if ! strings . HasPrefix ( path , "/select/" ) {
// Skip requests, which do not start with /select/, since these aren't our requests.
return false
}
2023-06-20 07:55:12 +02:00
path = strings . ReplaceAll ( path , "//" , "/" )
2024-05-22 21:01:20 +02:00
if path == "/select/vmui" {
2023-06-22 04:52:48 +02:00
// VMUI access via incomplete url without `/` in the end. Redirect to complete url.
// Use relative redirect, since the hostname and path prefix may be incorrect if VictoriaMetrics
// is hidden behind vmauth or similar proxy.
_ = r . ParseForm ( )
newURL := "vmui/?" + r . Form . Encode ( )
httpserver . Redirect ( w , newURL )
return true
}
2024-05-22 21:01:20 +02:00
if strings . HasPrefix ( path , "/select/vmui/" ) {
if strings . HasPrefix ( path , "/select/vmui/static/" ) {
2023-10-12 09:30:39 +02:00
// Allow clients caching static contents for long period of time, since it shouldn't change over time.
// Path to static contents (such as js and css) must be changed whenever its contents is changed.
// See https://developer.chrome.com/docs/lighthouse/performance/uses-long-cache-ttl/
w . Header ( ) . Set ( "Cache-Control" , "max-age=31536000" )
}
2024-05-22 23:23:38 +02:00
r . URL . Path = strings . TrimPrefix ( path , "/select" )
2023-06-22 04:52:48 +02:00
vmuiFileServer . ServeHTTP ( w , r )
return true
}
2024-06-27 14:18:42 +02:00
// Limit the number of concurrent queries, which can consume big amounts of CPU time.
2023-06-20 07:55:12 +02:00
startTime := time . Now ( )
2024-02-18 22:01:34 +01:00
ctx := r . Context ( )
2024-06-27 14:18:42 +02:00
d := getMaxQueryDuration ( r )
ctxWithTimeout , cancel := context . WithTimeout ( ctx , d )
defer cancel ( )
stopCh := ctxWithTimeout . Done ( )
2023-06-20 07:55:12 +02:00
select {
case concurrencyLimitCh <- struct { } { } :
defer func ( ) { <- concurrencyLimitCh } ( )
default :
// Sleep for a while until giving up. This should resolve short bursts in requests.
concurrencyLimitReached . Inc ( )
select {
case concurrencyLimitCh <- struct { } { } :
defer func ( ) { <- concurrencyLimitCh } ( )
case <- stopCh :
2024-06-27 14:18:42 +02:00
switch ctxWithTimeout . Err ( ) {
case context . Canceled :
remoteAddr := httpserver . GetQuotedRemoteAddr ( r )
requestURI := httpserver . GetRequestURI ( r )
logger . Infof ( "client has canceled the pending request after %.3f seconds: remoteAddr=%s, requestURI: %q" ,
time . Since ( startTime ) . Seconds ( ) , remoteAddr , requestURI )
case context . DeadlineExceeded :
concurrencyLimitTimeout . Inc ( )
err := & httpserver . ErrorWithStatusCode {
Err : fmt . Errorf ( "couldn't start executing the request in %.3f seconds, since -search.maxConcurrentRequests=%d concurrent requests " +
"are executed. Possible solutions: to reduce query load; to add more compute resources to the server; " +
"to increase -search.maxQueueDuration=%s; to increase -search.maxQueryDuration=%s; to increase -search.maxConcurrentRequests; " +
"to pass bigger value to 'timeout' query arg" ,
d . Seconds ( ) , * maxConcurrentRequests , maxQueueDuration , maxQueryDuration ) ,
StatusCode : http . StatusServiceUnavailable ,
}
httpserver . Errorf ( w , r , "%s" , err )
2023-06-20 07:55:12 +02:00
}
return true
}
}
2024-06-27 14:18:42 +02:00
if path == "/select/logsql/tail" {
logsqlTailRequests . Inc ( )
// Process live tailing request without timeout (e.g. use ctx instead of ctxWithTimeout),
// since it is OK to run live tailing requests for very long time.
logsql . ProcessLiveTailRequest ( ctx , w , r )
return true
}
ok := processSelectRequest ( ctxWithTimeout , w , r , path )
if ! ok {
return false
}
err := ctxWithTimeout . Err ( )
switch err {
case nil :
// nothing to do
case context . Canceled :
remoteAddr := httpserver . GetQuotedRemoteAddr ( r )
requestURI := httpserver . GetRequestURI ( r )
logger . Infof ( "client has canceled the request after %.3f seconds: remoteAddr=%s, requestURI: %q" ,
time . Since ( startTime ) . Seconds ( ) , remoteAddr , requestURI )
case context . DeadlineExceeded :
err = & httpserver . ErrorWithStatusCode {
Err : fmt . Errorf ( "the request couldn't be executed in %.3f seconds; possible solutions: " +
"to increase -search.maxQueryDuration=%s; to pass bigger value to 'timeout' query arg" , d . Seconds ( ) , maxQueryDuration ) ,
StatusCode : http . StatusServiceUnavailable ,
}
httpserver . Errorf ( w , r , "%s" , err )
default :
httpserver . Errorf ( w , r , "unexpected error: %s" , err )
}
return true
}
func processSelectRequest ( ctx context . Context , w http . ResponseWriter , r * http . Request , path string ) bool {
2024-05-22 21:01:20 +02:00
httpserver . EnableCORS ( w , r )
2024-05-20 04:08:30 +02:00
switch path {
2024-05-22 21:01:20 +02:00
case "/select/logsql/field_names" :
logsqlFieldNamesRequests . Inc ( )
logsql . ProcessFieldNamesRequest ( ctx , w , r )
2023-06-20 07:55:12 +02:00
return true
2024-05-22 21:01:20 +02:00
case "/select/logsql/field_values" :
2024-05-20 04:08:30 +02:00
logsqlFieldValuesRequests . Inc ( )
logsql . ProcessFieldValuesRequest ( ctx , w , r )
return true
2024-05-22 21:01:20 +02:00
case "/select/logsql/hits" :
2024-05-20 04:08:30 +02:00
logsqlHitsRequests . Inc ( )
logsql . ProcessHitsRequest ( ctx , w , r )
return true
2024-05-22 21:01:20 +02:00
case "/select/logsql/query" :
logsqlQueryRequests . Inc ( )
logsql . ProcessQueryRequest ( ctx , w , r )
return true
2024-05-25 21:36:16 +02:00
case "/select/logsql/stream_field_names" :
logsqlStreamFieldNamesRequests . Inc ( )
logsql . ProcessStreamFieldNamesRequest ( ctx , w , r )
2024-05-22 21:01:20 +02:00
return true
2024-05-25 21:36:16 +02:00
case "/select/logsql/stream_field_values" :
logsqlStreamFieldValuesRequests . Inc ( )
logsql . ProcessStreamFieldValuesRequest ( ctx , w , r )
2024-05-22 21:01:20 +02:00
return true
2024-06-27 14:18:42 +02:00
case "/select/logsql/stream_ids" :
logsqlStreamIDsRequests . Inc ( )
logsql . ProcessStreamIDsRequest ( ctx , w , r )
return true
2024-05-22 21:01:20 +02:00
case "/select/logsql/streams" :
logsqlStreamsRequests . Inc ( )
logsql . ProcessStreamsRequest ( ctx , w , r )
return true
2023-06-20 07:55:12 +02:00
default :
return false
}
}
// getMaxQueryDuration returns the maximum duration for query from r.
func getMaxQueryDuration ( r * http . Request ) time . Duration {
dms , err := httputils . GetDuration ( r , "timeout" , 0 )
if err != nil {
dms = 0
}
d := time . Duration ( dms ) * time . Millisecond
if d <= 0 || d > * maxQueryDuration {
d = * maxQueryDuration
}
return d
}
var (
2024-05-22 21:01:20 +02:00
logsqlFieldNamesRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/field_names"} ` )
logsqlFieldValuesRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/field_values"} ` )
logsqlHitsRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/hits"} ` )
logsqlQueryRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/query"} ` )
2024-05-25 21:36:16 +02:00
logsqlStreamFieldNamesRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/stream_field_names"} ` )
logsqlStreamFieldValuesRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/stream_field_values"} ` )
2024-06-27 14:18:42 +02:00
logsqlStreamIDsRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/stream_ids"} ` )
2024-05-22 21:01:20 +02:00
logsqlStreamsRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/streams"} ` )
2024-06-27 14:18:42 +02:00
logsqlTailRequests = metrics . NewCounter ( ` vl_http_requests_total { path="/select/logsql/tail"} ` )
2023-06-20 07:55:12 +02:00
)