2019-05-22 23:16:55 +02:00
package promql
import (
"flag"
"fmt"
"math"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
2020-09-11 12:18:57 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
2019-05-22 23:23:23 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/auth"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
2020-12-08 19:49:32 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
2021-08-15 12:20:02 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/decimal"
2019-05-22 23:16:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/metrics"
2020-04-28 14:28:22 +02:00
"github.com/VictoriaMetrics/metricsql"
2019-05-22 23:16:55 +02:00
)
var (
2020-07-30 22:14:15 +02:00
disableCache = flag . Bool ( "search.disableCache" , false , "Whether to disable response caching. This may be useful during data backfilling" )
2021-01-22 11:59:06 +01:00
maxPointsPerTimeseries = flag . Int ( "search.maxPointsPerTimeseries" , 30e3 , "The maximum points per a single timeseries returned from /api/v1/query_range. " +
"This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points " +
2021-03-22 15:44:27 +01:00
"returned to graphing UI such as Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph" )
2019-05-22 23:16:55 +02:00
)
2019-05-25 20:51:11 +02:00
// The minimum number of points per timeseries for enabling time rounding.
2019-05-22 23:16:55 +02:00
// This improves cache hit ratio for frequently requested queries over
// big time ranges.
const minTimeseriesPointsForTimeRounding = 50
// ValidateMaxPointsPerTimeseries checks the maximum number of points that
// may be returned per each time series.
//
// The number mustn't exceed -search.maxPointsPerTimeseries.
func ValidateMaxPointsPerTimeseries ( start , end , step int64 ) error {
points := ( end - start ) / step + 1
if uint64 ( points ) > uint64 ( * maxPointsPerTimeseries ) {
2019-06-24 21:52:06 +02:00
return fmt . Errorf ( ` too many points for the given step=%d, start=%d and end=%d: %d; cannot exceed -search.maxPointsPerTimeseries=%d ` ,
2019-05-22 23:16:55 +02:00
step , start , end , uint64 ( points ) , * maxPointsPerTimeseries )
}
return nil
}
// AdjustStartEnd adjusts start and end values, so response caching may be enabled.
//
// See EvalConfig.mayCache for details.
func AdjustStartEnd ( start , end , step int64 ) ( int64 , int64 ) {
2020-07-30 22:14:15 +02:00
if * disableCache {
// Do not adjust start and end values when cache is disabled.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/563
return start , end
}
2019-05-22 23:16:55 +02:00
points := ( end - start ) / step + 1
if points < minTimeseriesPointsForTimeRounding {
// Too small number of points for rounding.
return start , end
}
// Round start and end to values divisible by step in order
// to enable response caching (see EvalConfig.mayCache).
2020-09-03 12:21:51 +02:00
start , end = alignStartEnd ( start , end , step )
2019-12-24 21:44:24 +01:00
// Make sure that the new number of points is the same as the initial number of points.
newPoints := ( end - start ) / step + 1
for newPoints > points {
end -= step
newPoints --
}
2019-05-22 23:16:55 +02:00
return start , end
}
2020-09-03 12:21:51 +02:00
func alignStartEnd ( start , end , step int64 ) ( int64 , int64 ) {
// Round start to the nearest smaller value divisible by step.
start -= start % step
// Round end to the nearest bigger value divisible by step.
adjust := end % step
if adjust > 0 {
end += step - adjust
}
return start , end
}
2019-05-22 23:16:55 +02:00
// EvalConfig is the configuration required for query evaluation via Exec
type EvalConfig struct {
2019-05-22 23:23:23 +02:00
AuthToken * auth . Token
Start int64
End int64
Step int64
2019-05-22 23:16:55 +02:00
2020-07-31 17:00:21 +02:00
// QuotedRemoteAddr contains quoted remote address.
QuotedRemoteAddr string
2020-09-11 12:18:57 +02:00
Deadline searchutils . Deadline
2019-05-22 23:16:55 +02:00
MayCache bool
2019-10-15 18:12:27 +02:00
// LookbackDelta is analog to `-query.lookback-delta` from Prometheus.
LookbackDelta int64
2021-03-15 11:35:44 +01:00
// How many decimal digits after the point to leave in response.
RoundDigits int
2021-02-01 16:42:35 +01:00
// EnforcedTagFilters used for apply additional label filters to query.
EnforcedTagFilters [ ] storage . TagFilter
2021-03-15 11:35:44 +01:00
// Whether to deny partial response.
2019-06-30 00:27:03 +02:00
DenyPartialResponse bool
2020-11-14 11:36:21 +01:00
// IsPartialResponse is set during query execution and can be used by Exec caller after query execution.
IsPartialResponse bool
2019-05-22 23:16:55 +02:00
timestamps [ ] int64
timestampsOnce sync . Once
}
// newEvalConfig returns new EvalConfig copy from src.
func newEvalConfig ( src * EvalConfig ) * EvalConfig {
var ec EvalConfig
2019-05-22 23:23:23 +02:00
ec . AuthToken = src . AuthToken
2019-05-22 23:16:55 +02:00
ec . Start = src . Start
ec . End = src . End
ec . Step = src . Step
ec . Deadline = src . Deadline
ec . MayCache = src . MayCache
2019-10-15 18:12:27 +02:00
ec . LookbackDelta = src . LookbackDelta
2021-03-15 11:35:44 +01:00
ec . RoundDigits = src . RoundDigits
2021-02-01 16:42:35 +01:00
ec . EnforcedTagFilters = src . EnforcedTagFilters
2019-06-30 00:27:03 +02:00
ec . DenyPartialResponse = src . DenyPartialResponse
2020-11-14 11:36:21 +01:00
ec . IsPartialResponse = src . IsPartialResponse
2019-05-22 23:16:55 +02:00
// do not copy src.timestamps - they must be generated again.
return & ec
}
2020-11-14 11:36:21 +01:00
func ( ec * EvalConfig ) updateIsPartialResponse ( isPartialResponse bool ) {
if ! ec . IsPartialResponse {
ec . IsPartialResponse = isPartialResponse
}
}
2019-05-22 23:16:55 +02:00
func ( ec * EvalConfig ) validate ( ) {
if ec . Start > ec . End {
logger . Panicf ( "BUG: start cannot exceed end; got %d vs %d" , ec . Start , ec . End )
}
if ec . Step <= 0 {
logger . Panicf ( "BUG: step must be greater than 0; got %d" , ec . Step )
}
}
func ( ec * EvalConfig ) mayCache ( ) bool {
2020-07-30 22:14:15 +02:00
if * disableCache {
return false
}
2019-05-22 23:16:55 +02:00
if ! ec . MayCache {
return false
}
if ec . Start % ec . Step != 0 {
return false
}
if ec . End % ec . Step != 0 {
return false
}
return true
}
func ( ec * EvalConfig ) getSharedTimestamps ( ) [ ] int64 {
ec . timestampsOnce . Do ( ec . timestampsInit )
return ec . timestamps
}
func ( ec * EvalConfig ) timestampsInit ( ) {
ec . timestamps = getTimestamps ( ec . Start , ec . End , ec . Step )
}
func getTimestamps ( start , end , step int64 ) [ ] int64 {
// Sanity checks.
if step <= 0 {
logger . Panicf ( "BUG: Step must be bigger than 0; got %d" , step )
}
if start > end {
logger . Panicf ( "BUG: Start cannot exceed End; got %d vs %d" , start , end )
}
if err := ValidateMaxPointsPerTimeseries ( start , end , step ) ; err != nil {
logger . Panicf ( "BUG: %s; this must be validated before the call to getTimestamps" , err )
}
// Prepare timestamps.
points := 1 + ( end - start ) / step
timestamps := make ( [ ] int64 , points )
for i := range timestamps {
timestamps [ i ] = start
start += step
}
return timestamps
}
2019-12-25 20:35:47 +01:00
func evalExpr ( ec * EvalConfig , e metricsql . Expr ) ( [ ] * timeseries , error ) {
if me , ok := e . ( * metricsql . MetricExpr ) ; ok {
re := & metricsql . RollupExpr {
2019-05-22 23:16:55 +02:00
Expr : me ,
}
2020-01-03 19:42:51 +01:00
rv , err := evalRollupFunc ( ec , "default_rollup" , rollupDefault , e , re , nil )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , me . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
2019-12-25 20:35:47 +01:00
if re , ok := e . ( * metricsql . RollupExpr ) ; ok {
2020-01-03 19:42:51 +01:00
rv , err := evalRollupFunc ( ec , "default_rollup" , rollupDefault , e , re , nil )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , re . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
2019-12-25 20:35:47 +01:00
if fe , ok := e . ( * metricsql . FuncExpr ) ; ok {
2019-05-22 23:16:55 +02:00
nrf := getRollupFunc ( fe . Name )
if nrf == nil {
args , err := evalExprs ( ec , fe . Args )
if err != nil {
return nil , err
}
tf := getTransformFunc ( fe . Name )
if tf == nil {
return nil , fmt . Errorf ( ` unknown func %q ` , fe . Name )
}
tfa := & transformFuncArg {
ec : ec ,
fe : fe ,
args : args ,
}
rv , err := tf ( tfa )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , fe . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
args , re , err := evalRollupFuncArgs ( ec , fe )
if err != nil {
return nil , err
}
rf , err := nrf ( args )
if err != nil {
return nil , err
}
2020-01-03 19:42:51 +01:00
rv , err := evalRollupFunc ( ec , fe . Name , rf , e , re , nil )
2019-05-22 23:16:55 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , fe . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
2019-12-25 20:35:47 +01:00
if ae , ok := e . ( * metricsql . AggrFuncExpr ) ; ok {
2019-07-10 11:57:27 +02:00
if callbacks := getIncrementalAggrFuncCallbacks ( ae . Name ) ; callbacks != nil {
fe , nrf := tryGetArgRollupFuncWithMetricExpr ( ae )
if fe != nil {
2019-12-25 20:35:47 +01:00
// There is an optimized path for calculating metricsql.AggrFuncExpr over rollupFunc over metricsql.MetricExpr.
2019-07-10 11:57:27 +02:00
// The optimized path saves RAM for aggregates over big number of time series.
args , re , err := evalRollupFuncArgs ( ec , fe )
if err != nil {
return nil , err
}
rf , err := nrf ( args )
if err != nil {
return nil , err
}
iafc := newIncrementalAggrFuncContext ( ae , callbacks )
2020-01-03 19:42:51 +01:00
return evalRollupFunc ( ec , fe . Name , rf , e , re , iafc )
2019-07-10 11:57:27 +02:00
}
}
2019-05-22 23:16:55 +02:00
args , err := evalExprs ( ec , ae . Args )
if err != nil {
return nil , err
}
af := getAggrFunc ( ae . Name )
if af == nil {
return nil , fmt . Errorf ( ` unknown func %q ` , ae . Name )
}
afa := & aggrFuncArg {
ae : ae ,
args : args ,
ec : ec ,
}
rv , err := af ( afa )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , ae . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
2019-12-25 20:35:47 +01:00
if be , ok := e . ( * metricsql . BinaryOpExpr ) ; ok {
2021-02-10 21:59:35 +01:00
// Execute left and right sides of the binary operation in parallel.
// This should reduce execution times for heavy queries.
// On the other side this can increase CPU and RAM usage when executing heavy queries.
// TODO: think on how to limit CPU and RAM usage while leaving short execution times.
var left , right [ ] * timeseries
var mu sync . Mutex
var wg sync . WaitGroup
var errGlobal error
wg . Add ( 1 )
go func ( ) {
defer wg . Done ( )
2021-02-11 11:37:19 +01:00
ecCopy := newEvalConfig ( ec )
tss , err := evalExpr ( ecCopy , be . Left )
2021-02-10 21:59:35 +01:00
mu . Lock ( )
if err != nil {
if errGlobal == nil {
errGlobal = err
}
}
left = tss
mu . Unlock ( )
} ( )
wg . Add ( 1 )
go func ( ) {
defer wg . Done ( )
2021-02-11 11:37:19 +01:00
ecCopy := newEvalConfig ( ec )
tss , err := evalExpr ( ecCopy , be . Right )
2021-02-10 21:59:35 +01:00
mu . Lock ( )
if err != nil {
if errGlobal == nil {
errGlobal = err
}
}
right = tss
mu . Unlock ( )
} ( )
wg . Wait ( )
if errGlobal != nil {
return nil , errGlobal
2019-05-22 23:16:55 +02:00
}
2021-02-10 21:59:35 +01:00
2019-05-22 23:16:55 +02:00
bf := getBinaryOpFunc ( be . Op )
if bf == nil {
return nil , fmt . Errorf ( ` unknown binary op %q ` , be . Op )
}
bfa := & binaryOpFuncArg {
be : be ,
left : left ,
right : right ,
}
rv , err := bf ( bfa )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( ` cannot evaluate %q: %w ` , be . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
return rv , nil
}
2019-12-25 20:35:47 +01:00
if ne , ok := e . ( * metricsql . NumberExpr ) ; ok {
2019-05-22 23:16:55 +02:00
rv := evalNumber ( ec , ne . N )
return rv , nil
}
2019-12-25 20:35:47 +01:00
if se , ok := e . ( * metricsql . StringExpr ) ; ok {
2019-05-22 23:16:55 +02:00
rv := evalString ( ec , se . S )
return rv , nil
}
2021-07-12 16:16:38 +02:00
if de , ok := e . ( * metricsql . DurationExpr ) ; ok {
d := de . Duration ( ec . Step )
dSec := float64 ( d ) / 1000
rv := evalNumber ( ec , dSec )
return rv , nil
}
2019-05-22 23:16:55 +02:00
return nil , fmt . Errorf ( "unexpected expression %q" , e . AppendString ( nil ) )
}
2019-12-25 20:35:47 +01:00
func tryGetArgRollupFuncWithMetricExpr ( ae * metricsql . AggrFuncExpr ) ( * metricsql . FuncExpr , newRollupFunc ) {
2019-07-10 11:57:27 +02:00
if len ( ae . Args ) != 1 {
return nil , nil
}
e := ae . Args [ 0 ]
// Make sure e contains one of the following:
// - metricExpr
// - metricExpr[d]
// - rollupFunc(metricExpr)
// - rollupFunc(metricExpr[d])
2019-12-25 20:35:47 +01:00
if me , ok := e . ( * metricsql . MetricExpr ) ; ok {
2019-07-10 11:57:27 +02:00
// e = metricExpr
if me . IsEmpty ( ) {
return nil , nil
}
2019-12-25 20:35:47 +01:00
fe := & metricsql . FuncExpr {
2019-07-10 11:57:27 +02:00
Name : "default_rollup" ,
2019-12-25 20:35:47 +01:00
Args : [ ] metricsql . Expr { me } ,
2019-07-10 11:57:27 +02:00
}
nrf := getRollupFunc ( fe . Name )
return fe , nrf
}
2019-12-25 20:35:47 +01:00
if re , ok := e . ( * metricsql . RollupExpr ) ; ok {
if me , ok := re . Expr . ( * metricsql . MetricExpr ) ; ! ok || me . IsEmpty ( ) || re . ForSubquery ( ) {
2019-07-10 11:57:27 +02:00
return nil , nil
}
2019-09-13 20:40:46 +02:00
// e = metricExpr[d]
2019-12-25 20:35:47 +01:00
fe := & metricsql . FuncExpr {
2019-07-10 11:57:27 +02:00
Name : "default_rollup" ,
2019-12-25 20:35:47 +01:00
Args : [ ] metricsql . Expr { re } ,
2019-07-10 11:57:27 +02:00
}
nrf := getRollupFunc ( fe . Name )
return fe , nrf
}
2019-12-25 20:35:47 +01:00
fe , ok := e . ( * metricsql . FuncExpr )
2019-07-10 11:57:27 +02:00
if ! ok {
return nil , nil
}
nrf := getRollupFunc ( fe . Name )
if nrf == nil {
return nil , nil
}
rollupArgIdx := getRollupArgIdx ( fe . Name )
2020-01-15 15:26:02 +01:00
if rollupArgIdx >= len ( fe . Args ) {
// Incorrect number of args for rollup func.
return nil , nil
}
2019-07-10 11:57:27 +02:00
arg := fe . Args [ rollupArgIdx ]
2019-12-25 20:35:47 +01:00
if me , ok := arg . ( * metricsql . MetricExpr ) ; ok {
2019-07-10 11:57:27 +02:00
if me . IsEmpty ( ) {
return nil , nil
}
2019-09-13 20:40:46 +02:00
// e = rollupFunc(metricExpr)
2019-12-25 20:35:47 +01:00
return & metricsql . FuncExpr {
2019-07-10 11:57:27 +02:00
Name : fe . Name ,
2019-12-25 20:35:47 +01:00
Args : [ ] metricsql . Expr { me } ,
2019-07-10 11:57:27 +02:00
} , nrf
}
2019-12-25 20:35:47 +01:00
if re , ok := arg . ( * metricsql . RollupExpr ) ; ok {
if me , ok := re . Expr . ( * metricsql . MetricExpr ) ; ! ok || me . IsEmpty ( ) || re . ForSubquery ( ) {
2019-07-10 11:57:27 +02:00
return nil , nil
}
2019-09-13 20:40:46 +02:00
// e = rollupFunc(metricExpr[d])
2019-07-10 11:57:27 +02:00
return fe , nrf
}
return nil , nil
}
2019-12-25 20:35:47 +01:00
func evalExprs ( ec * EvalConfig , es [ ] metricsql . Expr ) ( [ ] [ ] * timeseries , error ) {
2019-05-22 23:16:55 +02:00
var rvs [ ] [ ] * timeseries
for _ , e := range es {
rv , err := evalExpr ( ec , e )
if err != nil {
return nil , err
}
rvs = append ( rvs , rv )
}
return rvs , nil
}
2019-12-25 20:35:47 +01:00
func evalRollupFuncArgs ( ec * EvalConfig , fe * metricsql . FuncExpr ) ( [ ] interface { } , * metricsql . RollupExpr , error ) {
var re * metricsql . RollupExpr
2019-05-22 23:16:55 +02:00
rollupArgIdx := getRollupArgIdx ( fe . Name )
2020-01-10 20:16:14 +01:00
if len ( fe . Args ) <= rollupArgIdx {
2020-01-15 15:26:02 +01:00
return nil , nil , fmt . Errorf ( "expecting at least %d args to %q; got %d args; expr: %q" , rollupArgIdx + 1 , fe . Name , len ( fe . Args ) , fe . AppendString ( nil ) )
2020-01-10 20:16:14 +01:00
}
2019-05-22 23:16:55 +02:00
args := make ( [ ] interface { } , len ( fe . Args ) )
for i , arg := range fe . Args {
if i == rollupArgIdx {
re = getRollupExprArg ( arg )
args [ i ] = re
continue
}
ts , err := evalExpr ( ec , arg )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , nil , fmt . Errorf ( "cannot evaluate arg #%d for %q: %w" , i + 1 , fe . AppendString ( nil ) , err )
2019-05-22 23:16:55 +02:00
}
args [ i ] = ts
}
return args , re , nil
}
2019-12-25 20:35:47 +01:00
func getRollupExprArg ( arg metricsql . Expr ) * metricsql . RollupExpr {
re , ok := arg . ( * metricsql . RollupExpr )
2019-05-22 23:16:55 +02:00
if ! ok {
2019-12-25 20:35:47 +01:00
// Wrap non-rollup arg into metricsql.RollupExpr.
return & metricsql . RollupExpr {
2019-05-22 23:16:55 +02:00
Expr : arg ,
}
}
2019-09-13 20:40:46 +02:00
if ! re . ForSubquery ( ) {
// Return standard rollup if it doesn't contain subquery.
2019-05-22 23:16:55 +02:00
return re
}
2019-12-25 20:35:47 +01:00
me , ok := re . Expr . ( * metricsql . MetricExpr )
2019-05-22 23:16:55 +02:00
if ! ok {
// arg contains subquery.
return re
}
// Convert me[w:step] -> default_rollup(me)[w:step]
reNew := * re
2019-12-25 20:35:47 +01:00
reNew . Expr = & metricsql . FuncExpr {
2019-05-22 23:16:55 +02:00
Name : "default_rollup" ,
2019-12-25 20:35:47 +01:00
Args : [ ] metricsql . Expr {
& metricsql . RollupExpr { Expr : me } ,
2019-05-22 23:16:55 +02:00
} ,
}
return & reNew
}
2020-01-03 19:42:51 +01:00
func evalRollupFunc ( ec * EvalConfig , name string , rf rollupFunc , expr metricsql . Expr , re * metricsql . RollupExpr , iafc * incrementalAggrFuncContext ) ( [ ] * timeseries , error ) {
2019-05-22 23:16:55 +02:00
ecNew := ec
var offset int64
2021-07-12 16:16:38 +02:00
if re . Offset != nil {
offset = re . Offset . Duration ( ec . Step )
2020-02-04 22:23:37 +01:00
ecNew = newEvalConfig ( ecNew )
2019-05-22 23:16:55 +02:00
ecNew . Start -= offset
ecNew . End -= offset
2020-12-27 13:09:22 +01:00
// There is no need in calling AdjustStartEnd() on ecNew if ecNew.MayCache is set to true,
// since the time range alignment has been already performed by the caller,
// so cache hit rate should be quite good.
// See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/976
2019-05-22 23:16:55 +02:00
}
2020-02-04 22:31:06 +01:00
if name == "rollup_candlestick" {
2020-02-04 22:23:37 +01:00
// Automatically apply `offset -step` to `rollup_candlestick` function
// in order to obtain expected OHLC results.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/309#issuecomment-582113462
step := ecNew . Step
ecNew = newEvalConfig ( ecNew )
ecNew . Start += step
ecNew . End += step
offset -= step
}
2019-05-22 23:16:55 +02:00
var rvs [ ] * timeseries
var err error
2019-12-25 20:35:47 +01:00
if me , ok := re . Expr . ( * metricsql . MetricExpr ) ; ok {
2020-01-03 19:42:51 +01:00
rvs , err = evalRollupFuncWithMetricExpr ( ecNew , name , rf , expr , me , iafc , re . Window )
2019-05-22 23:16:55 +02:00
} else {
2019-07-10 11:57:27 +02:00
if iafc != nil {
logger . Panicf ( "BUG: iafc must be nil for rollup %q over subquery %q" , name , re . AppendString ( nil ) )
}
2020-01-10 20:16:14 +01:00
rvs , err = evalRollupFuncWithSubquery ( ecNew , name , rf , expr , re )
2019-05-22 23:16:55 +02:00
}
if err != nil {
return nil , err
}
2020-11-14 11:36:21 +01:00
ec . updateIsPartialResponse ( ecNew . IsPartialResponse )
2019-05-22 23:16:55 +02:00
if offset != 0 && len ( rvs ) > 0 {
// Make a copy of timestamps, since they may be used in other values.
srcTimestamps := rvs [ 0 ] . Timestamps
dstTimestamps := append ( [ ] int64 { } , srcTimestamps ... )
for i := range dstTimestamps {
dstTimestamps [ i ] += offset
}
for _ , ts := range rvs {
ts . Timestamps = dstTimestamps
}
}
return rvs , nil
}
2020-01-10 20:16:14 +01:00
func evalRollupFuncWithSubquery ( ec * EvalConfig , name string , rf rollupFunc , expr metricsql . Expr , re * metricsql . RollupExpr ) ( [ ] * timeseries , error ) {
2020-01-03 19:42:51 +01:00
// TODO: determine whether to use rollupResultCacheV here.
2021-07-12 16:16:38 +02:00
step := re . Step . Duration ( ec . Step )
if step == 0 {
2019-05-22 23:16:55 +02:00
step = ec . Step
}
2021-07-12 16:16:38 +02:00
window := re . Window . Duration ( ec . Step )
2019-05-22 23:16:55 +02:00
ecSQ := newEvalConfig ( ec )
2019-06-21 20:50:44 +02:00
ecSQ . Start -= window + maxSilenceInterval + step
2020-09-03 12:21:51 +02:00
ecSQ . End += step
2019-05-22 23:16:55 +02:00
ecSQ . Step = step
if err := ValidateMaxPointsPerTimeseries ( ecSQ . Start , ecSQ . End , ecSQ . Step ) ; err != nil {
return nil , err
}
2020-09-03 12:21:51 +02:00
// unconditionally align start and end args to step for subquery as Prometheus does.
ecSQ . Start , ecSQ . End = alignStartEnd ( ecSQ . Start , ecSQ . End , ecSQ . Step )
2019-05-22 23:16:55 +02:00
tssSQ , err := evalExpr ( ecSQ , re . Expr )
if err != nil {
return nil , err
}
2020-11-14 11:36:21 +01:00
ec . updateIsPartialResponse ( ecSQ . IsPartialResponse )
2020-01-03 23:46:39 +01:00
if len ( tssSQ ) == 0 {
if name == "absent_over_time" {
tss := evalNumber ( ec , 1 )
return tss , nil
}
return nil , nil
}
2019-05-22 23:16:55 +02:00
sharedTimestamps := getTimestamps ( ec . Start , ec . End , ec . Step )
2020-01-10 20:16:14 +01:00
preFunc , rcs , err := getRollupConfigs ( name , rf , expr , ec . Start , ec . End , ec . Step , window , ec . LookbackDelta , sharedTimestamps )
if err != nil {
return nil , err
}
2019-05-22 23:16:55 +02:00
tss := make ( [ ] * timeseries , 0 , len ( tssSQ ) * len ( rcs ) )
var tssLock sync . Mutex
2019-07-25 20:48:12 +02:00
removeMetricGroup := ! rollupFuncsKeepMetricGroup [ name ]
2019-05-22 23:16:55 +02:00
doParallel ( tssSQ , func ( tsSQ * timeseries , values [ ] float64 , timestamps [ ] int64 ) ( [ ] float64 , [ ] int64 ) {
values , timestamps = removeNanValues ( values [ : 0 ] , timestamps [ : 0 ] , tsSQ . Values , tsSQ . Timestamps )
preFunc ( values , timestamps )
for _ , rc := range rcs {
2020-01-03 22:50:47 +01:00
if tsm := newTimeseriesMap ( name , sharedTimestamps , & tsSQ . MetricName ) ; tsm != nil {
rc . DoTimeseriesMap ( tsm , values , timestamps )
tssLock . Lock ( )
tss = tsm . AppendTimeseriesTo ( tss )
tssLock . Unlock ( )
continue
}
2019-05-22 23:16:55 +02:00
var ts timeseries
2019-07-25 20:48:12 +02:00
doRollupForTimeseries ( rc , & ts , & tsSQ . MetricName , values , timestamps , sharedTimestamps , removeMetricGroup )
2019-05-22 23:16:55 +02:00
tssLock . Lock ( )
tss = append ( tss , & ts )
tssLock . Unlock ( )
}
return values , timestamps
} )
return tss , nil
}
func doParallel ( tss [ ] * timeseries , f func ( ts * timeseries , values [ ] float64 , timestamps [ ] int64 ) ( [ ] float64 , [ ] int64 ) ) {
2020-12-08 19:49:32 +01:00
concurrency := cgroup . AvailableCPUs ( )
2019-05-22 23:16:55 +02:00
if concurrency > len ( tss ) {
concurrency = len ( tss )
}
workCh := make ( chan * timeseries , concurrency )
var wg sync . WaitGroup
wg . Add ( concurrency )
for i := 0 ; i < concurrency ; i ++ {
go func ( ) {
defer wg . Done ( )
var tmpValues [ ] float64
var tmpTimestamps [ ] int64
for ts := range workCh {
tmpValues , tmpTimestamps = f ( ts , tmpValues , tmpTimestamps )
}
} ( )
}
for _ , ts := range tss {
workCh <- ts
}
close ( workCh )
wg . Wait ( )
}
func removeNanValues ( dstValues [ ] float64 , dstTimestamps [ ] int64 , values [ ] float64 , timestamps [ ] int64 ) ( [ ] float64 , [ ] int64 ) {
hasNan := false
for _ , v := range values {
if math . IsNaN ( v ) {
hasNan = true
}
}
if ! hasNan {
// Fast path - no NaNs.
dstValues = append ( dstValues , values ... )
dstTimestamps = append ( dstTimestamps , timestamps ... )
return dstValues , dstTimestamps
}
// Slow path - remove NaNs.
for i , v := range values {
if math . IsNaN ( v ) {
continue
}
dstValues = append ( dstValues , v )
dstTimestamps = append ( dstTimestamps , timestamps [ i ] )
}
return dstValues , dstTimestamps
}
var (
rollupResultCacheFullHits = metrics . NewCounter ( ` vm_rollup_result_cache_full_hits_total ` )
rollupResultCachePartialHits = metrics . NewCounter ( ` vm_rollup_result_cache_partial_hits_total ` )
rollupResultCacheMiss = metrics . NewCounter ( ` vm_rollup_result_cache_miss_total ` )
)
2020-01-03 19:42:51 +01:00
func evalRollupFuncWithMetricExpr ( ec * EvalConfig , name string , rf rollupFunc ,
2021-07-12 16:16:38 +02:00
expr metricsql . Expr , me * metricsql . MetricExpr , iafc * incrementalAggrFuncContext , windowExpr * metricsql . DurationExpr ) ( [ ] * timeseries , error ) {
2019-07-10 11:57:27 +02:00
if me . IsEmpty ( ) {
return evalNumber ( ec , nan ) , nil
}
2021-07-12 16:16:38 +02:00
window := windowExpr . Duration ( ec . Step )
2019-07-10 11:57:27 +02:00
2019-05-22 23:16:55 +02:00
// Search for partial results in cache.
2020-01-03 19:42:51 +01:00
tssCached , start := rollupResultCacheV . Get ( ec , expr , window )
2019-05-22 23:16:55 +02:00
if start > ec . End {
// The result is fully cached.
rollupResultCacheFullHits . Inc ( )
return tssCached , nil
}
if start > ec . Start {
rollupResultCachePartialHits . Inc ( )
} else {
rollupResultCacheMiss . Inc ( )
}
2020-01-11 13:40:32 +01:00
// Obtain rollup configs before fetching data from db,
// so type errors can be caught earlier.
sharedTimestamps := getTimestamps ( start , ec . End , ec . Step )
preFunc , rcs , err := getRollupConfigs ( name , rf , expr , start , ec . End , ec . Step , window , ec . LookbackDelta , sharedTimestamps )
if err != nil {
return nil , err
}
2019-05-22 23:16:55 +02:00
// Fetch the remaining part of the result.
2019-12-25 20:35:47 +01:00
tfs := toTagFilters ( me . LabelFilters )
2021-02-01 16:42:35 +01:00
// append external filters.
tfs = append ( tfs , ec . EnforcedTagFilters ... )
2020-02-05 18:20:54 +01:00
minTimestamp := start - maxSilenceInterval
if window > ec . Step {
minTimestamp -= window
} else {
minTimestamp -= ec . Step
}
2020-11-16 17:00:50 +01:00
sq := storage . NewSearchQuery ( ec . AuthToken . AccountID , ec . AuthToken . ProjectID , minTimestamp , ec . End , [ ] [ ] storage . TagFilter { tfs } )
2020-11-14 11:36:21 +01:00
rss , isPartial , err := netstorage . ProcessSearchQuery ( ec . AuthToken , ec . DenyPartialResponse , sq , true , ec . Deadline )
2019-05-22 23:16:55 +02:00
if err != nil {
return nil , err
}
2020-11-14 11:36:21 +01:00
ec . updateIsPartialResponse ( isPartial )
2019-05-22 23:16:55 +02:00
rssLen := rss . Len ( )
if rssLen == 0 {
rss . Cancel ( )
2020-01-03 23:46:39 +01:00
var tss [ ] * timeseries
if name == "absent_over_time" {
tss = getAbsentTimeseries ( ec , me )
}
2019-05-22 23:16:55 +02:00
// Add missing points until ec.End.
// Do not cache the result, since missing points
// may be backfilled in the future.
2020-01-03 23:46:39 +01:00
tss = mergeTimeseries ( tssCached , tss , start , ec )
2019-05-22 23:16:55 +02:00
return tss , nil
}
// Verify timeseries fit available memory after the rollup.
// Take into account points from tssCached.
pointsPerTimeseries := 1 + ( ec . End - ec . Start ) / ec . Step
2019-11-08 17:45:25 +01:00
timeseriesLen := rssLen
if iafc != nil {
2020-05-12 18:06:54 +02:00
// Incremental aggregates require holding only GOMAXPROCS timeseries in memory.
2020-12-08 19:49:32 +01:00
timeseriesLen = cgroup . AvailableCPUs ( )
2019-11-08 17:45:25 +01:00
if iafc . ae . Modifier . Op != "" {
2020-05-12 18:06:54 +02:00
if iafc . ae . Limit > 0 {
// There is an explicit limit on the number of output time series.
timeseriesLen *= iafc . ae . Limit
} else {
// Increase the number of timeseries for non-empty group list: `aggr() by (something)`,
// since each group can have own set of time series in memory.
timeseriesLen *= 1000
}
2019-11-08 17:45:25 +01:00
}
2020-04-28 23:18:47 +02:00
// The maximum number of output time series is limited by rssLen.
if timeseriesLen > rssLen {
timeseriesLen = rssLen
}
2019-11-08 17:45:25 +01:00
}
rollupPoints := mulNoOverflow ( pointsPerTimeseries , int64 ( timeseriesLen * len ( rcs ) ) )
2019-06-12 21:25:37 +02:00
rollupMemorySize := mulNoOverflow ( rollupPoints , 16 )
rml := getRollupMemoryLimiter ( )
if ! rml . Get ( uint64 ( rollupMemorySize ) ) {
2019-05-22 23:16:55 +02:00
rss . Cancel ( )
2019-06-12 21:25:37 +02:00
return nil , fmt . Errorf ( "not enough memory for processing %d data points across %d time series with %d points in each time series; " +
2020-07-20 18:51:00 +02:00
"total available memory for concurrent requests: %d bytes; " +
2019-06-12 21:25:37 +02:00
"possible solutions are: reducing the number of matching time series; switching to node with more RAM; " +
2020-05-12 18:47:33 +02:00
"increasing -memory.allowedPercent; increasing `step` query arg (%gs)" ,
2020-07-20 18:51:00 +02:00
rollupPoints , timeseriesLen * len ( rcs ) , pointsPerTimeseries , rml . MaxSize , float64 ( ec . Step ) / 1e3 )
2019-05-22 23:16:55 +02:00
}
2019-06-12 21:25:37 +02:00
defer rml . Put ( uint64 ( rollupMemorySize ) )
2019-05-22 23:16:55 +02:00
// Evaluate rollup
2019-07-25 20:48:12 +02:00
removeMetricGroup := ! rollupFuncsKeepMetricGroup [ name ]
2019-07-10 11:57:27 +02:00
var tss [ ] * timeseries
if iafc != nil {
2020-01-03 22:50:47 +01:00
tss , err = evalRollupWithIncrementalAggregate ( name , iafc , rss , rcs , preFunc , sharedTimestamps , removeMetricGroup )
2019-07-10 11:57:27 +02:00
} else {
2020-01-03 22:50:47 +01:00
tss , err = evalRollupNoIncrementalAggregate ( name , rss , rcs , preFunc , sharedTimestamps , removeMetricGroup )
2019-07-10 11:57:27 +02:00
}
2019-05-22 23:16:55 +02:00
if err != nil {
return nil , err
}
tss = mergeTimeseries ( tssCached , tss , start , ec )
2019-06-30 00:27:03 +02:00
if ! isPartial {
2020-01-03 19:42:51 +01:00
rollupResultCacheV . Put ( ec , expr , window , tss )
2019-05-22 23:23:23 +02:00
}
2019-05-22 23:16:55 +02:00
return tss , nil
}
2019-06-12 21:25:37 +02:00
var (
rollupMemoryLimiter memoryLimiter
rollupMemoryLimiterOnce sync . Once
)
func getRollupMemoryLimiter ( ) * memoryLimiter {
rollupMemoryLimiterOnce . Do ( func ( ) {
2021-05-12 13:42:38 +02:00
rollupMemoryLimiter . MaxSize = uint64 ( memory . Allowed ( ) ) / 2
2019-06-12 21:25:37 +02:00
} )
return & rollupMemoryLimiter
}
2020-01-03 22:50:47 +01:00
func evalRollupWithIncrementalAggregate ( name string , iafc * incrementalAggrFuncContext , rss * netstorage . Results , rcs [ ] * rollupConfig ,
2019-07-25 20:48:12 +02:00
preFunc func ( values [ ] float64 , timestamps [ ] int64 ) , sharedTimestamps [ ] int64 , removeMetricGroup bool ) ( [ ] * timeseries , error ) {
2020-09-27 22:17:14 +02:00
err := rss . RunParallel ( func ( rs * netstorage . Result , workerID uint ) error {
2021-08-17 10:00:17 +02:00
rs . Values , rs . Timestamps = dropStaleNaNs ( name , rs . Values , rs . Timestamps )
2019-07-10 11:57:27 +02:00
preFunc ( rs . Values , rs . Timestamps )
ts := getTimeseries ( )
defer putTimeseries ( ts )
for _ , rc := range rcs {
2020-01-03 22:50:47 +01:00
if tsm := newTimeseriesMap ( name , sharedTimestamps , & rs . MetricName ) ; tsm != nil {
rc . DoTimeseriesMap ( tsm , rs . Values , rs . Timestamps )
for _ , ts := range tsm . m {
iafc . updateTimeseries ( ts , workerID )
}
continue
}
2019-07-10 11:57:27 +02:00
ts . Reset ( )
2019-07-25 20:48:12 +02:00
doRollupForTimeseries ( rc , ts , & rs . MetricName , rs . Values , rs . Timestamps , sharedTimestamps , removeMetricGroup )
2019-07-12 14:51:02 +02:00
iafc . updateTimeseries ( ts , workerID )
2019-07-25 20:48:12 +02:00
// ts.Timestamps points to sharedTimestamps. Zero it, so it can be re-used.
2019-07-10 11:57:27 +02:00
ts . Timestamps = nil
2019-07-25 20:48:12 +02:00
ts . denyReuse = false
2019-07-10 11:57:27 +02:00
}
2020-09-27 22:17:14 +02:00
return nil
2019-07-10 11:57:27 +02:00
} )
if err != nil {
return nil , err
}
tss := iafc . finalizeTimeseries ( )
return tss , nil
}
2020-01-03 22:50:47 +01:00
func evalRollupNoIncrementalAggregate ( name string , rss * netstorage . Results , rcs [ ] * rollupConfig ,
2019-07-25 20:48:12 +02:00
preFunc func ( values [ ] float64 , timestamps [ ] int64 ) , sharedTimestamps [ ] int64 , removeMetricGroup bool ) ( [ ] * timeseries , error ) {
2019-07-10 11:57:27 +02:00
tss := make ( [ ] * timeseries , 0 , rss . Len ( ) * len ( rcs ) )
var tssLock sync . Mutex
2020-09-27 22:17:14 +02:00
err := rss . RunParallel ( func ( rs * netstorage . Result , workerID uint ) error {
2021-08-17 10:00:17 +02:00
rs . Values , rs . Timestamps = dropStaleNaNs ( name , rs . Values , rs . Timestamps )
2019-07-10 11:57:27 +02:00
preFunc ( rs . Values , rs . Timestamps )
for _ , rc := range rcs {
2020-01-03 22:50:47 +01:00
if tsm := newTimeseriesMap ( name , sharedTimestamps , & rs . MetricName ) ; tsm != nil {
rc . DoTimeseriesMap ( tsm , rs . Values , rs . Timestamps )
tssLock . Lock ( )
tss = tsm . AppendTimeseriesTo ( tss )
tssLock . Unlock ( )
continue
}
2019-07-10 11:57:27 +02:00
var ts timeseries
2019-07-25 20:48:12 +02:00
doRollupForTimeseries ( rc , & ts , & rs . MetricName , rs . Values , rs . Timestamps , sharedTimestamps , removeMetricGroup )
2019-07-10 11:57:27 +02:00
tssLock . Lock ( )
tss = append ( tss , & ts )
tssLock . Unlock ( )
}
2020-09-27 22:17:14 +02:00
return nil
2019-07-10 11:57:27 +02:00
} )
if err != nil {
return nil , err
}
return tss , nil
}
2019-07-25 20:48:12 +02:00
func doRollupForTimeseries ( rc * rollupConfig , tsDst * timeseries , mnSrc * storage . MetricName , valuesSrc [ ] float64 , timestampsSrc [ ] int64 ,
sharedTimestamps [ ] int64 , removeMetricGroup bool ) {
tsDst . MetricName . CopyFrom ( mnSrc )
if len ( rc . TagValue ) > 0 {
tsDst . MetricName . AddTag ( "rollup" , rc . TagValue )
}
if removeMetricGroup {
tsDst . MetricName . ResetMetricGroup ( )
}
tsDst . Values = rc . Do ( tsDst . Values [ : 0 ] , valuesSrc , timestampsSrc )
tsDst . Timestamps = sharedTimestamps
tsDst . denyReuse = true
}
2019-05-22 23:16:55 +02:00
var bbPool bytesutil . ByteBufferPool
func evalNumber ( ec * EvalConfig , n float64 ) [ ] * timeseries {
var ts timeseries
ts . denyReuse = true
2019-05-22 23:23:23 +02:00
ts . MetricName . AccountID = ec . AuthToken . AccountID
ts . MetricName . ProjectID = ec . AuthToken . ProjectID
2019-05-22 23:16:55 +02:00
timestamps := ec . getSharedTimestamps ( )
values := make ( [ ] float64 , len ( timestamps ) )
for i := range timestamps {
values [ i ] = n
}
ts . Values = values
ts . Timestamps = timestamps
return [ ] * timeseries { & ts }
}
func evalString ( ec * EvalConfig , s string ) [ ] * timeseries {
rv := evalNumber ( ec , nan )
rv [ 0 ] . MetricName . MetricGroup = append ( rv [ 0 ] . MetricName . MetricGroup [ : 0 ] , s ... )
return rv
}
func evalTime ( ec * EvalConfig ) [ ] * timeseries {
rv := evalNumber ( ec , nan )
timestamps := rv [ 0 ] . Timestamps
values := rv [ 0 ] . Values
for i , ts := range timestamps {
2020-09-08 13:00:47 +02:00
values [ i ] = float64 ( ts ) / 1e3
2019-05-22 23:16:55 +02:00
}
return rv
}
2019-06-12 21:25:37 +02:00
func mulNoOverflow ( a , b int64 ) int64 {
if math . MaxInt64 / b < a {
// Overflow
return math . MaxInt64
}
return a * b
}
2019-12-25 20:35:47 +01:00
func toTagFilters ( lfs [ ] metricsql . LabelFilter ) [ ] storage . TagFilter {
tfs := make ( [ ] storage . TagFilter , len ( lfs ) )
for i := range lfs {
toTagFilter ( & tfs [ i ] , & lfs [ i ] )
}
return tfs
}
func toTagFilter ( dst * storage . TagFilter , src * metricsql . LabelFilter ) {
if src . Label != "__name__" {
dst . Key = [ ] byte ( src . Label )
} else {
// This is required for storage.Search.
dst . Key = nil
}
dst . Value = [ ] byte ( src . Value )
dst . IsRegexp = src . IsRegexp
dst . IsNegative = src . IsNegative
}
2021-08-15 12:20:02 +02:00
2021-08-17 10:00:17 +02:00
func dropStaleNaNs ( name string , values [ ] float64 , timestamps [ ] int64 ) ( [ ] float64 , [ ] int64 ) {
if name == "default_rollup" {
// Do not drop Prometheus staleness marks (aka stale NaNs) for default_rollup() function,
// since it uses them for Prometheus-style staleness detection.
return values , timestamps
}
// Remove Prometheus staleness marks, so non-default rollup functions don't hit NaN values.
2021-08-15 12:20:02 +02:00
hasStaleSamples := false
for _ , v := range values {
if decimal . IsStaleNaN ( v ) {
hasStaleSamples = true
break
}
}
if ! hasStaleSamples {
// Fast path: values have no Prometheus staleness marks.
return values , timestamps
}
// Slow path: drop Prometheus staleness marks from values.
dstValues := values [ : 0 ]
dstTimestamps := timestamps [ : 0 ]
for i , v := range values {
if decimal . IsStaleNaN ( v ) {
continue
}
dstValues = append ( dstValues , v )
dstTimestamps = append ( dstTimestamps , timestamps [ i ] )
}
return dstValues , dstTimestamps
}