app/vmselect: properly adjust the lower bound for the time range where raw samples must be selected for default_rollup() function

Previously the lower bound could be too small, which could result in missing values at the beginning of the graph
for default_rollup() function. This function is automatically applied to all the series selectors if they aren't
explicitly wrapped into a rollup function - see https://docs.victoriametrics.com/MetricsQL.html#implicit-query-conversions

While at it, properly take into account `-search.minStalenessInterval` command-line flag when adjusting
the lower bound for the selected time range.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5388
This commit is contained in:
Aliaksandr Valialkin 2023-12-06 14:15:48 +02:00
parent 065f5a7f9e
commit 509339bf63
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
3 changed files with 19 additions and 10 deletions

View File

@ -945,7 +945,7 @@ func evalRollupFuncWithSubquery(qt *querytracer.Tracer, ec *EvalConfig, funcName
}
ecSQ := copyEvalConfig(ec)
ecSQ.Start -= window + maxSilenceInterval + step
ecSQ.Start -= window + step + maxSilenceInterval()
ecSQ.End += step
ecSQ.Step = step
ecSQ.MaxPointsPerSeries = *maxPointsSubqueryPerTimeseries
@ -1699,12 +1699,12 @@ func evalRollupFuncNoCache(qt *querytracer.Tracer, ec *EvalConfig, funcName stri
return nil, err
}
// Fetch the remaining part of the result.
// Fetch the result.
tfss := searchutils.ToTagFilterss(me.LabelFilterss)
tfss = searchutils.JoinTagFilterss(tfss, ec.EnforcedTagFilterss)
minTimestamp := ec.Start
if needSilenceIntervalForRollupFunc(funcName) {
minTimestamp -= maxSilenceInterval
minTimestamp -= maxSilenceInterval()
}
if window > ec.Step {
minTimestamp -= window
@ -1800,10 +1800,22 @@ func getRollupMemoryLimiter() *memoryLimiter {
return &rollupMemoryLimiter
}
func maxSilenceInterval() int64 {
d := minStalenessInterval.Milliseconds()
if d <= 0 {
d = 5 * 60 * 1000
}
return d
}
func needSilenceIntervalForRollupFunc(funcName string) bool {
// All rollup the functions, which do not rely on the previous sample
// before the lookbehind window (aka prevValue), do not need silence interval.
// All the rollup functions, which do not rely on the previous sample
// before the lookbehind window (aka prevValue and realPrevValue), do not need silence interval.
switch strings.ToLower(funcName) {
case "default_rollup":
// The default_rollup implicitly relies on the previous samples in order to fill gaps.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5388
return true
case
"absent_over_time",
"avg_over_time",
@ -1812,7 +1824,6 @@ func needSilenceIntervalForRollupFunc(funcName string) bool {
"count_le_over_time",
"count_ne_over_time",
"count_over_time",
"default_rollup",
"first_over_time",
"histogram_over_time",
"hoeffding_bound_lower",

View File

@ -561,9 +561,6 @@ var (
inf = math.Inf(1)
)
// The maximum interval without previous rows.
const maxSilenceInterval = 5 * 60 * 1000
type timeseriesMap struct {
origin *timeseries
h metrics.Histogram
@ -620,7 +617,7 @@ func (tsm *timeseriesMap) GetOrCreateTimeseries(labelName, labelValue string) *t
//
// rc.Timestamps are used as timestamps for dstValues.
//
// timestamps must cover time range [rc.Start - rc.Window - maxSilenceInterval ... rc.End].
// It is expected that timestamps cover the time range [rc.Start - rc.Window ... rc.End].
//
// Do cannot be called from concurrent goroutines.
func (rc *rollupConfig) Do(dstValues []float64, values []float64, timestamps []int64) ([]float64, uint64) {

View File

@ -49,6 +49,7 @@ The sandbox cluster installation is running under the constant load generated by
* `go_scavenge_cpu_seconds_total` - the [counter](https://docs.victoriametrics.com/keyConcepts.html#counter), which shows the total CPU time spent by Go runtime for returning memory to the Operating System.
* `go_memlimit_bytes` - the value of [GOMEMLIMIT](https://pkg.go.dev/runtime#hdr-Environment_Variables) environment variable.
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly calculate values for the first point on the graph for queries, which do not use [rollup functions](https://docs.victoriametrics.com/MetricsQL.html#rollup-functions). For example, previously `count(up)` could return lower than expected values for the first point on the graph. This also could result in lower than expected values in the middle of the graph like in [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5388) when the response caching isn't disabled. The issue has been introduced in [v1.95.0](https://docs.victoriametrics.com/CHANGELOG.html#v1950).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): prevent from `FATAL: cannot flush metainfo` panic when [`-remoteWrite.multitenantURL`](https://docs.victoriametrics.com/vmagent.html#multitenancy) command-line flag is set. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5357).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly decode zstd-encoded data blocks received via [VictoriaMetrics remote_write protocol](https://docs.victoriametrics.com/vmagent.html#victoriametrics-remote-write-protocol). See [this issue comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5301#issuecomment-1815871992).
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly add new labels at `output_relabel_configs` during [stream aggregation](https://docs.victoriametrics.com/stream-aggregation.html). Previously this could lead to corrupted labels in output samples. Thanks to @ChengChung for providing [detailed report for this bug](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5402).