VictoriaMetrics/app/vmselect/prometheus/prometheus.go
Zhu Jiekun 8c50c38a80
vmstorage: auto calculate maxUniqueTimeseries based on resources (#6961)
### Describe Your Changes

Add support for
https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6930

Calculate `-search.maxUniqueTimeseries` by
`-search.maxConcurrentRequests` and remaining memory if it's **not set**
or **less equal than 0**.

The remaining memory is affected by `-memory.allowedPercent`,
`-memory.allowedBytes` and cgroup memory limit.
### Checklist

The following checks are **mandatory**:

- [x] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Roman Khavronenko <roman@victoriametrics.com>

(cherry picked from commit 85f60237e2)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2024-10-18 14:00:14 +02:00

1290 lines
43 KiB
Go

package prometheus
import (
"flag"
"fmt"
"math"
"net/http"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/metricsql"
"github.com/valyala/fastjson/fastfloat"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/netstorage"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/promql"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/querystats"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmselect/searchutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bufferedwriter"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httputils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
)
var (
latencyOffset = flag.Duration("search.latencyOffset", time.Second*30, "The time when data points become visible in query results after the collection. "+
"It can be overridden on per-query basis via latency_offset arg. "+
"Too small value can result in incomplete last points for query results")
maxQueryLen = flagutil.NewBytes("search.maxQueryLen", 16*1024, "The maximum search query length in bytes")
maxLookback = flag.Duration("search.maxLookback", 0, "Synonym to -search.lookback-delta from Prometheus. "+
"The value is dynamically detected from interval between time series datapoints if not set. It can be overridden on per-query basis via max_lookback arg. "+
"See also '-search.maxStalenessInterval' flag, which has the same meaning due to historical reasons")
maxStalenessInterval = flag.Duration("search.maxStalenessInterval", 0, "The maximum interval for staleness calculations. "+
"By default, it is automatically calculated from the median interval between samples. This flag could be useful for tuning "+
"Prometheus data model closer to Influx-style data model. See https://prometheus.io/docs/prometheus/latest/querying/basics/#staleness for details. "+
"See also '-search.setLookbackToStep' flag")
setLookbackToStep = flag.Bool("search.setLookbackToStep", false, "Whether to fix lookback interval to 'step' query arg value. "+
"If set to true, the query model becomes closer to InfluxDB data model. If set to true, then -search.maxLookback and -search.maxStalenessInterval are ignored")
maxStepForPointsAdjustment = flag.Duration("search.maxStepForPointsAdjustment", time.Minute, "The maximum step when /api/v1/query_range handler adjusts "+
"points with timestamps closer than -search.latencyOffset to the current time. The adjustment is needed because such points may contain incomplete data")
maxUniqueTimeseries = flag.Int("search.maxUniqueTimeseries", 0, "The maximum number of unique time series, which can be selected during /api/v1/query and /api/v1/query_range queries. This option allows limiting memory usage. "+
"When set to zero, the limit is automatically calculated based on -search.maxConcurrentRequests (inversely proportional) and memory available to the process (proportional).")
maxFederateSeries = flag.Int("search.maxFederateSeries", 1e6, "The maximum number of time series, which can be returned from /federate. This option allows limiting memory usage")
maxExportSeries = flag.Int("search.maxExportSeries", 10e6, "The maximum number of time series, which can be returned from /api/v1/export* APIs. This option allows limiting memory usage")
maxTSDBStatusSeries = flag.Int("search.maxTSDBStatusSeries", 10e6, "The maximum number of time series, which can be processed during the call to /api/v1/status/tsdb. This option allows limiting memory usage")
maxSeriesLimit = flag.Int("search.maxSeries", 30e3, "The maximum number of time series, which can be returned from /api/v1/series. This option allows limiting memory usage")
maxDeleteSeries = flag.Int("search.maxDeleteSeries", 1e6, "The maximum number of time series, which can be deleted using /api/v1/admin/tsdb/delete_series. This option allows limiting memory usage")
maxLabelsAPISeries = flag.Int("search.maxLabelsAPISeries", 1e6, "The maximum number of time series, which could be scanned when searching for the matching time series "+
"at /api/v1/labels and /api/v1/label/.../values. This option allows limiting memory usage and CPU usage. See also -search.maxLabelsAPIDuration, "+
"-search.maxTagKeys, -search.maxTagValues and -search.ignoreExtraFiltersAtLabelsAPI")
maxPointsPerTimeseries = flag.Int("search.maxPointsPerTimeseries", 30e3, "The maximum points per a single timeseries returned from /api/v1/query_range. "+
"This option doesn't limit the number of scanned raw samples in the database. The main purpose of this option is to limit the number of per-series points "+
"returned to graphing UI such as VMUI or Grafana. There is no sense in setting this limit to values bigger than the horizontal resolution of the graph. "+
"See also -search.maxResponseSeries")
ignoreExtraFiltersAtLabelsAPI = flag.Bool("search.ignoreExtraFiltersAtLabelsAPI", false, "Whether to ignore match[], extra_filters[] and extra_label query args at "+
"/api/v1/labels and /api/v1/label/.../values . This may be useful for decreasing load on VictoriaMetrics when extra filters "+
"match too many time series. The downside is that superfluous labels or series could be returned, which do not match the extra filters. "+
"See also -search.maxLabelsAPISeries and -search.maxLabelsAPIDuration")
)
// Default step used if not set.
const defaultStep = 5 * 60 * 1000
// ExpandWithExprs handles the request to /expand-with-exprs
func ExpandWithExprs(w http.ResponseWriter, r *http.Request) {
query := r.FormValue("query")
format := r.FormValue("format")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
if format == "json" {
w.Header().Set("Content-Type", "application/json")
httpserver.EnableCORS(w, r)
WriteExpandWithExprsJSONResponse(bw, query)
} else {
WriteExpandWithExprsResponse(bw, query)
}
_ = bw.Flush()
}
// PrettifyQuery handles the request /prettify-query
func PrettifyQuery(w http.ResponseWriter, r *http.Request) {
query := r.FormValue("query")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
w.Header().Set("Content-Type", "application/json")
httpserver.EnableCORS(w, r)
prettyQuery, err := metricsql.Prettify(query)
if err != nil {
fmt.Fprintf(bw, `{"status": "error", "msg": %q}`, err)
} else {
fmt.Fprintf(bw, `{"status": "success", "query": %q}`, prettyQuery)
}
_ = bw.Flush()
}
// FederateHandler implements /federate . See https://prometheus.io/docs/prometheus/latest/federation/
func FederateHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer federateDuration.UpdateDuration(startTime)
cp, err := getCommonParams(r, startTime, true)
if err != nil {
return err
}
lookbackDelta, err := getMaxLookback(r)
if err != nil {
return err
}
if lookbackDelta <= 0 {
lookbackDelta = defaultStep
}
if cp.IsDefaultTimeRange() {
cp.start = cp.end - lookbackDelta
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxFederateSeries)
rss, err := netstorage.ProcessSearchQuery(nil, sq, cp.deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
}
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
sw := newScalableWriter(bw)
err = rss.RunParallel(nil, func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
bb := sw.getBuffer(workerID)
WriteFederate(bb, rs)
return sw.maybeFlushBuffer(bb)
})
if err != nil {
return fmt.Errorf("error during sending data to remote client: %w", err)
}
return sw.flush()
}
var federateDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/federate"}`)
// ExportCSVHandler exports data in CSV format from /api/v1/export/csv
func ExportCSVHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportCSVDuration.UpdateDuration(startTime)
cp, err := getExportParams(r, startTime)
if err != nil {
return err
}
format := r.FormValue("format")
if len(format) == 0 {
return fmt.Errorf("missing `format` arg; see https://docs.victoriametrics.com/#how-to-export-csv-data")
}
fieldNames := strings.Split(format, ",")
reduceMemUsage := httputils.GetBool(r, "reduce_mem_usage")
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxExportSeries)
w.Header().Set("Content-Type", "text/csv; charset=utf-8")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
sw := newScalableWriter(bw)
writeCSVLine := func(xb *exportBlock, workerID uint) error {
if len(xb.timestamps) == 0 {
return nil
}
bb := sw.getBuffer(workerID)
WriteExportCSVLine(bb, xb, fieldNames)
return sw.maybeFlushBuffer(bb)
}
doneCh := make(chan error, 1)
if !reduceMemUsage {
rss, err := netstorage.ProcessSearchQuery(nil, sq, cp.deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
}
go func() {
err := rss.RunParallel(nil, func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = &rs.MetricName
xb.timestamps = rs.Timestamps
xb.values = rs.Values
if err := writeCSVLine(xb, workerID); err != nil {
return err
}
xb.reset()
exportBlockPool.Put(xb)
return nil
})
doneCh <- err
}()
} else {
go func() {
err := netstorage.ExportBlocks(nil, sq, cp.deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block during export: %w", err)
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = mn
xb.timestamps, xb.values = b.AppendRowsWithTimeRangeFilter(xb.timestamps[:0], xb.values[:0], tr)
if err := writeCSVLine(xb, workerID); err != nil {
return err
}
xb.reset()
exportBlockPool.Put(xb)
return nil
})
doneCh <- err
}()
}
err = <-doneCh
if err != nil {
return fmt.Errorf("error during sending the exported csv data to remote client: %w", err)
}
return sw.flush()
}
var exportCSVDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/csv"}`)
// ExportNativeHandler exports data in native format from /api/v1/export/native.
func ExportNativeHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportNativeDuration.UpdateDuration(startTime)
cp, err := getExportParams(r, startTime)
if err != nil {
return err
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxExportSeries)
w.Header().Set("Content-Type", "VictoriaMetrics/native")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
sw := newScalableWriter(bw)
// Marshal tr
trBuf := make([]byte, 0, 16)
trBuf = encoding.MarshalInt64(trBuf, cp.start)
trBuf = encoding.MarshalInt64(trBuf, cp.end)
_, _ = bw.Write(trBuf)
// Marshal native blocks.
err = netstorage.ExportBlocks(nil, sq, cp.deadline, func(mn *storage.MetricName, b *storage.Block, _ storage.TimeRange, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
bb := sw.getBuffer(workerID)
dst := bb.B
tmpBuf := bbPool.Get()
tmp := tmpBuf.B
// Marshal mn
tmp = mn.Marshal(tmp[:0])
dst = encoding.MarshalUint32(dst, uint32(len(tmp)))
dst = append(dst, tmp...)
// Marshal b
tmp = b.MarshalPortable(tmp[:0])
dst = encoding.MarshalUint32(dst, uint32(len(tmp)))
dst = append(dst, tmp...)
tmpBuf.B = tmp
bbPool.Put(tmpBuf)
bb.B = dst
return sw.maybeFlushBuffer(bb)
})
if err != nil {
return fmt.Errorf("error during sending native data to remote client: %w", err)
}
return sw.flush()
}
var exportNativeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export/native"}`)
var bbPool bytesutil.ByteBufferPool
// ExportHandler exports data in raw format from /api/v1/export.
func ExportHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer exportDuration.UpdateDuration(startTime)
cp, err := getExportParams(r, startTime)
if err != nil {
return err
}
format := r.FormValue("format")
maxRowsPerLine := int(fastfloat.ParseInt64BestEffort(r.FormValue("max_rows_per_line")))
reduceMemUsage := httputils.GetBool(r, "reduce_mem_usage")
if err := exportHandler(nil, w, cp, format, maxRowsPerLine, reduceMemUsage); err != nil {
return fmt.Errorf("error when exporting data on the time range (start=%d, end=%d): %w", cp.start, cp.end, err)
}
return nil
}
var exportDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/export"}`)
func exportHandler(qt *querytracer.Tracer, w http.ResponseWriter, cp *commonParams, format string, maxRowsPerLine int, reduceMemUsage bool) error {
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
sw := newScalableWriter(bw)
writeLineFunc := func(xb *exportBlock, workerID uint) error {
bb := sw.getBuffer(workerID)
WriteExportJSONLine(bb, xb)
return sw.maybeFlushBuffer(bb)
}
contentType := "application/stream+json; charset=utf-8"
if format == "prometheus" {
contentType = "text/plain; charset=utf-8"
writeLineFunc = func(xb *exportBlock, workerID uint) error {
bb := sw.getBuffer(workerID)
WriteExportPrometheusLine(bb, xb)
return sw.maybeFlushBuffer(bb)
}
} else if format == "promapi" {
WriteExportPromAPIHeader(bw)
var firstLineOnce atomic.Bool
var firstLineSent atomic.Bool
writeLineFunc = func(xb *exportBlock, workerID uint) error {
bb := sw.getBuffer(workerID)
// Use Load() in front of CompareAndSwap() in order to avoid slow inter-CPU synchronization
// in fast path after the first line has been already sent.
if !firstLineOnce.Load() && firstLineOnce.CompareAndSwap(false, true) {
// Send the first line to sw.bw
WriteExportPromAPILine(bb, xb)
_, err := sw.bw.Write(bb.B)
bb.Reset()
firstLineSent.Store(true)
return err
}
for !firstLineSent.Load() {
// Busy wait until the first line is sent to sw.bw
runtime.Gosched()
}
bb.B = append(bb.B, ',')
WriteExportPromAPILine(bb, xb)
return sw.maybeFlushBuffer(bb)
}
}
if maxRowsPerLine > 0 {
writeLineFuncOrig := writeLineFunc
writeLineFunc = func(xb *exportBlock, workerID uint) error {
valuesOrig := xb.values
timestampsOrig := xb.timestamps
values := valuesOrig
timestamps := timestampsOrig
for len(values) > 0 {
var valuesChunk []float64
var timestampsChunk []int64
if len(values) > maxRowsPerLine {
valuesChunk = values[:maxRowsPerLine]
timestampsChunk = timestamps[:maxRowsPerLine]
values = values[maxRowsPerLine:]
timestamps = timestamps[maxRowsPerLine:]
} else {
valuesChunk = values
timestampsChunk = timestamps
values = nil
timestamps = nil
}
xb.values = valuesChunk
xb.timestamps = timestampsChunk
if err := writeLineFuncOrig(xb, workerID); err != nil {
return err
}
}
xb.values = valuesOrig
xb.timestamps = timestampsOrig
return nil
}
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxExportSeries)
w.Header().Set("Content-Type", contentType)
doneCh := make(chan error, 1)
if !reduceMemUsage {
rss, err := netstorage.ProcessSearchQuery(qt, sq, cp.deadline)
if err != nil {
return fmt.Errorf("cannot fetch data for %q: %w", sq, err)
}
qtChild := qt.NewChild("background export format=%s", format)
go func() {
err := rss.RunParallel(qtChild, func(rs *netstorage.Result, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = &rs.MetricName
xb.timestamps = rs.Timestamps
xb.values = rs.Values
if err := writeLineFunc(xb, workerID); err != nil {
return err
}
xb.reset()
exportBlockPool.Put(xb)
return nil
})
qtChild.Done()
doneCh <- err
}()
} else {
qtChild := qt.NewChild("background export format=%s", format)
go func() {
err := netstorage.ExportBlocks(qtChild, sq, cp.deadline, func(mn *storage.MetricName, b *storage.Block, tr storage.TimeRange, workerID uint) error {
if err := bw.Error(); err != nil {
return err
}
if err := b.UnmarshalData(); err != nil {
return fmt.Errorf("cannot unmarshal block during export: %w", err)
}
xb := exportBlockPool.Get().(*exportBlock)
xb.mn = mn
xb.timestamps, xb.values = b.AppendRowsWithTimeRangeFilter(xb.timestamps[:0], xb.values[:0], tr)
if len(xb.timestamps) > 0 {
if err := writeLineFunc(xb, workerID); err != nil {
return err
}
}
xb.reset()
exportBlockPool.Put(xb)
return nil
})
qtChild.Done()
doneCh <- err
}()
}
err := <-doneCh
if err != nil {
return fmt.Errorf("cannot send data to remote client: %w", err)
}
if err := sw.flush(); err != nil {
return fmt.Errorf("cannot send data to remote client: %w", err)
}
if format == "promapi" {
WriteExportPromAPIFooter(bw, qt)
}
return bw.Flush()
}
type exportBlock struct {
mn *storage.MetricName
timestamps []int64
values []float64
}
func (xb *exportBlock) reset() {
xb.mn = nil
xb.timestamps = xb.timestamps[:0]
xb.values = xb.values[:0]
}
var exportBlockPool = &sync.Pool{
New: func() any {
return &exportBlock{}
},
}
// DeleteHandler processes /api/v1/admin/tsdb/delete_series prometheus API request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#delete-series
func DeleteHandler(startTime time.Time, r *http.Request) error {
defer deleteDuration.UpdateDuration(startTime)
cp, err := getCommonParams(r, startTime, true)
if err != nil {
return err
}
if !cp.IsDefaultTimeRange() {
return fmt.Errorf("start=%d and end=%d args aren't supported. Remove these args from the query in order to delete all the matching metrics", cp.start, cp.end)
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxDeleteSeries)
deletedCount, err := netstorage.DeleteSeries(nil, sq, cp.deadline)
if err != nil {
return fmt.Errorf("cannot delete time series: %w", err)
}
if deletedCount > 0 {
promql.ResetRollupResultCache()
}
return nil
}
var deleteDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/admin/tsdb/delete_series"}`)
// LabelValuesHandler processes /api/v1/label/<labelName>/values request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values
func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName string, w http.ResponseWriter, r *http.Request) error {
defer labelValuesDuration.UpdateDuration(startTime)
cp, err := getCommonParamsForLabelsAPI(r, startTime, false)
if err != nil {
return err
}
limit, err := httputils.GetInt(r, "limit")
if err != nil {
return err
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxLabelsAPISeries)
labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline)
if err != nil {
return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err)
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteLabelValuesResponse(bw, labelValues, qt)
if err := bw.Flush(); err != nil {
return fmt.Errorf("canot flush label values to remote client: %w", err)
}
return nil
}
var labelValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/label/{}/values"}`)
const secsPerDay = 3600 * 24
// TSDBStatusHandler processes /api/v1/status/tsdb request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#tsdb-stats
//
// It can accept `match[]` filters in order to narrow down the search.
func TSDBStatusHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer tsdbStatusDuration.UpdateDuration(startTime)
cp, err := getCommonParams(r, startTime, false)
if err != nil {
return err
}
cp.deadline = searchutils.GetDeadlineForStatusRequest(r, startTime)
date := fasttime.UnixDate()
dateStr := r.FormValue("date")
if len(dateStr) > 0 {
if dateStr == "0" {
date = 0
} else {
t, err := time.Parse("2006-01-02", dateStr)
if err != nil {
return fmt.Errorf("cannot parse `date` arg %q: %w", dateStr, err)
}
date = uint64(t.Unix()) / secsPerDay
}
}
focusLabel := r.FormValue("focusLabel")
topN := 10
topNStr := r.FormValue("topN")
if len(topNStr) > 0 {
n, err := strconv.Atoi(topNStr)
if err != nil {
return fmt.Errorf("cannot parse `topN` arg %q: %w", topNStr, err)
}
if n <= 0 {
n = 1
}
if n > 1000 {
n = 1000
}
topN = n
}
start := int64(date*secsPerDay) * 1000
end := int64((date+1)*secsPerDay)*1000 - 1
sq := storage.NewSearchQuery(start, end, cp.filterss, *maxTSDBStatusSeries)
status, err := netstorage.TSDBStatus(qt, sq, focusLabel, topN, cp.deadline)
if err != nil {
return fmt.Errorf("cannot obtain tsdb stats: %w", err)
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteTSDBStatusResponse(bw, status, qt)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot send tsdb status response to remote client: %w", err)
}
return nil
}
var tsdbStatusDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/status/tsdb"}`)
// LabelsHandler processes /api/v1/labels request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names
func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer labelsDuration.UpdateDuration(startTime)
cp, err := getCommonParamsForLabelsAPI(r, startTime, false)
if err != nil {
return err
}
limit, err := httputils.GetInt(r, "limit")
if err != nil {
return err
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxLabelsAPISeries)
labels, err := netstorage.LabelNames(qt, sq, limit, cp.deadline)
if err != nil {
return fmt.Errorf("cannot obtain labels: %w", err)
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteLabelsResponse(bw, labels, qt)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot send labels response to remote client: %w", err)
}
return nil
}
var labelsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/labels"}`)
// SeriesCountHandler processes /api/v1/series/count request.
func SeriesCountHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer seriesCountDuration.UpdateDuration(startTime)
deadline := searchutils.GetDeadlineForStatusRequest(r, startTime)
n, err := netstorage.SeriesCount(nil, deadline)
if err != nil {
return fmt.Errorf("cannot obtain series count: %w", err)
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
WriteSeriesCountResponse(bw, n)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot send series count response to remote client: %w", err)
}
return nil
}
var seriesCountDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/series/count"}`)
// SeriesHandler processes /api/v1/series request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers
func SeriesHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer seriesDuration.UpdateDuration(startTime)
// Do not set start to httputils.minTimeMsecs by default as Prometheus does,
// since this leads to fetching and scanning all the data from the storage,
// which can take a lot of time for big storages.
// It is better setting start as end-defaultStep by default.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/91
cp, err := getCommonParamsForLabelsAPI(r, startTime, true)
if err != nil {
return err
}
limit, err := httputils.GetInt(r, "limit")
if err != nil {
return err
}
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxSeriesLimit)
metricNames, err := netstorage.SearchMetricNames(qt, sq, cp.deadline)
if err != nil {
return fmt.Errorf("cannot fetch time series for %q: %w", sq, err)
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
if limit > 0 && limit < len(metricNames) {
metricNames = metricNames[:limit]
}
qtDone := func() {
qt.Donef("start=%d, end=%d", cp.start, cp.end)
}
WriteSeriesResponse(bw, metricNames, qt, qtDone)
return bw.Flush()
}
var seriesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/series"}`)
// QueryHandler processes /api/v1/query request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
func QueryHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
deadline := searchutils.GetDeadlineForQuery(r, startTime)
mayCache := !httputils.GetBool(r, "nocache")
query := r.FormValue("query")
if len(query) == 0 {
return fmt.Errorf("missing `query` arg")
}
start, err := httputils.GetTime(r, "time", ct)
if err != nil {
return err
}
lookbackDelta, err := getMaxLookback(r)
if err != nil {
return err
}
step, err := httputils.GetDuration(r, "step", lookbackDelta)
if err != nil {
return err
}
if step <= 0 {
step = defaultStep
}
if len(query) > maxQueryLen.IntN() {
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), maxQueryLen.N)
}
etfs, err := searchutils.GetExtraTagFilters(r)
if err != nil {
return err
}
if childQuery, windowExpr, offsetExpr := promql.IsMetricSelectorWithRollup(query); childQuery != "" {
window, err := windowExpr.NonNegativeDuration(step)
if err != nil {
return fmt.Errorf("cannot parse lookbehind window in square brackets at %s: %w", query, err)
}
offset := offsetExpr.Duration(step)
start -= offset
end := start
start = end - window
// Do not include data point with a timestamp matching the lower boundary of the window as Prometheus does.
start++
if end < start {
end = start
}
tagFilterss, err := getTagFilterssFromMatches([]string{childQuery})
if err != nil {
return err
}
filterss := searchutils.JoinTagFilterss(tagFilterss, etfs)
cp := &commonParams{
deadline: deadline,
start: start,
end: end,
filterss: filterss,
}
if err := exportHandler(qt, w, cp, "promapi", 0, false); err != nil {
return fmt.Errorf("error when exporting data for query=%q on the time range (start=%d, end=%d): %w", childQuery, start, end, err)
}
return nil
}
if childQuery, windowExpr, stepExpr, offsetExpr := promql.IsRollup(query); childQuery != "" {
newStep, err := stepExpr.NonNegativeDuration(step)
if err != nil {
return fmt.Errorf("cannot parse step in square brackets at %s: %w", query, err)
}
if newStep > 0 {
step = newStep
}
window, err := windowExpr.NonNegativeDuration(step)
if err != nil {
return fmt.Errorf("cannot parse lookbehind window in square brackets at %s: %w", query, err)
}
offset := offsetExpr.Duration(step)
start -= offset
end := start
start = end - window
if err := queryRangeHandler(qt, startTime, w, childQuery, start, end, step, r, ct, etfs); err != nil {
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", childQuery, start, end, step, err)
}
return nil
}
queryOffset, err := getLatencyOffsetMilliseconds(r)
if err != nil {
return err
}
if !httputils.GetBool(r, "nocache") && ct-start < queryOffset && start-ct < queryOffset {
// Adjust start time only if `nocache` arg isn't set.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/241
startPrev := start
start = ct - queryOffset
queryOffset = startPrev - start
} else {
queryOffset = 0
}
qs := &promql.QueryStats{}
ec := &promql.EvalConfig{
Start: start,
End: start,
Step: step,
MaxPointsPerSeries: *maxPointsPerTimeseries,
MaxSeries: GetMaxUniqueTimeSeries(),
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
Deadline: deadline,
MayCache: mayCache,
LookbackDelta: lookbackDelta,
RoundDigits: getRoundDigits(r),
EnforcedTagFilterss: etfs,
GetRequestURI: func() string {
return httpserver.GetRequestURI(r)
},
QueryStats: qs,
}
result, err := promql.Exec(qt, ec, query, true)
if err != nil {
return fmt.Errorf("error when executing query=%q for (time=%d, step=%d): %w", query, start, step, err)
}
if queryOffset > 0 {
for i := range result {
r := &result[i]
// Do not modify r.Timestamps, since they may be shared among multiple series.
// Make a copy instead.
timestamps := append([]int64{}, r.Timestamps...)
for j := range timestamps {
timestamps[j] += queryOffset
}
r.Timestamps = timestamps
}
}
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
qtDone := func() {
qt.Donef("query=%s, time=%d: series=%d", query, start, len(result))
}
WriteQueryResponse(bw, result, qt, qtDone, qs)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot flush query response to remote client: %w", err)
}
return nil
}
var queryDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query"}`)
// QueryRangeHandler processes /api/v1/query_range request.
//
// See https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
func QueryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, r *http.Request) error {
defer queryRangeDuration.UpdateDuration(startTime)
ct := startTime.UnixNano() / 1e6
query := r.FormValue("query")
if len(query) == 0 {
return fmt.Errorf("missing `query` arg")
}
start, err := httputils.GetTime(r, "start", ct-defaultStep)
if err != nil {
return err
}
end, err := httputils.GetTime(r, "end", ct)
if err != nil {
return err
}
step, err := httputils.GetDuration(r, "step", defaultStep)
if err != nil {
return err
}
etfs, err := searchutils.GetExtraTagFilters(r)
if err != nil {
return err
}
if err := queryRangeHandler(qt, startTime, w, query, start, end, step, r, ct, etfs); err != nil {
return fmt.Errorf("error when executing query=%q on the time range (start=%d, end=%d, step=%d): %w", query, start, end, step, err)
}
return nil
}
func queryRangeHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseWriter, query string,
start, end, step int64, r *http.Request, ct int64, etfs [][]storage.TagFilter) error {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
mayCache := !httputils.GetBool(r, "nocache")
lookbackDelta, err := getMaxLookback(r)
if err != nil {
return err
}
// Validate input args.
if len(query) > maxQueryLen.IntN() {
return fmt.Errorf("too long query; got %d bytes; mustn't exceed `-search.maxQueryLen=%d` bytes", len(query), maxQueryLen.N)
}
if start > end {
end = start + defaultStep
}
if err := promql.ValidateMaxPointsPerSeries(start, end, step, *maxPointsPerTimeseries); err != nil {
return fmt.Errorf("%w; (see -search.maxPointsPerTimeseries command-line flag)", err)
}
if mayCache {
start, end = promql.AdjustStartEnd(start, end, step)
}
qs := &promql.QueryStats{}
ec := &promql.EvalConfig{
Start: start,
End: end,
Step: step,
MaxPointsPerSeries: *maxPointsPerTimeseries,
MaxSeries: GetMaxUniqueTimeSeries(),
QuotedRemoteAddr: httpserver.GetQuotedRemoteAddr(r),
Deadline: deadline,
MayCache: mayCache,
LookbackDelta: lookbackDelta,
RoundDigits: getRoundDigits(r),
EnforcedTagFilterss: etfs,
GetRequestURI: func() string {
return httpserver.GetRequestURI(r)
},
QueryStats: qs,
}
result, err := promql.Exec(qt, ec, query, false)
if err != nil {
return err
}
if step < maxStepForPointsAdjustment.Milliseconds() {
queryOffset, err := getLatencyOffsetMilliseconds(r)
if err != nil {
return err
}
if ct-queryOffset < end {
result = adjustLastPoints(result, ct-queryOffset, ct+step)
}
}
// Remove NaN values as Prometheus does.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/153
result = removeEmptyValuesAndTimeseries(result)
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
qtDone := func() {
qt.Donef("start=%d, end=%d, step=%d, query=%q: series=%d", start, end, step, query, len(result))
}
WriteQueryRangeResponse(bw, result, qt, qtDone, qs)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot send query range response to remote client: %w", err)
}
return nil
}
func removeEmptyValuesAndTimeseries(tss []netstorage.Result) []netstorage.Result {
dst := tss[:0]
for i := range tss {
ts := &tss[i]
hasNaNs := false
for _, v := range ts.Values {
if math.IsNaN(v) {
hasNaNs = true
break
}
}
if !hasNaNs {
// Fast path: nothing to remove.
if len(ts.Values) > 0 {
dst = append(dst, *ts)
}
continue
}
// Slow path: remove NaNs.
srcTimestamps := ts.Timestamps
dstValues := ts.Values[:0]
// Do not re-use ts.Timestamps for dstTimestamps, since ts.Timestamps
// may be shared among multiple time series.
dstTimestamps := make([]int64, 0, len(ts.Timestamps))
for j, v := range ts.Values {
if math.IsNaN(v) {
continue
}
dstValues = append(dstValues, v)
dstTimestamps = append(dstTimestamps, srcTimestamps[j])
}
ts.Values = dstValues
ts.Timestamps = dstTimestamps
if len(ts.Values) > 0 {
dst = append(dst, *ts)
}
}
return dst
}
var queryRangeDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/api/v1/query_range"}`)
var nan = math.NaN()
// adjustLastPoints substitutes the last point values on the time range (start..end]
// with the previous point values, since these points may contain incomplete values.
func adjustLastPoints(tss []netstorage.Result, start, end int64) []netstorage.Result {
for i := range tss {
ts := &tss[i]
values := ts.Values
timestamps := ts.Timestamps
j := len(timestamps) - 1
if j >= 0 && timestamps[j] > end {
// It looks like the `offset` is used in the query, which shifts time range beyond the `end`.
// Leave such a time series as is, since it is unclear which points may be incomplete in it.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/625
continue
}
for j >= 0 && timestamps[j] > start {
j--
}
j++
lastValue := nan
if j > 0 {
lastValue = values[j-1]
}
for j < len(timestamps) && timestamps[j] <= end {
values[j] = lastValue
j++
}
}
return tss
}
func getMaxLookback(r *http.Request) (int64, error) {
d := maxLookback.Milliseconds()
if d == 0 {
d = maxStalenessInterval.Milliseconds()
}
maxLookback, err := httputils.GetDuration(r, "max_lookback", d)
if err != nil {
return 0, err
}
d = maxLookback
if *setLookbackToStep {
step, err := httputils.GetDuration(r, "step", d)
if err != nil {
return 0, err
}
d = step
}
return d, nil
}
func getTagFilterssFromMatches(matches []string) ([][]storage.TagFilter, error) {
tfss := make([][]storage.TagFilter, 0, len(matches))
for _, match := range matches {
tfssLocal, err := searchutils.ParseMetricSelector(match)
if err != nil {
return nil, fmt.Errorf("cannot parse matches[]=%s: %w", match, err)
}
tfss = append(tfss, tfssLocal...)
}
return tfss, nil
}
func getRoundDigits(r *http.Request) int {
s := r.FormValue("round_digits")
if len(s) == 0 {
return 100
}
n, err := strconv.Atoi(s)
if err != nil {
return 100
}
return n
}
func getLatencyOffsetMilliseconds(r *http.Request) (int64, error) {
d := latencyOffset.Milliseconds()
if d < 0 {
// Zero latency offset may be useful for some use cases.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2061#issuecomment-1299109836
d = 0
}
return httputils.GetDuration(r, "latency_offset", d)
}
// QueryStatsHandler returns query stats at `/api/v1/status/top_queries`
func QueryStatsHandler(w http.ResponseWriter, r *http.Request) error {
topN := 20
topNStr := r.FormValue("topN")
if len(topNStr) > 0 {
n, err := strconv.Atoi(topNStr)
if err != nil {
return fmt.Errorf("cannot parse `topN` arg %q: %w", topNStr, err)
}
topN = n
}
maxLifetimeMsecs, err := httputils.GetDuration(r, "maxLifetime", 10*60*1000)
if err != nil {
return fmt.Errorf("cannot parse `maxLifetime` arg: %w", err)
}
maxLifetime := time.Duration(maxLifetimeMsecs) * time.Millisecond
w.Header().Set("Content-Type", "application/json")
bw := bufferedwriter.Get(w)
defer bufferedwriter.Put(bw)
querystats.WriteJSONQueryStats(bw, topN, maxLifetime)
if err := bw.Flush(); err != nil {
return fmt.Errorf("cannot send query stats response to client: %w", err)
}
return nil
}
// commonParams contains common parameters for all /api/v1/* handlers
//
// timeout, start, end, match[], extra_label, extra_filters[]
type commonParams struct {
deadline searchutils.Deadline
start int64
end int64
currentTimestamp int64
filterss [][]storage.TagFilter
}
func (cp *commonParams) IsDefaultTimeRange() bool {
return cp.start == 0 && cp.currentTimestamp-cp.end < 1000
}
// getExportParams obtains common params from r, which are used in /api/v1/export* handlers
//
// - timeout
// - start
// - end
// - match[]
// - extra_label
// - extra_filters[]
func getExportParams(r *http.Request, startTime time.Time) (*commonParams, error) {
cp, err := getCommonParams(r, startTime, true)
if err != nil {
return nil, err
}
cp.deadline = searchutils.GetDeadlineForExport(r, startTime)
return cp, nil
}
func getCommonParamsForLabelsAPI(r *http.Request, startTime time.Time, requireNonEmptyMatch bool) (*commonParams, error) {
cp, err := getCommonParamsInternal(r, startTime, requireNonEmptyMatch, true)
if err != nil {
return nil, err
}
if cp.start == 0 {
cp.start = cp.end - defaultStep
}
cp.deadline = searchutils.GetDeadlineForLabelsAPI(r, startTime)
return cp, nil
}
// getCommonParams obtains common params from r, which are used in /api/v1/* handlers:
//
// - timeout
// - start
// - end
// - match[]
// - extra_label
// - extra_filters[]
func getCommonParams(r *http.Request, startTime time.Time, requireNonEmptyMatch bool) (*commonParams, error) {
return getCommonParamsInternal(r, startTime, requireNonEmptyMatch, false)
}
func getCommonParamsInternal(r *http.Request, startTime time.Time, requireNonEmptyMatch, isLabelsAPI bool) (*commonParams, error) {
deadline := searchutils.GetDeadlineForQuery(r, startTime)
start, err := httputils.GetTime(r, "start", 0)
if err != nil {
return nil, err
}
ct := startTime.UnixNano() / 1e6
end, err := httputils.GetTime(r, "end", ct)
if err != nil {
return nil, err
}
// Limit the `end` arg to the current time +2 days in the same way
// as it is limited during data ingestion.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/blob/ea06d2fd3ccbbb6aa4480ab3b04f7b671408be2a/lib/storage/table.go#L378
// This should fix possible timestamp overflow - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2669
maxTS := startTime.UnixNano()/1e6 + 2*24*3600*1000
if end > maxTS {
end = maxTS
}
if end < start {
end = start
}
matches := append([]string{}, r.Form["match[]"]...)
matches = append(matches, r.Form["match"]...)
if requireNonEmptyMatch && len(matches) == 0 {
return nil, fmt.Errorf("missing `match[]` arg")
}
filterss, err := getTagFilterssFromMatches(matches)
if err != nil {
return nil, err
}
if len(filterss) > 0 || !isLabelsAPI || !*ignoreExtraFiltersAtLabelsAPI {
// If matches isn't empty, then there is no sense in ignoring extra filters
// even if ignoreExtraLabelsAtLabelsAPI is set, since extra filters won't slow down
// the query - they can only improve query performance by reducing the number
// of matching series at the storage level.
etfs, err := searchutils.GetExtraTagFilters(r)
if err != nil {
return nil, err
}
filterss = searchutils.JoinTagFilterss(filterss, etfs)
}
cp := &commonParams{
deadline: deadline,
start: start,
end: end,
currentTimestamp: ct,
filterss: filterss,
}
return cp, nil
}
type scalableWriter struct {
bw *bufferedwriter.Writer
m sync.Map
}
func newScalableWriter(bw *bufferedwriter.Writer) *scalableWriter {
return &scalableWriter{
bw: bw,
}
}
func (sw *scalableWriter) getBuffer(workerID uint) *bytesutil.ByteBuffer {
v, ok := sw.m.Load(workerID)
if !ok {
v = &bytesutil.ByteBuffer{}
sw.m.Store(workerID, v)
}
return v.(*bytesutil.ByteBuffer)
}
func (sw *scalableWriter) maybeFlushBuffer(bb *bytesutil.ByteBuffer) error {
if len(bb.B) < 1024*1024 {
return nil
}
_, err := sw.bw.Write(bb.B)
bb.Reset()
return err
}
func (sw *scalableWriter) flush() error {
sw.m.Range(func(_, v any) bool {
bb := v.(*bytesutil.ByteBuffer)
_, err := sw.bw.Write(bb.B)
return err == nil
})
return sw.bw.Flush()
}
var (
maxUniqueTimeseriesValueOnce sync.Once
maxUniqueTimeseriesValue int
)
// InitMaxUniqueTimeseries init the max metrics limit calculated by available resources.
// The calculation is split into calculateMaxUniqueTimeSeriesForResource for unit testing.
func InitMaxUniqueTimeseries(maxConcurrentRequests int) {
maxUniqueTimeseriesValueOnce.Do(func() {
maxUniqueTimeseriesValue = *maxUniqueTimeseries
if maxUniqueTimeseriesValue <= 0 {
maxUniqueTimeseriesValue = calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, memory.Remaining())
}
})
}
// calculateMaxUniqueTimeSeriesForResource calculate the max metrics limit calculated by available resources.
func calculateMaxUniqueTimeSeriesForResource(maxConcurrentRequests, remainingMemory int) int {
if maxConcurrentRequests <= 0 {
// This line should NOT be reached unless the user has set an incorrect `search.maxConcurrentRequests`.
// In such cases, fallback to unlimited.
logger.Warnf("limiting -search.maxUniqueTimeseries to %v because -search.maxConcurrentRequests=%d.", 2e9, maxConcurrentRequests)
return 2e9
}
// Calculate the max metrics limit for a single request in the worst-case concurrent scenario.
// The approximate size of 1 unique series that could occupy in the vmstorage is 200 bytes.
mts := remainingMemory / 200 / maxConcurrentRequests
logger.Infof("limiting -search.maxUniqueTimeseries to %d according to -search.maxConcurrentRequests=%d and remaining memory=%d bytes. To increase the limit, reduce -search.maxConcurrentRequests or increase memory available to the process.", mts, maxConcurrentRequests, remainingMemory)
return mts
}
// GetMaxUniqueTimeSeries returns the max metrics limit calculated by available resources.
func GetMaxUniqueTimeSeries() int {
return maxUniqueTimeseriesValue
}