From 89b778902b3a02a112c85272c49efd561cb411dd Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 10 Jun 2022 09:50:30 +0300 Subject: [PATCH] app/vmselect: add optional `limit` query arg to `/api/v1/labels` and `/api/v1/label_values` endpoints This arg allows limiting the number of sample values returned from these APIs --- README.md | 2 ++ app/vmselect/graphite/metrics_api.go | 2 +- app/vmselect/graphite/tags_api.go | 23 ++++-------------- app/vmselect/netstorage/netstorage.go | 32 +++++++++++++++++-------- app/vmselect/prometheus/prometheus.go | 30 ++++++++++++++++------- app/vmselect/searchutils/searchutils.go | 13 ++++++++++ docs/CHANGELOG.md | 1 + docs/README.md | 2 ++ docs/Single-server-VictoriaMetrics.md | 2 ++ 9 files changed, 70 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 084367ee3..8dba14810 100644 --- a/README.md +++ b/README.md @@ -607,6 +607,8 @@ For example, the following query would return data for the last 30 minutes: `/ap VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point. +VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label//values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used. + By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range. Additionally, VictoriaMetrics provides the following handlers: diff --git a/app/vmselect/graphite/metrics_api.go b/app/vmselect/graphite/metrics_api.go index 1c328be21..c84df8c03 100644 --- a/app/vmselect/graphite/metrics_api.go +++ b/app/vmselect/graphite/metrics_api.go @@ -197,7 +197,7 @@ func MetricsExpandHandler(startTime time.Time, w http.ResponseWriter, r *http.Re func MetricsIndexHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) jsonp := r.FormValue("jsonp") - metricNames, err := netstorage.GetLabelValues(nil, "__name__", deadline) + metricNames, err := netstorage.GetLabelValues(nil, "__name__", 0, deadline) if err != nil { return fmt.Errorf(`cannot obtain metric names: %w`, err) } diff --git a/app/vmselect/graphite/tags_api.go b/app/vmselect/graphite/tags_api.go index f39e19b1a..9cb78d8c9 100644 --- a/app/vmselect/graphite/tags_api.go +++ b/app/vmselect/graphite/tags_api.go @@ -5,7 +5,6 @@ import ( "net/http" "regexp" "sort" - "strconv" "strings" "time" @@ -159,7 +158,7 @@ var ( // See https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support func TagsAutoCompleteValuesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) - limit, err := getInt(r, "limit") + limit, err := searchutils.GetInt(r, "limit") if err != nil { return err } @@ -245,7 +244,7 @@ var tagsAutoCompleteValuesDuration = metrics.NewSummary(`vm_request_duration_sec // See https://graphite.readthedocs.io/en/stable/tags.html#auto-complete-support func TagsAutoCompleteTagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) - limit, err := getInt(r, "limit") + limit, err := searchutils.GetInt(r, "limit") if err != nil { return err } @@ -324,7 +323,7 @@ var tagsAutoCompleteTagsDuration = metrics.NewSummary(`vm_request_duration_secon // See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags func TagsFindSeriesHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) - limit, err := getInt(r, "limit") + limit, err := searchutils.GetInt(r, "limit") if err != nil { return err } @@ -392,7 +391,7 @@ var tagsFindSeriesDuration = metrics.NewSummary(`vm_request_duration_seconds{pat // See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags func TagValuesHandler(startTime time.Time, tagName string, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) - limit, err := getInt(r, "limit") + limit, err := searchutils.GetInt(r, "limit") if err != nil { return err } @@ -420,7 +419,7 @@ var tagValuesDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/t // See https://graphite.readthedocs.io/en/stable/tags.html#exploring-tags func TagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) error { deadline := searchutils.GetDeadlineForQuery(r, startTime) - limit, err := getInt(r, "limit") + limit, err := searchutils.GetInt(r, "limit") if err != nil { return err } @@ -443,18 +442,6 @@ func TagsHandler(startTime time.Time, w http.ResponseWriter, r *http.Request) er var tagsDuration = metrics.NewSummary(`vm_request_duration_seconds{path="/tags"}`) -func getInt(r *http.Request, argName string) (int, error) { - argValue := r.FormValue(argName) - if len(argValue) == 0 { - return 0, nil - } - n, err := strconv.Atoi(argValue) - if err != nil { - return 0, fmt.Errorf("cannot parse %q=%q: %w", argName, argValue, err) - } - return n, nil -} - func getSearchQueryForExprs(startTime time.Time, etfs [][]storage.TagFilter, exprs []string, maxMetrics int) (*storage.SearchQuery, error) { tfs, err := exprsToTagFilters(exprs) if err != nil { diff --git a/app/vmselect/netstorage/netstorage.go b/app/vmselect/netstorage/netstorage.go index 4f5e6054d..99b076cf4 100644 --- a/app/vmselect/netstorage/netstorage.go +++ b/app/vmselect/netstorage/netstorage.go @@ -612,13 +612,16 @@ func DeleteSeries(qt *querytracer.Tracer, sq *storage.SearchQuery, deadline sear } // GetLabelsOnTimeRange returns labels for the given tr until the given deadline. -func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) { +func GetLabelsOnTimeRange(qt *querytracer.Tracer, tr storage.TimeRange, limit int, deadline searchutils.Deadline) ([]string, error) { qt = qt.NewChild("get labels on timeRange=%s", &tr) defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } - labels, err := vmstorage.SearchTagKeysOnTimeRange(tr, *maxTagKeysPerSearch, deadline.Deadline()) + if limit > *maxTagKeysPerSearch || limit <= 0 { + limit = *maxTagKeysPerSearch + } + labels, err := vmstorage.SearchTagKeysOnTimeRange(tr, limit, deadline.Deadline()) qt.Printf("get %d labels", len(labels)) if err != nil { return nil, fmt.Errorf("error during labels search on time range: %w", err) @@ -642,7 +645,7 @@ func GetGraphiteTags(qt *querytracer.Tracer, filter string, limit int, deadline if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } - labels, err := GetLabels(nil, deadline) + labels, err := GetLabels(nil, 0, deadline) if err != nil { return nil, err } @@ -683,13 +686,16 @@ func hasString(a []string, s string) bool { } // GetLabels returns labels until the given deadline. -func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string, error) { +func GetLabels(qt *querytracer.Tracer, limit int, deadline searchutils.Deadline) ([]string, error) { qt = qt.NewChild("get labels") defer qt.Done() if deadline.Exceeded() { return nil, fmt.Errorf("timeout exceeded before starting the query processing: %s", deadline.String()) } - labels, err := vmstorage.SearchTagKeys(*maxTagKeysPerSearch, deadline.Deadline()) + if limit > *maxTagKeysPerSearch || limit <= 0 { + limit = *maxTagKeysPerSearch + } + labels, err := vmstorage.SearchTagKeys(limit, deadline.Deadline()) qt.Printf("get %d labels from global index", len(labels)) if err != nil { return nil, fmt.Errorf("error during labels search: %w", err) @@ -708,7 +714,7 @@ func GetLabels(qt *querytracer.Tracer, deadline searchutils.Deadline) ([]string, // GetLabelValuesOnTimeRange returns label values for the given labelName on the given tr // until the given deadline. -func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr storage.TimeRange, deadline searchutils.Deadline) ([]string, error) { +func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr storage.TimeRange, limit int, deadline searchutils.Deadline) ([]string, error) { qt = qt.NewChild("get values for label %s on a timeRange %s", labelName, &tr) defer qt.Done() if deadline.Exceeded() { @@ -718,7 +724,10 @@ func GetLabelValuesOnTimeRange(qt *querytracer.Tracer, labelName string, tr stor labelName = "" } // Search for tag values - labelValues, err := vmstorage.SearchTagValuesOnTimeRange([]byte(labelName), tr, *maxTagValuesPerSearch, deadline.Deadline()) + if limit > *maxTagValuesPerSearch || limit <= 0 { + limit = *maxTagValuesPerSearch + } + labelValues, err := vmstorage.SearchTagValuesOnTimeRange([]byte(labelName), tr, limit, deadline.Deadline()) qt.Printf("get %d label values", len(labelValues)) if err != nil { return nil, fmt.Errorf("error during label values search on time range for labelName=%q: %w", labelName, err) @@ -739,7 +748,7 @@ func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit if tagName == "name" { tagName = "" } - tagValues, err := GetLabelValues(nil, tagName, deadline) + tagValues, err := GetLabelValues(nil, tagName, 0, deadline) if err != nil { return nil, err } @@ -757,7 +766,7 @@ func GetGraphiteTagValues(qt *querytracer.Tracer, tagName, filter string, limit // GetLabelValues returns label values for the given labelName // until the given deadline. -func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchutils.Deadline) ([]string, error) { +func GetLabelValues(qt *querytracer.Tracer, labelName string, limit int, deadline searchutils.Deadline) ([]string, error) { qt = qt.NewChild("get values for label %s", labelName) defer qt.Done() if deadline.Exceeded() { @@ -767,7 +776,10 @@ func GetLabelValues(qt *querytracer.Tracer, labelName string, deadline searchuti labelName = "" } // Search for tag values - labelValues, err := vmstorage.SearchTagValues([]byte(labelName), *maxTagValuesPerSearch, deadline.Deadline()) + if limit > *maxTagValuesPerSearch || limit <= 0 { + limit = *maxTagValuesPerSearch + } + labelValues, err := vmstorage.SearchTagValues([]byte(labelName), limit, deadline.Deadline()) qt.Printf("get %d label values", len(labelValues)) if err != nil { return nil, fmt.Errorf("error during label values search for labelName=%q: %w", labelName, err) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 9d853a020..3c7dd973e 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -450,10 +450,14 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s if err != nil { return err } + limit, err := searchutils.GetInt(r, "limit") + if err != nil { + return err + } var labelValues []string if len(cp.filterss) == 0 { if cp.IsDefaultTimeRange() { - labelValues, err = netstorage.GetLabelValues(qt, labelName, cp.deadline) + labelValues, err = netstorage.GetLabelValues(qt, labelName, limit, cp.deadline) if err != nil { return fmt.Errorf(`cannot obtain label values for %q: %w`, labelName, err) } @@ -465,7 +469,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s MinTimestamp: cp.start, MaxTimestamp: cp.end, } - labelValues, err = netstorage.GetLabelValuesOnTimeRange(qt, labelName, tr, cp.deadline) + labelValues, err = netstorage.GetLabelValuesOnTimeRange(qt, labelName, tr, limit, cp.deadline) if err != nil { return fmt.Errorf(`cannot obtain label values on time range for %q: %w`, labelName, err) } @@ -478,7 +482,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s if cp.start == 0 { cp.start = cp.end - defaultStep } - labelValues, err = labelValuesWithMatches(qt, labelName, cp) + labelValues, err = labelValuesWithMatches(qt, labelName, cp, limit) if err != nil { return fmt.Errorf("cannot obtain label values for %q on time range [%d...%d]: %w", labelName, cp.start, cp.end, err) } @@ -494,7 +498,7 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s return nil } -func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *commonParams) ([]string, error) { +func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *commonParams, limit int) ([]string, error) { // Add `labelName!=''` tag filter in order to filter out series without the labelName. // There is no need in adding `__name__!=''` filter, since all the time series should // already have non-empty name. @@ -546,6 +550,9 @@ func labelValuesWithMatches(qt *querytracer.Tracer, labelName string, cp *common for labelValue := range m { labelValues = append(labelValues, labelValue) } + if limit > 0 && len(labelValues) > limit { + labelValues = labelValues[:limit] + } sort.Strings(labelValues) qt.Printf("sort %d label values", len(labelValues)) return labelValues, nil @@ -659,10 +666,14 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW if err != nil { return err } + limit, err := searchutils.GetInt(r, "limit") + if err != nil { + return err + } var labels []string if len(cp.filterss) == 0 { if cp.IsDefaultTimeRange() { - labels, err = netstorage.GetLabels(qt, cp.deadline) + labels, err = netstorage.GetLabels(qt, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain labels: %w", err) } @@ -674,7 +685,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW MinTimestamp: cp.start, MaxTimestamp: cp.end, } - labels, err = netstorage.GetLabelsOnTimeRange(qt, tr, cp.deadline) + labels, err = netstorage.GetLabelsOnTimeRange(qt, tr, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain labels on time range: %w", err) } @@ -685,7 +696,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW if cp.start == 0 { cp.start = cp.end - defaultStep } - labels, err = labelsWithMatches(qt, cp) + labels, err = labelsWithMatches(qt, cp, limit) if err != nil { return fmt.Errorf("cannot obtain labels for timeRange=[%d..%d]: %w", cp.start, cp.end, err) } @@ -701,7 +712,7 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW return nil } -func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams) ([]string, error) { +func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams, limit int) ([]string, error) { sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxSeriesLimit) m := make(map[string]struct{}) if cp.end-cp.start > 24*3600*1000 { @@ -741,6 +752,9 @@ func labelsWithMatches(qt *querytracer.Tracer, cp *commonParams) ([]string, erro for label := range m { labels = append(labels, label) } + if limit > 0 && limit < len(labels) { + labels = labels[:limit] + } sort.Strings(labels) qt.Printf("sort %d labels", len(labels)) return labels, nil diff --git a/app/vmselect/searchutils/searchutils.go b/app/vmselect/searchutils/searchutils.go index c8ea77ae4..1ea6cb1c0 100644 --- a/app/vmselect/searchutils/searchutils.go +++ b/app/vmselect/searchutils/searchutils.go @@ -25,6 +25,19 @@ func roundToSeconds(ms int64) int64 { return ms - ms%1000 } +// GetInt returns integer value from the given argKey. +func GetInt(r *http.Request, argKey string) (int, error) { + argValue := r.FormValue(argKey) + if len(argValue) == 0 { + return 0, nil + } + n, err := strconv.Atoi(argValue) + if err != nil { + return 0, fmt.Errorf("cannot parse integer %q=%q: %w", argKey, argValue, err) + } + return n, nil +} + // GetTime returns time from the given argKey query arg. // // If argKey is missing in r, then defaultMs rounded to seconds is returned. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f8c61fef6..18ec49947 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -23,6 +23,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: add support of `lowercase` and `uppercase` relabeling actions in the same way as [Prometheus 2.36.0 does](https://github.com/prometheus/prometheus/releases/tag/v2.36.0). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2664). * FEATURE: add ability to change the `indexdb` rotation timezone offset via `-retentionTimezoneOffset` command-line flag. Previously it was performed at 4am UTC time. This could lead to performance degradation in the middle of the day when VictoriaMetrics runs in time zones located too far from UTC. Thanks to @cnych for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2574). * FEATURE: limit the number of background merge threads on systems with big number of CPU cores by default. This increases the max size of parts, which can be created during background merge when `-storageDataPath` directory has limited free disk space. This may improve on-disk data compression efficiency and query performance. The limits can be tuned if needed with `-smallMergeConcurrency` and `-bigMergeConcurrency` command-line flags. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2673). +* FEATURE: accept optional `limit` query arg at [/api/v1/labels](https://prometheus.io/docs/prometheus/latest/querying/api/#getting-label-names) and [/api/v1/label_values](https://prometheus.io/docs/prometheus/latest/querying/api/#querying-label-values) for limiting the numbef of sample entries returned from these endpoints. See [these docs](https://docs.victoriametrics.com/#prometheus-querying-api-enhancements). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): support `limit` param per-group for limiting number of produced samples per each rule. Thanks to @Howie59 for [implementation](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2676). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): remove dependency on Internet access at [web API pages](https://docs.victoriametrics.com/vmalert.html#web). Previously the functionality and the layout of these pages was broken without Internet access. See [shis issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2594). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): implement the `http://vmagent:8429/service-discovery` page in the same way as Prometheus does. This page shows the original labels for all the discovered targets alongside the resulting labels after the relabeling. This simplifies service discovery debugging. diff --git a/docs/README.md b/docs/README.md index 084367ee3..8dba14810 100644 --- a/docs/README.md +++ b/docs/README.md @@ -607,6 +607,8 @@ For example, the following query would return data for the last 30 minutes: `/ap VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point. +VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label//values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used. + By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range. Additionally, VictoriaMetrics provides the following handlers: diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index c6b8e65c3..223f8d492 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -611,6 +611,8 @@ For example, the following query would return data for the last 30 minutes: `/ap VictoriaMetrics accepts `round_digits` query arg for `/api/v1/query` and `/api/v1/query_range` handlers. It can be used for rounding response values to the given number of digits after the decimal point. For example, `/api/v1/query?query=avg_over_time(temperature[1h])&round_digits=2` would round response values to up to two digits after the decimal point. +VictoriaMetrics accepts `limit` query arg for `/api/v1/labels` and `/api/v1/label//values` handlers for limiting the number of returned entries. For example, the query to `/api/v1/labels?limit=5` returns a sample of up to 5 unique labels, while ignoring the rest of labels. If the provided `limit` value exceeds the corresponding `-search.maxTagKeys` / `-search.maxTagValues` command-line flag values, then limits specified in the command-line flags are used. + By default, VictoriaMetrics returns time series for the last 5 minutes from `/api/v1/series`, while the Prometheus API defaults to all time. Use `start` and `end` to select a different time range. Additionally, VictoriaMetrics provides the following handlers: