From 5d66ee88bdb2bbab6419a2854a130c8838998923 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 29 Jan 2024 15:50:15 +0100 Subject: [PATCH] lib/storage: do not check the limit for -search.maxUniqueTimeseries when performing /api/v1/labels and /api/v1/label/.../values requests This limit has little sense for these APIs, since: - Thses APIs frequently result in scanning of all the time series on the given time range. For example, if extra_filters={datacenter="some_dc"} . - Users expect these APIs shouldn't hit the -search.maxUniqueTimeseries limit, which is intended for limiting resource usage at /api/v1/query and /api/v1/query_range requests. Also limit the concurrency for /api/v1/labels, /api/v1/label/.../values and /api/v1/series requests in order to limit the maximum memory usage and CPU usage for these API. This limit shouldn't affect typical use cases for these APIs: - Grafana dashboard load when dashboard labels should be loaded - Auto-suggestion list load when editing the query in Grafana or vmui Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055 --- app/vmselect/prometheus/prometheus.go | 10 ++++++++-- docs/CHANGELOG.md | 1 + lib/storage/storage.go | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 27fcf7096..1bed30bfa 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -499,7 +499,10 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s if err != nil { return err } - sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) + // Do not limit the number of unique time series, which could be scanned + // during the search for matching label values, since users expect this API + // must always work. + sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1) labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err) @@ -596,7 +599,10 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW if err != nil { return err } - sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) + // Do not limit the number of unique time series, which could be scanned + // during the search for matching label values, since users expect this API + // must always work. + sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1) labels, err := netstorage.LabelNames(qt, sq, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain labels: %w", err) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 05f484cb1..a5e224752 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -65,6 +65,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): include UTC in the timezone selection dropdown for standardized time referencing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5375). * FEATURE: add [VictoriaMetrics datasource](https://github.com/VictoriaMetrics/grafana-datasource) to docker compose environment. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5363). +* BUGFIX: properly return the list of matching label names and label values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the database contains more than `-search.maxUniqueTimeseries` unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) on the selected time range. Previously VictoriaMetrics could return `the number of matching timeseries exceeds ...` error in this case. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055). * BUGFIX: properly return errors from [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). Previously these errors were silently suppressed. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5649). * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case. * BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped. diff --git a/lib/storage/storage.go b/lib/storage/storage.go index c6e59e165..59156badc 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1110,8 +1110,14 @@ func nextRetentionDeadlineSeconds(atSecs, retentionSecs, offsetSecs int64) int64 // // The marshaled metric names must be unmarshaled via MetricName.UnmarshalString(). func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() + qt = qt.NewChild("search for matching metric names: filters=%s, timeRange=%s", tfss, &tr) defer qt.Done() + metricIDs, err := s.idb().searchMetricIDs(qt, tfss, tr, maxMetrics, deadline) if err != nil { return nil, err @@ -1258,6 +1264,10 @@ func (s *Storage) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters) (int, // SearchLabelNamesWithFiltersOnTimeRange searches for label names matching the given tfss on tr. func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxLabelNames, maxMetrics int, deadline uint64, ) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() return s.idb().SearchLabelNamesWithFiltersOnTimeRange(qt, tfss, tr, maxLabelNames, maxMetrics, deadline) } @@ -1265,9 +1275,22 @@ func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, func (s *Storage) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters, tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64, ) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() return s.idb().SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline) } +// This channel limits the concurrency of apis, which return label names and label values. +// +// For example, /api/v1/labels or /api/v1/label//values +// +// These APIs are used infrequently (e.g. on Grafana dashboard load or when editing a query), +// so it is better limiting their concurrency in order to reduce the maximum memory usage and CPU usage +// when the database contains big number of time series. +var labelAPIConcurrencyCh = make(chan struct{}, 1) + // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.