lib/storage: do not check the limit for -search.maxUniqueTimeseries when performing /api/v1/labels and /api/v1/label/.../values requests

This limit has little sense for these APIs, since: - Thses APIs frequently result in scanning of all the time series on the given time range. For example, if extra_filters={datacenter="some_dc"} . - Users expect these APIs shouldn't hit the -search.maxUniqueTimeseries limit, which is intended for limiting resource usage at /api/v1/query and /api/v1/query_range requests. Also limit the concurrency for /api/v1/labels, /api/v1/label/.../values and /api/v1/series requests in order to limit the maximum memory usage and CPU usage for these API. This limit shouldn't affect typical use cases for these APIs: - Grafana dashboard load when dashboard labels should be loaded - Auto-suggestion list load when editing the query in Grafana or vmui Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055
2024-11-23 12:31:07 +01:00 · 2024-01-29 15:50:15 +01:00 · 2024-01-29 15:50:15 +01:00 · 5d66ee88bd
commit 5d66ee88bd
parent b9b18b5fd8
3 changed files with 32 additions and 2 deletions
--- a/app/vmselect/prometheus/prometheus.go
+++ b/app/vmselect/prometheus/prometheus.go
@ -499,7 +499,10 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
 	if err != nil {
 		return err
 	}
-	sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries)
+	// Do not limit the number of unique time series, which could be scanned
 	// during the search for matching label values, since users expect this API
 	// must always work.
 	sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1)
 	labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err)
@ -596,7 +599,10 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
 	if err != nil {
 		return err
 	}
-	sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries)
+	// Do not limit the number of unique time series, which could be scanned
 	// during the search for matching label values, since users expect this API
 	// must always work.
 	sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1)
 	labels, err := netstorage.LabelNames(qt, sq, limit, cp.deadline)
 	if err != nil {
 		return fmt.Errorf("cannot obtain labels: %w", err)
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -65,6 +65,7 @@ The sandbox cluster installation is running under the constant load generated by
 * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): include UTC in the timezone selection dropdown for standardized time referencing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5375).
 * FEATURE: add [VictoriaMetrics datasource](https://github.com/VictoriaMetrics/grafana-datasource) to docker compose environment. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5363).
 * BUGFIX: properly return the list of matching label names and label values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the database contains more than `-search.maxUniqueTimeseries` unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) on the selected time range. Previously VictoriaMetrics could return `the number of matching timeseries exceeds ...`  error in this case. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055).
 * BUGFIX: properly return errors from [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). Previously these errors were silently suppressed. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5649).
 * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case.
 * BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped.
--- a/lib/storage/storage.go
+++ b/lib/storage/storage.go
@ -1110,8 +1110,14 @@ func nextRetentionDeadlineSeconds(atSecs, retentionSecs, offsetSecs int64) int64
 //
 // The marshaled metric names must be unmarshaled via MetricName.UnmarshalString().
 func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]string, error) {
 	labelAPIConcurrencyCh <- struct{}{}
 	defer func() {
 		<-labelAPIConcurrencyCh
 	}()
 	qt = qt.NewChild("search for matching metric names: filters=%s, timeRange=%s", tfss, &tr)
 	defer qt.Done()
 	metricIDs, err := s.idb().searchMetricIDs(qt, tfss, tr, maxMetrics, deadline)
 	if err != nil {
 		return nil, err
@ -1258,6 +1264,10 @@ func (s *Storage) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters) (int,
 // SearchLabelNamesWithFiltersOnTimeRange searches for label names matching the given tfss on tr.
 func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxLabelNames, maxMetrics int, deadline uint64,
 ) ([]string, error) {
 	labelAPIConcurrencyCh <- struct{}{}
 	defer func() {
 		<-labelAPIConcurrencyCh
 	}()
 	return s.idb().SearchLabelNamesWithFiltersOnTimeRange(qt, tfss, tr, maxLabelNames, maxMetrics, deadline)
 }
@ -1265,9 +1275,22 @@ func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer,
 func (s *Storage) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters,
 	tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64,
 ) ([]string, error) {
 	labelAPIConcurrencyCh <- struct{}{}
 	defer func() {
 		<-labelAPIConcurrencyCh
 	}()
 	return s.idb().SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
 }
 // This channel limits the concurrency of apis, which return label names and label values.
 //
 // For example, /api/v1/labels or /api/v1/label/<labelName>/values
 //
 // These APIs are used infrequently (e.g. on Grafana dashboard load or when editing a query),
 // so it is better limiting their concurrency in order to reduce the maximum memory usage and CPU usage
 // when the database contains big number of time series.
 var labelAPIConcurrencyCh = make(chan struct{}, 1)
 // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr.
 //
 // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.