diff --git a/app/vmselect/prometheus/prometheus.go b/app/vmselect/prometheus/prometheus.go index 5583bba04a..e19f442504 100644 --- a/app/vmselect/prometheus/prometheus.go +++ b/app/vmselect/prometheus/prometheus.go @@ -589,7 +589,10 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, at *auth.To return err } denyPartialResponse := httputils.GetDenyPartialResponse(r) - sq := storage.NewSearchQuery(at.AccountID, at.ProjectID, cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) + // Do not limit the number of unique time series, which could be scanned + // during the search for matching label values, since users expect this API + // must always work. + sq := storage.NewSearchQuery(at.AccountID, at.ProjectID, cp.start, cp.end, cp.filterss, -1) labelValues, isPartial, err := netstorage.LabelValues(qt, denyPartialResponse, labelName, sq, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err) @@ -688,7 +691,10 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, at *auth.Token, return err } denyPartialResponse := httputils.GetDenyPartialResponse(r) - sq := storage.NewSearchQuery(at.AccountID, at.ProjectID, cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) + // Do not limit the number of unique time series, which could be scanned + // during the search for matching label values, since users expect this API + // must always work. + sq := storage.NewSearchQuery(at.AccountID, at.ProjectID, cp.start, cp.end, cp.filterss, -1) labels, isPartial, err := netstorage.LabelNames(qt, denyPartialResponse, sq, limit, cp.deadline) if err != nil { return fmt.Errorf("cannot obtain labels: %w", err) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 05f484cb19..a5e224752c 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -65,6 +65,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): include UTC in the timezone selection dropdown for standardized time referencing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5375). * FEATURE: add [VictoriaMetrics datasource](https://github.com/VictoriaMetrics/grafana-datasource) to docker compose environment. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5363). +* BUGFIX: properly return the list of matching label names and label values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the database contains more than `-search.maxUniqueTimeseries` unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) on the selected time range. Previously VictoriaMetrics could return `the number of matching timeseries exceeds ...` error in this case. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055). * BUGFIX: properly return errors from [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). Previously these errors were silently suppressed. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5649). * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case. * BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped. diff --git a/lib/storage/storage.go b/lib/storage/storage.go index dc2790d47c..4626b852a1 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1173,8 +1173,14 @@ func nextRetentionDeadlineSeconds(atSecs, retentionSecs, offsetSecs int64) int64 // // The marshaled metric names must be unmarshaled via MetricName.UnmarshalString(). func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() + qt = qt.NewChild("search for matching metric names: filters=%s, timeRange=%s", tfss, &tr) defer qt.Done() + metricIDs, err := s.idb().searchMetricIDs(qt, tfss, tr, maxMetrics, deadline) if err != nil { return nil, err @@ -1326,6 +1332,10 @@ func (s *Storage) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters) (int, func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, accountID, projectID uint32, tfss []*TagFilters, tr TimeRange, maxLabelNames, maxMetrics int, deadline uint64, ) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() return s.idb().SearchLabelNamesWithFiltersOnTimeRange(qt, accountID, projectID, tfss, tr, maxLabelNames, maxMetrics, deadline) } @@ -1333,9 +1343,22 @@ func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, func (s *Storage) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, accountID, projectID uint32, labelName string, tfss []*TagFilters, tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64, ) ([]string, error) { + labelAPIConcurrencyCh <- struct{}{} + defer func() { + <-labelAPIConcurrencyCh + }() return s.idb().SearchLabelValuesWithFiltersOnTimeRange(qt, accountID, projectID, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline) } +// This channel limits the concurrency of apis, which return label names and label values. +// +// For example, /api/v1/labels or /api/v1/label//values +// +// These APIs are used infrequently (e.g. on Grafana dashboard load or when editing a query), +// so it is better limiting their concurrency in order to reduce the maximum memory usage and CPU usage +// when the database contains big number of time series. +var labelAPIConcurrencyCh = make(chan struct{}, 1) + // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.