lib/storage: do not check the limit for -search.maxUniqueTimeseries when performing /api/v1/labels and /api/v1/label/.../values requests

This limit has little sense for these APIs, since:

- Thses APIs frequently result in scanning of all the time series on the given time range.
  For example, if extra_filters={datacenter="some_dc"} .

- Users expect these APIs shouldn't hit the -search.maxUniqueTimeseries limit,
  which is intended for limiting resource usage at /api/v1/query and /api/v1/query_range requests.

Also limit the concurrency for /api/v1/labels, /api/v1/label/.../values
and /api/v1/series requests in order to limit the maximum memory usage and CPU usage for these API.
This limit shouldn't affect typical use cases for these APIs:

- Grafana dashboard load when dashboard labels should be loaded
- Auto-suggestion list load when editing the query in Grafana or vmui

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055
This commit is contained in:
Aliaksandr Valialkin 2024-01-29 15:50:15 +01:00
parent b9b18b5fd8
commit 5d66ee88bd
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
3 changed files with 32 additions and 2 deletions

View File

@ -499,7 +499,10 @@ func LabelValuesHandler(qt *querytracer.Tracer, startTime time.Time, labelName s
if err != nil { if err != nil {
return err return err
} }
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) // Do not limit the number of unique time series, which could be scanned
// during the search for matching label values, since users expect this API
// must always work.
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1)
labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline) labelValues, err := netstorage.LabelValues(qt, labelName, sq, limit, cp.deadline)
if err != nil { if err != nil {
return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err) return fmt.Errorf("cannot obtain values for label %q: %w", labelName, err)
@ -596,7 +599,10 @@ func LabelsHandler(qt *querytracer.Tracer, startTime time.Time, w http.ResponseW
if err != nil { if err != nil {
return err return err
} }
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, *maxUniqueTimeseries) // Do not limit the number of unique time series, which could be scanned
// during the search for matching label values, since users expect this API
// must always work.
sq := storage.NewSearchQuery(cp.start, cp.end, cp.filterss, -1)
labels, err := netstorage.LabelNames(qt, sq, limit, cp.deadline) labels, err := netstorage.LabelNames(qt, sq, limit, cp.deadline)
if err != nil { if err != nil {
return fmt.Errorf("cannot obtain labels: %w", err) return fmt.Errorf("cannot obtain labels: %w", err)

View File

@ -65,6 +65,7 @@ The sandbox cluster installation is running under the constant load generated by
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): include UTC in the timezone selection dropdown for standardized time referencing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5375). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): include UTC in the timezone selection dropdown for standardized time referencing. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5375).
* FEATURE: add [VictoriaMetrics datasource](https://github.com/VictoriaMetrics/grafana-datasource) to docker compose environment. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5363). * FEATURE: add [VictoriaMetrics datasource](https://github.com/VictoriaMetrics/grafana-datasource) to docker compose environment. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5363).
* BUGFIX: properly return the list of matching label names and label values from [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the database contains more than `-search.maxUniqueTimeseries` unique [time series](https://docs.victoriametrics.com/keyConcepts.html#time-series) on the selected time range. Previously VictoriaMetrics could return `the number of matching timeseries exceeds ...` error in this case. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055).
* BUGFIX: properly return errors from [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). Previously these errors were silently suppressed. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5649). * BUGFIX: properly return errors from [export APIs](https://docs.victoriametrics.com/#how-to-export-time-series). Previously these errors were silently suppressed. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5649).
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case. * BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): properly return full results when `-search.skipSlowReplicas` command-line flag is passed to `vmselect` and when [vmstorage groups](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#vmstorage-groups-at-vmselect) are in use. Previously partial results could be returned in this case.
* BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped. * BUGFIX: `vminsert`: properly accept samples via [OpenTelemetry data ingestion protocol](https://docs.victoriametrics.com/#sending-data-via-opentelemetry) when these samples have no [resource attributes](https://opentelemetry.io/docs/instrumentation/go/resources/). Previously such samples were silently skipped.

View File

@ -1110,8 +1110,14 @@ func nextRetentionDeadlineSeconds(atSecs, retentionSecs, offsetSecs int64) int64
// //
// The marshaled metric names must be unmarshaled via MetricName.UnmarshalString(). // The marshaled metric names must be unmarshaled via MetricName.UnmarshalString().
func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]string, error) { func (s *Storage) SearchMetricNames(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxMetrics int, deadline uint64) ([]string, error) {
labelAPIConcurrencyCh <- struct{}{}
defer func() {
<-labelAPIConcurrencyCh
}()
qt = qt.NewChild("search for matching metric names: filters=%s, timeRange=%s", tfss, &tr) qt = qt.NewChild("search for matching metric names: filters=%s, timeRange=%s", tfss, &tr)
defer qt.Done() defer qt.Done()
metricIDs, err := s.idb().searchMetricIDs(qt, tfss, tr, maxMetrics, deadline) metricIDs, err := s.idb().searchMetricIDs(qt, tfss, tr, maxMetrics, deadline)
if err != nil { if err != nil {
return nil, err return nil, err
@ -1258,6 +1264,10 @@ func (s *Storage) DeleteSeries(qt *querytracer.Tracer, tfss []*TagFilters) (int,
// SearchLabelNamesWithFiltersOnTimeRange searches for label names matching the given tfss on tr. // SearchLabelNamesWithFiltersOnTimeRange searches for label names matching the given tfss on tr.
func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxLabelNames, maxMetrics int, deadline uint64, func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer, tfss []*TagFilters, tr TimeRange, maxLabelNames, maxMetrics int, deadline uint64,
) ([]string, error) { ) ([]string, error) {
labelAPIConcurrencyCh <- struct{}{}
defer func() {
<-labelAPIConcurrencyCh
}()
return s.idb().SearchLabelNamesWithFiltersOnTimeRange(qt, tfss, tr, maxLabelNames, maxMetrics, deadline) return s.idb().SearchLabelNamesWithFiltersOnTimeRange(qt, tfss, tr, maxLabelNames, maxMetrics, deadline)
} }
@ -1265,9 +1275,22 @@ func (s *Storage) SearchLabelNamesWithFiltersOnTimeRange(qt *querytracer.Tracer,
func (s *Storage) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters, func (s *Storage) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, labelName string, tfss []*TagFilters,
tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64, tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64,
) ([]string, error) { ) ([]string, error) {
labelAPIConcurrencyCh <- struct{}{}
defer func() {
<-labelAPIConcurrencyCh
}()
return s.idb().SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline) return s.idb().SearchLabelValuesWithFiltersOnTimeRange(qt, labelName, tfss, tr, maxLabelValues, maxMetrics, deadline)
} }
// This channel limits the concurrency of apis, which return label names and label values.
//
// For example, /api/v1/labels or /api/v1/label/<labelName>/values
//
// These APIs are used infrequently (e.g. on Grafana dashboard load or when editing a query),
// so it is better limiting their concurrency in order to reduce the maximum memory usage and CPU usage
// when the database contains big number of time series.
var labelAPIConcurrencyCh = make(chan struct{}, 1)
// SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr.
// //
// This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs. // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.