From dc929e0d168825d3894fb1fd74fbb94fb37f06da Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Tue, 16 Aug 2022 13:32:30 +0300 Subject: [PATCH] lib/storage: improve performance for /api/v1/labels and /api/v1/label/.../values endpoints when `match[]` filter matches small number of time series Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 --- docs/CHANGELOG.md | 1 + lib/storage/index_db.go | 88 ++++++++++++++++++++++++++++++++++++++--- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index d4304e5acc..db853ed95f 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -29,6 +29,7 @@ The following tip changes can be tested by building VictoriaMetrics components f * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `toTime()` template function in the same way as Prometheus 2.38 [does](https://github.com/prometheus/prometheus/pull/10993). See [these docs](https://prometheus.io/docs/prometheus/latest/configuration/template_reference/#numbers). * BUGFIX: prevent from excess CPU usage when the storage enters [read-only mode](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#readonly-mode). +* BUGFIX: improve performance for requests to [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues) when the filter in the `match[]` query arg matches small number of time series. The performance for this case has been reduced in [v1.78.0](https://docs.victoriametrics.com/CHANGELOG.html#v1780). See [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978) and [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1533) issues. ## [v1.80.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.80.0) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 9823a8f602..6ca4fe55f5 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -873,9 +873,13 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, if err != nil { return err } - if filter != nil && filter.Len() == 0 { - qt.Printf("found zero label names for filter=%s", tfss) - return nil + if filter != nil && filter.Len() <= 100e3 { + // It is faster to obtain label names by metricIDs from the filter + // instead of scanning the inverted index for the matching filters. + // This hould help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 + metricIDs := filter.AppendTo(nil) + qt.Printf("sort %d metricIDs", len(metricIDs)) + return is.getLabelNamesForMetricIDs(qt, metricIDs, lns, maxLabelNames) } var prevLabelName []byte ts := &is.ts @@ -935,6 +939,41 @@ func (is *indexSearch) searchLabelNamesWithFiltersOnDate(qt *querytracer.Tracer, return nil } +func (is *indexSearch) getLabelNamesForMetricIDs(qt *querytracer.Tracer, metricIDs []uint64, lns map[string]struct{}, maxLabelNames int) error { + lns["__name__"] = struct{}{} + var mn MetricName + foundLabelNames := 0 + var buf []byte + for _, metricID := range metricIDs { + var err error + buf, err = is.searchMetricNameWithCache(buf[:0], metricID) + if err != nil { + if err == io.EOF { + // It is likely the metricID->metricName entry didn't propagate to inverted index yet. + // Skip this metricID for now. + continue + } + return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err) + } + if err := mn.Unmarshal(buf); err != nil { + return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) + } + for _, tag := range mn.Tags { + _, ok := lns[string(tag.Key)] + if !ok { + foundLabelNames++ + lns[string(tag.Key)] = struct{}{} + if len(lns) >= maxLabelNames { + qt.Printf("hit the limit on the number of unique label names: %d", maxLabelNames) + return nil + } + } + } + } + qt.Printf("get %d distinct label names from %d metricIDs", foundLabelNames, len(metricIDs)) + return nil +} + // SearchLabelValuesWithFiltersOnTimeRange returns label values for the given labelName, tfss and tr. func (db *indexDB) SearchLabelValuesWithFiltersOnTimeRange(qt *querytracer.Tracer, accountID, projectID uint32, labelName string, tfss []*TagFilters, tr TimeRange, maxLabelValues, maxMetrics int, deadline uint64) ([]string, error) { @@ -1030,9 +1069,13 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer if err != nil { return err } - if filter != nil && filter.Len() == 0 { - qt.Printf("found zero label values for filter=%s", tfss) - return nil + if filter != nil && filter.Len() < 100e3 { + // It is faster to obtain label names by metricIDs from the filter + // instead of scanning the inverted index for the matching filters. + // This hould help https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2978 + metricIDs := filter.AppendTo(nil) + qt.Printf("sort %d metricIDs", len(metricIDs)) + return is.getLabelValuesForMetricIDs(qt, lvs, labelName, metricIDs, maxLabelValues) } if labelName == "__name__" { // __name__ label is encoded as empty string in indexdb. @@ -1091,6 +1134,39 @@ func (is *indexSearch) searchLabelValuesWithFiltersOnDate(qt *querytracer.Tracer return nil } +func (is *indexSearch) getLabelValuesForMetricIDs(qt *querytracer.Tracer, lvs map[string]struct{}, labelName string, metricIDs []uint64, maxLabelValues int) error { + var mn MetricName + foundLabelValues := 0 + var buf []byte + for _, metricID := range metricIDs { + var err error + buf, err = is.searchMetricNameWithCache(buf[:0], metricID) + if err != nil { + if err == io.EOF { + // It is likely the metricID->metricName entry didn't propagate to inverted index yet. + // Skip this metricID for now. + continue + } + return fmt.Errorf("cannot find metricName by metricID %d: %w", metricID, err) + } + if err := mn.Unmarshal(buf); err != nil { + return fmt.Errorf("cannot unmarshal metricName %q: %w", buf, err) + } + tagValue := mn.GetTagValue(labelName) + _, ok := lvs[string(tagValue)] + if !ok { + foundLabelValues++ + lvs[string(tagValue)] = struct{}{} + if len(lvs) >= maxLabelValues { + qt.Printf("hit the limit on the number of unique label values for label %q: %d", labelName, maxLabelValues) + return nil + } + } + } + qt.Printf("get %d distinct values for label %q from %d metricIDs", foundLabelValues, labelName, len(metricIDs)) + return nil +} + // SearchTagValueSuffixes returns all the tag value suffixes for the given tagKey and tagValuePrefix on the given tr. // // This allows implementing https://graphite-api.readthedocs.io/en/latest/api.html#metrics-find or similar APIs.