From 787b9cd9a09f49ace10f23e1273eb7e21a3c463d Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Fri, 20 Sep 2024 13:07:17 +0200 Subject: [PATCH] lib/storage: improve performance for indexSearch.containsTimeRange() The indexSearch.containsTimeRange() function is called for the current indexDB and the previous indexDB every time when searching for metricIDs by label filters. This function consumes a lot of additional CPU time for cases when queries with lightweight label filters are sent to VictoriaMetrics at high rate (e.g. thousands of RPS), like in the issue https://github.com/VictoriaMetrics/VictoriaMetrics/issues/7009 . Optimize indexSearch.containsTimeRange() function in the following ways: - Unconditionally return true if this function is called for the current indexDB, since there are very high chances that the current indexDB contains the data with timestamps in the requested time range. - Cache the minimum timestamp, which is missing in the indexed data for the previous indexDB. This is safe to do, since the previous indexDB is readonly. This optimization eliminates potentially slow lookup in the previous indexDB for typical use cases when the requested time range is close to the current time. --- lib/storage/index_db.go | 58 +++++++++++++++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 11 deletions(-) diff --git a/lib/storage/index_db.go b/lib/storage/index_db.go index 4d9ec3cd5a..d280fa65dd 100644 --- a/lib/storage/index_db.go +++ b/lib/storage/index_db.go @@ -91,6 +91,14 @@ type indexDB struct { // The db must be automatically recovered after that. missingMetricNamesForMetricID atomic.Uint64 + // minMissingTimestamp is the minimum timestamp, which is missing in the given indexDB. + // + // This field is used at containsTimeRange() function only for the previous indexDB, + // since this indexDB is readonly. + // This field cannot be used for the current indexDB, since it may receive data + // with bigger timestamps at any time. + minMissingTimestamp atomic.Int64 + // generation identifies the index generation ID // and is used for syncing items from different indexDBs generation uint64 @@ -274,6 +282,14 @@ func (db *indexDB) doExtDB(f func(extDB *indexDB)) { } } +// hasExtDB returns true if db.extDB != nil +func (db *indexDB) hasExtDB() bool { + db.extDBLock.Lock() + ok := db.extDB != nil + db.extDBLock.Unlock() + return ok +} + // SetExtDB sets external db to search. // // It decrements refCount for the previous extDB. @@ -1904,13 +1920,37 @@ func (is *indexSearch) searchMetricName(dst []byte, metricID uint64) ([]byte, bo return dst, true } -func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) { +func (is *indexSearch) containsTimeRange(tr TimeRange) bool { + db := is.db + + if db.hasExtDB() { + // The db corresponds to the current indexDB, which is used for storing index data for newly registered time series. + // This means that it may contain data for the given tr with probability close to 100%. + return true + } + + // The db corresponds to the previous indexDB, which is readonly. + // So it is safe caching the minimum timestamp, which isn't covered by the db. + minMissingTimestamp := db.minMissingTimestamp.Load() + if minMissingTimestamp != 0 && tr.MinTimestamp >= minMissingTimestamp { + return false + } + + if is.containsTimeRangeSlow(tr) { + return true + } + + db.minMissingTimestamp.CompareAndSwap(minMissingTimestamp, tr.MinTimestamp) + return false +} + +func (is *indexSearch) containsTimeRangeSlow(tr TimeRange) bool { ts := &is.ts kb := &is.kb // Verify whether the tr.MinTimestamp is included into `ts` or is smaller than the minimum date stored in `ts`. // Do not check whether tr.MaxTimestamp is included into `ts` or is bigger than the max date stored in `ts` for performance reasons. - // This means that containsTimeRange() can return true if `tr` is located below the min date stored in `ts`. + // This means that containsTimeRangeSlow() can return true if `tr` is located below the min date stored in `ts`. // This is OK, since this case isn't encountered too much in practice. // The main practical case allows skipping searching in prev indexdb (`ts`) when `tr` // is located above the max date stored there. @@ -1921,15 +1961,15 @@ func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) { ts.Seek(kb.B) if !ts.NextItem() { if err := ts.Error(); err != nil { - return false, fmt.Errorf("error when searching for minDate=%d, prefix %q: %w", minDate, kb.B, err) + logger.Panicf("FATAL: error when searching for minDate=%d, prefix %q: %w", minDate, kb.B, err) } - return false, nil + return false } if !bytes.HasPrefix(ts.Item, prefix) { // minDate exceeds max date from ts. - return false, nil + return false } - return true, nil + return true } func (is *indexSearch) getTSIDByMetricID(dst *TSID, metricID uint64) bool { @@ -2225,11 +2265,7 @@ func (is *indexSearch) searchMetricIDsInternal(qt *querytracer.Tracer, tfss []*T metricIDs := &uint64set.Set{} - ok, err := is.containsTimeRange(tr) - if err != nil { - return nil, err - } - if !ok { + if !is.containsTimeRange(tr) { qt.Printf("indexdb doesn't contain data for the given timeRange=%s", &tr) return metricIDs, nil }