lib/storage: skip searching in extDB if it doesn't contain items for the given time range

This should improve inverted index search performance for big amount
of unique time series when the search is performed only on recent data.
This commit is contained in:
Aliaksandr Valialkin 2019-06-25 12:55:27 +03:00
parent dbc1e87bac
commit 51e2f3b48f
2 changed files with 47 additions and 5 deletions

View File

@ -1104,6 +1104,18 @@ func mergeTSIDs(a, b []TSID) []TSID {
}
func (is *indexSearch) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int) ([]TSID, error) {
accountID := tfss[0].accountID
projectID := tfss[0].projectID
// Verify whether `is` contains data for the given tr.
ok, err := is.containsTimeRange(tr, accountID, projectID)
if err != nil {
return nil, fmt.Errorf("error in containsTimeRange(%s): %s", &tr, err)
}
if !ok {
// Fast path: nothing to search.
return nil, nil
}
metricIDs, err := is.searchMetricIDs(tfss, tr, maxMetrics)
if err != nil {
return nil, err
@ -1116,8 +1128,6 @@ func (is *indexSearch) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics
// Obtain TSID values for the given metricIDs.
tsids := make([]TSID, len(metricIDs))
i := 0
accountID := tfss[0].accountID
projectID := tfss[0].projectID
for _, metricID := range metricIDs {
// Try obtaining TSIDs from db.tsidCache. This is much faster
// than scanning the mergeset if it contains a lot of metricIDs.
@ -1669,15 +1679,15 @@ func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int, ac
// Slow path: collect the metric ids for all the days covering the given tr.
atomic.AddUint64(&is.db.dateMetricIDsSearchCalls, 1)
minDate := tr.MinTimestamp / msecPerDay
maxDate := tr.MaxTimestamp / msecPerDay
minDate := uint64(tr.MinTimestamp) / msecPerDay
maxDate := uint64(tr.MaxTimestamp) / msecPerDay
if maxDate-minDate > 40 {
// Too much dates must be covered. Give up.
return nil, errMissingMetricIDsForDate
}
metricIDs = make(map[uint64]struct{}, maxMetrics)
for minDate <= maxDate {
if err := is.getMetricIDsForDate(uint64(minDate), metricIDs, maxMetrics, accountID, projectID); err != nil {
if err := is.getMetricIDsForDate(minDate, metricIDs, maxMetrics, accountID, projectID); err != nil {
return nil, err
}
minDate++
@ -1845,6 +1855,28 @@ func (is *indexSearch) getMetricIDsForDate(date uint64, metricIDs map[uint64]str
return nil
}
func (is *indexSearch) containsTimeRange(tr TimeRange, accountID, projectID uint32) (bool, error) {
ts := &is.ts
kb := &is.kb
// Verify whether the maximum date in `ts` covers tr.MinTimestamp.
minDate := uint64(tr.MinTimestamp) / msecPerDay
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateToMetricID, accountID, projectID)
kb.B = encoding.MarshalUint64(kb.B, minDate)
ts.Seek(kb.B)
if !ts.NextItem() {
if err := ts.Error(); err != nil {
return false, fmt.Errorf("error when searching for minDate=%d, prefix %q: %s", minDate, kb.B, err)
}
return false, nil
}
if !bytes.HasPrefix(ts.Item, kb.B[:1]) {
// minDate exceeds max date from ts.
return false, nil
}
return true, nil
}
func (is *indexSearch) updateMetricIDsForCommonPrefix(metricIDs map[uint64]struct{}, commonPrefix []byte, maxMetrics int) error {
ts := &is.ts
ts.Seek(commonPrefix)

View File

@ -301,6 +301,16 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
}
func testIndexDBCheckTSIDByName(db *indexDB, mns []MetricName, tsids []TSID, isConcurrent bool) error {
// fill Date -> MetricID cache
date := uint64(timestampFromTime(time.Now())) / msecPerDay
for i := range tsids {
tsid := &tsids[i]
if err := db.storeDateMetricID(date, tsid.MetricID, tsid.AccountID, tsid.ProjectID); err != nil {
return fmt.Errorf("error in storeDateMetricID(%d, %d, %d, %d): %s", date, tsid.MetricID, tsid.AccountID, tsid.ProjectID, err)
}
}
db.tb.DebugFlush()
hasValue := func(tvs []string, v []byte) bool {
for _, tv := range tvs {
if string(v) == tv {