diff --git a/lib/storage/part_search.go b/lib/storage/part_search.go index 88e92c501b..572b45553b 100644 --- a/lib/storage/part_search.go +++ b/lib/storage/part_search.go @@ -69,7 +69,7 @@ func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange) { if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) { logger.Panicf("BUG: tsids must be sorted; got %+v", tsids) } - // take ownership of of tsids. + // take ownership of tsids. ps.tsids = tsids } ps.tr = tr @@ -120,14 +120,38 @@ func (ps *partSearch) nextTSID() bool { return true } +func (ps *partSearch) skipTSIDsSmallerThan(tsid *TSID) bool { + if !ps.BlockRef.bh.TSID.Less(tsid) { + return true + } + if !ps.nextTSID() { + return false + } + if !ps.BlockRef.bh.TSID.Less(tsid) { + // Fast path: the next TSID isn't smaller than the tsid. + return true + } + + // Slower path - binary search for the next TSID, which isn't smaller than the tsid. + tsids := ps.tsids[ps.tsidIdx:] + ps.tsidIdx += sort.Search(len(tsids), func(i int) bool { + return !tsids[i].Less(tsid) + }) + if ps.tsidIdx >= len(ps.tsids) { + ps.tsidIdx = len(ps.tsids) + ps.err = io.EOF + return false + } + ps.BlockRef.bh.TSID = ps.tsids[ps.tsidIdx] + ps.tsidIdx++ + return true +} + func (ps *partSearch) nextBHS() bool { for len(ps.metaindex) > 0 { - // Optimization: skip tsid values smaller than the minimum value - // from ps.metaindex. - for ps.BlockRef.bh.TSID.Less(&ps.metaindex[0].TSID) { - if !ps.nextTSID() { - return false - } + // Optimization: skip tsid values smaller than the minimum value from ps.metaindex. + if !ps.skipTSIDsSmallerThan(&ps.metaindex[0].TSID) { + return false } // Invariant: ps.BlockRef.bh.TSID >= ps.metaindex[0].TSID @@ -247,7 +271,7 @@ func (ps *partSearch) searchBHS() bool { if bh.TSID.MetricID != tsid.MetricID { // tsid < bh.TSID: no more blocks with the given tsid. // Proceed to the next (bigger) tsid. - if !ps.nextTSID() { + if !ps.skipTSIDsSmallerThan(&bh.TSID) { return false } continue diff --git a/lib/storage/part_search_timing_test.go b/lib/storage/part_search_timing_test.go new file mode 100644 index 0000000000..cacd73bd70 --- /dev/null +++ b/lib/storage/part_search_timing_test.go @@ -0,0 +1,61 @@ +package storage + +import ( + "fmt" + "testing" +) + +func BenchmarkPartSearch(b *testing.B) { + for _, sparseness := range []int{1, 2, 10, 100} { + b.Run(fmt.Sprintf("sparseness-%d", sparseness), func(b *testing.B) { + benchmarkPartSearchWithSparseness(b, sparseness) + }) + } +} + +func benchmarkPartSearchWithSparseness(b *testing.B, sparseness int) { + blocksCount := 100000 + rows := make([]rawRow, blocksCount) + for i := 0; i < blocksCount; i++ { + r := &rows[i] + r.PrecisionBits = defaultPrecisionBits + r.TSID.MetricID = uint64(i * sparseness) + r.Timestamp = int64(i) * 1000 + r.Value = float64(i) + } + tr := TimeRange{ + MinTimestamp: rows[0].Timestamp, + MaxTimestamp: rows[len(rows)-1].Timestamp, + } + p := newTestPart(rows) + for _, tsidsCount := range []int{100, 1000, 10000, 100000} { + b.Run(fmt.Sprintf("tsids-%d", tsidsCount), func(b *testing.B) { + tsids := make([]TSID, tsidsCount) + for i := 0; i < tsidsCount; i++ { + tsids[i].MetricID = uint64(i) + } + benchmarkPartSearch(b, p, tsids, tr, sparseness) + }) + } +} + +func benchmarkPartSearch(b *testing.B, p *part, tsids []TSID, tr TimeRange, sparseness int) { + b.ReportAllocs() + b.RunParallel(func(pb *testing.PB) { + var ps partSearch + for pb.Next() { + blocksRead := 0 + ps.Init(p, tsids, tr) + for ps.NextBlock() { + blocksRead++ + } + if err := ps.Error(); err != nil { + panic(fmt.Errorf("BUG: unexpected error: %s", err)) + } + blocksWant := len(tsids) / sparseness + if blocksRead != blocksWant { + panic(fmt.Errorf("BUG: unexpected blocks read; got %d; want %d", blocksRead, blocksWant)) + } + } + }) +}