lib/storage: share tsids across all the partSearch instances

This should reduce memory usage when big number of time series matches the given query.
This commit is contained in:
Aliaksandr Valialkin 2019-09-23 22:34:04 +03:00
parent 4e26ad869b
commit c9063ece66
5 changed files with 22 additions and 6 deletions

View File

@ -925,7 +925,7 @@ func (is *indexSearch) loadDeletedMetricIDs() (map[uint64]struct{}, error) {
return dmis, nil
}
// searchTSIDs returns tsids matching the given tfss over the given tr.
// searchTSIDs returns sorted tsids matching the given tfss over the given tr.
func (db *indexDB) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int) ([]TSID, error) {
if len(tfss) == 0 {
return nil, nil

View File

@ -3,7 +3,9 @@ package storage
import (
"fmt"
"io"
"os"
"sort"
"strings"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
@ -49,7 +51,7 @@ type partSearch struct {
func (ps *partSearch) reset() {
ps.Block.Reset()
ps.p = nil
ps.tsids = ps.tsids[:0]
ps.tsids = nil
ps.tsidIdx = 0
ps.fetchData = true
ps.metaindex = nil
@ -64,16 +66,24 @@ func (ps *partSearch) reset() {
ps.err = nil
}
var isInTest = func() bool {
return strings.HasSuffix(os.Args[0], ".test")
}()
// Init initializes the ps with the given p, tsids and tr.
//
// tsids must be sorted.
// tsids cannot be modified after the Init call, since it is owned by ps.
func (ps *partSearch) Init(p *part, tsids []TSID, tr TimeRange, fetchData bool) {
ps.reset()
ps.p = p
if p.ph.MinTimestamp <= tr.MaxTimestamp && p.ph.MaxTimestamp >= tr.MinTimestamp {
if !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
if isInTest && !sort.SliceIsSorted(tsids, func(i, j int) bool { return tsids[i].Less(&tsids[j]) }) {
logger.Panicf("BUG: tsids must be sorted; got %+v", tsids)
}
ps.tsids = append(ps.tsids[:0], tsids...)
// take ownership of of tsids.
ps.tsids = tsids
}
ps.tr = tr
ps.fetchData = fetchData

View File

@ -55,7 +55,10 @@ func (pts *partitionSearch) reset() {
// Init initializes the search in the given partition for the given tsid and tr.
//
// MustClose must be called when partition search is done.
// tsids must be sorted.
// tsids cannot be modified after the Init call, since it is owned by pts.
//
/// MustClose must be called when partition search is done.
func (pts *partitionSearch) Init(pt *partition, tsids []TSID, tr TimeRange, fetchData bool) {
if pts.needClosing {
logger.Panicf("BUG: missing partitionSearch.MustClose call before the next call to Init")

View File

@ -579,7 +579,7 @@ func nextRetentionDuration(retentionMonths int) time.Duration {
return deadline.Sub(t)
}
// searchTSIDs returns TSIDs for the given tfss and the given tr.
// searchTSIDs returns sorted TSIDs for the given tfss and the given tr.
func (s *Storage) searchTSIDs(tfss []*TagFilters, tr TimeRange, maxMetrics int) ([]TSID, error) {
// Do not cache tfss -> tsids here, since the caching is performed
// on idb level.

View File

@ -54,6 +54,9 @@ func (ts *tableSearch) reset() {
// Init initializes the ts.
//
// tsids must be sorted.
// tsids cannot be modified after the Init call, since it is owned by ts.
//
// MustClose must be called then the tableSearch is done.
func (ts *tableSearch) Init(tb *table, tsids []TSID, tr TimeRange, fetchData bool) {
if ts.needClosing {