lib/storage: implement per-day inverted index

This commit is contained in:
Aliaksandr Valialkin 2019-11-09 23:17:42 +02:00
parent 5810ba57c2
commit ee7765b10d
6 changed files with 453 additions and 55 deletions

View File

@ -423,6 +423,13 @@ func registerStorageMetrics() {
return float64(idbm().RecentHourInvertedIndexSearchHits) return float64(idbm().RecentHourInvertedIndexSearchHits)
}) })
metrics.NewGauge(`vm_date_range_search_calls_total`, func() float64 {
return float64(idbm().DateRangeSearchCalls)
})
metrics.NewGauge(`vm_date_range_hits_total`, func() float64 {
return float64(idbm().DateRangeSearchHits)
})
metrics.NewGauge(`vm_cache_entries{type="storage/tsid"}`, func() float64 { metrics.NewGauge(`vm_cache_entries{type="storage/tsid"}`, func() float64 {
return float64(m().TSIDCacheSize) return float64(m().TSIDCacheSize)
}) })

View File

@ -42,6 +42,9 @@ const (
// Prefix for Date->MetricID entries. // Prefix for Date->MetricID entries.
nsPrefixDateToMetricID = 5 nsPrefixDateToMetricID = 5
// Prefix for (Date,Tag)->MetricID entries.
nsPrefixDateTagToMetricIDs = 6
) )
func shouldCacheBlock(item []byte) bool { func shouldCacheBlock(item []byte) bool {
@ -50,8 +53,8 @@ func shouldCacheBlock(item []byte) bool {
} }
// Do not cache items starting from // Do not cache items starting from
switch item[0] { switch item[0] {
case nsPrefixTagToMetricIDs: case nsPrefixTagToMetricIDs, nsPrefixDateTagToMetricIDs:
// Do not cache blocks with tag->metricIDs items, since: // Do not cache blocks with tag->metricIDs and (date,tag)->metricIDs items, since:
// - these blocks are scanned sequentially, so the overhead // - these blocks are scanned sequentially, so the overhead
// on their unmarshaling is amortized by the sequential scan. // on their unmarshaling is amortized by the sequential scan.
// - these blocks can occupy high amounts of RAM in cache // - these blocks can occupy high amounts of RAM in cache
@ -98,8 +101,17 @@ type indexDB struct {
// The number of hits for recent hour searches over inverted index. // The number of hits for recent hour searches over inverted index.
recentHourInvertedIndexSearchHits uint64 recentHourInvertedIndexSearchHits uint64
// The number of calls for date range searches.
dateRangeSearchCalls uint64
// The number of hits for date range searches.
dateRangeSearchHits uint64
mustDrop uint64 mustDrop uint64
// Start date fully covered by per-day inverted index.
startDateForPerDayInvertedIndex uint64
name string name string
tb *mergeset.Table tb *mergeset.Table
@ -184,6 +196,14 @@ func openIndexDB(path string, metricIDCache, metricNameCache *workingsetcache.Ca
} }
db.setDeletedMetricIDs(dmis) db.setDeletedMetricIDs(dmis)
is = db.getIndexSearch()
date, err := is.getStartDateForPerDayInvertedIndex()
db.putIndexSearch(is)
if err != nil {
return nil, fmt.Errorf("cannot obtain start date for per-day inverted index: %s", err)
}
db.startDateForPerDayInvertedIndex = date
return db, nil return db, nil
} }
@ -214,6 +234,9 @@ type IndexDBMetrics struct {
RecentHourInvertedIndexSearchCalls uint64 RecentHourInvertedIndexSearchCalls uint64
RecentHourInvertedIndexSearchHits uint64 RecentHourInvertedIndexSearchHits uint64
DateRangeSearchCalls uint64
DateRangeSearchHits uint64
IndexBlocksWithMetricIDsProcessed uint64 IndexBlocksWithMetricIDsProcessed uint64
IndexBlocksWithMetricIDsIncorrectOrder uint64 IndexBlocksWithMetricIDsIncorrectOrder uint64
@ -255,6 +278,9 @@ func (db *indexDB) UpdateMetrics(m *IndexDBMetrics) {
m.RecentHourInvertedIndexSearchCalls += atomic.LoadUint64(&db.recentHourInvertedIndexSearchCalls) m.RecentHourInvertedIndexSearchCalls += atomic.LoadUint64(&db.recentHourInvertedIndexSearchCalls)
m.RecentHourInvertedIndexSearchHits += atomic.LoadUint64(&db.recentHourInvertedIndexSearchHits) m.RecentHourInvertedIndexSearchHits += atomic.LoadUint64(&db.recentHourInvertedIndexSearchHits)
m.DateRangeSearchCalls += atomic.LoadUint64(&db.dateRangeSearchCalls)
m.DateRangeSearchHits += atomic.LoadUint64(&db.dateRangeSearchHits)
m.IndexBlocksWithMetricIDsProcessed = atomic.LoadUint64(&indexBlocksWithMetricIDsProcessed) m.IndexBlocksWithMetricIDsProcessed = atomic.LoadUint64(&indexBlocksWithMetricIDsProcessed)
m.IndexBlocksWithMetricIDsIncorrectOrder = atomic.LoadUint64(&indexBlocksWithMetricIDsIncorrectOrder) m.IndexBlocksWithMetricIDsIncorrectOrder = atomic.LoadUint64(&indexBlocksWithMetricIDsIncorrectOrder)
@ -734,7 +760,7 @@ func (is *indexSearch) searchTagKeys(tks map[string]struct{}, maxTagKeys int) er
if !bytes.HasPrefix(item, prefix) { if !bytes.HasPrefix(item, prefix) {
break break
} }
if err := mp.Init(item); err != nil { if err := mp.Init(item, nsPrefixTagToMetricIDs); err != nil {
return err return err
} }
if mp.IsDeletedTag(dmis) { if mp.IsDeletedTag(dmis) {
@ -802,7 +828,7 @@ func (is *indexSearch) searchTagValues(tvs map[string]struct{}, tagKey []byte, m
if !bytes.HasPrefix(item, prefix) { if !bytes.HasPrefix(item, prefix) {
break break
} }
if err := mp.Init(item); err != nil { if err := mp.Init(item, nsPrefixTagToMetricIDs); err != nil {
return err return err
} }
if mp.IsDeletedTag(dmis) { if mp.IsDeletedTag(dmis) {
@ -960,6 +986,35 @@ func (db *indexDB) updateDeletedMetricIDs(metricIDs *uint64set.Set) {
db.deletedMetricIDsUpdateLock.Unlock() db.deletedMetricIDsUpdateLock.Unlock()
} }
func (is *indexSearch) getStartDateForPerDayInvertedIndex() (uint64, error) {
minDate := uint64(timestampFromTime(time.Now())) / msecPerDay
kb := &is.kb
ts := &is.ts
kb.B = append(kb.B[:0], nsPrefixDateTagToMetricIDs)
prefix := kb.B
ts.Seek(kb.B)
for ts.NextItem() {
item := ts.Item
if !bytes.HasPrefix(item, prefix) {
break
}
suffix := item[len(prefix):]
// Suffix must contain encoded 64-bit date.
if len(suffix) < 8 {
return 0, fmt.Errorf("unexpected (date, tag)->metricIDs row len; must be at least 8 bytes; got %d bytes", len(suffix))
}
minDate = encoding.UnmarshalUint64(suffix)
break
}
if err := ts.Error(); err != nil {
return 0, err
}
// The minDate can contain incomplete inverted index, so increment it.
minDate++
return minDate, nil
}
func (is *indexSearch) loadDeletedMetricIDs() (*uint64set.Set, error) { func (is *indexSearch) loadDeletedMetricIDs() (*uint64set.Set, error) {
dmis := &uint64set.Set{} dmis := &uint64set.Set{}
ts := &is.ts ts := &is.ts
@ -1614,7 +1669,16 @@ func (is *indexSearch) updateMetricIDsForTagFilters(metricIDs *uint64set.Set, tf
// Fast path: found metricIDs in the inmemoryInvertedIndex for the last hour. // Fast path: found metricIDs in the inmemoryInvertedIndex for the last hour.
return nil return nil
} }
ok, err := is.tryUpdatingMetricIDsForDateRange(metricIDs, tfs, tr, maxMetrics)
if err != nil {
return err
}
if ok {
// Fast path: found metricIDs by date range.
return nil
}
// Slow path - try searching over the whole inverted index.
minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountOptimized(tfs, tr, maxMetrics) minTf, minMetricIDs, err := is.getTagFilterWithMinMetricIDsCountOptimized(tfs, tr, maxMetrics)
if err != nil { if err != nil {
return err return err
@ -1922,7 +1986,8 @@ var errMissingMetricIDsForDate = errors.New("missing metricIDs for date")
func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (*uint64set.Set, error) { func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (*uint64set.Set, error) {
atomic.AddUint64(&is.db.recentHourMetricIDsSearchCalls, 1) atomic.AddUint64(&is.db.recentHourMetricIDsSearchCalls, 1)
if metricIDs, ok := is.getMetricIDsForRecentHours(tr, maxMetrics); ok { metricIDs, ok := is.getMetricIDsForRecentHours(tr, maxMetrics)
if ok {
// Fast path: tr covers the current and / or the previous hour. // Fast path: tr covers the current and / or the previous hour.
// Return the full list of metric ids for this time range. // Return the full list of metric ids for this time range.
atomic.AddUint64(&is.db.recentHourMetricIDsSearchHits, 1) atomic.AddUint64(&is.db.recentHourMetricIDsSearchHits, 1)
@ -1937,17 +2002,160 @@ func (is *indexSearch) getMetricIDsForTimeRange(tr TimeRange, maxMetrics int) (*
// Too much dates must be covered. Give up. // Too much dates must be covered. Give up.
return nil, errMissingMetricIDsForDate return nil, errMissingMetricIDsForDate
} }
metricIDs := &uint64set.Set{}
// Search for metricIDs for each day in parallel.
metricIDs = &uint64set.Set{}
var wg sync.WaitGroup
var errGlobal error
var mu sync.Mutex // protects metricIDs + errGlobal from concurrent access below.
for minDate <= maxDate { for minDate <= maxDate {
if err := is.getMetricIDsForDate(minDate, metricIDs, maxMetrics); err != nil { date := minDate
return nil, err isLocal := is.db.getIndexSearch()
} wg.Add(1)
go func() {
defer wg.Done()
defer is.db.putIndexSearch(isLocal)
var result uint64set.Set
err := isLocal.getMetricIDsForDate(date, &result, maxMetrics)
mu.Lock()
if metricIDs.Len() < maxMetrics {
metricIDs.Union(&result)
}
if err != nil {
errGlobal = err
}
mu.Unlock()
}()
minDate++ minDate++
} }
wg.Wait()
if errGlobal != nil {
return nil, errGlobal
}
atomic.AddUint64(&is.db.dateMetricIDsSearchHits, 1) atomic.AddUint64(&is.db.dateMetricIDsSearchHits, 1)
return metricIDs, nil return metricIDs, nil
} }
func (is *indexSearch) tryUpdatingMetricIDsForDateRange(metricIDs *uint64set.Set, tfs *TagFilters, tr TimeRange, maxMetrics int) (bool, error) {
atomic.AddUint64(&is.db.dateRangeSearchCalls, 1)
minDate := uint64(tr.MinTimestamp) / msecPerDay
maxDate := uint64(tr.MaxTimestamp) / msecPerDay
if minDate < is.db.startDateForPerDayInvertedIndex || maxDate < minDate {
// Per-day inverted index doesn't cover the selected date range.
return false, nil
}
if maxDate-minDate > maxDaysForDateMetricIDs {
// Too much dates must be covered. Give up, since it may be slow.
return false, nil
}
// Search for metricIDs for each day in parallel.
var wg sync.WaitGroup
var errGlobal error
okGlobal := true
var mu sync.Mutex // protects metricIDs + *Global vars from concurrent access below
for minDate <= maxDate {
date := minDate
isLocal := is.db.getIndexSearch()
wg.Add(1)
go func() {
defer wg.Done()
defer is.db.putIndexSearch(isLocal)
var result uint64set.Set
ok, err := isLocal.tryUpdatingMetricIDsForDate(date, &result, tfs, maxMetrics)
mu.Lock()
if metricIDs.Len() < maxMetrics {
metricIDs.Union(&result)
}
if !ok {
okGlobal = ok
}
if err != nil {
errGlobal = fmt.Errorf("cannot search for metricIDs on date %d: %s", date, err)
}
mu.Unlock()
}()
minDate++
}
wg.Wait()
if errGlobal != nil {
return false, errGlobal
}
atomic.AddUint64(&is.db.dateRangeSearchHits, 1)
return okGlobal, nil
}
func (is *indexSearch) tryUpdatingMetricIDsForDate(date uint64, metricIDs *uint64set.Set, tfs *TagFilters, maxMetrics int) (bool, error) {
var tfFirst *tagFilter
for i := range tfs.tfs {
tf := &tfs.tfs[i]
if tf.isNegative {
continue
}
tfFirst = tf
break
}
var result *uint64set.Set
maxDateMetrics := maxMetrics * 20
if tfFirst == nil {
result = &uint64set.Set{}
if err := is.updateMetricIDsForDateAll(result, date, maxDateMetrics); err != nil {
if err == errMissingMetricIDsForDate {
// Zero data points were written on the given date.
// It is OK, since (date, metricID) entries must exist for the given date
// according to startDateForPerDayInvertedIndex.
return true, nil
}
return false, fmt.Errorf("cannot obtain all the metricIDs for date %d: %s", date, err)
}
} else {
m, err := is.getMetricIDsForDateTagFilter(tfFirst, date, tfs.commonPrefix, maxDateMetrics)
if err != nil {
if err == errFallbackToMetricNameMatch {
// The per-date search is too expensive. Probably it is better to perform global search
// using metric name match.
return false, nil
}
return false, err
}
result = m
}
if result.Len() >= maxDateMetrics {
return false, fmt.Errorf("more than %d time series found; narrow down the query or increase `-search.maxUniqueTimeseries`", maxDateMetrics)
}
for i := range tfs.tfs {
tf := &tfs.tfs[i]
if tf == tfFirst {
continue
}
m, err := is.getMetricIDsForDateTagFilter(tf, date, tfs.commonPrefix, maxDateMetrics)
if err != nil {
if err == errFallbackToMetricNameMatch {
// The per-date search is too expensive. Probably it is better to perform global search
// using metric name match.
return false, nil
}
return false, err
}
if m.Len() >= maxDateMetrics {
return false, fmt.Errorf("more than %d time series found for tag filter %s; narrow down the query or increase `-search.maxUniqueTimeseries`",
maxDateMetrics, tf)
}
if tf.isNegative {
result.Subtract(m)
} else {
result.Intersect(m)
}
if result.Len() == 0 {
return true, nil
}
}
metricIDs.Union(result)
return true, nil
}
func (is *indexSearch) getMetricIDsForRecentHours(tr TimeRange, maxMetrics int) (*uint64set.Set, bool) { func (is *indexSearch) getMetricIDsForRecentHours(tr TimeRange, maxMetrics int) (*uint64set.Set, bool) {
minHour := uint64(tr.MinTimestamp) / msecPerHour minHour := uint64(tr.MinTimestamp) / msecPerHour
maxHour := uint64(tr.MaxTimestamp) / msecPerHour maxHour := uint64(tr.MaxTimestamp) / msecPerHour
@ -2023,15 +2231,47 @@ func (db *indexDB) storeDateMetricID(date, metricID uint64) error {
return nil return nil
} }
// Slow path: create (date, metricID) entry. // Slow path: create (date, metricID) entries.
items := getIndexItems() items := getIndexItems()
items.B = marshalCommonPrefix(items.B[:0], nsPrefixDateToMetricID) defer putIndexItems(items)
items.B = marshalCommonPrefix(items.B, nsPrefixDateToMetricID)
items.B = encoding.MarshalUint64(items.B, date) items.B = encoding.MarshalUint64(items.B, date)
items.B = encoding.MarshalUint64(items.B, metricID) items.B = encoding.MarshalUint64(items.B, metricID)
items.Next() items.Next()
err = db.tb.AddItems(items.Items)
putIndexItems(items) // Create per-day inverted index entries for metricID.
return err kb := kbPool.Get()
defer kbPool.Put(kb)
mn := GetMetricName()
defer PutMetricName(mn)
kb.B, err = db.searchMetricName(kb.B[:0], metricID)
if err != nil {
return fmt.Errorf("cannot find metricName by metricID %d: %s", metricID, err)
}
if err = mn.Unmarshal(kb.B); err != nil {
return fmt.Errorf("cannot unmarshal metricName %q obtained by metricID %d: %s", metricID, kb.B, err)
}
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
kb.B = encoding.MarshalUint64(kb.B, date)
items.B = append(items.B, kb.B...)
items.B = marshalTagValue(items.B, nil)
items.B = marshalTagValue(items.B, mn.MetricGroup)
items.B = encoding.MarshalUint64(items.B, metricID)
items.Next()
for i := range mn.Tags {
tag := &mn.Tags[i]
items.B = append(items.B, kb.B...)
items.B = tag.Marshal(items.B)
items.B = encoding.MarshalUint64(items.B, metricID)
items.Next()
}
if err = db.tb.AddItems(items.Items); err != nil {
return fmt.Errorf("cannot add per-day entires for metricID %d: %s", metricID, err)
}
return nil
} }
func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) { func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) {
@ -2052,6 +2292,23 @@ func (is *indexSearch) hasDateMetricID(date, metricID uint64) (bool, error) {
return true, nil return true, nil
} }
func (is *indexSearch) getMetricIDsForDateTagFilter(tf *tagFilter, date uint64, commonPrefix []byte, maxMetrics int) (*uint64set.Set, error) {
// Augument tag filter prefix for per-date search instead of global search.
if !bytes.HasPrefix(tf.prefix, commonPrefix) {
logger.Panicf("BUG: unexpected tf.prefix %q; must start with commonPrefix %q", tf.prefix, commonPrefix)
}
kb := kbPool.Get()
defer kbPool.Put(kb)
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs)
kb.B = encoding.MarshalUint64(kb.B, date)
kb.B = append(kb.B, tf.prefix[len(commonPrefix):]...)
tfNew := *tf
tfNew.isNegative = false // isNegative for the original tf is handled by the caller.
tfNew.prefix = kb.B
return is.getMetricIDsForTagFilter(&tfNew, maxMetrics)
}
func (is *indexSearch) getMetricIDsForDate(date uint64, metricIDs *uint64set.Set, maxMetrics int) error { func (is *indexSearch) getMetricIDsForDate(date uint64, metricIDs *uint64set.Set, maxMetrics int) error {
ts := &is.ts ts := &is.ts
kb := &is.kb kb := &is.kb
@ -2105,15 +2362,28 @@ func (is *indexSearch) containsTimeRange(tr TimeRange) (bool, error) {
return true, nil return true, nil
} }
func (is *indexSearch) updateMetricIDsAll(metricIDs *uint64set.Set, maxMetrics int) error { func (is *indexSearch) updateMetricIDsForDateAll(metricIDs *uint64set.Set, date uint64, maxMetrics int) error {
ts := &is.ts // Extract all the metricIDs from (date, __name__=value)->metricIDs entries.
kb := &is.kb kb := kbPool.Get()
mp := &is.mp defer kbPool.Put(kb)
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
kb.B = encoding.MarshalUint64(kb.B, date)
kb.B = marshalTagValue(kb.B, nil)
return is.updateMetricIDsForPrefix(kb.B, metricIDs, maxMetrics)
}
// Extract all the mtricIDs from (__name__=value) metricIDs. func (is *indexSearch) updateMetricIDsAll(metricIDs *uint64set.Set, maxMetrics int) error {
kb := kbPool.Get()
defer kbPool.Put(kb)
// Extract all the metricIDs from (__name__=value)->metricIDs entries.
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs) kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixTagToMetricIDs)
kb.B = marshalTagValue(kb.B, nil) kb.B = marshalTagValue(kb.B, nil)
prefix := kb.B return is.updateMetricIDsForPrefix(kb.B, metricIDs, maxMetrics)
}
func (is *indexSearch) updateMetricIDsForPrefix(prefix []byte, metricIDs *uint64set.Set, maxMetrics int) error {
ts := &is.ts
mp := &is.mp
ts.Seek(prefix) ts.Seek(prefix)
for ts.NextItem() { for ts.NextItem() {
item := ts.Item item := ts.Item
@ -2248,6 +2518,13 @@ func unmarshalCommonPrefix(src []byte) ([]byte, byte, error) {
const commonPrefixLen = 1 const commonPrefixLen = 1
type tagToMetricIDsRowParser struct { type tagToMetricIDsRowParser struct {
// NSPrefix contains the first byte parsed from the row after Init call.
// This is either nsPrefixTagToMetricIDs or nsPrefixDateTagToMetricIDs.
NSPrefix byte
// Date contains parsed date for nsPrefixDateTagToMetricIDs rows after Init call
Date uint64
// MetricIDs contains parsed MetricIDs after ParseMetricIDs call // MetricIDs contains parsed MetricIDs after ParseMetricIDs call
MetricIDs []uint64 MetricIDs []uint64
@ -2259,6 +2536,8 @@ type tagToMetricIDsRowParser struct {
} }
func (mp *tagToMetricIDsRowParser) Reset() { func (mp *tagToMetricIDsRowParser) Reset() {
mp.NSPrefix = 0
mp.Date = 0
mp.MetricIDs = mp.MetricIDs[:0] mp.MetricIDs = mp.MetricIDs[:0]
mp.Tag.Reset() mp.Tag.Reset()
mp.tail = nil mp.tail = nil
@ -2267,14 +2546,23 @@ func (mp *tagToMetricIDsRowParser) Reset() {
// Init initializes mp from b, which should contain encoded tag->metricIDs row. // Init initializes mp from b, which should contain encoded tag->metricIDs row.
// //
// b cannot be re-used until Reset call. // b cannot be re-used until Reset call.
func (mp *tagToMetricIDsRowParser) Init(b []byte) error { func (mp *tagToMetricIDsRowParser) Init(b []byte, nsPrefixExpected byte) error {
tail, prefix, err := unmarshalCommonPrefix(b) tail, nsPrefix, err := unmarshalCommonPrefix(b)
if err != nil { if err != nil {
return fmt.Errorf("invalid tag->metricIDs row %q: %s", b, err) return fmt.Errorf("invalid tag->metricIDs row %q: %s", b, err)
} }
if prefix != nsPrefixTagToMetricIDs { if nsPrefix != nsPrefixExpected {
return fmt.Errorf("invalid prefix for tag->metricIDs row %q; got %d; want %d", b, prefix, nsPrefixTagToMetricIDs) return fmt.Errorf("invalid prefix for tag->metricIDs row %q; got %d; want %d", b, nsPrefix, nsPrefixExpected)
} }
if nsPrefix == nsPrefixDateTagToMetricIDs {
// unmarshal date.
if len(tail) < 8 {
return fmt.Errorf("cannot unmarshal date from (date, tag)->metricIDs row %q from %d bytes; want at least 8 bytes", b, len(tail))
}
mp.Date = encoding.UnmarshalUint64(tail)
tail = tail[8:]
}
mp.NSPrefix = nsPrefix
tail, err = mp.Tag.Unmarshal(tail) tail, err = mp.Tag.Unmarshal(tail)
if err != nil { if err != nil {
return fmt.Errorf("cannot unmarshal tag from tag->metricIDs row %q: %s", b, err) return fmt.Errorf("cannot unmarshal tag from tag->metricIDs row %q: %s", b, err)
@ -2282,6 +2570,16 @@ func (mp *tagToMetricIDsRowParser) Init(b []byte) error {
return mp.InitOnlyTail(b, tail) return mp.InitOnlyTail(b, tail)
} }
// MarshalPrefix marshals row prefix without tail to dst.
func (mp *tagToMetricIDsRowParser) MarshalPrefix(dst []byte) []byte {
dst = marshalCommonPrefix(dst, mp.NSPrefix)
if mp.NSPrefix == nsPrefixDateTagToMetricIDs {
dst = encoding.MarshalUint64(dst, mp.Date)
}
dst = mp.Tag.Marshal(dst)
return dst
}
// InitOnlyTail initializes mp.tail from tail. // InitOnlyTail initializes mp.tail from tail.
// //
// b must contain tag->metricIDs row. // b must contain tag->metricIDs row.
@ -2301,7 +2599,10 @@ func (mp *tagToMetricIDsRowParser) InitOnlyTail(b, tail []byte) error {
// //
// Prefix contains (tag) // Prefix contains (tag)
func (mp *tagToMetricIDsRowParser) EqualPrefix(x *tagToMetricIDsRowParser) bool { func (mp *tagToMetricIDsRowParser) EqualPrefix(x *tagToMetricIDsRowParser) bool {
return mp.Tag.Equal(&x.Tag) if !mp.Tag.Equal(&x.Tag) {
return false
}
return mp.Date == x.Date && mp.NSPrefix == x.NSPrefix
} }
// FirstAndLastMetricIDs returns the first and the last metricIDs in the mp.tail. // FirstAndLastMetricIDs returns the first and the last metricIDs in the mp.tail.
@ -2364,22 +2665,28 @@ func (mp *tagToMetricIDsRowParser) IsDeletedTag(dmis *uint64set.Set) bool {
} }
func mergeTagToMetricIDsRows(data []byte, items [][]byte) ([]byte, [][]byte) { func mergeTagToMetricIDsRows(data []byte, items [][]byte) ([]byte, [][]byte) {
// Perform quick checks whether items contain tag->metricIDs rows data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixTagToMetricIDs)
data, items = mergeTagToMetricIDsRowsInternal(data, items, nsPrefixDateTagToMetricIDs)
return data, items
}
func mergeTagToMetricIDsRowsInternal(data []byte, items [][]byte, nsPrefix byte) ([]byte, [][]byte) {
// Perform quick checks whether items contain rows starting from nsPrefix
// based on the fact that items are sorted. // based on the fact that items are sorted.
if len(items) <= 2 { if len(items) <= 2 {
// The first and the last row must remain unchanged. // The first and the last row must remain unchanged.
return data, items return data, items
} }
firstItem := items[0] firstItem := items[0]
if len(firstItem) > 0 && firstItem[0] > nsPrefixTagToMetricIDs { if len(firstItem) > 0 && firstItem[0] > nsPrefix {
return data, items return data, items
} }
lastItem := items[len(items)-1] lastItem := items[len(items)-1]
if len(lastItem) > 0 && lastItem[0] < nsPrefixTagToMetricIDs { if len(lastItem) > 0 && lastItem[0] < nsPrefix {
return data, items return data, items
} }
// items contain at least one tag->metricIDs row. Merge rows with common tag. // items contain at least one row starting from nsPrefix. Merge rows with common tag.
tmm := getTagToMetricIDsRowsMerger() tmm := getTagToMetricIDsRowsMerger()
tmm.dataCopy = append(tmm.dataCopy[:0], data...) tmm.dataCopy = append(tmm.dataCopy[:0], data...)
tmm.itemsCopy = append(tmm.itemsCopy[:0], items...) tmm.itemsCopy = append(tmm.itemsCopy[:0], items...)
@ -2388,29 +2695,25 @@ func mergeTagToMetricIDsRows(data []byte, items [][]byte) ([]byte, [][]byte) {
dstData := data[:0] dstData := data[:0]
dstItems := items[:0] dstItems := items[:0]
for i, item := range items { for i, item := range items {
if len(item) == 0 || item[0] != nsPrefixTagToMetricIDs || i == 0 || i == len(items)-1 { if len(item) == 0 || item[0] != nsPrefix || i == 0 || i == len(items)-1 {
// Write rows other than tag->metricIDs as-is. // Write rows not starting with nsPrefix as-is.
// Additionally write the first and the last row as-is in order to preserve // Additionally write the first and the last row as-is in order to preserve
// sort order for adjancent blocks. // sort order for adjancent blocks.
if len(tmm.pendingMetricIDs) > 0 { dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
}
dstData = append(dstData, item...) dstData = append(dstData, item...)
dstItems = append(dstItems, dstData[len(dstData)-len(item):]) dstItems = append(dstItems, dstData[len(dstData)-len(item):])
continue continue
} }
if err := mp.Init(item); err != nil { if err := mp.Init(item, nsPrefix); err != nil {
logger.Panicf("FATAL: cannot parse tag->metricIDs row during merge: %s", err) logger.Panicf("FATAL: cannot parse row starting with nsPrefix %d during merge: %s", nsPrefix, err)
} }
if mp.MetricIDsLen() >= maxMetricIDsPerRow { if mp.MetricIDsLen() >= maxMetricIDsPerRow {
if len(tmm.pendingMetricIDs) > 0 { dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
}
dstData = append(dstData, item...) dstData = append(dstData, item...)
dstItems = append(dstItems, dstData[len(dstData)-len(item):]) dstItems = append(dstItems, dstData[len(dstData)-len(item):])
continue continue
} }
if len(tmm.pendingMetricIDs) > 0 && !mp.EqualPrefix(mpPrev) { if !mp.EqualPrefix(mpPrev) {
dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev) dstData, dstItems = tmm.flushPendingMetricIDs(dstData, dstItems, mpPrev)
} }
mp.ParseMetricIDs() mp.ParseMetricIDs()
@ -2510,7 +2813,8 @@ func (tmm *tagToMetricIDsRowsMerger) Reset() {
func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstItems [][]byte, mp *tagToMetricIDsRowParser) ([]byte, [][]byte) { func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstItems [][]byte, mp *tagToMetricIDsRowParser) ([]byte, [][]byte) {
if len(tmm.pendingMetricIDs) == 0 { if len(tmm.pendingMetricIDs) == 0 {
logger.Panicf("BUG: pendingMetricIDs must be non-empty") // Nothing to flush
return dstData, dstItems
} }
// Use sort.Sort instead of sort.Slice in order to reduce memory allocations. // Use sort.Sort instead of sort.Slice in order to reduce memory allocations.
sort.Sort(&tmm.pendingMetricIDs) sort.Sort(&tmm.pendingMetricIDs)
@ -2518,8 +2822,7 @@ func (tmm *tagToMetricIDsRowsMerger) flushPendingMetricIDs(dstData []byte, dstIt
// Marshal pendingMetricIDs // Marshal pendingMetricIDs
dstDataLen := len(dstData) dstDataLen := len(dstData)
dstData = marshalCommonPrefix(dstData, nsPrefixTagToMetricIDs) dstData = mp.MarshalPrefix(dstData)
dstData = mp.Tag.Marshal(dstData)
for _, metricID := range tmm.pendingMetricIDs { for _, metricID := range tmm.pendingMetricIDs {
dstData = encoding.MarshalUint64(dstData, metricID) dstData = encoding.MarshalUint64(dstData, metricID)
} }

View File

@ -52,8 +52,11 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
t.Fatalf("unexpected items;\ngot\n%X\nwant\n%X", resultItems, expectedItems) t.Fatalf("unexpected items;\ngot\n%X\nwant\n%X", resultItems, expectedItems)
} }
} }
x := func(key, value string, metricIDs []uint64) string { xy := func(nsPrefix byte, key, value string, metricIDs []uint64) string {
dst := marshalCommonPrefix(nil, nsPrefixTagToMetricIDs) dst := marshalCommonPrefix(nil, nsPrefix)
if nsPrefix == nsPrefixDateTagToMetricIDs {
dst = encoding.MarshalUint64(dst, 1234567901233)
}
t := &Tag{ t := &Tag{
Key: []byte(key), Key: []byte(key),
Value: []byte(value), Value: []byte(value),
@ -64,6 +67,12 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
} }
return string(dst) return string(dst)
} }
x := func(key, value string, metricIDs []uint64) string {
return xy(nsPrefixTagToMetricIDs, key, value, metricIDs)
}
y := func(key, value string, metricIDs []uint64) string {
return xy(nsPrefixDateTagToMetricIDs, key, value, metricIDs)
}
f(nil, nil) f(nil, nil)
f([]string{}, nil) f([]string{}, nil)
@ -80,6 +89,19 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
x("", "", []uint64{0}), x("", "", []uint64{0}),
x("", "", []uint64{0}), x("", "", []uint64{0}),
}) })
f([]string{
x("", "", []uint64{0}),
x("", "", []uint64{0}),
x("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
}, []string{
x("", "", []uint64{0}),
x("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
})
f([]string{ f([]string{
x("", "", []uint64{0}), x("", "", []uint64{0}),
x("", "", []uint64{0}), x("", "", []uint64{0}),
@ -102,6 +124,17 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
x("", "", []uint64{0}), x("", "", []uint64{0}),
x("", "", []uint64{0}), x("", "", []uint64{0}),
}) })
f([]string{
"\x00asdf",
y("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
}, []string{
"\x00asdf",
y("", "", []uint64{0}),
y("", "", []uint64{0}),
})
f([]string{ f([]string{
"\x00asdf", "\x00asdf",
x("", "", []uint64{0}), x("", "", []uint64{0}),
@ -114,6 +147,19 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
x("", "", []uint64{0}), x("", "", []uint64{0}),
"xyz", "xyz",
}) })
f([]string{
"\x00asdf",
x("", "", []uint64{0}),
x("", "", []uint64{0}),
y("", "", []uint64{0}),
y("", "", []uint64{0}),
"xyz",
}, []string{
"\x00asdf",
x("", "", []uint64{0}),
y("", "", []uint64{0}),
"xyz",
})
f([]string{ f([]string{
"\x00asdf", "\x00asdf",
x("", "", []uint64{1}), x("", "", []uint64{1}),
@ -210,10 +256,13 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
"\x00aa", "\x00aa",
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
y("foo", "bar", metricIDs),
y("foo", "bar", metricIDs),
"x", "x",
}, []string{ }, []string{
"\x00aa", "\x00aa",
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
y("foo", "bar", metricIDs),
"x", "x",
}) })
@ -271,10 +320,12 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
"\x00aa", "\x00aa",
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
y("foo", "bar", metricIDs),
"x", "x",
}, []string{ }, []string{
"\x00aa", "\x00aa",
x("foo", "bar", []uint64{123}), x("foo", "bar", []uint64{123}),
y("foo", "bar", []uint64{123}),
"x", "x",
}) })
@ -301,11 +352,13 @@ func TestMergeTagToMetricIDsRows(t *testing.T) {
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
x("foo", "bar", []uint64{123, 123, 125}), x("foo", "bar", []uint64{123, 123, 125}),
x("foo", "bar", []uint64{123, 124}), x("foo", "bar", []uint64{123, 124}),
y("foo", "bar", []uint64{123, 124}),
}, []string{ }, []string{
"\x00aa", "\x00aa",
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),
x("foo", "bar", []uint64{123, 123, 125}), x("foo", "bar", []uint64{123, 123, 125}),
x("foo", "bar", []uint64{123, 124}), x("foo", "bar", []uint64{123, 124}),
y("foo", "bar", []uint64{123, 124}),
}) })
f([]string{ f([]string{
x("foo", "bar", metricIDs), x("foo", "bar", metricIDs),

View File

@ -71,7 +71,22 @@ func TestSearchQueryMarshalUnmarshal(t *testing.T) {
} }
func TestSearch(t *testing.T) { func TestSearch(t *testing.T) {
path := "TestSearch" t.Run("global_inverted_index", func(t *testing.T) {
testSearchGeneric(t, false, false)
})
t.Run("perday_inverted_index", func(t *testing.T) {
testSearchGeneric(t, false, true)
})
t.Run("recent_hour_global_inverted_index", func(t *testing.T) {
testSearchGeneric(t, true, false)
})
t.Run("recent_hour_perday_inverted_index", func(t *testing.T) {
testSearchGeneric(t, true, true)
})
}
func testSearchGeneric(t *testing.T, forceRecentHourInvertedIndex, forcePerDayInvertedIndex bool) {
path := fmt.Sprintf("TestSearch_%v_%v", forceRecentHourInvertedIndex, forcePerDayInvertedIndex)
st, err := OpenStorage(path, 0) st, err := OpenStorage(path, 0)
if err != nil { if err != nil {
t.Fatalf("cannot open storage %q: %s", path, err) t.Fatalf("cannot open storage %q: %s", path, err)
@ -125,6 +140,17 @@ func TestSearch(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("cannot re-open storage %q: %s", path, err) t.Fatalf("cannot re-open storage %q: %s", path, err)
} }
if forcePerDayInvertedIndex {
idb := st.idb()
idb.startDateForPerDayInvertedIndex = 0
idb.doExtDB(func(extDB *indexDB) {
extDB.startDateForPerDayInvertedIndex = 0
})
}
if forceRecentHourInvertedIndex {
hm := st.currHourMetricIDs.Load().(*hourMetricIDs)
hm.isFull = true
}
// Run search. // Run search.
tr := TimeRange{ tr := TimeRange{
@ -133,7 +159,7 @@ func TestSearch(t *testing.T) {
} }
t.Run("serial", func(t *testing.T) { t.Run("serial", func(t *testing.T) {
if err := testSearch(st, tr, mrs, accountsCount); err != nil { if err := testSearchInternal(st, tr, mrs, accountsCount); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
}) })
@ -142,7 +168,7 @@ func TestSearch(t *testing.T) {
ch := make(chan error, 3) ch := make(chan error, 3)
for i := 0; i < cap(ch); i++ { for i := 0; i < cap(ch); i++ {
go func() { go func() {
ch <- testSearch(st, tr, mrs, accountsCount) ch <- testSearchInternal(st, tr, mrs, accountsCount)
}() }()
} }
for i := 0; i < cap(ch); i++ { for i := 0; i < cap(ch); i++ {
@ -158,7 +184,7 @@ func TestSearch(t *testing.T) {
}) })
} }
func testSearch(st *Storage, tr TimeRange, mrs []MetricRow, accountsCount int) error { func testSearchInternal(st *Storage, tr TimeRange, mrs []MetricRow, accountsCount int) error {
var s Search var s Search
for i := 0; i < 10; i++ { for i := 0; i < 10; i++ {
// Prepare TagFilters for search. // Prepare TagFilters for search.

View File

@ -883,7 +883,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
if err := s.tb.AddRows(rows); err != nil { if err := s.tb.AddRows(rows); err != nil {
lastError = fmt.Errorf("cannot add rows to table: %s", err) lastError = fmt.Errorf("cannot add rows to table: %s", err)
} }
if err := s.updateDateMetricIDCache(rows, lastError); err != nil { if err := s.updatePerDateData(rows, lastError); err != nil {
lastError = err lastError = err
} }
if lastError != nil { if lastError != nil {
@ -892,7 +892,7 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra
return rows, nil return rows, nil
} }
func (s *Storage) updateDateMetricIDCache(rows []rawRow, lastError error) error { func (s *Storage) updatePerDateData(rows []rawRow, lastError error) error {
var date uint64 var date uint64
var hour uint64 var hour uint64
var prevTimestamp int64 var prevTimestamp int64
@ -910,6 +910,7 @@ func (s *Storage) updateDateMetricIDCache(rows []rawRow, lastError error) error
// The r belongs to the current hour. Check for the current hour cache. // The r belongs to the current hour. Check for the current hour cache.
if hm.m.Has(metricID) { if hm.m.Has(metricID) {
// Fast path: the metricID is in the current hour cache. // Fast path: the metricID is in the current hour cache.
// This means the metricID has been already added to per-day inverted index.
continue continue
} }
s.pendingHourEntriesLock.Lock() s.pendingHourEntriesLock.Lock()
@ -920,17 +921,20 @@ func (s *Storage) updateDateMetricIDCache(rows []rawRow, lastError error) error
// Slower path: check global cache for (date, metricID) entry. // Slower path: check global cache for (date, metricID) entry.
if s.dateMetricIDCache.Has(date, metricID) { if s.dateMetricIDCache.Has(date, metricID) {
// The metricID has been already added to per-day inverted index.
continue continue
} }
// Slow path: store the entry in the (date, metricID) cache and in the indexDB. // Slow path: store the entry (date, metricID) entry in the indexDB.
// It is OK if the (date, metricID) entry is added multiple times to db // It is OK if the (date, metricID) entry is added multiple times to db
// by concurrent goroutines. // by concurrent goroutines.
s.dateMetricIDCache.Set(date, metricID)
if err := idb.storeDateMetricID(date, metricID); err != nil { if err := idb.storeDateMetricID(date, metricID); err != nil {
lastError = err lastError = err
continue continue
} }
// The metric must be added to cache only after it has been successfully added to indexDB.
s.dateMetricIDCache.Set(date, metricID)
} }
return lastError return lastError
} }
@ -1192,6 +1196,11 @@ func openIndexDBTables(path string, metricIDCache, metricNameCache *workingsetca
return nil, nil, fmt.Errorf("cannot open prev indexdb table at %q: %s", prevPath, err) return nil, nil, fmt.Errorf("cannot open prev indexdb table at %q: %s", prevPath, err)
} }
// Adjust startDateForPerDayInvertedIndex for the previous index.
if prev.startDateForPerDayInvertedIndex > curr.startDateForPerDayInvertedIndex {
prev.startDateForPerDayInvertedIndex = curr.startDateForPerDayInvertedIndex
}
return curr, prev, nil return curr, prev, nil
} }

View File

@ -385,7 +385,7 @@ func TestStorageDeleteMetrics(t *testing.T) {
t.Run("serial", func(t *testing.T) { t.Run("serial", func(t *testing.T) {
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
if err = testStorageDeleteMetrics(s, 0); err != nil { if err = testStorageDeleteMetrics(s, 0); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error on iteration %d: %s", i, err)
} }
// Re-open the storage in order to check how deleted metricIDs // Re-open the storage in order to check how deleted metricIDs
@ -393,7 +393,7 @@ func TestStorageDeleteMetrics(t *testing.T) {
s.MustClose() s.MustClose()
s, err = OpenStorage(path, 0) s, err = OpenStorage(path, 0)
if err != nil { if err != nil {
t.Fatalf("cannot open storage after closing: %s", err) t.Fatalf("cannot open storage after closing on iteration %d: %s", i, err)
} }
} }
}) })