lib/mergeset: panic when too long item is passed to Table.AddItems()

This commit is contained in:
Aliaksandr Valialkin 2022-12-03 23:30:31 -08:00
parent dccd70ce10
commit a13d21513e
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
6 changed files with 38 additions and 171 deletions

View File

@ -138,12 +138,11 @@ func (riss *rawItemsShards) init() {
riss.shards = make([]rawItemsShard, rawItemsShardsPerTable) riss.shards = make([]rawItemsShard, rawItemsShardsPerTable)
} }
func (riss *rawItemsShards) addItems(tb *Table, items [][]byte) error { func (riss *rawItemsShards) addItems(tb *Table, items [][]byte) {
n := atomic.AddUint32(&riss.shardIdx, 1) n := atomic.AddUint32(&riss.shardIdx, 1)
shards := riss.shards shards := riss.shards
idx := n % uint32(len(shards)) idx := n % uint32(len(shards))
shard := &shards[idx] shards[idx].addItems(tb, items)
return shard.addItems(tb, items)
} }
func (riss *rawItemsShards) Len() int { func (riss *rawItemsShards) Len() int {
@ -180,8 +179,7 @@ func (ris *rawItemsShard) Len() int {
return n return n
} }
func (ris *rawItemsShard) addItems(tb *Table, items [][]byte) error { func (ris *rawItemsShard) addItems(tb *Table, items [][]byte) {
var err error
var blocksToFlush []*inmemoryBlock var blocksToFlush []*inmemoryBlock
ris.mu.Lock() ris.mu.Lock()
@ -193,17 +191,18 @@ func (ris *rawItemsShard) addItems(tb *Table, items [][]byte) error {
} }
ib := ibs[len(ibs)-1] ib := ibs[len(ibs)-1]
for _, item := range items { for _, item := range items {
if !ib.Add(item) { if ib.Add(item) {
continue
}
ib = getInmemoryBlock() ib = getInmemoryBlock()
if !ib.Add(item) { if ib.Add(item) {
putInmemoryBlock(ib)
err = fmt.Errorf("cannot insert an item %q into an empty inmemoryBlock; it looks like the item is too large? len(item)=%d", item, len(item))
break
}
ibs = append(ibs, ib) ibs = append(ibs, ib)
continue
}
putInmemoryBlock(ib)
logger.Panicf("BUG: cannot insert too big item into an empty inmemoryBlock len(item)=%d; the caller should be responsible for avoiding too big items", len(item))
}
ris.ibs = ibs ris.ibs = ibs
}
}
if len(ibs) >= maxBlocksPerShard { if len(ibs) >= maxBlocksPerShard {
blocksToFlush = append(blocksToFlush, ibs...) blocksToFlush = append(blocksToFlush, ibs...)
for i := range ibs { for i := range ibs {
@ -215,7 +214,6 @@ func (ris *rawItemsShard) addItems(tb *Table, items [][]byte) error {
ris.mu.Unlock() ris.mu.Unlock()
tb.mergeRawItemsBlocks(blocksToFlush, false) tb.mergeRawItemsBlocks(blocksToFlush, false)
return err
} }
type partWrapper struct { type partWrapper struct {
@ -457,17 +455,17 @@ func (tb *Table) UpdateMetrics(m *TableMetrics) {
} }
// AddItems adds the given items to the tb. // AddItems adds the given items to the tb.
func (tb *Table) AddItems(items [][]byte) error { //
if err := tb.rawItems.addItems(tb, items); err != nil { // The function panics when items contains an item with length exceeding maxInmemoryBlockSize.
return fmt.Errorf("cannot insert data into %q: %w", tb.path, err) // It is caller's responsibility to make sure there are no too long items.
} func (tb *Table) AddItems(items [][]byte) {
tb.rawItems.addItems(tb, items)
atomic.AddUint64(&tb.itemsAdded, uint64(len(items))) atomic.AddUint64(&tb.itemsAdded, uint64(len(items)))
n := 0 n := 0
for _, item := range items { for _, item := range items {
n += len(item) n += len(item)
} }
atomic.AddUint64(&tb.itemsAddedSizeBytes, uint64(n)) atomic.AddUint64(&tb.itemsAddedSizeBytes, uint64(n))
return nil
} }
// getParts appends parts snapshot to dst and returns it. // getParts appends parts snapshot to dst and returns it.

View File

@ -161,9 +161,7 @@ func newTestTable(path string, itemsCount int) (*Table, []string, error) {
items := make([]string, itemsCount) items := make([]string, itemsCount)
for i := 0; i < itemsCount; i++ { for i := 0; i < itemsCount; i++ {
item := fmt.Sprintf("%d:%d", rand.Intn(1e9), i) item := fmt.Sprintf("%d:%d", rand.Intn(1e9), i)
if err := tb.AddItems([][]byte{[]byte(item)}); err != nil { tb.AddItems([][]byte{[]byte(item)})
return nil, nil, fmt.Errorf("cannot add item: %w", err)
}
items[i] = item items[i] = item
} }
tb.DebugFlush() tb.DebugFlush()

View File

@ -7,8 +7,6 @@ import (
"sync" "sync"
"sync/atomic" "sync/atomic"
"testing" "testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
) )
func TestTableOpenClose(t *testing.T) { func TestTableOpenClose(t *testing.T) {
@ -120,9 +118,7 @@ func testAddItemsSerial(tb *Table, itemsCount int) {
if len(item) > maxInmemoryBlockSize { if len(item) > maxInmemoryBlockSize {
item = item[:maxInmemoryBlockSize] item = item[:maxInmemoryBlockSize]
} }
if err := tb.AddItems([][]byte{item}); err != nil { tb.AddItems([][]byte{item})
logger.Panicf("BUG: cannot add item to table: %s", err)
}
} }
} }
@ -146,9 +142,7 @@ func TestTableCreateSnapshotAt(t *testing.T) {
const itemsCount = 3e5 const itemsCount = 3e5
for i := 0; i < itemsCount; i++ { for i := 0; i < itemsCount; i++ {
item := []byte(fmt.Sprintf("item %d", i)) item := []byte(fmt.Sprintf("item %d", i))
if err := tb.AddItems([][]byte{item}); err != nil { tb.AddItems([][]byte{item})
t.Fatalf("cannot add item to table: %s", err)
}
} }
tb.DebugFlush() tb.DebugFlush()
@ -276,9 +270,7 @@ func testAddItemsConcurrent(tb *Table, itemsCount int) {
if len(item) > maxInmemoryBlockSize { if len(item) > maxInmemoryBlockSize {
item = item[:maxInmemoryBlockSize] item = item[:maxInmemoryBlockSize]
} }
if err := tb.AddItems([][]byte{item}); err != nil { tb.AddItems([][]byte{item})
logger.Panicf("BUG: cannot add item to table: %s", err)
}
} }
}() }()
} }

View File

@ -408,12 +408,8 @@ func (is *indexSearch) maybeCreateIndexes(tsid *TSID, metricNameRaw []byte, date
return false, fmt.Errorf("cannot unmarshal metricNameRaw %q: %w", metricNameRaw, err) return false, fmt.Errorf("cannot unmarshal metricNameRaw %q: %w", metricNameRaw, err)
} }
mn.sortTags() mn.sortTags()
if err := is.createGlobalIndexes(tsid, mn); err != nil { is.createGlobalIndexes(tsid, mn)
return false, fmt.Errorf("cannot create global indexes: %w", err) is.createPerDayIndexes(date, tsid.MetricID, mn)
}
if err := is.createPerDayIndexes(date, tsid.MetricID, mn); err != nil {
return false, fmt.Errorf("cannot create per-day indexes for date=%s: %w", dateToString(date), err)
}
PutMetricName(mn) PutMetricName(mn)
atomic.AddUint64(&is.db.timeseriesRepopulated, 1) atomic.AddUint64(&is.db.timeseriesRepopulated, 1)
return true, nil return true, nil
@ -619,12 +615,8 @@ func (is *indexSearch) createTSIDByName(dst *TSID, metricName, metricNameRaw []b
if err := is.db.s.registerSeriesCardinality(dst.MetricID, metricNameRaw); err != nil { if err := is.db.s.registerSeriesCardinality(dst.MetricID, metricNameRaw); err != nil {
return err return err
} }
if err := is.createGlobalIndexes(dst, mn); err != nil { is.createGlobalIndexes(dst, mn)
return fmt.Errorf("cannot create global indexes: %w", err) is.createPerDayIndexes(date, dst.MetricID, mn)
}
if err := is.createPerDayIndexes(date, dst.MetricID, mn); err != nil {
return fmt.Errorf("cannot create per-day indexes for date=%s: %w", dateToString(date), err)
}
// There is no need in invalidating tag cache, since it is invalidated // There is no need in invalidating tag cache, since it is invalidated
// on db.tb flush via invalidateTagFiltersCache flushCallback passed to OpenTable. // on db.tb flush via invalidateTagFiltersCache flushCallback passed to OpenTable.
@ -690,7 +682,7 @@ func generateTSID(dst *TSID, mn *MetricName) {
dst.MetricID = generateUniqueMetricID() dst.MetricID = generateUniqueMetricID()
} }
func (is *indexSearch) createGlobalIndexes(tsid *TSID, mn *MetricName) error { func (is *indexSearch) createGlobalIndexes(tsid *TSID, mn *MetricName) {
// The order of index items is important. // The order of index items is important.
// It guarantees index consistency. // It guarantees index consistency.
@ -721,7 +713,7 @@ func (is *indexSearch) createGlobalIndexes(tsid *TSID, mn *MetricName) error {
ii.registerTagIndexes(prefix.B, mn, tsid.MetricID) ii.registerTagIndexes(prefix.B, mn, tsid.MetricID)
kbPool.Put(prefix) kbPool.Put(prefix)
return is.db.tb.AddItems(ii.Items) is.db.tb.AddItems(ii.Items)
} }
type indexItems struct { type indexItems struct {
@ -1792,9 +1784,7 @@ func (db *indexDB) searchMetricNameWithCache(dst []byte, metricID uint64, accoun
// Mark the metricID as deleted, so it will be created again when new data point // Mark the metricID as deleted, so it will be created again when new data point
// for the given time series will arrive. // for the given time series will arrive.
if err := db.deleteMetricIDs([]uint64{metricID}); err != nil { db.deleteMetricIDs([]uint64{metricID})
return dst, fmt.Errorf("cannot delete metricID for missing metricID->metricName entry; metricID=%d; error: %w", metricID, err)
}
return dst, io.EOF return dst, io.EOF
} }
@ -1821,9 +1811,7 @@ func (db *indexDB) DeleteTSIDs(qt *querytracer.Tracer, tfss []*TagFilters) (int,
if err != nil { if err != nil {
return 0, err return 0, err
} }
if err := db.deleteMetricIDs(metricIDs); err != nil { db.deleteMetricIDs(metricIDs)
return 0, err
}
// Delete TSIDs in the extDB. // Delete TSIDs in the extDB.
deletedCount := len(metricIDs) deletedCount := len(metricIDs)
@ -1841,10 +1829,10 @@ func (db *indexDB) DeleteTSIDs(qt *querytracer.Tracer, tfss []*TagFilters) (int,
return deletedCount, nil return deletedCount, nil
} }
func (db *indexDB) deleteMetricIDs(metricIDs []uint64) error { func (db *indexDB) deleteMetricIDs(metricIDs []uint64) {
if len(metricIDs) == 0 { if len(metricIDs) == 0 {
// Nothing to delete // Nothing to delete
return nil return
} }
// atomically add deleted metricIDs to an inmemory map. // atomically add deleted metricIDs to an inmemory map.
@ -1869,9 +1857,8 @@ func (db *indexDB) deleteMetricIDs(metricIDs []uint64) error {
items.B = encoding.MarshalUint64(items.B, metricID) items.B = encoding.MarshalUint64(items.B, metricID)
items.Next() items.Next()
} }
err := db.tb.AddItems(items.Items) db.tb.AddItems(items.Items)
putIndexItems(items) putIndexItems(items)
return err
} }
func (db *indexDB) loadDeletedMetricIDs() (*uint64set.Set, error) { func (db *indexDB) loadDeletedMetricIDs() (*uint64set.Set, error) {
@ -2947,7 +2934,7 @@ const (
int64Max = int64((1 << 63) - 1) int64Max = int64((1 << 63) - 1)
) )
func (is *indexSearch) createPerDayIndexes(date, metricID uint64, mn *MetricName) error { func (is *indexSearch) createPerDayIndexes(date, metricID uint64, mn *MetricName) {
ii := getIndexItems() ii := getIndexItems()
defer putIndexItems(ii) defer putIndexItems(ii)
@ -2962,12 +2949,8 @@ func (is *indexSearch) createPerDayIndexes(date, metricID uint64, mn *MetricName
kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs, mn.AccountID, mn.ProjectID) kb.B = marshalCommonPrefix(kb.B[:0], nsPrefixDateTagToMetricIDs, mn.AccountID, mn.ProjectID)
kb.B = encoding.MarshalUint64(kb.B, date) kb.B = encoding.MarshalUint64(kb.B, date)
ii.registerTagIndexes(kb.B, mn, metricID) ii.registerTagIndexes(kb.B, mn, metricID)
is.db.tb.AddItems(ii.Items)
if err := is.db.tb.AddItems(ii.Items); err != nil {
return fmt.Errorf("cannot add per-day entires for metricID %d: %w", metricID, err)
}
is.db.s.dateMetricIDCache.Set(date, metricID) is.db.s.dateMetricIDCache.Set(date, metricID)
return nil
} }
func (ii *indexItems) registerTagIndexes(prefix []byte, mn *MetricName, metricID uint64) { func (ii *indexItems) registerTagIndexes(prefix []byte, mn *MetricName, metricID uint64) {

View File

@ -541,22 +541,13 @@ func TestIndexDB(t *testing.T) {
} }
}() }()
if err := testIndexDBBigMetricName(db); err != nil {
t.Fatalf("unexpected error: %s", err)
}
mns, tsids, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups) mns, tsids, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
if err != nil { if err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if err := testIndexDBBigMetricName(db); err != nil {
t.Fatalf("unexpected error: %s", err)
}
if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil { if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if err := testIndexDBBigMetricName(db); err != nil {
t.Fatalf("unexpected error: %s", err)
}
// Re-open the db and verify it works as expected. // Re-open the db and verify it works as expected.
db.MustClose() db.MustClose()
@ -564,15 +555,9 @@ func TestIndexDB(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("cannot open indexDB: %s", err) t.Fatalf("cannot open indexDB: %s", err)
} }
if err := testIndexDBBigMetricName(db); err != nil {
t.Fatalf("unexpected error: %s", err)
}
if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil { if err := testIndexDBCheckTSIDByName(db, mns, tsids, tenants, false); err != nil {
t.Fatalf("unexpected error: %s", err) t.Fatalf("unexpected error: %s", err)
} }
if err := testIndexDBBigMetricName(db); err != nil {
t.Fatalf("unexpected error: %s", err)
}
}) })
t.Run("concurrent", func(t *testing.T) { t.Run("concurrent", func(t *testing.T) {
@ -595,27 +580,15 @@ func TestIndexDB(t *testing.T) {
ch := make(chan error, 3) ch := make(chan error, 3)
for i := 0; i < cap(ch); i++ { for i := 0; i < cap(ch); i++ {
go func() { go func() {
if err := testIndexDBBigMetricName(db); err != nil {
ch <- err
return
}
mns, tsid, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups) mns, tsid, tenants, err := testIndexDBGetOrCreateTSIDByName(db, accountsCount, projectsCount, metricGroups)
if err != nil { if err != nil {
ch <- err ch <- err
return return
} }
if err := testIndexDBBigMetricName(db); err != nil {
ch <- err
return
}
if err := testIndexDBCheckTSIDByName(db, mns, tsid, tenants, true); err != nil { if err := testIndexDBCheckTSIDByName(db, mns, tsid, tenants, true); err != nil {
ch <- err ch <- err
return return
} }
if err := testIndexDBBigMetricName(db); err != nil {
ch <- err
return
}
ch <- nil ch <- nil
}() }()
} }
@ -636,74 +609,6 @@ func TestIndexDB(t *testing.T) {
}) })
} }
func testIndexDBBigMetricName(db *indexDB) error {
var bigBytes []byte
for i := 0; i < 128*1000; i++ {
bigBytes = append(bigBytes, byte(i))
}
var mn MetricName
var tsid TSID
is := db.getIndexSearch(0, 0, noDeadline)
defer db.putIndexSearch(is)
// Try creating too big metric group
mn.Reset()
mn.MetricGroup = append(mn.MetricGroup[:0], bigBytes...)
mn.sortTags()
metricName := mn.Marshal(nil)
metricNameRaw := mn.marshalRaw(nil)
if err := is.GetOrCreateTSIDByName(&tsid, metricName, metricNameRaw, 0); err == nil {
return fmt.Errorf("expecting non-nil error on an attempt to insert metric with too big MetricGroup")
}
// Try creating too big tag key
mn.Reset()
mn.MetricGroup = append(mn.MetricGroup[:0], "xxx"...)
mn.Tags = []Tag{{
Key: append([]byte(nil), bigBytes...),
Value: []byte("foobar"),
}}
mn.sortTags()
metricName = mn.Marshal(nil)
metricNameRaw = mn.marshalRaw(nil)
if err := is.GetOrCreateTSIDByName(&tsid, metricName, metricNameRaw, 0); err == nil {
return fmt.Errorf("expecting non-nil error on an attempt to insert metric with too big tag key")
}
// Try creating too big tag value
mn.Reset()
mn.MetricGroup = append(mn.MetricGroup[:0], "xxx"...)
mn.Tags = []Tag{{
Key: []byte("foobar"),
Value: append([]byte(nil), bigBytes...),
}}
mn.sortTags()
metricName = mn.Marshal(nil)
metricNameRaw = mn.marshalRaw(nil)
if err := is.GetOrCreateTSIDByName(&tsid, metricName, metricNameRaw, 0); err == nil {
return fmt.Errorf("expecting non-nil error on an attempt to insert metric with too big tag value")
}
// Try creating metric name with too many tags
mn.Reset()
mn.MetricGroup = append(mn.MetricGroup[:0], "xxx"...)
for i := 0; i < 60000; i++ {
mn.Tags = append(mn.Tags, Tag{
Key: []byte(fmt.Sprintf("foobar %d", i)),
Value: []byte(fmt.Sprintf("sdfjdslkfj %d", i)),
})
}
mn.sortTags()
metricName = mn.Marshal(nil)
metricNameRaw = mn.marshalRaw(nil)
if err := is.GetOrCreateTSIDByName(&tsid, metricName, metricNameRaw, 0); err == nil {
return fmt.Errorf("expecting non-nil error on an attempt to insert metric with too many tags")
}
return nil
}
func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount, metricGroups int) ([]MetricName, []TSID, []string, error) { func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount, metricGroups int) ([]MetricName, []TSID, []string, error) {
// Create tsids. // Create tsids.
var mns []MetricName var mns []MetricName
@ -756,9 +661,7 @@ func testIndexDBGetOrCreateTSIDByName(db *indexDB, accountsCount, projectsCount,
date := uint64(timestampFromTime(time.Now())) / msecPerDay date := uint64(timestampFromTime(time.Now())) / msecPerDay
for i := range tsids { for i := range tsids {
tsid := &tsids[i] tsid := &tsids[i]
if err := is.createPerDayIndexes(date, tsid.MetricID, &mns[i]); err != nil { is.createPerDayIndexes(date, tsid.MetricID, &mns[i])
return nil, nil, nil, fmt.Errorf("error in createPerDayIndexes(%d, %d): %w", date, tsid.MetricID, err)
}
} }
// Flush index to disk, so it becomes visible for search // Flush index to disk, so it becomes visible for search
@ -1838,9 +1741,7 @@ func TestSearchTSIDWithTimeRange(t *testing.T) {
for i := range tsids { for i := range tsids {
tsid := &tsids[i] tsid := &tsids[i]
metricIDs.Add(tsid.MetricID) metricIDs.Add(tsid.MetricID)
if err := is.createPerDayIndexes(date, tsid.MetricID, &mns[i]); err != nil { is.createPerDayIndexes(date, tsid.MetricID, &mns[i])
t.Fatalf("error in createPerDayIndexes(%d, %d): %s", date, tsid.MetricID, err)
}
} }
allMetricIDs.Union(&metricIDs) allMetricIDs.Union(&metricIDs)
perDayMetricIDs[date] = &metricIDs perDayMetricIDs[date] = &metricIDs

View File

@ -2196,12 +2196,7 @@ func (s *Storage) updatePerDateData(rows []rawRow, mrs []*MetricRow) error {
continue continue
} }
mn.sortTags() mn.sortTags()
if err := is.createPerDayIndexes(date, metricID, mn); err != nil { is.createPerDayIndexes(date, metricID, mn)
if firstError == nil {
firstError = fmt.Errorf("error when storing per-date inverted index for (date=%s, metricID=%d): %w", dateToString(date), metricID, err)
}
continue
}
} }
dateMetricIDsForCache = append(dateMetricIDsForCache, dateMetricID{ dateMetricIDsForCache = append(dateMetricIDsForCache, dateMetricID{
date: date, date: date,