2019-05-22 23:16:55 +02:00
|
|
|
package mergeset
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"path/filepath"
|
|
|
|
"sync"
|
|
|
|
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/filestream"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
|
|
|
|
)
|
|
|
|
|
|
|
|
type blockStreamReader struct {
|
|
|
|
// Block contains the current block if Next returned true.
|
|
|
|
Block inmemoryBlock
|
|
|
|
|
2023-04-15 00:46:09 +02:00
|
|
|
// isInmemoryBlock is set to true if bsr was initialized with MustInitFromInmemoryBlock().
|
2022-07-27 22:47:18 +02:00
|
|
|
isInmemoryBlock bool
|
|
|
|
|
2022-07-27 22:04:58 +02:00
|
|
|
// The index of the current item in the Block, which is returned from CurrItem()
|
|
|
|
currItemIdx int
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
path string
|
|
|
|
|
|
|
|
// ph contains partHeader for the read part.
|
|
|
|
ph partHeader
|
|
|
|
|
|
|
|
// All the metaindexRows.
|
|
|
|
// The blockStreamReader doesn't own mrs - it must be alive
|
|
|
|
// during the read.
|
|
|
|
mrs []metaindexRow
|
|
|
|
|
|
|
|
// The index for the currently processed metaindexRow from mrs.
|
|
|
|
mrIdx int
|
|
|
|
|
|
|
|
// Currently processed blockHeaders.
|
|
|
|
bhs []blockHeader
|
|
|
|
|
|
|
|
// The index of the currently processed blockHeader.
|
|
|
|
bhIdx int
|
|
|
|
|
|
|
|
indexReader filestream.ReadCloser
|
|
|
|
itemsReader filestream.ReadCloser
|
|
|
|
lensReader filestream.ReadCloser
|
|
|
|
|
|
|
|
// Contains the current blockHeader.
|
|
|
|
bh *blockHeader
|
|
|
|
|
|
|
|
// Contains the current storageBlock.
|
|
|
|
sb storageBlock
|
|
|
|
|
|
|
|
// The number of items read so far.
|
|
|
|
itemsRead uint64
|
|
|
|
|
|
|
|
// The number of blocks read so far.
|
|
|
|
blocksRead uint64
|
|
|
|
|
|
|
|
// Whether the first item in the reader checked with ph.firstItem.
|
|
|
|
firstItemChecked bool
|
|
|
|
|
|
|
|
packedBuf []byte
|
|
|
|
unpackedBuf []byte
|
|
|
|
|
|
|
|
// The last error.
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bsr *blockStreamReader) reset() {
|
|
|
|
bsr.Block.Reset()
|
2022-07-27 22:47:18 +02:00
|
|
|
bsr.isInmemoryBlock = false
|
2022-07-27 22:04:58 +02:00
|
|
|
bsr.currItemIdx = 0
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.path = ""
|
|
|
|
bsr.ph.Reset()
|
|
|
|
bsr.mrs = nil
|
|
|
|
bsr.mrIdx = 0
|
|
|
|
bsr.bhs = bsr.bhs[:0]
|
|
|
|
bsr.bhIdx = 0
|
|
|
|
|
|
|
|
bsr.indexReader = nil
|
|
|
|
bsr.itemsReader = nil
|
|
|
|
bsr.lensReader = nil
|
|
|
|
|
|
|
|
bsr.bh = nil
|
|
|
|
bsr.sb.Reset()
|
|
|
|
|
|
|
|
bsr.itemsRead = 0
|
|
|
|
bsr.blocksRead = 0
|
|
|
|
bsr.firstItemChecked = false
|
|
|
|
|
|
|
|
bsr.packedBuf = bsr.packedBuf[:0]
|
|
|
|
bsr.unpackedBuf = bsr.unpackedBuf[:0]
|
|
|
|
|
|
|
|
bsr.err = nil
|
|
|
|
}
|
|
|
|
|
2019-06-24 13:05:42 +02:00
|
|
|
func (bsr *blockStreamReader) String() string {
|
|
|
|
if len(bsr.path) > 0 {
|
|
|
|
return bsr.path
|
|
|
|
}
|
|
|
|
return bsr.ph.String()
|
|
|
|
}
|
|
|
|
|
2023-04-15 00:46:09 +02:00
|
|
|
// MustInitFromInmemoryBlock initializes bsr from the given ib.
|
|
|
|
func (bsr *blockStreamReader) MustInitFromInmemoryBlock(ib *inmemoryBlock) {
|
2022-07-27 22:47:18 +02:00
|
|
|
bsr.reset()
|
|
|
|
bsr.Block.CopyFrom(ib)
|
|
|
|
bsr.Block.SortItems()
|
|
|
|
bsr.isInmemoryBlock = true
|
|
|
|
}
|
|
|
|
|
2023-04-15 00:46:09 +02:00
|
|
|
// MustInitFromInmemoryPart initializes bsr from the given mp.
|
|
|
|
func (bsr *blockStreamReader) MustInitFromInmemoryPart(mp *inmemoryPart) {
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.reset()
|
|
|
|
|
|
|
|
var err error
|
2022-03-03 14:48:22 +01:00
|
|
|
bsr.mrs, err = unmarshalMetaindexRows(bsr.mrs[:0], mp.metaindexData.NewReader())
|
2019-05-22 23:16:55 +02:00
|
|
|
if err != nil {
|
|
|
|
logger.Panicf("BUG: cannot unmarshal metaindex rows from inmemory part: %s", err)
|
|
|
|
}
|
|
|
|
|
2022-03-03 14:48:22 +01:00
|
|
|
bsr.ph.CopyFrom(&mp.ph)
|
|
|
|
bsr.indexReader = mp.indexData.NewReader()
|
|
|
|
bsr.itemsReader = mp.itemsData.NewReader()
|
|
|
|
bsr.lensReader = mp.lensData.NewReader()
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
if bsr.ph.itemsCount <= 0 {
|
|
|
|
logger.Panicf("BUG: source inmemoryPart must contain at least a single item")
|
|
|
|
}
|
|
|
|
if bsr.ph.blocksCount <= 0 {
|
|
|
|
logger.Panicf("BUG: source inmemoryPart must contain at least a single block")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-15 00:46:09 +02:00
|
|
|
// MustInitFromFilePart initializes bsr from a file-based part on the given path.
|
2019-05-22 23:16:55 +02:00
|
|
|
//
|
|
|
|
// Part files are read without OS cache pollution, since the part is usually
|
|
|
|
// deleted after the merge.
|
2023-04-15 00:46:09 +02:00
|
|
|
func (bsr *blockStreamReader) MustInitFromFilePart(path string) {
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.reset()
|
|
|
|
|
|
|
|
path = filepath.Clean(path)
|
|
|
|
|
2023-04-15 00:46:09 +02:00
|
|
|
bsr.ph.MustReadMetadata(path)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
2023-03-25 21:39:38 +01:00
|
|
|
metaindexPath := filepath.Join(path, metaindexFilename)
|
2023-04-15 00:03:39 +02:00
|
|
|
metaindexFile := filestream.MustOpen(metaindexPath, true)
|
|
|
|
|
|
|
|
var err error
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.mrs, err = unmarshalMetaindexRows(bsr.mrs[:0], metaindexFile)
|
|
|
|
metaindexFile.MustClose()
|
|
|
|
if err != nil {
|
2023-04-15 00:03:39 +02:00
|
|
|
logger.Panicf("FATAL: cannot unmarshal metaindex rows from file %q: %s", metaindexPath, err)
|
2019-05-22 23:16:55 +02:00
|
|
|
}
|
|
|
|
|
2023-03-25 21:39:38 +01:00
|
|
|
indexPath := filepath.Join(path, indexFilename)
|
2023-04-15 00:03:39 +02:00
|
|
|
indexFile := filestream.MustOpen(indexPath, true)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
2023-03-25 21:39:38 +01:00
|
|
|
itemsPath := filepath.Join(path, itemsFilename)
|
2023-04-15 00:03:39 +02:00
|
|
|
itemsFile := filestream.MustOpen(itemsPath, true)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
2023-03-25 21:39:38 +01:00
|
|
|
lensPath := filepath.Join(path, lensFilename)
|
2023-04-15 00:03:39 +02:00
|
|
|
lensFile := filestream.MustOpen(lensPath, true)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
bsr.path = path
|
|
|
|
bsr.indexReader = indexFile
|
|
|
|
bsr.itemsReader = itemsFile
|
|
|
|
bsr.lensReader = lensFile
|
|
|
|
}
|
|
|
|
|
|
|
|
// MustClose closes the bsr.
|
|
|
|
//
|
|
|
|
// It closes *Reader files passed to Init.
|
|
|
|
func (bsr *blockStreamReader) MustClose() {
|
2022-07-27 22:47:18 +02:00
|
|
|
if !bsr.isInmemoryBlock {
|
|
|
|
bsr.indexReader.MustClose()
|
|
|
|
bsr.itemsReader.MustClose()
|
|
|
|
bsr.lensReader.MustClose()
|
|
|
|
}
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.reset()
|
|
|
|
}
|
|
|
|
|
2022-07-27 22:04:58 +02:00
|
|
|
func (bsr *blockStreamReader) CurrItem() string {
|
|
|
|
return bsr.Block.items[bsr.currItemIdx].String(bsr.Block.data)
|
|
|
|
}
|
|
|
|
|
2019-05-22 23:16:55 +02:00
|
|
|
func (bsr *blockStreamReader) Next() bool {
|
|
|
|
if bsr.err != nil {
|
|
|
|
return false
|
|
|
|
}
|
2022-07-27 22:47:18 +02:00
|
|
|
if bsr.isInmemoryBlock {
|
|
|
|
bsr.err = io.EOF
|
|
|
|
return true
|
|
|
|
}
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
if bsr.bhIdx >= len(bsr.bhs) {
|
|
|
|
// The current index block is over. Try reading the next index block.
|
|
|
|
if err := bsr.readNextBHS(); err != nil {
|
|
|
|
if err == io.EOF {
|
|
|
|
// Check the last item.
|
2021-02-21 21:06:45 +01:00
|
|
|
b := &bsr.Block
|
|
|
|
lastItem := b.items[len(b.items)-1].Bytes(b.data)
|
2019-05-22 23:16:55 +02:00
|
|
|
if string(bsr.ph.lastItem) != string(lastItem) {
|
|
|
|
err = fmt.Errorf("unexpected last item; got %X; want %X", lastItem, bsr.ph.lastItem)
|
|
|
|
}
|
|
|
|
} else {
|
2020-06-30 21:58:18 +02:00
|
|
|
err = fmt.Errorf("cannot read the next index block: %w", err)
|
2019-05-22 23:16:55 +02:00
|
|
|
}
|
|
|
|
bsr.err = err
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bsr.bh = &bsr.bhs[bsr.bhIdx]
|
|
|
|
bsr.bhIdx++
|
|
|
|
|
2022-01-31 23:18:39 +01:00
|
|
|
bsr.sb.itemsData = bytesutil.ResizeNoCopyMayOverallocate(bsr.sb.itemsData, int(bsr.bh.itemsBlockSize))
|
2023-04-14 23:39:26 +02:00
|
|
|
fs.MustReadData(bsr.itemsReader, bsr.sb.itemsData)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
2022-01-31 23:18:39 +01:00
|
|
|
bsr.sb.lensData = bytesutil.ResizeNoCopyMayOverallocate(bsr.sb.lensData, int(bsr.bh.lensBlockSize))
|
2023-04-14 23:39:26 +02:00
|
|
|
fs.MustReadData(bsr.lensReader, bsr.sb.lensData)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
if err := bsr.Block.UnmarshalData(&bsr.sb, bsr.bh.firstItem, bsr.bh.commonPrefix, bsr.bh.itemsCount, bsr.bh.marshalType); err != nil {
|
2020-06-30 21:58:18 +02:00
|
|
|
bsr.err = fmt.Errorf("cannot unmarshal inmemoryBlock from storageBlock with firstItem=%X, commonPrefix=%X, itemsCount=%d, marshalType=%d: %w",
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.bh.firstItem, bsr.bh.commonPrefix, bsr.bh.itemsCount, bsr.bh.marshalType, err)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
bsr.blocksRead++
|
|
|
|
if bsr.blocksRead > bsr.ph.blocksCount {
|
|
|
|
bsr.err = fmt.Errorf("too many blocks read: %d; must be smaller than partHeader.blocksCount %d", bsr.blocksRead, bsr.ph.blocksCount)
|
|
|
|
return false
|
|
|
|
}
|
2022-07-27 22:04:58 +02:00
|
|
|
bsr.currItemIdx = 0
|
2019-05-22 23:16:55 +02:00
|
|
|
bsr.itemsRead += uint64(len(bsr.Block.items))
|
|
|
|
if bsr.itemsRead > bsr.ph.itemsCount {
|
|
|
|
bsr.err = fmt.Errorf("too many items read: %d; must be smaller than partHeader.itemsCount %d", bsr.itemsRead, bsr.ph.itemsCount)
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if !bsr.firstItemChecked {
|
|
|
|
bsr.firstItemChecked = true
|
2021-02-21 21:06:45 +01:00
|
|
|
b := &bsr.Block
|
|
|
|
firstItem := b.items[0].Bytes(b.data)
|
|
|
|
if string(bsr.ph.firstItem) != string(firstItem) {
|
|
|
|
bsr.err = fmt.Errorf("unexpected first item; got %X; want %X", firstItem, bsr.ph.firstItem)
|
2019-05-22 23:16:55 +02:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bsr *blockStreamReader) readNextBHS() error {
|
|
|
|
if bsr.mrIdx >= len(bsr.mrs) {
|
|
|
|
return io.EOF
|
|
|
|
}
|
|
|
|
|
|
|
|
mr := &bsr.mrs[bsr.mrIdx]
|
|
|
|
bsr.mrIdx++
|
|
|
|
|
|
|
|
// Read compressed index block.
|
2022-01-31 23:18:39 +01:00
|
|
|
bsr.packedBuf = bytesutil.ResizeNoCopyMayOverallocate(bsr.packedBuf, int(mr.indexBlockSize))
|
2023-04-14 23:39:26 +02:00
|
|
|
fs.MustReadData(bsr.indexReader, bsr.packedBuf)
|
2019-05-22 23:16:55 +02:00
|
|
|
|
|
|
|
// Unpack the compressed index block.
|
|
|
|
var err error
|
|
|
|
bsr.unpackedBuf, err = encoding.DecompressZSTD(bsr.unpackedBuf[:0], bsr.packedBuf)
|
|
|
|
if err != nil {
|
2020-08-15 13:44:29 +02:00
|
|
|
return fmt.Errorf("cannot decompress index block: %w", err)
|
2019-05-22 23:16:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unmarshal the unpacked index block into bsr.bhs.
|
2022-11-15 15:30:51 +01:00
|
|
|
bsr.bhs, err = unmarshalBlockHeadersNoCopy(bsr.bhs[:0], bsr.unpackedBuf, int(mr.blockHeadersCount))
|
|
|
|
if err != nil {
|
2022-11-16 20:21:46 +01:00
|
|
|
return fmt.Errorf("cannot unmarshal blockHeaders in the index block #%d: %w", bsr.mrIdx, err)
|
2019-05-22 23:16:55 +02:00
|
|
|
}
|
2022-11-16 20:21:46 +01:00
|
|
|
bsr.bhIdx = 0
|
2019-05-22 23:16:55 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (bsr *blockStreamReader) Error() error {
|
|
|
|
if bsr.err == io.EOF {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return bsr.err
|
|
|
|
}
|
|
|
|
|
|
|
|
func getBlockStreamReader() *blockStreamReader {
|
|
|
|
v := bsrPool.Get()
|
|
|
|
if v == nil {
|
|
|
|
return &blockStreamReader{}
|
|
|
|
}
|
|
|
|
return v.(*blockStreamReader)
|
|
|
|
}
|
|
|
|
|
|
|
|
func putBlockStreamReader(bsr *blockStreamReader) {
|
|
|
|
bsr.MustClose()
|
|
|
|
bsrPool.Put(bsr)
|
|
|
|
}
|
|
|
|
|
|
|
|
var bsrPool sync.Pool
|