VictoriaMetrics/lib/logstorage/part_header.go
Aliaksandr Valialkin 7a62eefa34
lib/logstorage: dynamically adjust the number of (bloom, values) shards in a part depending on the number of non-const columns
This allows reducing the amounts of data, which must be read during queries over logs with big number of fields (aka "wide events").
This, in turn, improves query performance when the data, which needs to be scanned during the query, doesn't fit OS page cache.
2024-10-29 16:44:45 +01:00

118 lines
3.7 KiB
Go

package logstorage
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// partHeader contains the information about a single part
type partHeader struct {
// FormatVersion is the version of the part format
FormatVersion uint
// CompressedSizeBytes is physical size of the part
CompressedSizeBytes uint64
// UncompressedSizeBytes is the original size of log entries stored in the part
UncompressedSizeBytes uint64
// RowsCount is the number of log entries in the part
RowsCount uint64
// BlocksCount is the number of blocks in the part
BlocksCount uint64
// MinTimestamp is the minimum timestamp seen in the part
MinTimestamp int64
// MaxTimestamp is the maximum timestamp seen in the part
MaxTimestamp int64
// BloomValuesShardsCount is the number of (bloom, values) shards in the part.
BloomValuesShardsCount uint64
// BloomValuesFieldsCount is the number of fields with (bloom, values) pairs in the given part.
BloomValuesFieldsCount uint64
}
// reset resets ph for subsequent re-use
func (ph *partHeader) reset() {
ph.FormatVersion = 0
ph.CompressedSizeBytes = 0
ph.UncompressedSizeBytes = 0
ph.RowsCount = 0
ph.BlocksCount = 0
ph.MinTimestamp = 0
ph.MaxTimestamp = 0
ph.BloomValuesShardsCount = 0
ph.BloomValuesFieldsCount = 0
}
// String returns string represenation for ph.
func (ph *partHeader) String() string {
return fmt.Sprintf("{FormatVersion=%d, CompressedSizeBytes=%d, UncompressedSizeBytes=%d, RowsCount=%d, BlocksCount=%d, "+
"MinTimestamp=%s, MaxTimestamp=%s, BloomValuesShardsCount=%d, BloomValuesFieldsCount=%d}",
ph.FormatVersion, ph.CompressedSizeBytes, ph.UncompressedSizeBytes, ph.RowsCount, ph.BlocksCount,
timestampToString(ph.MinTimestamp), timestampToString(ph.MaxTimestamp), ph.BloomValuesShardsCount, ph.BloomValuesFieldsCount)
}
func (ph *partHeader) mustReadMetadata(partPath string) {
ph.reset()
metadataPath := filepath.Join(partPath, metadataFilename)
metadata, err := os.ReadFile(metadataPath)
if err != nil {
logger.Panicf("FATAL: cannot read %q: %s", metadataPath, err)
}
if err := json.Unmarshal(metadata, ph); err != nil {
logger.Panicf("FATAL: cannot parse %q: %s", metadataPath, err)
}
if ph.FormatVersion <= 1 {
if ph.BloomValuesShardsCount != 0 {
logger.Panicf("FATAL: unexpected BloomValuesShardsCount for FormatVersion<=1; got %d; want 0", ph.BloomValuesShardsCount)
}
if ph.BloomValuesFieldsCount != 0 {
logger.Panicf("FATAL: unexpected BloomValuesFieldsCount for FormatVersion<=1; got %d; want 0", ph.BloomValuesFieldsCount)
}
if ph.FormatVersion == 1 {
ph.BloomValuesShardsCount = 8
ph.BloomValuesFieldsCount = bloomValuesMaxShardsCount
}
}
// Perform various checks
if ph.FormatVersion > partFormatLatestVersion {
logger.Panicf("FATAL: unsupported part format version; got %d; mustn't exceed %d", partFormatLatestVersion)
}
if ph.MinTimestamp > ph.MaxTimestamp {
logger.Panicf("FATAL: MinTimestamp cannot exceed MaxTimestamp; got %d vs %d", ph.MinTimestamp, ph.MaxTimestamp)
}
if ph.BlocksCount > ph.RowsCount {
logger.Panicf("FATAL: BlocksCount=%d cannot exceed RowsCount=%d", ph.BlocksCount, ph.RowsCount)
}
}
func (ph *partHeader) mustWriteMetadata(partPath string) {
metadata, err := json.Marshal(ph)
if err != nil {
logger.Panicf("BUG: cannot marshal partHeader: %s", err)
}
metadataPath := filepath.Join(partPath, metadataFilename)
fs.MustWriteSync(metadataPath, metadata)
}
func timestampToString(timestamp int64) string {
t := time.Unix(0, timestamp).UTC()
return strings.Replace(t.Format(timestampForPathname), ".", "", 1)
}
const timestampForPathname = "20060102150405.000000000"