VictoriaMetrics/lib/storage/inmemory_part.go
Aliaksandr Valialkin 7d5c64eb7a
all: add -inmemoryDataFlushInterval command-line flag for controlling the frequency of saving in-memory data to disk
The main purpose of this command-line flag is to increase the lifetime of low-end flash storage
with the limited number of write operations it can perform. Such flash storage is usually
installed on Raspberry PI or similar appliances.

For example, `-inmemoryDataFlushInterval=1h` reduces the frequency of disk write operations
to up to once per hour if the ingested one-hour worth of data fits the limit for in-memory data.

The in-memory data is searchable in the same way as the data stored on disk.
VictoriaMetrics automatically flushes the in-memory data to disk on graceful shutdown via SIGINT signal.
The in-memory data is lost on unclean shutdown (hardware power loss, OOM crash, SIGKILL).

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337
2022-12-05 15:28:09 -08:00

117 lines
3.4 KiB
Go

package storage
import (
"fmt"
"path/filepath"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
// inmemoryPart represents in-memory partition.
type inmemoryPart struct {
ph partHeader
timestampsData bytesutil.ByteBuffer
valuesData bytesutil.ByteBuffer
indexData bytesutil.ByteBuffer
metaindexData bytesutil.ByteBuffer
creationTime uint64
}
// Reset resets mp.
func (mp *inmemoryPart) Reset() {
mp.ph.Reset()
mp.timestampsData.Reset()
mp.valuesData.Reset()
mp.indexData.Reset()
mp.metaindexData.Reset()
mp.creationTime = 0
}
// StoreToDisk stores the mp to the given path on disk.
func (mp *inmemoryPart) StoreToDisk(path string) error {
if err := fs.MkdirAllIfNotExist(path); err != nil {
return fmt.Errorf("cannot create directory %q: %w", path, err)
}
timestampsPath := path + "/timestamps.bin"
if err := fs.WriteFileAndSync(timestampsPath, mp.timestampsData.B); err != nil {
return fmt.Errorf("cannot store timestamps: %w", err)
}
valuesPath := path + "/values.bin"
if err := fs.WriteFileAndSync(valuesPath, mp.valuesData.B); err != nil {
return fmt.Errorf("cannot store values: %w", err)
}
indexPath := path + "/index.bin"
if err := fs.WriteFileAndSync(indexPath, mp.indexData.B); err != nil {
return fmt.Errorf("cannot store index: %w", err)
}
metaindexPath := path + "/metaindex.bin"
if err := fs.WriteFileAndSync(metaindexPath, mp.metaindexData.B); err != nil {
return fmt.Errorf("cannot store metaindex: %w", err)
}
if err := mp.ph.writeMinDedupInterval(path); err != nil {
return fmt.Errorf("cannot store min dedup interval: %w", err)
}
// Sync parent directory in order to make sure the written files remain visible after hardware reset
parentDirPath := filepath.Dir(path)
fs.MustSyncPath(parentDirPath)
return nil
}
// InitFromRows initializes mp from the given rows.
func (mp *inmemoryPart) InitFromRows(rows []rawRow) {
if len(rows) == 0 {
logger.Panicf("BUG: Inmemory.InitFromRows must accept at least one row")
}
mp.Reset()
rrm := getRawRowsMarshaler()
rrm.marshalToInmemoryPart(mp, rows)
putRawRowsMarshaler(rrm)
mp.creationTime = fasttime.UnixTimestamp()
}
// NewPart creates new part from mp.
//
// It is safe calling NewPart multiple times.
// It is unsafe re-using mp while the returned part is in use.
func (mp *inmemoryPart) NewPart() (*part, error) {
size := mp.size()
return newPart(&mp.ph, "", size, mp.metaindexData.NewReader(), &mp.timestampsData, &mp.valuesData, &mp.indexData)
}
func (mp *inmemoryPart) size() uint64 {
return uint64(cap(mp.timestampsData.B) + cap(mp.valuesData.B) + cap(mp.indexData.B) + cap(mp.metaindexData.B))
}
func getInmemoryPart() *inmemoryPart {
select {
case mp := <-mpPool:
return mp
default:
return &inmemoryPart{}
}
}
func putInmemoryPart(mp *inmemoryPart) {
mp.Reset()
select {
case mpPool <- mp:
default:
// Drop mp in order to reduce memory usage.
}
}
// Use chan instead of sync.Pool in order to reduce memory usage on systems with big number of CPU cores,
// since sync.Pool maintains per-CPU pool of inmemoryPart objects.
//
// The inmemoryPart object size can exceed 64KB, so it is better to use chan instead of sync.Pool for reducing memory usage.
var mpPool = make(chan *inmemoryPart, cgroup.AvailableCPUs())