VictoriaMetrics/lib/streamaggr/count_series.go
Roman Khavronenko 78121642df
lib/streamaggr: reduce number of inuse objects (#6402)
The main change is getting rid of interning of sample key. It was
discovered that for cases with many unique time series aggregated by
vmagent interned keys could grow up to hundreds of millions of objects.
This has negative impact on the following aspects:
1. It slows down garbage collection cycles, as GC has to scan all inuse
objects periodically. The higher is the number of inuse objects, the
longer it takes/the more CPU it takes.
2. It slows down the hot path of samples aggregation where each key
needs to be looked up in the map first.

The change makes code more fragile, but suppose to provide performance
optimization for heavy-loaded vmagents with stream aggregation enabled.

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
Co-authored-by: Aliaksandr Valialkin <valyala@victoriametrics.com>
2024-06-07 16:35:52 +02:00

93 lines
2.3 KiB
Go

package streamaggr
import (
"strings"
"sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/cespare/xxhash/v2"
)
// countSeriesAggrState calculates output=count_series, e.g. the number of unique series.
type countSeriesAggrState struct {
m sync.Map
}
type countSeriesStateValue struct {
mu sync.Mutex
m map[uint64]struct{}
deleted bool
}
func newCountSeriesAggrState() *countSeriesAggrState {
return &countSeriesAggrState{}
}
func (as *countSeriesAggrState) pushSamples(samples []pushSample) {
for i := range samples {
s := &samples[i]
inputKey, outputKey := getInputOutputKey(s.key)
// Count unique hashes over the inputKeys instead of unique inputKey values.
// This reduces memory usage at the cost of possible hash collisions for distinct inputKey values.
h := xxhash.Sum64(bytesutil.ToUnsafeBytes(inputKey))
again:
v, ok := as.m.Load(outputKey)
if !ok {
// The entry is missing in the map. Try creating it.
v = &countSeriesStateValue{
m: map[uint64]struct{}{
h: {},
},
}
vNew, loaded := as.m.LoadOrStore(strings.Clone(outputKey), v)
if !loaded {
// The entry has been added to the map.
continue
}
// Update the entry created by a concurrent goroutine.
v = vNew
}
sv := v.(*countSeriesStateValue)
sv.mu.Lock()
deleted := sv.deleted
if !deleted {
if _, ok := sv.m[h]; !ok {
sv.m[h] = struct{}{}
}
}
sv.mu.Unlock()
if deleted {
// The entry has been deleted by the concurrent call to flushState
// Try obtaining and updating the entry again.
goto again
}
}
}
func (as *countSeriesAggrState) flushState(ctx *flushCtx, resetState bool) {
currentTimeMsec := int64(fasttime.UnixTimestamp()) * 1000
m := &as.m
m.Range(func(k, v interface{}) bool {
if resetState {
// Atomically delete the entry from the map, so new entry is created for the next flush.
m.Delete(k)
}
sv := v.(*countSeriesStateValue)
sv.mu.Lock()
n := len(sv.m)
if resetState {
// Mark the entry as deleted, so it won't be updated anymore by concurrent pushSample() calls.
sv.deleted = true
}
sv.mu.Unlock()
key := k.(string)
ctx.appendSeries(key, "count_series", currentTimeMsec, float64(n))
return true
})
}