VictoriaMetrics/lib/bytesutil/internstring.go
Aliaksandr Valialkin 6470eac7dc
lib/bytesutil: reduce the number of memory allocations per each interned string in bytesutil.InternString() from 5 to 1
This should reduce GC overhead when tens of millions of strings are interned (for example, during stream deduplication
of millions of active time series).
2024-06-10 18:06:24 +02:00

155 lines
4.1 KiB
Go

package bytesutil
import (
"flag"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
)
var (
internStringMaxLen = flag.Int("internStringMaxLen", 500, "The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. "+
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration")
disableCache = flag.Bool("internStringDisableCache", false, "Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. "+
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen")
cacheExpireDuration = flag.Duration("internStringCacheExpireDuration", 6*time.Minute, "The expiry duration for caches for interned strings. "+
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache")
)
type internStringMap struct {
mu sync.Mutex
mutable map[string]string
mutableReads uint64
readonly atomic.Pointer[map[string]internStringMapEntry]
nextCleanupTime atomic.Uint64
}
type internStringMapEntry struct {
deadline uint64
s string
}
func newInternStringMap() *internStringMap {
ism := &internStringMap{
mutable: make(map[string]string),
}
readonly := make(map[string]internStringMapEntry)
ism.readonly.Store(&readonly)
ism.nextCleanupTime.Store(fasttime.UnixTimestamp() + 61)
return ism
}
func (m *internStringMap) getReadonly() map[string]internStringMapEntry {
return *m.readonly.Load()
}
func (m *internStringMap) intern(s string) string {
if *disableCache || len(s) > *internStringMaxLen {
return strings.Clone(s)
}
currentTime := fasttime.UnixTimestamp()
if currentTime >= m.nextCleanupTime.Load() {
m.nextCleanupTime.Store(currentTime + 61)
m.cleanup()
}
readonly := m.getReadonly()
e, ok := readonly[s]
if ok {
// Fast path - the string has been found in readonly map
return e.s
}
// Slower path - search for the string in mutable map
m.mu.Lock()
sInterned, ok := m.mutable[s]
if !ok {
// Verify whether the s has been already registered by concurrent goroutines in m.readonly
readonly = m.getReadonly()
e, ok = readonly[s]
if !ok {
// Slowest path - register the string in mutable map.
// Make a new copy for s in order to remove references from possible bigger string s refers to.
sInterned = strings.Clone(s)
m.mutable[sInterned] = sInterned
} else {
sInterned = e.s
}
}
m.mutableReads++
if m.mutableReads > uint64(len(readonly)) {
m.migrateMutableToReadonlyLocked()
m.mutableReads = 0
}
m.mu.Unlock()
return sInterned
}
func (m *internStringMap) migrateMutableToReadonlyLocked() {
readonly := m.getReadonly()
readonlyCopy := make(map[string]internStringMapEntry, len(readonly)+len(m.mutable))
for k, e := range readonly {
readonlyCopy[k] = e
}
deadline := fasttime.UnixTimestamp() + uint64(cacheExpireDuration.Seconds()+0.5)
for k, s := range m.mutable {
readonlyCopy[k] = internStringMapEntry{
s: s,
deadline: deadline,
}
}
m.mutable = make(map[string]string)
m.readonly.Store(&readonlyCopy)
}
func (m *internStringMap) cleanup() {
m.mu.Lock()
defer m.mu.Unlock()
readonly := m.getReadonly()
currentTime := fasttime.UnixTimestamp()
needCleanup := false
for _, e := range readonly {
if e.deadline <= currentTime {
needCleanup = true
break
}
}
if !needCleanup {
return
}
readonlyCopy := make(map[string]internStringMapEntry, len(readonly))
for k, e := range readonly {
if e.deadline > currentTime {
readonlyCopy[k] = e
}
}
m.readonly.Store(&readonlyCopy)
}
func isSkipCache(s string) bool {
return *disableCache || len(s) > *internStringMaxLen
}
// InternBytes interns b as a string
func InternBytes(b []byte) string {
s := ToUnsafeString(b)
return InternString(s)
}
// InternString returns interned s.
//
// This may be needed for reducing the amounts of allocated memory.
func InternString(s string) string {
return ism.intern(s)
}
var ism = newInternStringMap()