2022-09-30 06:34:14 +02:00
package bytesutil
2022-08-26 23:12:39 +02:00
import (
2023-01-24 08:28:10 +01:00
"flag"
2022-12-12 23:31:16 +01:00
"strings"
2022-08-26 23:12:39 +02:00
"sync"
"sync/atomic"
2023-02-27 23:15:49 +01:00
"time"
2022-12-12 23:31:16 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2022-08-26 23:12:39 +02:00
)
2023-02-27 23:15:49 +01:00
var (
2023-07-19 10:10:51 +02:00
internStringMaxLen = flag . Int ( "internStringMaxLen" , 500 , "The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. " +
2023-02-27 23:15:49 +01:00
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration" )
disableCache = flag . Bool ( "internStringDisableCache" , false , "Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. " +
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen" )
2023-05-10 09:50:41 +02:00
cacheExpireDuration = flag . Duration ( "internStringCacheExpireDuration" , 6 * time . Minute , "The expiry duration for caches for interned strings. " +
2023-02-27 23:15:49 +01:00
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache" )
)
2024-06-10 17:58:02 +02:00
type internStringMap struct {
mu sync . Mutex
mutable map [ string ] string
mutableReads uint64
readonly atomic . Pointer [ map [ string ] internStringMapEntry ]
2024-06-11 17:50:32 +02:00
cleanupInterval uint64
2024-06-10 17:58:02 +02:00
nextCleanupTime atomic . Uint64
2023-02-27 23:15:49 +01:00
}
2023-01-24 08:28:10 +01:00
2024-06-10 17:58:02 +02:00
type internStringMapEntry struct {
deadline uint64
s string
2023-01-04 07:14:20 +01:00
}
2024-06-10 17:58:02 +02:00
func newInternStringMap ( ) * internStringMap {
2024-06-11 17:50:32 +02:00
m := & internStringMap {
2024-06-10 17:58:02 +02:00
mutable : make ( map [ string ] string ) ,
}
readonly := make ( map [ string ] internStringMapEntry )
2024-06-11 17:50:32 +02:00
m . readonly . Store ( & readonly )
m . cleanupInterval = uint64 ( cacheExpireDuration . Seconds ( ) / 3 )
m . nextCleanupTime . Store ( fasttime . UnixTimestamp ( ) + m . cleanupInterval )
return m
2024-06-10 17:58:02 +02:00
}
func ( m * internStringMap ) getReadonly ( ) map [ string ] internStringMapEntry {
return * m . readonly . Load ( )
}
func ( m * internStringMap ) intern ( s string ) string {
2024-06-11 17:50:32 +02:00
if isSkipCache ( s ) {
2023-02-27 23:15:49 +01:00
return strings . Clone ( s )
}
2024-06-10 17:58:02 +02:00
currentTime := fasttime . UnixTimestamp ( )
if currentTime >= m . nextCleanupTime . Load ( ) {
2024-06-11 17:50:32 +02:00
m . nextCleanupTime . Store ( currentTime + m . cleanupInterval )
2024-06-10 17:58:02 +02:00
m . cleanup ( )
}
2023-02-27 23:15:49 +01:00
2024-06-10 17:58:02 +02:00
readonly := m . getReadonly ( )
e , ok := readonly [ s ]
if ok {
// Fast path - the string has been found in readonly map
2022-12-12 23:31:16 +01:00
return e . s
2022-08-26 23:12:39 +02:00
}
2024-06-10 17:58:02 +02:00
2024-06-11 17:50:32 +02:00
// Slower path - search for the string in mutable map under the lock.
2024-06-10 17:58:02 +02:00
m . mu . Lock ( )
sInterned , ok := m . mutable [ s ]
if ! ok {
2024-06-11 17:50:32 +02:00
// Verify whether s has been already registered by concurrent goroutines in m.readonly
2024-06-10 17:58:02 +02:00
readonly = m . getReadonly ( )
e , ok = readonly [ s ]
if ! ok {
// Slowest path - register the string in mutable map.
// Make a new copy for s in order to remove references from possible bigger string s refers to.
sInterned = strings . Clone ( s )
m . mutable [ sInterned ] = sInterned
} else {
sInterned = e . s
}
2022-12-12 23:31:16 +01:00
}
2024-06-10 17:58:02 +02:00
m . mutableReads ++
if m . mutableReads > uint64 ( len ( readonly ) ) {
m . migrateMutableToReadonlyLocked ( )
m . mutableReads = 0
2022-08-26 23:12:39 +02:00
}
2024-06-10 17:58:02 +02:00
m . mu . Unlock ( )
2022-12-12 23:31:16 +01:00
2024-06-10 17:58:02 +02:00
return sInterned
2022-08-26 23:12:39 +02:00
}
2024-06-10 17:58:02 +02:00
func ( m * internStringMap ) migrateMutableToReadonlyLocked ( ) {
readonly := m . getReadonly ( )
readonlyCopy := make ( map [ string ] internStringMapEntry , len ( readonly ) + len ( m . mutable ) )
for k , e := range readonly {
readonlyCopy [ k ] = e
}
deadline := fasttime . UnixTimestamp ( ) + uint64 ( cacheExpireDuration . Seconds ( ) + 0.5 )
for k , s := range m . mutable {
readonlyCopy [ k ] = internStringMapEntry {
s : s ,
deadline : deadline ,
}
}
m . mutable = make ( map [ string ] string )
m . readonly . Store ( & readonlyCopy )
2022-12-12 23:31:16 +01:00
}
2024-06-10 17:58:02 +02:00
func ( m * internStringMap ) cleanup ( ) {
m . mu . Lock ( )
defer m . mu . Unlock ( )
readonly := m . getReadonly ( )
currentTime := fasttime . UnixTimestamp ( )
needCleanup := false
for _ , e := range readonly {
if e . deadline <= currentTime {
needCleanup = true
break
}
}
if ! needCleanup {
return
}
readonlyCopy := make ( map [ string ] internStringMapEntry , len ( readonly ) )
for k , e := range readonly {
if e . deadline > currentTime {
readonlyCopy [ k ] = e
}
}
m . readonly . Store ( & readonlyCopy )
}
func isSkipCache ( s string ) bool {
return * disableCache || len ( s ) > * internStringMaxLen
}
// InternBytes interns b as a string
func InternBytes ( b [ ] byte ) string {
s := ToUnsafeString ( b )
return InternString ( s )
}
// InternString returns interned s.
//
// This may be needed for reducing the amounts of allocated memory.
func InternString ( s string ) string {
return ism . intern ( s )
}
var ism = newInternStringMap ( )