2022-09-30 06:34:14 +02:00
package bytesutil
2022-08-26 23:12:39 +02:00
import (
2023-01-24 08:28:10 +01:00
"flag"
2022-12-12 23:31:16 +01:00
"strings"
2022-08-26 23:12:39 +02:00
"sync"
"sync/atomic"
2023-02-27 23:15:49 +01:00
"time"
2022-12-12 23:31:16 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2022-08-26 23:12:39 +02:00
)
2023-02-27 23:15:49 +01:00
var (
2023-07-19 10:10:51 +02:00
internStringMaxLen = flag . Int ( "internStringMaxLen" , 500 , "The maximum length for strings to intern. A lower limit may save memory at the cost of higher CPU usage. " +
2023-02-27 23:15:49 +01:00
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringDisableCache and -internStringCacheExpireDuration" )
disableCache = flag . Bool ( "internStringDisableCache" , false , "Whether to disable caches for interned strings. This may reduce memory usage at the cost of higher CPU usage. " +
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringCacheExpireDuration and -internStringMaxLen" )
2023-05-10 09:50:41 +02:00
cacheExpireDuration = flag . Duration ( "internStringCacheExpireDuration" , 6 * time . Minute , "The expiry duration for caches for interned strings. " +
2023-02-27 23:15:49 +01:00
"See https://en.wikipedia.org/wiki/String_interning . See also -internStringMaxLen and -internStringDisableCache" )
)
func isSkipCache ( s string ) bool {
return * disableCache || len ( s ) > * internStringMaxLen
}
2023-01-24 08:28:10 +01:00
2023-01-04 07:14:20 +01:00
// InternBytes interns b as a string
func InternBytes ( b [ ] byte ) string {
s := ToUnsafeString ( b )
return InternString ( s )
}
2022-08-26 23:12:39 +02:00
// InternString returns interned s.
//
// This may be needed for reducing the amounts of allocated memory.
func InternString ( s string ) string {
2023-02-27 23:15:49 +01:00
if isSkipCache ( s ) {
// Make a new copy for s in order to remove references from possible bigger string s refers to.
// This also protects from cases when s points to unsafe string - see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3227
return strings . Clone ( s )
}
2022-12-12 23:31:16 +01:00
ct := fasttime . UnixTimestamp ( )
if v , ok := internStringsMap . Load ( s ) ; ok {
e := v . ( * ismEntry )
2024-02-24 01:07:51 +01:00
if e . lastAccessTime . Load ( ) + 10 < ct {
2022-12-12 23:31:16 +01:00
// Reduce the frequency of e.lastAccessTime update to once per 10 seconds
// in order to improve the fast path speed on systems with many CPU cores.
2024-02-24 01:07:51 +01:00
e . lastAccessTime . Store ( ct )
2022-12-12 23:31:16 +01:00
}
return e . s
2022-08-26 23:12:39 +02:00
}
// Make a new copy for s in order to remove references from possible bigger string s refers to.
2022-12-12 23:31:16 +01:00
sCopy := strings . Clone ( s )
e := & ismEntry {
2024-02-24 01:07:51 +01:00
s : sCopy ,
2022-12-12 23:31:16 +01:00
}
2024-02-24 01:07:51 +01:00
e . lastAccessTime . Store ( ct )
2022-12-12 23:31:16 +01:00
internStringsMap . Store ( sCopy , e )
2022-12-21 21:57:28 +01:00
if needCleanup ( & internStringsMapLastCleanupTime , ct ) {
// Perform a global cleanup for internStringsMap by removing items, which weren't accessed during the last 5 minutes.
2022-12-12 23:31:16 +01:00
m := & internStringsMap
2023-02-27 23:15:49 +01:00
deadline := ct - uint64 ( cacheExpireDuration . Seconds ( ) )
2022-12-12 23:31:16 +01:00
m . Range ( func ( k , v interface { } ) bool {
e := v . ( * ismEntry )
2024-02-24 01:07:51 +01:00
if e . lastAccessTime . Load ( ) < deadline {
2022-12-12 23:31:16 +01:00
m . Delete ( k )
}
return true
} )
2022-08-26 23:12:39 +02:00
}
2022-12-12 23:31:16 +01:00
2022-08-26 23:12:39 +02:00
return sCopy
}
2022-12-12 23:31:16 +01:00
type ismEntry struct {
2024-02-24 01:07:51 +01:00
lastAccessTime atomic . Uint64
2022-12-12 23:31:16 +01:00
s string
}
2022-08-26 23:12:39 +02:00
var (
2022-12-12 23:31:16 +01:00
internStringsMap sync . Map
2024-02-24 01:07:51 +01:00
internStringsMapLastCleanupTime atomic . Uint64
2022-08-26 23:12:39 +02:00
)