all: add up to 10% random jitter to the interval between periodic tasks performed by various components

This should smooth CPU and RAM usage spikes related to these periodic tasks,
by reducing the probability that multiple concurrent periodic tasks are performed at the same time.
This commit is contained in:
Aliaksandr Valialkin 2024-01-22 18:12:37 +02:00
parent 9b4294e53e
commit 3449d563bd
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
14 changed files with 110 additions and 45 deletions

View File

@ -18,6 +18,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/protoparser/common"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
)
@ -395,7 +396,8 @@ func (c *client) newRequest(url string, body []byte) (*http.Request, error) {
// Otherwise it tries sending the block to remote storage indefinitely.
func (c *client) sendBlockHTTP(block []byte) bool {
c.rl.register(len(block), c.stopCh)
retryDuration := time.Second
maxRetryDuration := timeutil.AddJitterToDuration(time.Minute)
retryDuration := timeutil.AddJitterToDuration(time.Second)
retriesCount := 0
again:
@ -405,8 +407,8 @@ again:
if err != nil {
c.errorsCount.Inc()
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
logger.Warnf("couldn't send a block with size %d bytes to %q: %s; re-sending the block in %.3f seconds",
len(block), c.sanitizedURL, err, retryDuration.Seconds())
@ -452,8 +454,8 @@ again:
// Unexpected status code returned
retriesCount++
retryDuration *= 2
if retryDuration > time.Minute {
retryDuration = time.Minute
if retryDuration > maxRetryDuration {
retryDuration = maxRetryDuration
}
body, err := io.ReadAll(resp.Body)
_ = resp.Body.Close()

View File

@ -15,6 +15,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/persistentqueue"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promrelabel"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/metrics"
"github.com/golang/snappy"
)
@ -69,7 +70,8 @@ func (ps *pendingSeries) periodicFlusher() {
if flushSeconds <= 0 {
flushSeconds = 1
}
ticker := time.NewTicker(*flushInterval)
d := timeutil.AddJitterToDuration(*flushInterval)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {

View File

@ -10,6 +10,8 @@ import (
"sync"
"time"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/encoding"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
@ -20,7 +22,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/storage"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
"github.com/VictoriaMetrics/metrics"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
@ -400,7 +402,8 @@ func initStaleSnapshotsRemover(strg *storage.Storage) {
staleSnapshotsRemoverWG.Add(1)
go func() {
defer staleSnapshotsRemoverWG.Done()
t := time.NewTicker(11 * time.Second)
d := timeutil.AddJitterToDuration(time.Second * 11)
t := time.NewTicker(d)
defer t.Stop()
for {
select {

View File

@ -10,6 +10,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/cespare/xxhash/v2"
)
@ -138,9 +139,12 @@ func (c *Cache) Misses() uint64 {
}
func (c *Cache) cleaner() {
ticker := time.NewTicker(57 * time.Second)
d := timeutil.AddJitterToDuration(time.Minute)
ticker := time.NewTicker(d)
defer ticker.Stop()
perKeyMissesTicker := time.NewTicker(3 * time.Minute)
d = timeutil.AddJitterToDuration(time.Minute * 3)
perKeyMissesTicker := time.NewTicker(d)
defer perKeyMissesTicker.Stop()
for {
select {

View File

@ -11,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
)
@ -318,7 +319,8 @@ func (s *Storage) runRetentionWatcher() {
}
func (s *Storage) watchRetention() {
ticker := time.NewTicker(time.Hour)
d := timeutil.AddJitterToDuration(time.Hour)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
var ptwsToDelete []*partitionWrapper

View File

@ -9,6 +9,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/cespare/xxhash/v2"
)
@ -127,7 +128,8 @@ func (c *Cache) Misses() uint64 {
}
func (c *Cache) cleaner() {
ticker := time.NewTicker(53 * time.Second)
d := timeutil.AddJitterToDuration(time.Second * 53)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {

View File

@ -19,6 +19,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
// maxInmemoryParts is the maximum number of inmemory parts in the table.
@ -353,7 +354,8 @@ func MustOpenTable(path string, flushCallback func(), prepareBlock PrepareBlockC
go func() {
// call flushCallback once per 10 seconds in order to improve the effectiveness of caches,
// which are reset by the flushCallback.
tc := time.NewTicker(10 * time.Second)
d := timeutil.AddJitterToDuration(time.Second * 10)
tc := time.NewTicker(d)
for {
select {
case <-tb.stopCh:
@ -603,7 +605,8 @@ func (tb *Table) startPendingItemsFlusher() {
}
func (tb *Table) inmemoryPartsFlusher() {
ticker := time.NewTicker(dataFlushInterval)
d := timeutil.AddJitterToDuration(dataFlushInterval)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {

View File

@ -24,6 +24,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promauth"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timerpool"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
var (
@ -581,7 +582,7 @@ func newURLWatcher(role, apiURL string, gw *groupWatcher) *urlWatcher {
}
func (uw *urlWatcher) recreateScrapeWorks() {
const minSleepTime = 5 * time.Second
minSleepTime := timeutil.AddJitterToDuration(5 * time.Second)
sleepTime := minSleepTime
gw := uw.gw
stopCh := gw.ctx.Done()
@ -756,8 +757,9 @@ func (uw *urlWatcher) reloadObjects() string {
func (uw *urlWatcher) watchForUpdates() {
gw := uw.gw
stopCh := gw.ctx.Done()
backoffDelay := time.Second
maxBackoffDelay := 30 * time.Second
minBackoffDelay := timeutil.AddJitterToDuration(time.Second)
maxBackoffDelay := timeutil.AddJitterToDuration(time.Second * 30)
backoffDelay := minBackoffDelay
backoffSleep := func() {
t := timerpool.Get(backoffDelay)
select {
@ -802,7 +804,7 @@ func (uw *urlWatcher) watchForUpdates() {
if resp.StatusCode != http.StatusOK {
if resp.StatusCode == 410 {
// There is no need for sleep on 410 error. See https://kubernetes.io/docs/reference/using-api/api-concepts/#410-gone-responses
backoffDelay = time.Second
backoffDelay = minBackoffDelay
uw.staleResourceVersions.Inc()
uw.resourceVersion = ""
} else {
@ -813,7 +815,7 @@ func (uw *urlWatcher) watchForUpdates() {
}
continue
}
backoffDelay = time.Second
backoffDelay = minBackoffDelay
err = uw.readObjectUpdateStream(resp.Body)
_ = resp.Body.Close()
if err != nil {

View File

@ -20,6 +20,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
// The maximum size of big part.
@ -815,7 +816,8 @@ func (pt *partition) startPendingRowsFlusher() {
}
func (pt *partition) inmemoryPartsFlusher() {
ticker := time.NewTicker(dataFlushInterval)
d := timeutil.AddJitterToDuration(dataFlushInterval)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {
@ -828,7 +830,8 @@ func (pt *partition) inmemoryPartsFlusher() {
}
func (pt *partition) pendingRowsFlusher() {
ticker := time.NewTicker(pendingRowsFlushInterval)
d := timeutil.AddJitterToDuration(pendingRowsFlushInterval)
ticker := time.NewTicker(d)
defer ticker.Stop()
var rows []rawRow
for {
@ -1574,7 +1577,8 @@ func (pt *partition) startStalePartsRemover() {
}
func (pt *partition) stalePartsRemover() {
ticker := time.NewTicker(7 * time.Minute)
d := timeutil.AddJitterToDuration(7 * time.Minute)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {

View File

@ -26,11 +26,11 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/memory"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/querytracer"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/snapshot"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/uint64set"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/workingsetcache"
"github.com/VictoriaMetrics/fastcache"
"github.com/VictoriaMetrics/metricsql"
"github.com/valyala/fastrand"
)
const (
@ -671,7 +671,8 @@ func (s *Storage) startFreeDiskSpaceWatcher() {
s.freeDiskSpaceWatcherWG.Add(1)
go func() {
defer s.freeDiskSpaceWatcherWG.Done()
ticker := time.NewTicker(time.Second)
d := timeutil.AddJitterToDuration(time.Second)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {
@ -720,10 +721,9 @@ func (s *Storage) startNextDayMetricIDsUpdater() {
}()
}
var currHourMetricIDsUpdateInterval = time.Second * 10
func (s *Storage) currHourMetricIDsUpdater() {
ticker := time.NewTicker(currHourMetricIDsUpdateInterval)
d := timeutil.AddJitterToDuration(time.Second * 10)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {
@ -738,10 +738,9 @@ func (s *Storage) currHourMetricIDsUpdater() {
}
}
var nextDayMetricIDsUpdateInterval = time.Second * 11
func (s *Storage) nextDayMetricIDsUpdater() {
ticker := time.NewTicker(nextDayMetricIDsUpdateInterval)
d := timeutil.AddJitterToDuration(time.Second * 11)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {
@ -1214,9 +1213,8 @@ func (s *Storage) prefetchMetricNames(qt *querytracer.Tracer, srcMetricIDs []uin
if fasttime.UnixTimestamp() > atomic.LoadUint64(&s.prefetchedMetricIDsDeadline) {
// Periodically reset the prefetchedMetricIDs in order to limit its size.
s.prefetchedMetricIDs = &uint64set.Set{}
const deadlineSec = 20 * 60
jitterSec := fastrand.Uint32n(deadlineSec / 10)
metricIDsDeadline := fasttime.UnixTimestamp() + deadlineSec + uint64(jitterSec)
d := timeutil.AddJitterToDuration(time.Second * 20 * 60)
metricIDsDeadline := fasttime.UnixTimestamp() + uint64(d.Seconds())
atomic.StoreUint64(&s.prefetchedMetricIDsDeadline, metricIDsDeadline)
}
s.prefetchedMetricIDs.AddMulti(metricIDs)

View File

@ -11,6 +11,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fs"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
)
// table represents a single table with time series data.
@ -366,7 +367,8 @@ func (tb *table) startRetentionWatcher() {
}
func (tb *table) retentionWatcher() {
ticker := time.NewTicker(time.Minute)
d := timeutil.AddJitterToDuration(time.Minute)
ticker := time.NewTicker(d)
defer ticker.Stop()
for {
select {
@ -433,7 +435,8 @@ func (tb *table) finalDedupWatcher() {
}
}
}
t := time.NewTicker(time.Hour)
d := timeutil.AddJitterToDuration(time.Hour)
t := time.NewTicker(d)
defer t.Stop()
for {
select {

19
lib/timeutil/timeutil.go Normal file
View File

@ -0,0 +1,19 @@
package timeutil
import (
"time"
"github.com/valyala/fastrand"
)
// AddJitterToDuration adds up to 10% random jitter to d and returns the resulting duration.
//
// The maximum jitter is limited by 10 seconds.
func AddJitterToDuration(d time.Duration) time.Duration {
dv := d / 10
if dv > 10*time.Second {
dv = 10 * time.Second
}
p := float64(fastrand.Uint32()) / (1 << 32)
return d + time.Duration(p*float64(dv))
}

View File

@ -0,0 +1,27 @@
package timeutil
import (
"testing"
"time"
)
func TestAddJitterToDuration(t *testing.T) {
f := func(d time.Duration) {
t.Helper()
result := AddJitterToDuration(d)
if result < d {
t.Fatalf("unexpected negative jitter")
}
variance := (float64(result) - float64(d)) / float64(d)
if variance > 0.1 {
t.Fatalf("too big variance=%.2f for result=%s, d=%s; mustn't exceed 0.1", variance, result, d)
}
}
f(time.Nanosecond)
f(time.Microsecond)
f(time.Millisecond)
f(time.Second)
f(time.Hour)
f(24 * time.Hour)
}

View File

@ -8,6 +8,7 @@ import (
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/cgroup"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/timeutil"
"github.com/VictoriaMetrics/fastcache"
)
@ -132,7 +133,7 @@ func (c *Cache) runWatchers(expireDuration time.Duration) {
}
func (c *Cache) expirationWatcher(expireDuration time.Duration) {
expireDuration += timeJitter(expireDuration / 10)
expireDuration = timeutil.AddJitterToDuration(expireDuration)
t := time.NewTicker(expireDuration)
defer t.Stop()
for {
@ -170,8 +171,7 @@ func (c *Cache) prevCacheWatcher() {
// Watch for the usage of the prev cache and drop it whenever it receives
// less than prevCacheRemovalPercent requests comparing to the curr cache during the last 60 seconds.
checkInterval := 60 * time.Second
checkInterval += timeJitter(checkInterval / 10)
checkInterval := timeutil.AddJitterToDuration(time.Second * 60)
t := time.NewTicker(checkInterval)
defer t.Stop()
prevGetCalls := uint64(0)
@ -216,8 +216,7 @@ func (c *Cache) prevCacheWatcher() {
}
func (c *Cache) cacheSizeWatcher() {
checkInterval := 1500 * time.Millisecond
checkInterval += timeJitter(checkInterval / 10)
checkInterval := timeutil.AddJitterToDuration(time.Millisecond * 1500)
t := time.NewTicker(checkInterval)
defer t.Stop()
@ -451,8 +450,3 @@ func (c *Cache) SetBig(key, value []byte) {
curr := c.curr.Load()
curr.SetBig(key, value)
}
func timeJitter(d time.Duration) time.Duration {
n := float64(time.Now().UnixNano()%1e9) / 1e9
return time.Duration(float64(d) * n)
}