From c3362e3db472af2b95f06c88c4855b5547e32107 Mon Sep 17 00:00:00 2001
From: Aliaksandr Valialkin <valyala@victoriametrics.com>
Date: Wed, 16 Nov 2022 12:37:55 +0200
Subject: [PATCH] lib/workingsetcache: add `-prevCacheRemovalPercent`
 command-line flag for tuning memory usage vs CPU usage ratio

Reduce the default value of this flag from 1% to 0.2% after 71335e6024831408f79d3d086a7cbfb8ebd670e5

This flag should help determining the best ratio for https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3343
---
 README.md                       |  2 ++
 docs/Cluster-VictoriaMetrics.md |  2 ++
 lib/workingsetcache/cache.go    | 17 ++++++++++++++---
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 0634c1fcc..5182cace1 100644
--- a/README.md
+++ b/README.md
@@ -2162,6 +2162,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
      Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
   -precisionBits int
      The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64)
+  -prevCacheRemovalPercent float
+     The previous cache is removed when the percent of requests it serves becomes lower than this value. Higher values reduce average memory usage at the cost of higher CPU usage (default 0.2)
   -promscrape.azureSDCheckInterval duration
      Interval for checking for changes in Azure. This works only if azure_sd_configs is configured in '-promscrape.config' file. See https://docs.victoriametrics.com/sd_configs.html#azure_sd_configs for details (default 1m0s)
   -promscrape.cluster.memberNum string
diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md
index a557ddeae..c2d072ef9 100644
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@@ -1174,6 +1174,8 @@ Below is the output for `/path/to/vmstorage -help`:
      Auth key for /debug/pprof/* endpoints. It must be passed via authKey query arg. It overrides httpAuth.* settings
   -precisionBits int
      The number of precision bits to store per each value. Lower precision bits improves data compression at the cost of precision loss (default 64)
+  -prevCacheRemovalPercent float
+        The previous cache is removed when the percent of requests it serves becomes lower than this value. Higher values reduce average memory usage at the cost of higher CPU usage (default 0.2)
   -pushmetrics.extraLabel array
      Optional labels to add to metrics pushed to -pushmetrics.url . For example, -pushmetrics.extraLabel='instance="foo"' adds instance="foo" label to all the metrics pushed to -pushmetrics.url
      Supports an array of values separated by comma or specified via multiple flags.
diff --git a/lib/workingsetcache/cache.go b/lib/workingsetcache/cache.go
index 84a7d436b..3e271029b 100644
--- a/lib/workingsetcache/cache.go
+++ b/lib/workingsetcache/cache.go
@@ -1,6 +1,7 @@
 package workingsetcache
 
 import (
+	"flag"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -10,6 +11,9 @@ import (
 	"github.com/VictoriaMetrics/fastcache"
 )
 
+var prevCacheRemovalPercent = flag.Float64("prevCacheRemovalPercent", 0.2, "The previous cache is removed when the percent of requests it serves becomes lower than this value. "+
+	"Higher values reduce average memory usage at the cost of higher CPU usage")
+
 // Cache modes.
 const (
 	split     = 0
@@ -163,9 +167,16 @@ func (c *Cache) expirationWatcher(expireDuration time.Duration) {
 }
 
 func (c *Cache) prevCacheWatcher() {
+	p := *prevCacheRemovalPercent / 100
+	if p <= 0 {
+		// There is no need in removing the previous cache.
+		return
+	}
+	minCurrRequests := uint64(1 / p)
+
 	// Watch for the usage of the prev cache and drop it whenever it receives
-	// less than 1% of requests comparing to the curr cache during the last 10 seconds.
-	checkInterval := 10 * time.Second
+	// less than prevCacheRemovalPercent requests comparing to the curr cache during the last 30 seconds.
+	checkInterval := 30 * time.Second
 	checkInterval += timeJitter(checkInterval / 10)
 	t := time.NewTicker(checkInterval)
 	defer t.Stop()
@@ -198,7 +209,7 @@ func (c *Cache) prevCacheWatcher() {
 		}
 		currGetCalls = csCurr.GetCalls
 		prevGetCalls = csPrev.GetCalls
-		if currRequests >= 100 && float64(prevRequests)/float64(currRequests) < 0.01 {
+		if currRequests >= minCurrRequests && float64(prevRequests)/float64(currRequests) < p {
 			// The majority of requests are served from the curr cache,
 			// so the prev cache can be deleted in order to free up memory.
 			if csPrev.EntriesCount > 0 {