lib/blockcache: do not cache entries, which were attempted to be accessed 1 or 2 times

Previously entries which were accessed only 1 time weren't cached. It has been appeared that some rarely executed heavy queries may read indexdb block twice in a row instead of once. There is no need in caching such a block then. This change should eliminate cache size spikes for indexdb/dataBlocks when such heavy queries are executed. Expose -blockcache.missesBeforeCaching command-line flag, which can be used for fine-tuning the number of cache misses needed before storing the block in the caching.
2024-11-23 12:31:07 +01:00 · 2023-11-10 21:54:25 +01:00 · 2023-11-10 21:54:25 +01:00 · 010dc15d16
commit 010dc15d16
parent 22498c5087
7 changed files with 42 additions and 26 deletions
--- a/README.md
+++ b/README.md
@ -2526,6 +2526,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 ```
  -bigMergeConcurrency int
     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
+  -blockcache.missesBeforeCaching int
+     The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -configAuthKey string
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -78,6 +78,7 @@ The sandbox cluster installation is running under the constant load generated by
 * FEATURE: dashboards/vmalert: add new panel `Missed evaluations` for indicating alerting groups that miss their evaluations.
 * FEATURE: all: track requests with wrong auth key and wrong basic auth at `vm_http_request_errors_total` [metric](https://docs.victoriametrics.com/#monitoring) with `reason="wrong_auth_key"` and `reason="wrong_basic_auth"`. See [this issue](https://github.com/victoriaMetrics/victoriaMetrics/issues/4590). Thanks to @venkatbvc for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5166).
 * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth.html): add `tls_insecure_skip_verify` parameter which allows to disable TLS verification for backend connection. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5240).
+* FEATURE: `vmstorage`: add `-blockcache.missesBeforeCaching` command-line flag, which can be used for fine-tuning RAM usage for `indexdb/dataBlocks` cache when queries touching big number of time series are executed.

 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): strip sensitive information such as auth headers or passwords from datasource, remote-read, remote-write or notifier URLs in log messages or UI. This behavior is by default and is controlled via `-datasource.showURL`, `-remoteRead.showURL`, `remoteWrite.showURL` or `-notifier.showURL` cmd-line flags. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5044).
 * BUGFIX: [vmalert](https://docs.victoriametrics.com/vmalert.html): fix vmalert web UI when running on 32-bit architectures machine.
--- a/docs/Cluster-VictoriaMetrics.md
+++ b/docs/Cluster-VictoriaMetrics.md
@ -1332,6 +1332,8 @@ Below is the output for `/path/to/vmstorage -help`:
 ```
  -bigMergeConcurrency int
     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
+  -blockcache.missesBeforeCaching int
+     The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -cluster.tls
--- a/docs/README.md
+++ b/docs/README.md
@ -2529,6 +2529,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 ```
  -bigMergeConcurrency int
     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
+  -blockcache.missesBeforeCaching int
+     The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -configAuthKey string
--- a/docs/Single-server-VictoriaMetrics.md
+++ b/docs/Single-server-VictoriaMetrics.md
@ -2537,6 +2537,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
 ```
  -bigMergeConcurrency int
     Deprecated: this flag does nothing. Please use -smallMergeConcurrency for controlling the concurrency of background merges. See https://docs.victoriametrics.com/#storage
+  -blockcache.missesBeforeCaching int
+     The number of cache misses before putting the block into cache. Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage (default 2)
  -cacheExpireDuration duration
     Items are removed from in-memory caches after they aren't accessed for this duration. Lower values may reduce memory usage at the cost of higher CPU usage. See also -prevCacheRemovalPercent (default 30m0s)
  -configAuthKey string
--- a/lib/blockcache/blockcache.go
+++ b/lib/blockcache/blockcache.go
@ -2,6 +2,7 @@ package blockcache

 import (
 	"container/heap"
+	"flag"
 	"sync"
 	"sync/atomic"
 	"time"
@ -12,6 +13,9 @@ import (
 	"github.com/cespare/xxhash/v2"
 )

+var missesBeforeCaching = flag.Int("blockcache.missesBeforeCaching", 2, "The number of cache misses before putting the block into cache. "+
+	"Higher values may reduce indexdb/dataBlocks cache size at the cost of higher CPU and disk read usage")
+
 // Cache caches Block entries.
 //
 // Call NewCache() for creating new Cache.
@ -184,7 +188,7 @@ type cache struct {

 	// perKeyMisses contains per-block cache misses.
 	//
-	// Blocks with less than 2 cache misses aren't stored in the cache in order to prevent from eviction for frequently accessed items.
+	// Blocks with up to *missesBeforeCaching cache misses aren't stored in the cache in order to prevent from eviction for frequently accessed items.
 	perKeyMisses map[Key]int

 	// The heap for removing the least recently used entries from m.
@ -300,13 +304,14 @@ func (c *cache) GetBlock(k Key) Block {
 func (c *cache) PutBlock(k Key, b Block) {
 	c.mu.Lock()
 	defer c.mu.Unlock()
-	// If the entry wasn't accessed yet (e.g. c.perKeyMisses[k] == 0), then cache it, since it is likely it will be accessed soon.
-	// Do not cache the entry only if there was only a single unsuccessful attempt to access it.
-	// This may be one-time-wonders entry, which won't be accessed more, so there is no need in caching it.
-	doNotCache := c.perKeyMisses[k] == 1
-	if doNotCache {
-		// Do not cache b if it has been requested only once (aka one-time-wonders items).
-		// This should reduce memory usage for the cache.
+	misses := c.perKeyMisses[k]
+	if misses > 0 && misses <= *missesBeforeCaching {
+		// If the entry wasn't accessed yet (e.g. misses == 0), then cache it,
+		// since it has been just created without consulting the cache and will be accessed soon.
+		//
+		// Do not cache the entry if there were up to *missesBeforeCaching unsuccessful attempts to access it.
+		// This may be one-time-wonders entry, which won't be accessed more, so do not cache it
+		// in order to save memory for frequently accessed items.
 		return
 	}

--- a/lib/blockcache/blockcache_test.go
+++ b/lib/blockcache/blockcache_test.go
@ -83,6 +83,7 @@ func TestCache(t *testing.T) {
 	if n := c.Misses(); n != 2 {
 		t.Fatalf("unexpected number of misses; got %d; want %d", n, 2)
 	}
+	for i := 0; i < *missesBeforeCaching; i++ {
 		// Store the missed entry to the cache. It shouldn't be stored because of the previous cache miss
 		c.PutBlock(k, &b)
 		if n := c.SizeBytes(); n != 0 {
@ -92,11 +93,12 @@ func TestCache(t *testing.T) {
 		if b1 := c.GetBlock(k); b1 != nil {
 			t.Fatalf("unexpected non-nil block obtained after removing all the blocks for the part; got %v", b1)
 		}
-	if n := c.Requests(); n != 4 {
-		t.Fatalf("unexpected number of requests; got %d; want %d", n, 4)
+		if n := c.Requests(); n != uint64(4+i) {
+			t.Fatalf("unexpected number of requests; got %d; want %d", n, 4+i)
+		}
+		if n := c.Misses(); n != uint64(3+i) {
+			t.Fatalf("unexpected number of misses; got %d; want %d", n, 3+i)
 		}
-	if n := c.Misses(); n != 3 {
-		t.Fatalf("unexpected number of misses; got %d; want %d", n, 3)
 	}
 	// Store the entry again. Now it must be stored because of the second cache miss.
 	c.PutBlock(k, &b)
@ -106,11 +108,11 @@ func TestCache(t *testing.T) {
 	if b1 := c.GetBlock(k); b1 != &b {
 		t.Fatalf("unexpected block obtained; got %v; want %v", b1, &b)
 	}
-	if n := c.Requests(); n != 5 {
-		t.Fatalf("unexpected number of requests; got %d; want %d", n, 5)
+	if n := c.Requests(); n != uint64(4+*missesBeforeCaching) {
+		t.Fatalf("unexpected number of requests; got %d; want %d", n, 4+*missesBeforeCaching)
 	}
-	if n := c.Misses(); n != 3 {
-		t.Fatalf("unexpected number of misses; got %d; want %d", n, 3)
+	if n := c.Misses(); n != uint64(2+*missesBeforeCaching) {
+		t.Fatalf("unexpected number of misses; got %d; want %d", n, 2+*missesBeforeCaching)
 	}

 	// Manually clean the cache. The entry shouldn't be deleted because it was recently accessed.