From b6ed9afd6d14c4a042625a811377051993e95160 Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Mon, 21 Feb 2022 13:50:34 +0200 Subject: [PATCH] lib: allow to configure cache size by type (#2206) * lib: allow to configure cache size by type https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940 Signed-off-by: hagen1778 * Apply suggestions from code review * wip Co-authored-by: Aliaksandr Valialkin --- README.md | 34 +++++++++++++++++++++++++++ app/vmstorage/main.go | 8 +++++++ docs/CHANGELOG.md | 6 +++++ docs/README.md | 34 +++++++++++++++++++++++++++ docs/Single-server-VictoriaMetrics.md | 34 +++++++++++++++++++++++++++ lib/mergeset/part.go | 18 ++++++++++++-- lib/storage/storage.go | 16 ++++++++++++- 7 files changed, 147 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c829d2664..d76674a45 100644 --- a/README.md +++ b/README.md @@ -1428,6 +1428,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details. +## Cache tuning + +VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance. +The following metrics for each type of cache are exported at [`/metrics` page](#monitoring): +- `vm_cache_size_bytes` - the actual cache size +- `vm_cache_size_max_bytes` - cache size limit +- `vm_cache_requests_total` - the number of requests to the cache +- `vm_cache_misses_total` - the number of cache misses +- `vm_cache_entries` - the number of entries in the cache + +Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) +and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176) +contain `Caches` section with cache metrics visualized. The panels show the current +memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100% +then cache efficiency is already very high and does not need any tuning. +The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size +from the allowed size by type. If the percentage is below 100%, then no further tuning needed. + +Please note, default cache sizes were carefully adjusted accordingly to the most +practical scenarios and workloads. Change the defaults only if you understand the implications. + +To override the default values see command-line flags with `-storage.cacheSize` prefix. +See the full description of flags [here](#list-of-command-line-flags). + + ## Data migration Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types: @@ -1898,6 +1923,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li authKey, which must be passed in query string to /snapshot* pages -sortLabels Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit + -storage.cacheSizeIndexDBDataBlocks size + Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeIndexDBIndexBlocks size + Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeStorageTSID size + Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) -storage.maxDailySeries int The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries -storage.maxHourlySeries int diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go index 3e1ddd1e9..4d45253b6 100644 --- a/app/vmstorage/main.go +++ b/app/vmstorage/main.go @@ -15,6 +15,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/lib/fs" "github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver" "github.com/VictoriaMetrics/VictoriaMetrics/lib/logger" + "github.com/VictoriaMetrics/VictoriaMetrics/lib/mergeset" "github.com/VictoriaMetrics/VictoriaMetrics/lib/storage" "github.com/VictoriaMetrics/VictoriaMetrics/lib/syncwg" "github.com/VictoriaMetrics/metrics" @@ -48,6 +49,10 @@ var ( "Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries") minFreeDiskSpaceBytes = flagutil.NewBytes("storage.minFreeDiskSpaceBytes", 10e6, "The minimum free disk space at -storageDataPath after which the storage stops accepting new data") + + cacheSizeStorageTSID = flagutil.NewBytes("storage.cacheSizeStorageTSID", 0, "Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning") + cacheSizeIndexDBIndexBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBIndexBlocks", 0, "Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning") + cacheSizeIndexDBDataBlocks = flagutil.NewBytes("storage.cacheSizeIndexDBDataBlocks", 0, "Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning") ) // CheckTimeRange returns true if the given tr is denied for querying. @@ -85,6 +90,9 @@ func InitWithoutMetrics(resetCacheIfNeeded func(mrs []storage.MetricRow)) { storage.SetBigMergeWorkersCount(*bigMergeConcurrency) storage.SetSmallMergeWorkersCount(*smallMergeConcurrency) storage.SetFreeDiskSpaceLimit(minFreeDiskSpaceBytes.N) + storage.SetTSIDCacheSize(cacheSizeStorageTSID.N) + mergeset.SetIndexBlocksCacheSize(cacheSizeIndexDBIndexBlocks.N) + mergeset.SetDataBlocksCacheSize(cacheSizeIndexDBDataBlocks.N) logger.Infof("opening storage at %q with -retentionPeriod=%s", *DataPath, retentionPeriod) startTime := time.Now() diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 41af4f1e1..f1cae3d22 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -14,9 +14,15 @@ The following tip changes can be tested by building VictoriaMetrics components f ## tip +* FEATURE: allow overriding default limits for the following in-memory caches, which usually occupy the most memory: + * `storage/tsid` - the cache speeds up lookups of internal metric ids by `metric_name{labels...}` during data ingestion. The size for this cache can be tuned with `-storage.cacheSizeStorageTSID` command-line flag. + * `indexdb/dataBlocks` - the cache speeds up data lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBDataBlocks` command-line flag. + * `indexdb/indexBlocks` - the cache speeds up index lookups in `<-storageDataPath>/indexdb` files. The size for this cache can be tuned with `-storage.cacheSizeIndexDBIndexBlocks` command-line flag. + See also [cache tuning docs](https://docs.victoriametrics.com/#cache-tuning). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1940). * FEATURE: add `-influxDBLabel` command-line flag for overriding `db` label name for the data [imported into VictoriaMetrics via InfluxDB line protocol](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). Thanks to @johnatannvmd for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2203). * FEATURE: return `X-Influxdb-Version` HTTP header in responses to [InfluxDB write requests](https://docs.victoriametrics.com/#how-to-send-data-from-influxdb-compatible-agents-such-as-telegraf). This is needed for some InfluxDB clients. See, for example, [this comment](https://github.com/ntop/ntopng/issues/5449#issuecomment-1005347597). +* BUGFIX: reduce memory usage during the first three hours after the upgrade from versions older than v1.73.0. The memory usage spike was related to the need of in-memory caches' re-population after the upgrade because of the fix for [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1401). Now cache size limits are reduced in order to occupy less memory during the upgrade. * BUGFIX: fix a bug, which could significantly slow down requests to `/api/v1/labels` and `/api/v1/label//values`. These APIs are used by Grafana for auto-completion of label names and label values. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2200). * BUGFIX: vmalert: add support for `$externalLabels` and `$externalURL` template vars in the same way as Prometheus does. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2193). * BUGFIX: update default value for `-promscrape.fileSDCheckInterval`, so it matches default duration used by Prometheus for checking for updates in `file_sd_configs`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2187). Thanks to @corporate-gadfly for the fix. diff --git a/docs/README.md b/docs/README.md index c829d2664..d76674a45 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1428,6 +1428,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details. +## Cache tuning + +VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance. +The following metrics for each type of cache are exported at [`/metrics` page](#monitoring): +- `vm_cache_size_bytes` - the actual cache size +- `vm_cache_size_max_bytes` - cache size limit +- `vm_cache_requests_total` - the number of requests to the cache +- `vm_cache_misses_total` - the number of cache misses +- `vm_cache_entries` - the number of entries in the cache + +Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) +and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176) +contain `Caches` section with cache metrics visualized. The panels show the current +memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100% +then cache efficiency is already very high and does not need any tuning. +The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size +from the allowed size by type. If the percentage is below 100%, then no further tuning needed. + +Please note, default cache sizes were carefully adjusted accordingly to the most +practical scenarios and workloads. Change the defaults only if you understand the implications. + +To override the default values see command-line flags with `-storage.cacheSize` prefix. +See the full description of flags [here](#list-of-command-line-flags). + + ## Data migration Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types: @@ -1898,6 +1923,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li authKey, which must be passed in query string to /snapshot* pages -sortLabels Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit + -storage.cacheSizeIndexDBDataBlocks size + Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeIndexDBIndexBlocks size + Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeStorageTSID size + Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) -storage.maxDailySeries int The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries -storage.maxHourlySeries int diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index bdec1734d..bb003a758 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1432,6 +1432,31 @@ See also more advanced [cardinality limiter in vmagent](https://docs.victoriamet VictoriaMetrics uses various internal caches. These caches are stored to `<-storageDataPath>/cache` directory during graceful shutdown (e.g. when VictoriaMetrics is stopped by sending `SIGINT` signal). The caches are read on the next VictoriaMetrics startup. Sometimes it is needed to remove such caches on the next startup. This can be performed by placing `reset_cache_on_startup` file inside the `<-storageDataPath>/cache` directory before the restart of VictoriaMetrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1447) for details. +## Cache tuning + +VictoriaMetrics uses various in-memory caches for faster data ingestion and query performance. +The following metrics for each type of cache are exported at [`/metrics` page](#monitoring): +- `vm_cache_size_bytes` - the actual cache size +- `vm_cache_size_max_bytes` - cache size limit +- `vm_cache_requests_total` - the number of requests to the cache +- `vm_cache_misses_total` - the number of cache misses +- `vm_cache_entries` - the number of entries in the cache + +Both Grafana dashboards for [single-node VictoriaMetrics](https://grafana.com/dashboards/10229) +and [clustered VictoriaMetrics](https://grafana.com/grafana/dashboards/11176) +contain `Caches` section with cache metrics visualized. The panels show the current +memory usage by each type of cache, and also a cache hit rate. If hit rate is close to 100% +then cache efficiency is already very high and does not need any tuning. +The panel `Cache usage %` in `Troubleshooting` section shows the percentage of used cache size +from the allowed size by type. If the percentage is below 100%, then no further tuning needed. + +Please note, default cache sizes were carefully adjusted accordingly to the most +practical scenarios and workloads. Change the defaults only if you understand the implications. + +To override the default values see command-line flags with `-storage.cacheSize` prefix. +See the full description of flags [here](#list-of-command-line-flags). + + ## Data migration Use [vmctl](https://docs.victoriametrics.com/vmctl.html) for data migration. It supports the following data migration types: @@ -1902,6 +1927,15 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li authKey, which must be passed in query string to /snapshot* pages -sortLabels Whether to sort labels for incoming samples before writing them to storage. This may be needed for reducing memory usage at storage when the order of labels in incoming samples is random. For example, if m{k1="v1",k2="v2"} may be sent as m{k2="v2",k1="v1"}. Enabled sorting for labels can slow down ingestion performance a bit + -storage.cacheSizeIndexDBDataBlocks size + Overrides max size for indexdb/dataBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeIndexDBIndexBlocks size + Overrides max size for indexdb/indexBlocks cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) + -storage.cacheSizeStorageTSID size + Overrides max size for storage/tsid cache. See https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#cache-tuning + Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 0) -storage.maxDailySeries int The maximum number of unique series can be added to the storage during the last 24 hours. Excess series are logged and dropped. This can be useful for limiting series churn rate. See also -storage.maxHourlySeries -storage.maxHourlySeries int diff --git a/lib/mergeset/part.go b/lib/mergeset/part.go index 927b6545b..dc1b92ab8 100644 --- a/lib/mergeset/part.go +++ b/lib/mergeset/part.go @@ -15,9 +15,16 @@ import ( var idxbCache = blockcache.NewCache(getMaxIndexBlocksCacheSize) var ibCache = blockcache.NewCache(getMaxInmemoryBlocksCacheSize) +// SetIndexBlocksCacheSize overrides the default size of indexdb/indexBlock cache +func SetIndexBlocksCacheSize(size int) { + maxIndexBlockCacheSize = size +} + func getMaxIndexBlocksCacheSize() int { maxIndexBlockCacheSizeOnce.Do(func() { - maxIndexBlockCacheSize = int(0.15 * float64(memory.Allowed())) + if maxIndexBlockCacheSize <= 0 { + maxIndexBlockCacheSize = int(0.10 * float64(memory.Allowed())) + } }) return maxIndexBlockCacheSize } @@ -27,9 +34,16 @@ var ( maxIndexBlockCacheSizeOnce sync.Once ) +// SetDataBlocksCacheSize overrides the default size of indexdb/dataBlocks cache +func SetDataBlocksCacheSize(size int) { + maxInmemoryBlockCacheSize = size +} + func getMaxInmemoryBlocksCacheSize() int { maxInmemoryBlockCacheSizeOnce.Do(func() { - maxInmemoryBlockCacheSize = int(0.4 * float64(memory.Allowed())) + if maxInmemoryBlockCacheSize <= 0 { + maxIndexBlockCacheSize = int(0.25 * float64(memory.Allowed())) + } }) return maxInmemoryBlockCacheSize } diff --git a/lib/storage/storage.go b/lib/storage/storage.go index 4bd15f762..18e194e61 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -202,7 +202,7 @@ func OpenStorage(path string, retentionMsecs int64, maxHourlySeries, maxDailySer // Load caches. mem := memory.Allowed() - s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", int(float64(mem)*0.35)) + s.tsidCache = s.mustLoadCache("MetricName->TSID", "metricName_tsid", getTSIDCacheSize()) s.metricIDCache = s.mustLoadCache("MetricID->TSID", "metricID_tsid", mem/16) s.metricNameCache = s.mustLoadCache("MetricID->MetricName", "metricID_metricName", mem/10) s.dateMetricIDCache = newDateMetricIDCache() @@ -271,6 +271,20 @@ func OpenStorage(path string, retentionMsecs int64, maxHourlySeries, maxDailySer return s, nil } +var maxTSIDCacheSize int + +// SetTSIDCacheSize overrides the default size of storage/tsid cahce +func SetTSIDCacheSize(size int) { + maxTSIDCacheSize = size +} + +func getTSIDCacheSize() int { + if maxTSIDCacheSize <= 0 { + return int(float64(memory.Allowed()) * 0.37) + } + return maxTSIDCacheSize +} + func (s *Storage) getDeletedMetricIDs() *uint64set.Set { return s.deletedMetricIDs.Load().(*uint64set.Set) }