From 8d9d7a8a12c1f3fd3604ad14d47847bec0d09bbf Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 22 Feb 2024 18:32:53 +0200 Subject: [PATCH] app/vmstorage: expose vm_snapshots metric, which shows the current number of snapshots While at it, refresh docs about snapshots - https://docs.victoriametrics.com/#how-to-work-with-snapshots --- README.md | 34 +++++++++++++++++++-------- app/vmstorage/main.go | 1 + docs/CHANGELOG.md | 1 + docs/README.md | 34 +++++++++++++++++++-------- docs/Single-server-VictoriaMetrics.md | 34 +++++++++++++++++++-------- lib/storage/storage.go | 10 ++++++++ 6 files changed, 84 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index a0554334b..a90190dd9 100644 --- a/README.md +++ b/README.md @@ -1129,17 +1129,16 @@ as a service for your OS. A [snap package](https://snapcraft.io/victoriametrics) ## How to work with snapshots -VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) -for all the data stored under `-storageDataPath` directory. -Navigate to `http://:8428/snapshot/create` in order to create an instant snapshot. -The page will return the following JSON response: +Send a request to `http://:8428/snapshot/create` endpoint in order to create +an [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +The page returns the following JSON response on successful creation of snapshot: ```json {"status":"ok","snapshot":""} ``` Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>` -is the command-line flag value. Snapshots can be archived to backup storage at any time +is the corresponding command-line flag value. Snapshots can be archived to backup storage at any time with [vmbackup](https://docs.victoriametrics.com/vmbackup.html). Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`. @@ -1151,20 +1150,32 @@ for more details. This adds some restrictions on what can be done with the conte - Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data. -The `http://:8428/snapshot/list` page contains the list of available snapshots. +See also [snapshot troubleshooting](#snapshot-troubleshooting). -Navigate to `http://:8428/snapshot/delete?snapshot=` in order -to delete `` snapshot. +The `http://:8428/snapshot/list` endpoint returns the list of available snapshots. + +Send a query to `http://:8428/snapshot/delete?snapshot=` in order +to delete the snapshot with `` name. Navigate to `http://:8428/snapshot/delete_all` in order to delete all the snapshots. -Steps for restoring from a snapshot: +### How to restore from a snapshot 1. Stop VictoriaMetrics with `kill -INT`. 1. Restore snapshot contents from backup with [vmrestore](https://docs.victoriametrics.com/vmrestore.html) to the directory pointed by `-storageDataPath`. 1. Start VictoriaMetrics. +### Snapshot troubleshooting + +Snapshot doesn't occupy disk space just after its' creation thanks to the [used approach](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +Old snapshots may start occupying additional disk space if they refer to old parts, which were already deleted during [background merge](#storage). +That's why it is recommended deleting old snapshots after they are no longer needed in order to free up disk space used by old snapshots. +This can be done either manually or automatically if the `-snapshotsMaxAge` command-line flag is set. Make sure that the backup process has enough time to complete +when setting `-snapshotsMaxAge` command-line flag. + +VictoriaMetrics exposes the current number of available snapshots via `vm_snapshots` metric at [`/metrics`](#monitoring) page. + ## How to delete time series Send a request to `http://:8428/api/v1/admin/tsdb/delete_series?match[]=`, @@ -2260,7 +2271,10 @@ and [cardinality explorer docs](#cardinality-explorer). * VictoriaMetrics ignores `NaN` values during data ingestion. -See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). +See also: + +- [Snapshot troubleshooting](#snapshot-troubleshooting). +- [General troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). ## Push metrics diff --git a/app/vmstorage/main.go b/app/vmstorage/main.go index 2cd7d371f..5af4768d8 100644 --- a/app/vmstorage/main.go +++ b/app/vmstorage/main.go @@ -524,6 +524,7 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) { metrics.WriteCounterUint64(w, `vm_rows_added_to_storage_total`, m.RowsAddedTotal) metrics.WriteCounterUint64(w, `vm_deduplicated_samples_total{type="merge"}`, m.DedupsDuringMerge) + metrics.WriteGaugeUint64(w, `vm_snapshots`, m.SnapshotsCount) metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="big_timestamp"}`, m.TooBigTimestampRows) metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="small_timestamp"}`, m.TooSmallTimestampRows) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index dea139352..4f8247d49 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -33,6 +33,7 @@ See also [LTS releases](https://docs.victoriametrics.com/LTS-releases.html). * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) via all the [label manipulation functions](https://docs.victoriametrics.com/metricsql/#label-manipulation-functions). For example, `label_del(some_metric{job="foo"}, "instance") + other_metric{pod="bar"}` is now transformed to `label_del(some_metric{job="foo",pod="bar"}, "instance") + other_metric{job="foo",pod="bar"}`. This should reduce the amounts of time series processed during query execution. * FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): add [count_values_over_time](https://docs.victoriametrics.com/MetricsQL.html#count_values_over_time) function. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5847). * FEATURE: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): expose `vm_last_partition_parts` [metrics](https://docs.victoriametrics.com/#monitoring), which show the number of [parts in the latest partition](https://docs.victoriametrics.com/#storage). These metrics may help debugging query performance slowdown related to the increased number of parts in the last partition, since usually all the ingested data is written to the last partition and all the queries are performed over the recently ingested data, e.g. the last partition. +* FEATURE: [Single-node VictoriaMetrics](https://docs.victoriametrics.com/) and `vmstorage` in [VictoriaMetrics cluster](https://docs.victoriametrics.com/cluster-victoriametrics/): expose `vm_snapshots` m[metric](https://docs.victoriametrics.com/#monitoring), which shows the current number of snapshots created via [snapshot API](https://docs.victoriametrics.com/#how-to-work-with-snapshots). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `client_id` option into [kuma_sd_configs](https://docs.victoriametrics.com/sd_configs/#kuma_sd_configs) in the same way as Prometheus does. See [this pull request](https://github.com/prometheus/prometheus/pull/13278). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for `enable_compression` option in [scrape_configs](https://docs.victoriametrics.com/sd_configs/#scrape_configs) in order to be compatible with Prometheus scrape configs. See [this pull request](https://github.com/prometheus/prometheus/pull/13166) and [this feature request](https://github.com/prometheus/prometheus/issues/12319). Note that `vmagent` was always supporting [`disable_compression` option](https://docs.victoriametrics.com/vmagent/#scrape_config-enhancements) before Prometheus added `enable_compression` option. * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol) and [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for pull requests [1](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5783), [2](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5798), [3](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5797). diff --git a/docs/README.md b/docs/README.md index eed799e22..716351888 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1132,17 +1132,16 @@ as a service for your OS. A [snap package](https://snapcraft.io/victoriametrics) ## How to work with snapshots -VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) -for all the data stored under `-storageDataPath` directory. -Navigate to `http://:8428/snapshot/create` in order to create an instant snapshot. -The page will return the following JSON response: +Send a request to `http://:8428/snapshot/create` endpoint in order to create +an [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +The page returns the following JSON response on successful creation of snapshot: ```json {"status":"ok","snapshot":""} ``` Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>` -is the command-line flag value. Snapshots can be archived to backup storage at any time +is the corresponding command-line flag value. Snapshots can be archived to backup storage at any time with [vmbackup](https://docs.victoriametrics.com/vmbackup.html). Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`. @@ -1154,20 +1153,32 @@ for more details. This adds some restrictions on what can be done with the conte - Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data. -The `http://:8428/snapshot/list` page contains the list of available snapshots. +See also [snapshot troubleshooting](#snapshot-troubleshooting). -Navigate to `http://:8428/snapshot/delete?snapshot=` in order -to delete `` snapshot. +The `http://:8428/snapshot/list` endpoint returns the list of available snapshots. + +Send a query to `http://:8428/snapshot/delete?snapshot=` in order +to delete the snapshot with `` name. Navigate to `http://:8428/snapshot/delete_all` in order to delete all the snapshots. -Steps for restoring from a snapshot: +### How to restore from a snapshot 1. Stop VictoriaMetrics with `kill -INT`. 1. Restore snapshot contents from backup with [vmrestore](https://docs.victoriametrics.com/vmrestore.html) to the directory pointed by `-storageDataPath`. 1. Start VictoriaMetrics. +### Snapshot troubleshooting + +Snapshot doesn't occupy disk space just after its' creation thanks to the [used approach](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +Old snapshots may start occupying additional disk space if they refer to old parts, which were already deleted during [background merge](#storage). +That's why it is recommended deleting old snapshots after they are no longer needed in order to free up disk space used by old snapshots. +This can be done either manually or automatically if the `-snapshotsMaxAge` command-line flag is set. Make sure that the backup process has enough time to complete +when setting `-snapshotsMaxAge` command-line flag. + +VictoriaMetrics exposes the current number of available snapshots via `vm_snapshots` metric at [`/metrics`](#monitoring) page. + ## How to delete time series Send a request to `http://:8428/api/v1/admin/tsdb/delete_series?match[]=`, @@ -2263,7 +2274,10 @@ and [cardinality explorer docs](#cardinality-explorer). * VictoriaMetrics ignores `NaN` values during data ingestion. -See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). +See also: + +- [Snapshot troubleshooting](#snapshot-troubleshooting). +- [General troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). ## Push metrics diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index d365642c5..cb43eeca2 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1140,17 +1140,16 @@ as a service for your OS. A [snap package](https://snapcraft.io/victoriametrics) ## How to work with snapshots -VictoriaMetrics can create [instant snapshots](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282) -for all the data stored under `-storageDataPath` directory. -Navigate to `http://:8428/snapshot/create` in order to create an instant snapshot. -The page will return the following JSON response: +Send a request to `http://:8428/snapshot/create` endpoint in order to create +an [instant snapshot](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +The page returns the following JSON response on successful creation of snapshot: ```json {"status":"ok","snapshot":""} ``` Snapshots are created under `<-storageDataPath>/snapshots` directory, where `<-storageDataPath>` -is the command-line flag value. Snapshots can be archived to backup storage at any time +is the corresponding command-line flag value. Snapshots can be archived to backup storage at any time with [vmbackup](https://docs.victoriametrics.com/vmbackup.html). Snapshots consist of a mix of hard-links and soft-links to various files and directories inside `-storageDataPath`. @@ -1162,20 +1161,32 @@ for more details. This adds some restrictions on what can be done with the conte - Do not copy subdirectories inside `<-storageDataPath>/snapshot` with `cp`, `rsync` or similar commands, since there are high chances that these commands won't copy some data stored in the snapshot. Prefer using [vmbackup](https://docs.victoriametrics.com/vmbackup.html) for making copies of snapshot data. -The `http://:8428/snapshot/list` page contains the list of available snapshots. +See also [snapshot troubleshooting](#snapshot-troubleshooting). -Navigate to `http://:8428/snapshot/delete?snapshot=` in order -to delete `` snapshot. +The `http://:8428/snapshot/list` endpoint returns the list of available snapshots. + +Send a query to `http://:8428/snapshot/delete?snapshot=` in order +to delete the snapshot with `` name. Navigate to `http://:8428/snapshot/delete_all` in order to delete all the snapshots. -Steps for restoring from a snapshot: +### How to restore from a snapshot 1. Stop VictoriaMetrics with `kill -INT`. 1. Restore snapshot contents from backup with [vmrestore](https://docs.victoriametrics.com/vmrestore.html) to the directory pointed by `-storageDataPath`. 1. Start VictoriaMetrics. +### Snapshot troubleshooting + +Snapshot doesn't occupy disk space just after its' creation thanks to the [used approach](https://medium.com/@valyala/how-victoriametrics-makes-instant-snapshots-for-multi-terabyte-time-series-data-e1f3fb0e0282). +Old snapshots may start occupying additional disk space if they refer to old parts, which were already deleted during [background merge](#storage). +That's why it is recommended deleting old snapshots after they are no longer needed in order to free up disk space used by old snapshots. +This can be done either manually or automatically if the `-snapshotsMaxAge` command-line flag is set. Make sure that the backup process has enough time to complete +when setting `-snapshotsMaxAge` command-line flag. + +VictoriaMetrics exposes the current number of available snapshots via `vm_snapshots` metric at [`/metrics`](#monitoring) page. + ## How to delete time series Send a request to `http://:8428/api/v1/admin/tsdb/delete_series?match[]=`, @@ -2271,7 +2282,10 @@ and [cardinality explorer docs](#cardinality-explorer). * VictoriaMetrics ignores `NaN` values during data ingestion. -See also [troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). +See also: + +- [Snapshot troubleshooting](#snapshot-troubleshooting). +- [General troubleshooting docs](https://docs.victoriametrics.com/Troubleshooting.html). ## Push metrics diff --git a/lib/storage/storage.go b/lib/storage/storage.go index cc7f87734..b61d88e8d 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -394,6 +394,14 @@ func (s *Storage) CreateSnapshot(deadline uint64) (string, error) { return snapshotName, nil } +func (s *Storage) mustGetSnapshotsCount() int { + snapshotNames, err := s.ListSnapshots() + if err != nil { + logger.Panicf("FATAL: cannot list snapshots: %s", err) + } + return len(snapshotNames) +} + // ListSnapshots returns sorted list of existing snapshots for s. func (s *Storage) ListSnapshots() ([]string, error) { snapshotsPath := filepath.Join(s.path, snapshotsDirname) @@ -467,6 +475,7 @@ func (s *Storage) idb() *indexDB { type Metrics struct { RowsAddedTotal uint64 DedupsDuringMerge uint64 + SnapshotsCount uint64 TooSmallTimestampRows uint64 TooBigTimestampRows uint64 @@ -539,6 +548,7 @@ func (m *Metrics) Reset() { func (s *Storage) UpdateMetrics(m *Metrics) { m.RowsAddedTotal = atomic.LoadUint64(&rowsAddedTotal) m.DedupsDuringMerge = atomic.LoadUint64(&dedupsDuringMerge) + m.SnapshotsCount += uint64(s.mustGetSnapshotsCount()) m.TooSmallTimestampRows += atomic.LoadUint64(&s.tooSmallTimestampRows) m.TooBigTimestampRows += atomic.LoadUint64(&s.tooBigTimestampRows)