From f3e84b4deaa003051d57a72586cdd201c8ce91b0 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 5 Dec 2022 15:59:52 -0800 Subject: [PATCH] {dashboards,alerts}: subtitute `{type="indexdb"}` with `{type=~"indexdb.*"}` inside queries after 8189770c50165b62867327ad388f2c2ef237ab6f Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337 --- README.md | 2 +- dashboards/victoriametrics-cluster.json | 18 +++++++++--------- dashboards/victoriametrics.json | 12 ++++++------ deployment/docker/alerts-cluster.yml | 4 ++-- deployment/docker/alerts.yml | 4 ++-- docs/CHANGELOG.md | 2 +- docs/README.md | 2 +- docs/Single-server-VictoriaMetrics.md | 2 +- 8 files changed, 23 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index c19d003e3..c697791ee 100644 --- a/README.md +++ b/README.md @@ -1745,7 +1745,7 @@ and [cardinality explorer docs](#cardinality-explorer). by requesting `/internal/force_flush` http handler. This handler is mostly needed for testing and debugging purposes. * The last few seconds of inserted data may be lost on unclean shutdown (i.e. OOM, `kill -9` or hardware reset). The `-inmemoryDataFlushInterval` command-line flag allows controlling the frequency of in-memory data flush to persistent storage. - See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704). + See [storage docs](#storage) and [this article](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) for more details. * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second, then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) for the current amount of RAM. diff --git a/dashboards/victoriametrics-cluster.json b/dashboards/victoriametrics-cluster.json index 1274cd6f2..98d9f20ac 100644 --- a/dashboards/victoriametrics-cluster.json +++ b/dashboards/victoriametrics-cluster.json @@ -179,7 +179,7 @@ "uid": "$ds" }, "exemplar": true, - "expr": "sum(vm_rows{job=~\"$job_storage\", type!=\"indexdb\"})", + "expr": "sum(vm_rows{job=~\"$job_storage\", type!~\"indexdb.*\"})", "format": "time_series", "instant": true, "interval": "", @@ -599,7 +599,7 @@ "uid": "$ds" }, "exemplar": true, - "expr": "sum(vm_data_size_bytes{job=~\"$job_storage\", type!=\"indexdb\"}) / sum(vm_rows{job=~\"$job_storage\", type!=\"indexdb\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job_storage\", type!~\"indexdb.*\"}) / sum(vm_rows{job=~\"$job_storage\", type!~\"indexdb.*\"})", "format": "time_series", "instant": true, "interval": "", @@ -4484,7 +4484,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!=\"indexdb\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!=\"indexdb\"})\n )\n))", + "expr": "min(vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n))", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -5584,7 +5584,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=\"indexdb\"}) by(job, instance)\n / \n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n)", + "expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=~\"indexdb.*\"}) by(job, instance)\n / \n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n)", "format": "time_series", "intervalFactor": 1, "legendFormat": "indexdb", @@ -5597,7 +5597,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"}) by(job, instance)\n / \n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n)", + "expr": "max(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) by(job, instance)\n / \n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -8374,7 +8374,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!=\"indexdb\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!=\"indexdb\"})\n )\n)", + "expr": "vm_free_disk_space_bytes{job=~\"$job_storage\", instance=~\"$instance\"} \n/ \nignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job_storage\", instance=~\"$instance\"}[1d])\n - \n ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job_storage\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n / \n sum(vm_rows{job=~\"$job_storage\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n)", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -8579,7 +8579,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=\"indexdb\"}) by(job, instance)", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=~\"indexdb.*\"}) by(job, instance)", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{job}}:{{instance}} (indexdb)", @@ -8592,7 +8592,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"}) by(job, instance)", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) by(job, instance)", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -8791,4 +8791,4 @@ "uid": "oS7Bi_0Wz", "version": 1, "weekStart": "" -} \ No newline at end of file +} diff --git a/dashboards/victoriametrics.json b/dashboards/victoriametrics.json index f5bce5bd0..9975145de 100644 --- a/dashboards/victoriametrics.json +++ b/dashboards/victoriametrics.json @@ -225,7 +225,7 @@ "uid": "$ds" }, "exemplar": false, - "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"})", + "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", "format": "time_series", "instant": true, "interval": "", @@ -3767,7 +3767,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n - ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"})\n )\n )", + "expr": "vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"} \n/ ignoring(path) (\n (\n rate(vm_rows_added_to_storage_total{job=~\"$job\", instance=~\"$instance\"}[1d]) \n - ignoring(type) rate(vm_deduplicated_samples_total{job=~\"$job\", instance=~\"$instance\", type=\"merge\"}[1d])\n ) * scalar(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"}) \n / sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})\n )\n )", "format": "time_series", "hide": false, "interval": "", @@ -3874,7 +3874,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!=\"indexdb\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -3900,7 +3900,7 @@ "uid": "$ds" }, "editorMode": "code", - "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=\"indexdb\"})", + "expr": "sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\", type=~\"indexdb.*\"})", "format": "time_series", "hide": false, "interval": "", @@ -4156,7 +4156,7 @@ "type": "prometheus", "uid": "$ds" }, - "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type != \"indexdb\"})", + "expr": "sum(vm_rows{job=~\"$job\", instance=~\"$instance\", type!~\"indexdb.*\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -5306,4 +5306,4 @@ "uid": "wNf0q_kZk", "version": 1, "weekStart": "" -} \ No newline at end of file +} diff --git a/deployment/docker/alerts-cluster.yml b/deployment/docker/alerts-cluster.yml index 15c305452..3e68bd6e3 100644 --- a/deployment/docker/alerts-cluster.yml +++ b/deployment/docker/alerts-cluster.yml @@ -18,8 +18,8 @@ groups: ignoring(type) rate(vm_deduplicated_samples_total{type="merge"}[1d]) ) * scalar( - sum(vm_data_size_bytes{type!="indexdb"}) / - sum(vm_rows{type!="indexdb"}) + sum(vm_data_size_bytes{type!~"indexdb.*"}) / + sum(vm_rows{type!~"indexdb.*"}) ) ) < 3 * 24 * 3600 > 0 for: 30m diff --git a/deployment/docker/alerts.yml b/deployment/docker/alerts.yml index 5d478f0c7..efa3c5f7e 100644 --- a/deployment/docker/alerts.yml +++ b/deployment/docker/alerts.yml @@ -18,8 +18,8 @@ groups: ignoring(type) rate(vm_deduplicated_samples_total{type="merge"}[1d]) ) * scalar( - sum(vm_data_size_bytes{type!="indexdb"}) / - sum(vm_rows{type!="indexdb"}) + sum(vm_data_size_bytes{type!~"indexdb.*"}) / + sum(vm_rows{type!~"indexdb.*"}) ) ) < 3 * 24 * 3600 > 0 for: 30m diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 96694cc09..e6a92f3b5 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -17,7 +17,7 @@ The following tip changes can be tested by building VictoriaMetrics components f **Update note 1:** this release drops support for direct upgrade from VictoriaMetrics versions prior [v1.28.0](https://github.com/VictoriaMetrics/VictoriaMetrics/releases/tag/v1.28.0). Please upgrade to `v1.84.0`, wait until `finished round 2 of background conversion` line is emitted to log by single-node VictoriaMetrics or by `vmstorage`, and then upgrade to newer releases. -**Update note 2:** this release splits `type="indexdb"` metrics into `type="indexdb/inmemory"` and `type="indexdb/file"` metrics. This may break old dashboards and alerting rules, which contain label filters on `{type="indexdb"}`. It is recommended upgrading to the latest available dashboards and alerting rules mentioned in [these docs](https://docs.victoriametrics.com/#monitoring). +**Update note 2:** this release splits `type="indexdb"` metrics into `type="indexdb/inmemory"` and `type="indexdb/file"` metrics. This may break old dashboards and alerting rules, which contain [label filter](https://docs.victoriametrics.com/keyConcepts.html#filtering) on `{type="indexdb"}`. Such label filter must be substituted with `{type=~"indexdb.*"}`, so it matches `indexdb` from the previous releases and `indexdb/inmemory` + `indexdb/file` from new releases. It is recommended upgrading to the latest available dashboards and alerting rules mentioned in [these docs](https://docs.victoriametrics.com/#monitoring), since they already contain fixed label filters. * FEATURE: add `-inmemoryDataFlushInterval` command-line flag, which can be used for controlling the frequency of in-memory data flush to disk. The data flush frequency can be reduced when VictoriaMetrics stores data to low-end flash device with limited number of write cycles (for example, on Raspberry PI). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3337). * FEATURE: expose additional metrics for `indexdb` and `storage` parts stored in memory and for `indexdb` parts stored in files (see [storage docs](https://docs.victoriametrics.com/#storage) for technical details): diff --git a/docs/README.md b/docs/README.md index 1fe27c686..7a716a12e 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1746,7 +1746,7 @@ and [cardinality explorer docs](#cardinality-explorer). by requesting `/internal/force_flush` http handler. This handler is mostly needed for testing and debugging purposes. * The last few seconds of inserted data may be lost on unclean shutdown (i.e. OOM, `kill -9` or hardware reset). The `-inmemoryDataFlushInterval` command-line flag allows controlling the frequency of in-memory data flush to persistent storage. - See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704). + See [storage docs](#storage) and [this article](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) for more details. * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second, then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) for the current amount of RAM. diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index ed764040b..8fe2a4103 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -1749,7 +1749,7 @@ and [cardinality explorer docs](#cardinality-explorer). by requesting `/internal/force_flush` http handler. This handler is mostly needed for testing and debugging purposes. * The last few seconds of inserted data may be lost on unclean shutdown (i.e. OOM, `kill -9` or hardware reset). The `-inmemoryDataFlushInterval` command-line flag allows controlling the frequency of in-memory data flush to persistent storage. - See [this article for technical details](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704). + See [storage docs](#storage) and [this article](https://valyala.medium.com/wal-usage-looks-broken-in-modern-time-series-databases-b62a627ab704) for more details. * If VictoriaMetrics works slowly and eats more than a CPU core per 100K ingested data points per second, then it is likely you have too many [active time series](https://docs.victoriametrics.com/FAQ.html#what-is-an-active-time-series) for the current amount of RAM.