mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2025-01-20 07:19:17 +01:00
vmalert: add default list of alerting rules (#3373)
The default list of alerting rules contains the basic rules for checking vmalert's health state and is recommended to use for monitoring vmalert deployments. Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
parent
20d758e3e4
commit
84742f229a
@ -673,10 +673,12 @@ See full description for these flags in `./vmalert -help`.
|
||||
## Monitoring
|
||||
|
||||
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
||||
The default list of alerting rules for these metric can be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker).
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
|
||||
metrics may be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview. Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
|
72
deployment/docker/alerts-vmalert.yml
Normal file
72
deployment/docker/alerts-vmalert.yml
Normal file
@ -0,0 +1,72 @@
|
||||
# File contains default list of alerts for мьфдуке service.
|
||||
# The alerts below are just recommendations and may require some updates
|
||||
# and threshold calibration according to every specific setup.
|
||||
groups:
|
||||
# Alerts group for vmalert assumes that Grafana dashboard
|
||||
# https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert is installed.
|
||||
# Pls update the `dashboard` annotation according to your setup.
|
||||
- name: vmalert
|
||||
interval: 5s
|
||||
rules:
|
||||
- alert: ConfigurationReloadFailure
|
||||
expr: vmalert_config_last_reload_successful != 1
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "Configuration reload failed for vmalert instance {{ $labels.instance }}"
|
||||
description: "Configuration hot-reload failed for vmalert on instance {{ $labels.instance }}.
|
||||
Check vmalert's logs for detailed error message."
|
||||
|
||||
- alert: AlertingRulesError
|
||||
expr: sum(vmalert_alerting_rules_error) by(job, instance, group) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
|
||||
summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Alerting rules execution is failing for group \"{{ $labels.group }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
|
||||
- alert: RecordingRulesError
|
||||
expr: sum(vmalert_recording_rules_error) by(job, instance, group) > 0
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
|
||||
summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}"
|
||||
description: "Recording rules execution is failing for group \"{{ $labels.group }}\".
|
||||
Check vmalert's logs for detailed error message."
|
||||
|
||||
- alert: RecordingRulesNoData
|
||||
expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording) < 1
|
||||
for: 30m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-group={{ $labels.group }}"
|
||||
summary: "Recording rule {{ $labels.recording }} ({ $labels.group }}) produces no data"
|
||||
description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\"
|
||||
produces 0 samples over the last 30min. It might be caused by a misconfiguration
|
||||
or incorrect query expression."
|
||||
|
||||
- alert: RemoteWriteErrors
|
||||
expr: sum(increase(vmalert_remotewrite_errors_total[5m])) by(job, instance) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "vmalert instance {{ $labels.instance }} is failing to push metrics to remote write URL"
|
||||
description: "vmalert instance {{ $labels.instance }} is failing to push metrics generated via alerting
|
||||
or recording rules to the configured remote write URL. Check vmalert's logs for detailed error message."
|
||||
|
||||
- alert: AlertmanagerErrors
|
||||
expr: sum(increase(vmalert_alerts_send_errors_total[5m])) by(job, instance, addr) > 0
|
||||
for: 15m
|
||||
labels:
|
||||
severity: warning
|
||||
annotations:
|
||||
summary: "vmalert instance {{ $labels.instance }} is failing to send notifications to Alertmanager"
|
||||
description: "vmalert instance {{ $labels.instance }} is failing to send alert notifications to \"{{ $labels.addr }}\".
|
||||
Check vmalert's logs for detailed error message."
|
@ -91,6 +91,7 @@ services:
|
||||
- ./alerts-cluster.yml:/etc/alerts/alerts.yml
|
||||
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
|
||||
- ./alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
|
||||
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
|
||||
command:
|
||||
- '--datasource.url=http://vmselect:8481/select/0/prometheus'
|
||||
- '--remoteRead.url=http://vmselect:8481/select/0/prometheus'
|
||||
|
@ -66,6 +66,7 @@ services:
|
||||
- ./alerts.yml:/etc/alerts/alerts.yml
|
||||
- ./alerts-health.yml:/etc/alerts/alerts-health.yml
|
||||
- ./alerts-vmagent.yml:/etc/alerts/alerts-vmagent.yml
|
||||
- ./alerts-vmalert.yml:/etc/alerts/alerts-vmalert.yml
|
||||
command:
|
||||
- "--datasource.url=http://victoriametrics:8428/"
|
||||
- "--remoteRead.url=http://victoriametrics:8428/"
|
||||
|
@ -21,6 +21,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add the ability to upload/paste JSON to investigate the trace. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3308) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3310).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): reduce JS bundle size from 200Kb to 100Kb. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3298).
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add the ability to hide results of a particular query by clicking the `eye` icon. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3359).
|
||||
* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add default alert list for vmalert's metrics. See [alerts-vmalert.yml](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmalert.yml).
|
||||
|
||||
* BUGFIX: [vmui](https://docs.victoriametrics.com/#vmui): properly display the requested graph on the requested time range when navigating from Prometheus URL in Grafana.
|
||||
* BUGFIX: reduce CPU usage spikes and memory usage spikes under high data ingestion rate introduced in [v1.83.0](https://docs.victoriametrics.com/CHANGELOG.html#v1830). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3343).
|
||||
|
@ -677,10 +677,12 @@ See full description for these flags in `./vmalert -help`.
|
||||
## Monitoring
|
||||
|
||||
`vmalert` exports various metrics in Prometheus exposition format at `http://vmalert-host:8880/metrics` page.
|
||||
The default list of alerting rules for these metric can be found [here](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker).
|
||||
We recommend setting up regular scraping of this page either through `vmagent` or by Prometheus so that the exported
|
||||
metrics may be analyzed later.
|
||||
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview. Graphs on this dashboard contain useful hints - hover the `i` icon at the top left corner of each graph in order to read it.
|
||||
Use the official [Grafana dashboard](https://grafana.com/grafana/dashboards/14950) for `vmalert` overview.
|
||||
Graphs on this dashboard contain useful hints - hover the `i` icon in the top left corner of each graph in order to read it.
|
||||
If you have suggestions for improvements or have found a bug - please open an issue on github or add
|
||||
a review to the dashboard.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user