VictoriaMetrics/deployment/docker/alerts-vmalert.yml
Roman Khavronenko 67c60dbf86
vmalert: bump alerting rules evaluation interval to reasonable 30s (#3374)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-11-21 16:48:09 +02:00

73 lines
3.7 KiB
YAML

# File contains default list of alerts for мьфдуке service.
# The alerts below are just recommendations and may require some updates
# and threshold calibration according to every specific setup.
groups:
# Alerts group for vmalert assumes that Grafana dashboard
# https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert is installed.
# Pls update the `dashboard` annotation according to your setup.
- name: vmalert
interval: 30s
rules:
- alert: ConfigurationReloadFailure
expr: vmalert_config_last_reload_successful != 1
labels:
severity: warning
annotations:
summary: "Configuration reload failed for vmalert instance {{ $labels.instance }}"
description: "Configuration hot-reload failed for vmalert on instance {{ $labels.instance }}.
Check vmalert's logs for detailed error message."
- alert: AlertingRulesError
expr: sum(vmalert_alerting_rules_error) by(job, instance, group) > 0
for: 5m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}"
description: "Alerting rules execution is failing for group \"{{ $labels.group }}\".
Check vmalert's logs for detailed error message."
- alert: RecordingRulesError
expr: sum(vmalert_recording_rules_error) by(job, instance, group) > 0
for: 5m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}"
summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}"
description: "Recording rules execution is failing for group \"{{ $labels.group }}\".
Check vmalert's logs for detailed error message."
- alert: RecordingRulesNoData
expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording) < 1
for: 30m
labels:
severity: warning
annotations:
dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-group={{ $labels.group }}"
summary: "Recording rule {{ $labels.recording }} ({ $labels.group }}) produces no data"
description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\"
produces 0 samples over the last 30min. It might be caused by a misconfiguration
or incorrect query expression."
- alert: RemoteWriteErrors
expr: sum(increase(vmalert_remotewrite_errors_total[5m])) by(job, instance) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "vmalert instance {{ $labels.instance }} is failing to push metrics to remote write URL"
description: "vmalert instance {{ $labels.instance }} is failing to push metrics generated via alerting
or recording rules to the configured remote write URL. Check vmalert's logs for detailed error message."
- alert: AlertmanagerErrors
expr: sum(increase(vmalert_alerts_send_errors_total[5m])) by(job, instance, addr) > 0
for: 15m
labels:
severity: warning
annotations:
summary: "vmalert instance {{ $labels.instance }} is failing to send notifications to Alertmanager"
description: "vmalert instance {{ $labels.instance }} is failing to send alert notifications to \"{{ $labels.addr }}\".
Check vmalert's logs for detailed error message."