# File contains default list of alerts for vmalert service. # The alerts below are just recommendations and may require some updates # and threshold calibration according to every specific setup. groups: # Alerts group for vmalert assumes that Grafana dashboard # https://grafana.com/grafana/dashboards/14950-victoriametrics-vmalert/ is installed. # Pls update the `dashboard` annotation according to your setup. - name: vmalert interval: 30s rules: - alert: ConfigurationReloadFailure expr: vmalert_config_last_reload_successful != 1 labels: severity: warning annotations: summary: "Configuration reload failed for vmalert instance {{ $labels.instance }}" description: "Configuration hot-reload failed for vmalert on instance {{ $labels.instance }}. Check vmalert's logs for detailed error message." - alert: AlertingRulesError expr: sum(vmalert_alerting_rules_error) by(job, instance, group) > 0 for: 5m labels: severity: warning annotations: dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=13&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}" summary: "Alerting rules are failing for vmalert instance {{ $labels.instance }}" description: "Alerting rules execution is failing for group \"{{ $labels.group }}\". Check vmalert's logs for detailed error message." - alert: RecordingRulesError expr: sum(vmalert_recording_rules_error) by(job, instance, group) > 0 for: 5m labels: severity: warning annotations: dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=30&var-instance={{ $labels.instance }}&var-group={{ $labels.group }}" summary: "Recording rules are failing for vmalert instance {{ $labels.instance }}" description: "Recording rules execution is failing for group \"{{ $labels.group }}\". Check vmalert's logs for detailed error message." - alert: RecordingRulesNoData expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording) < 1 for: 30m labels: severity: info annotations: dashboard: "http://localhost:3000/d/LzldHAVnz?viewPanel=33&var-group={{ $labels.group }}" summary: "Recording rule {{ $labels.recording }} ({ $labels.group }}) produces no data" description: "Recording rule \"{{ $labels.recording }}\" from group \"{{ $labels.group }}\" produces 0 samples over the last 30min. It might be caused by a misconfiguration or incorrect query expression." - alert: RemoteWriteErrors expr: sum(increase(vmalert_remotewrite_errors_total[5m])) by(job, instance) > 0 for: 15m labels: severity: warning annotations: summary: "vmalert instance {{ $labels.instance }} is failing to push metrics to remote write URL" description: "vmalert instance {{ $labels.instance }} is failing to push metrics generated via alerting or recording rules to the configured remote write URL. Check vmalert's logs for detailed error message." - alert: AlertmanagerErrors expr: sum(increase(vmalert_alerts_send_errors_total[5m])) by(job, instance, addr) > 0 for: 15m labels: severity: warning annotations: summary: "vmalert instance {{ $labels.instance }} is failing to send notifications to Alertmanager" description: "vmalert instance {{ $labels.instance }} is failing to send alert notifications to \"{{ $labels.addr }}\". Check vmalert's logs for detailed error message."