From b5254199c61002908fd8ebacdcc86c27ba9c01cf Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Thu, 2 Nov 2023 16:01:31 +0100 Subject: [PATCH] app/vmalert: add label `file` pointing to the group's filename to metrics (#5281) The filename should help identifying alerting rules belonging to specific groups with identical names but different filenames. https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5267 Signed-off-by: hagen1778 --- app/vmalert/rule/alerting.go | 2 +- app/vmalert/rule/recording.go | 2 +- deployment/docker/alerts-vmalert.yml | 8 ++++---- docs/CHANGELOG.md | 1 + 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/app/vmalert/rule/alerting.go b/app/vmalert/rule/alerting.go index 614755773..ae10f4fe3 100644 --- a/app/vmalert/rule/alerting.go +++ b/app/vmalert/rule/alerting.go @@ -91,7 +91,7 @@ func NewAlertingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rule entries: make([]StateEntry, entrySize), } - labels := fmt.Sprintf(`alertname=%q, group=%q, id="%d"`, ar.Name, group.Name, ar.ID()) + labels := fmt.Sprintf(`alertname=%q, group=%q, file=%q, id="%d"`, ar.Name, group.Name, group.File, ar.ID()) ar.metrics.pending = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_alerts_pending{%s}`, labels), func() float64 { ar.alertsMu.RLock() diff --git a/app/vmalert/rule/recording.go b/app/vmalert/rule/recording.go index fc25c903b..e74d1b830 100644 --- a/app/vmalert/rule/recording.go +++ b/app/vmalert/rule/recording.go @@ -78,7 +78,7 @@ func NewRecordingRule(qb datasource.QuerierBuilder, group *Group, cfg config.Rul entries: make([]StateEntry, entrySize), } - labels := fmt.Sprintf(`recording=%q, group=%q, id="%d"`, rr.Name, group.Name, rr.ID()) + labels := fmt.Sprintf(`recording=%q, group=%q, file=%q, id="%d"`, rr.Name, group.Name, group.File, rr.ID()) rr.metrics.errors = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_recording_rules_error{%s}`, labels), func() float64 { e := rr.state.getLast() diff --git a/deployment/docker/alerts-vmalert.yml b/deployment/docker/alerts-vmalert.yml index 267ada71c..ef6a5a7d3 100644 --- a/deployment/docker/alerts-vmalert.yml +++ b/deployment/docker/alerts-vmalert.yml @@ -18,7 +18,7 @@ groups: Check vmalert's logs for detailed error message." - alert: AlertingRulesError - expr: sum(vmalert_alerting_rules_error) by(job, instance, group) > 0 + expr: sum(vmalert_alerting_rules_error) by(job, instance, group, file) > 0 for: 5m labels: severity: warning @@ -29,7 +29,7 @@ groups: Check vmalert's logs for detailed error message." - alert: RecordingRulesError - expr: sum(vmalert_recording_rules_error) by(job, instance, group) > 0 + expr: sum(vmalert_recording_rules_error) by(job, instance, group, file) > 0 for: 5m labels: severity: warning @@ -40,7 +40,7 @@ groups: Check vmalert's logs for detailed error message." - alert: RecordingRulesNoData - expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording) < 1 + expr: sum(vmalert_recording_rules_last_evaluation_samples) by(job, group, recording, file) < 1 for: 30m labels: severity: info @@ -52,7 +52,7 @@ groups: or incorrect query expression." - alert: TooManyMissedIterations - expr: sum(increase(vmalert_iteration_missed_total[5m])) by(job, instance, group) > 0 + expr: sum(increase(vmalert_iteration_missed_total[5m])) by(job, instance, group, file) > 0 for: 15m labels: severity: warning diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a52b290b3..e70544611 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -51,6 +51,7 @@ The sandbox cluster installation is running under the constant load generated by * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add `-rule.evalDelay` flag and `eval_delay` attribute for [Groups](https://docs.victoriametrics.com/vmalert.html#groups). The new flag and param can be used to adjust the `time` parameter for rule evaluation requests to match [intentional query delay](https://docs.victoriametrics.com/keyConcepts.html#query-latency) from the datasource. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): allow specifying full url in notifier static_configs target address, like `http://alertmanager:9093/test/api/v2/alerts`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5184). * FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): reduce the number of queries for restoring alerts state on start-up. The change should speed up the restore process and reduce pressure on `remoteRead.url`. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5265). +* FEATURE: [vmalert](https://docs.victoriametrics.com/vmalert.html): add label `file` pointing to the group's filename to metrics `vmalert_recording_.*` and `vmalert_alerts_.*`. The filename should help identifying alerting rules belonging to specific groups with identical names but different filenames. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5267). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): support data ingestion from [NewRelic infrastructure agent](https://docs.newrelic.com/docs/infrastructure/install-infrastructure-agent). See [these docs](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#how-to-send-data-from-newrelic-agent), [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3520) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/4712). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.labels` command-line flag, which can be used for specifying a list of labels for sharding outgoing samples among the configured `-remoteWrite.url` destinations if `-remoteWrite.shardByURL` command-line flag is set. See [these docs](https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4942) for details. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): do not exit on startup when [scrape_configs](https://docs.victoriametrics.com/sd_configs.html#scrape_configs) refer to non-existing or invalid files with auth configs, since these files may appear / updated later. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4959) and [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5153).