2023-10-13 13:54:33 +02:00
|
|
|
package rule
|
2020-04-27 23:19:27 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2020-11-09 23:27:32 +01:00
|
|
|
"errors"
|
2023-12-06 19:39:35 +01:00
|
|
|
"fmt"
|
2020-12-19 13:10:59 +01:00
|
|
|
"reflect"
|
2022-03-16 16:26:33 +01:00
|
|
|
"sort"
|
2020-11-09 23:27:32 +01:00
|
|
|
"strings"
|
2023-02-04 04:46:13 +01:00
|
|
|
"sync"
|
2020-04-27 23:19:27 +02:00
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2023-02-04 04:46:13 +01:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
|
2020-04-27 23:19:27 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
2023-12-06 19:39:35 +01:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
|
2020-04-27 23:19:27 +02:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
2023-02-04 04:46:13 +01:00
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
|
2020-04-27 23:19:27 +02:00
|
|
|
)
|
|
|
|
|
2020-06-01 12:46:37 +02:00
|
|
|
func TestAlertingRule_ToTimeSeries(t *testing.T) {
|
2020-04-27 23:19:27 +02:00
|
|
|
timestamp := time.Now()
|
|
|
|
testCases := []struct {
|
2020-06-01 12:46:37 +02:00
|
|
|
rule *AlertingRule
|
2020-04-27 23:19:27 +02:00
|
|
|
alert *notifier.Alert
|
|
|
|
expTS []prompbmarshal.TimeSeries
|
|
|
|
}{
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("instant", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
¬ifier.Alert{State: notifier.StateFiring},
|
|
|
|
[]prompbmarshal.TimeSeries{
|
2021-06-09 11:20:38 +02:00
|
|
|
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
2020-04-27 23:19:27 +02:00
|
|
|
"__name__": alertMetricName,
|
|
|
|
alertStateLabel: notifier.StateFiring.String(),
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("instant extra labels", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
"job": "foo",
|
|
|
|
"instance": "bar",
|
|
|
|
}},
|
|
|
|
[]prompbmarshal.TimeSeries{
|
2021-06-09 11:20:38 +02:00
|
|
|
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
2020-04-27 23:19:27 +02:00
|
|
|
"__name__": alertMetricName,
|
|
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
|
|
"job": "foo",
|
|
|
|
"instance": "bar",
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("instant labels override", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
alertStateLabel: "foo",
|
|
|
|
"__name__": "bar",
|
|
|
|
}},
|
|
|
|
[]prompbmarshal.TimeSeries{
|
2021-06-09 11:20:38 +02:00
|
|
|
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
2020-04-27 23:19:27 +02:00
|
|
|
"__name__": alertMetricName,
|
|
|
|
alertStateLabel: notifier.StateFiring.String(),
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("for", time.Second),
|
2022-03-29 15:09:07 +02:00
|
|
|
¬ifier.Alert{State: notifier.StateFiring, ActiveAt: timestamp.Add(time.Second)},
|
2020-04-27 23:19:27 +02:00
|
|
|
[]prompbmarshal.TimeSeries{
|
2021-06-09 11:20:38 +02:00
|
|
|
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
2020-04-27 23:19:27 +02:00
|
|
|
"__name__": alertMetricName,
|
|
|
|
alertStateLabel: notifier.StateFiring.String(),
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
|
|
|
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
|
|
|
[]int64{timestamp.UnixNano()},
|
|
|
|
map[string]string{
|
2021-10-22 11:30:38 +02:00
|
|
|
"__name__": alertForStateMetricName,
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("for pending", 10*time.Second),
|
2022-03-29 15:09:07 +02:00
|
|
|
¬ifier.Alert{State: notifier.StatePending, ActiveAt: timestamp.Add(time.Second)},
|
2020-04-27 23:19:27 +02:00
|
|
|
[]prompbmarshal.TimeSeries{
|
2021-06-09 11:20:38 +02:00
|
|
|
newTimeSeries([]float64{1}, []int64{timestamp.UnixNano()}, map[string]string{
|
2020-04-27 23:19:27 +02:00
|
|
|
"__name__": alertMetricName,
|
|
|
|
alertStateLabel: notifier.StatePending.String(),
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
|
|
|
newTimeSeries([]float64{float64(timestamp.Add(time.Second).Unix())},
|
|
|
|
[]int64{timestamp.UnixNano()},
|
|
|
|
map[string]string{
|
2021-10-22 11:30:38 +02:00
|
|
|
"__name__": alertForStateMetricName,
|
2021-06-09 11:20:38 +02:00
|
|
|
}),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
2020-06-01 12:46:37 +02:00
|
|
|
tc.rule.alerts[tc.alert.ID] = tc.alert
|
2021-06-09 11:20:38 +02:00
|
|
|
tss := tc.rule.toTimeSeries(timestamp.Unix())
|
2020-06-01 12:46:37 +02:00
|
|
|
if err := compareTimeSeries(t, tc.expTS, tss); err != nil {
|
|
|
|
t.Fatalf("timeseries missmatch: %s", err)
|
2020-04-27 23:19:27 +02:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-01 12:46:37 +02:00
|
|
|
func TestAlertingRule_Exec(t *testing.T) {
|
2020-06-09 14:21:20 +02:00
|
|
|
const defaultStep = 5 * time.Millisecond
|
2021-10-22 11:30:38 +02:00
|
|
|
type testAlert struct {
|
|
|
|
labels []string
|
|
|
|
alert *notifier.Alert
|
|
|
|
}
|
2020-04-27 23:19:27 +02:00
|
|
|
testCases := []struct {
|
2020-06-01 12:46:37 +02:00
|
|
|
rule *AlertingRule
|
2020-04-27 23:19:27 +02:00
|
|
|
steps [][]datasource.Metric
|
2023-07-27 15:13:13 +02:00
|
|
|
expAlerts map[int][]testAlert
|
2020-04-27 23:19:27 +02:00
|
|
|
}{
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("empty", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{},
|
2021-10-22 11:30:38 +02:00
|
|
|
nil,
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2020-05-04 23:51:22 +02:00
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("empty labels", 0),
|
2020-05-04 23:51:22 +02:00
|
|
|
[][]datasource.Metric{
|
2021-06-09 11:20:38 +02:00
|
|
|
{datasource.Metric{Values: []float64{1}, Timestamps: []int64{1}}},
|
2020-05-04 23:51:22 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2022-03-29 15:09:07 +02:00
|
|
|
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
|
|
|
{},
|
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
2: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
3: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
4: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
2022-03-29 15:09:07 +02:00
|
|
|
},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
{
|
2023-07-27 15:13:13 +02:00
|
|
|
newTestAlertingRule("single-firing=>inactive=>firing=>inactive=>inactive=>firing", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
|
|
|
{},
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
2: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
3: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
4: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
5: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("multiple-firing", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
|
|
|
{
|
2020-05-04 23:51:22 +02:00
|
|
|
metricWithLabels(t, "name", "foo"),
|
|
|
|
metricWithLabels(t, "name", "foo1"),
|
|
|
|
metricWithLabels(t, "name", "foo2"),
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {
|
|
|
|
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
{labels: []string{"name", "foo1"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
{labels: []string{"name", "foo2"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("multiple-steps-firing", 0),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo1")},
|
|
|
|
{metricWithLabels(t, "name", "foo2")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
// 1: fire first alert
|
|
|
|
// 2: fire second alert, set first inactive
|
2022-03-29 15:09:07 +02:00
|
|
|
// 3: fire third alert, set second inactive
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {
|
|
|
|
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
},
|
|
|
|
1: {
|
|
|
|
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
|
|
|
{labels: []string{"name", "foo1"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
},
|
|
|
|
2: {
|
|
|
|
{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
|
|
|
{labels: []string{"name", "foo1"}, alert: ¬ifier.Alert{State: notifier.StateInactive}},
|
|
|
|
{labels: []string{"name", "foo2"}, alert: ¬ifier.Alert{State: notifier.StateFiring}},
|
|
|
|
},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("for-pending", time.Minute),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-09 14:21:20 +02:00
|
|
|
newTestAlertingRule("for-fired", defaultStep),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2020-06-01 12:46:37 +02:00
|
|
|
newTestAlertingRule("for-pending=>empty", time.Second),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2023-07-27 15:13:13 +02:00
|
|
|
// empty step to delete pending alerts
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
2: {},
|
|
|
|
},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
{
|
2023-07-27 15:13:13 +02:00
|
|
|
newTestAlertingRule("for-pending=>firing=>inactive=>pending=>firing", defaultStep),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2023-07-27 15:13:13 +02:00
|
|
|
// empty step to set alert inactive
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
2023-07-27 15:13:13 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
2: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
3: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
4: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2023-07-27 15:13:13 +02:00
|
|
|
newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>firing", defaultStep, defaultStep),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-06-09 14:21:20 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2023-07-27 15:13:13 +02:00
|
|
|
// empty step to keep firing
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
2: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
3: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
2023-07-27 15:13:13 +02:00
|
|
|
newTestAlertingRuleWithKeepFiring("for-pending=>firing=>keepfiring=>keepfiring=>inactive=>pending=>firing", defaultStep, 2*defaultStep),
|
2020-04-27 23:19:27 +02:00
|
|
|
[][]datasource.Metric{
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2023-07-27 15:13:13 +02:00
|
|
|
// empty step to keep firing
|
|
|
|
{},
|
|
|
|
// another empty step to keep firing
|
|
|
|
{},
|
|
|
|
// empty step to set alert inactive
|
2020-04-27 23:19:27 +02:00
|
|
|
{},
|
2020-05-04 23:51:22 +02:00
|
|
|
{metricWithLabels(t, "name", "foo")},
|
|
|
|
{metricWithLabels(t, "name", "foo")},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
2023-07-27 15:13:13 +02:00
|
|
|
map[int][]testAlert{
|
|
|
|
0: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
1: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
2: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
3: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
|
|
|
4: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateInactive}}},
|
|
|
|
5: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StatePending}}},
|
|
|
|
6: {{labels: []string{"name", "foo"}, alert: ¬ifier.Alert{State: notifier.StateFiring}}},
|
2020-04-27 23:19:27 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
2020-05-04 23:51:22 +02:00
|
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
2020-04-27 23:19:27 +02:00
|
|
|
for _, tc := range testCases {
|
|
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq := &datasource.FakeQuerier{}
|
2021-04-28 22:41:15 +02:00
|
|
|
tc.rule.q = fq
|
2020-06-01 12:46:37 +02:00
|
|
|
tc.rule.GroupID = fakeGroup.ID()
|
2023-07-27 15:13:13 +02:00
|
|
|
for i, step := range tc.steps {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Reset()
|
|
|
|
fq.Add(step...)
|
|
|
|
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
2020-04-27 23:19:27 +02:00
|
|
|
t.Fatalf("unexpected err: %s", err)
|
|
|
|
}
|
|
|
|
// artificial delay between applying steps
|
2020-06-09 14:21:20 +02:00
|
|
|
time.Sleep(defaultStep)
|
2023-07-27 15:13:13 +02:00
|
|
|
if _, ok := tc.expAlerts[i]; !ok {
|
|
|
|
continue
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
}
|
2023-07-27 15:13:13 +02:00
|
|
|
if len(tc.rule.alerts) != len(tc.expAlerts[i]) {
|
|
|
|
t.Fatalf("evalIndex %d: expected %d alerts; got %d", i, len(tc.expAlerts[i]), len(tc.rule.alerts))
|
2020-04-27 23:19:27 +02:00
|
|
|
}
|
2023-07-27 15:13:13 +02:00
|
|
|
expAlerts := make(map[uint64]*notifier.Alert)
|
|
|
|
for _, ta := range tc.expAlerts[i] {
|
|
|
|
labels := make(map[string]string)
|
|
|
|
for i := 0; i < len(ta.labels); i += 2 {
|
|
|
|
k, v := ta.labels[i], ta.labels[i+1]
|
|
|
|
labels[k] = v
|
|
|
|
}
|
|
|
|
labels[alertNameLabel] = tc.rule.Name
|
|
|
|
h := hash(labels)
|
|
|
|
expAlerts[h] = ta.alert
|
|
|
|
}
|
|
|
|
for key, exp := range expAlerts {
|
|
|
|
got, ok := tc.rule.alerts[key]
|
|
|
|
if !ok {
|
|
|
|
t.Fatalf("evalIndex %d: expected to have key %d", i, key)
|
|
|
|
}
|
|
|
|
if got.State != exp.State {
|
|
|
|
t.Fatalf("evalIndex %d: expected state %d; got %d", i, exp.State, got.State)
|
|
|
|
}
|
2020-04-27 23:19:27 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-09 11:20:38 +02:00
|
|
|
func TestAlertingRule_ExecRange(t *testing.T) {
|
2023-10-30 13:54:18 +01:00
|
|
|
fakeGroup := Group{Name: "TestRule_ExecRange"}
|
2021-06-09 11:20:38 +02:00
|
|
|
testCases := []struct {
|
2023-10-30 13:54:18 +01:00
|
|
|
rule *AlertingRule
|
|
|
|
data []datasource.Metric
|
|
|
|
expAlerts []*notifier.Alert
|
|
|
|
expHoldAlertStateAlerts map[uint64]*notifier.Alert
|
2021-06-09 11:20:38 +02:00
|
|
|
}{
|
|
|
|
{
|
|
|
|
newTestAlertingRule("empty", 0),
|
|
|
|
[]datasource.Metric{},
|
|
|
|
nil,
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("empty labels", 0),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1}, Timestamps: []int64{1}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
|
|
|
{State: notifier.StateFiring},
|
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("single-firing", 0),
|
|
|
|
[]datasource.Metric{
|
|
|
|
metricWithLabels(t, "name", "foo"),
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
|
|
|
{
|
|
|
|
Labels: map[string]string{"name": "foo"},
|
|
|
|
State: notifier.StateFiring,
|
|
|
|
},
|
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("single-firing-on-range", 0),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1}, Timestamps: []int64{1e3, 2e3, 3e3}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
|
|
|
{State: notifier.StateFiring},
|
|
|
|
{State: notifier.StateFiring},
|
|
|
|
{State: notifier.StateFiring},
|
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("for-pending", time.Second),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
2022-03-29 15:09:07 +02:00
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(3, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-pending"}): {
|
|
|
|
GroupID: fakeGroup.ID(),
|
|
|
|
Name: "for-pending",
|
|
|
|
Labels: map[string]string{"alertname": "for-pending"},
|
|
|
|
Annotations: map[string]string{},
|
|
|
|
State: notifier.StatePending,
|
|
|
|
ActiveAt: time.Unix(5, 0),
|
|
|
|
Value: 1,
|
|
|
|
For: time.Second,
|
|
|
|
}},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("for-firing", 3*time.Second),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
2022-03-29 15:09:07 +02:00
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-firing"}): {
|
|
|
|
GroupID: fakeGroup.ID(),
|
|
|
|
Name: "for-firing",
|
|
|
|
Labels: map[string]string{"alertname": "for-firing"},
|
|
|
|
Annotations: map[string]string{},
|
|
|
|
State: notifier.StateFiring,
|
|
|
|
ActiveAt: time.Unix(1, 0),
|
|
|
|
Start: time.Unix(5, 0),
|
|
|
|
Value: 1,
|
|
|
|
For: 3 * time.Second,
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("for-hold-pending", time.Second),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 2, 5}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{hash(map[string]string{"alertname": "for-hold-pending"}): {
|
|
|
|
GroupID: fakeGroup.ID(),
|
|
|
|
Name: "for-hold-pending",
|
|
|
|
Labels: map[string]string{"alertname": "for-hold-pending"},
|
|
|
|
Annotations: map[string]string{},
|
|
|
|
State: notifier.StatePending,
|
|
|
|
ActiveAt: time.Unix(5, 0),
|
|
|
|
Value: 1,
|
|
|
|
For: time.Second,
|
|
|
|
}},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestAlertingRule("for=>pending=>firing=>pending=>firing=>pending", time.Second),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1, 1, 1}, Timestamps: []int64{1, 2, 5, 6, 20}},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
2022-03-29 15:09:07 +02:00
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(5, 0)},
|
|
|
|
{State: notifier.StateFiring, ActiveAt: time.Unix(5, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(20, 0)},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
2023-10-30 13:54:18 +01:00
|
|
|
newTestAlertingRule("multi-series", 3*time.Second),
|
2021-06-09 11:20:38 +02:00
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1, 1}, Timestamps: []int64{1, 3, 5}},
|
2023-09-06 16:29:59 +02:00
|
|
|
{
|
|
|
|
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
2021-06-09 11:20:38 +02:00
|
|
|
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
2022-03-29 15:09:07 +02:00
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StatePending, ActiveAt: time.Unix(1, 0)},
|
|
|
|
{State: notifier.StateFiring, ActiveAt: time.Unix(1, 0)},
|
2023-09-06 16:29:59 +02:00
|
|
|
{
|
|
|
|
State: notifier.StatePending, ActiveAt: time.Unix(1, 0),
|
2021-06-09 11:20:38 +02:00
|
|
|
Labels: map[string]string{
|
|
|
|
"foo": "bar",
|
2023-09-06 16:29:59 +02:00
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
State: notifier.StatePending, ActiveAt: time.Unix(5, 0),
|
2021-06-09 11:20:38 +02:00
|
|
|
Labels: map[string]string{
|
|
|
|
"foo": "bar",
|
2023-09-06 16:29:59 +02:00
|
|
|
},
|
|
|
|
},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{"alertname": "multi-series"}): {
|
|
|
|
GroupID: fakeGroup.ID(),
|
|
|
|
Name: "multi-series",
|
|
|
|
Labels: map[string]string{"alertname": "multi-series"},
|
|
|
|
Annotations: map[string]string{},
|
|
|
|
State: notifier.StateFiring,
|
|
|
|
ActiveAt: time.Unix(1, 0),
|
|
|
|
Start: time.Unix(5, 0),
|
|
|
|
Value: 1,
|
|
|
|
For: 3 * time.Second,
|
|
|
|
},
|
|
|
|
hash(map[string]string{"alertname": "multi-series", "foo": "bar"}): {
|
|
|
|
GroupID: fakeGroup.ID(),
|
|
|
|
Name: "multi-series",
|
|
|
|
Labels: map[string]string{"alertname": "multi-series", "foo": "bar"},
|
|
|
|
Annotations: map[string]string{},
|
|
|
|
State: notifier.StatePending,
|
|
|
|
ActiveAt: time.Unix(5, 0),
|
|
|
|
Value: 1,
|
|
|
|
For: 3 * time.Second,
|
|
|
|
},
|
|
|
|
},
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
{
|
|
|
|
newTestRuleWithLabels("multi-series-firing", "source", "vm"),
|
|
|
|
[]datasource.Metric{
|
|
|
|
{Values: []float64{1, 1}, Timestamps: []int64{1, 100}},
|
2023-09-06 16:29:59 +02:00
|
|
|
{
|
|
|
|
Values: []float64{1, 1}, Timestamps: []int64{1, 5},
|
2021-06-09 11:20:38 +02:00
|
|
|
Labels: []datasource.Label{{Name: "foo", Value: "bar"}},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
[]*notifier.Alert{
|
|
|
|
{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
"source": "vm",
|
|
|
|
}},
|
|
|
|
{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
"source": "vm",
|
|
|
|
}},
|
|
|
|
//
|
|
|
|
{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
"foo": "bar",
|
|
|
|
"source": "vm",
|
|
|
|
}},
|
|
|
|
{State: notifier.StateFiring, Labels: map[string]string{
|
|
|
|
"foo": "bar",
|
|
|
|
"source": "vm",
|
|
|
|
}},
|
|
|
|
},
|
2023-10-30 13:54:18 +01:00
|
|
|
nil,
|
2021-06-09 11:20:38 +02:00
|
|
|
},
|
|
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq := &datasource.FakeQuerier{}
|
2021-06-09 11:20:38 +02:00
|
|
|
tc.rule.q = fq
|
|
|
|
tc.rule.GroupID = fakeGroup.ID()
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Add(tc.data...)
|
2023-10-30 13:54:18 +01:00
|
|
|
gotTS, err := tc.rule.execRange(context.TODO(), time.Unix(1, 0), time.Unix(5, 0))
|
2021-06-09 11:20:38 +02:00
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("unexpected err: %s", err)
|
|
|
|
}
|
|
|
|
var expTS []prompbmarshal.TimeSeries
|
|
|
|
var j int
|
|
|
|
for _, series := range tc.data {
|
|
|
|
for _, timestamp := range series.Timestamps {
|
2021-10-22 11:30:38 +02:00
|
|
|
a := tc.expAlerts[j]
|
|
|
|
if a.Labels == nil {
|
|
|
|
a.Labels = make(map[string]string)
|
|
|
|
}
|
|
|
|
a.Labels[alertNameLabel] = tc.rule.Name
|
2022-03-29 15:09:07 +02:00
|
|
|
expTS = append(expTS, tc.rule.alertToTimeSeries(a, timestamp)...)
|
2021-06-09 11:20:38 +02:00
|
|
|
j++
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(gotTS) != len(expTS) {
|
|
|
|
t.Fatalf("expected %d time series; got %d", len(expTS), len(gotTS))
|
|
|
|
}
|
|
|
|
for i := range expTS {
|
|
|
|
got, exp := gotTS[i], expTS[i]
|
|
|
|
if !reflect.DeepEqual(got, exp) {
|
|
|
|
t.Fatalf("%d: expected \n%v but got \n%v", i, exp, got)
|
|
|
|
}
|
|
|
|
}
|
2023-10-30 13:54:18 +01:00
|
|
|
if tc.expHoldAlertStateAlerts != nil {
|
|
|
|
if !reflect.DeepEqual(tc.expHoldAlertStateAlerts, tc.rule.alerts) {
|
|
|
|
t.Fatalf("expected hold alerts state: \n%v but got \n%v", tc.expHoldAlertStateAlerts, tc.rule.alerts)
|
|
|
|
}
|
|
|
|
}
|
2021-06-09 11:20:38 +02:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-04 04:46:13 +01:00
|
|
|
func TestGroup_Restore(t *testing.T) {
|
|
|
|
defaultTS := time.Now()
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr := &datasource.FakeQuerierWithRegistry{}
|
2023-02-04 04:46:13 +01:00
|
|
|
fn := func(rules []config.Rule, expAlerts map[uint64]*notifier.Alert) {
|
|
|
|
t.Helper()
|
2023-10-13 13:54:33 +02:00
|
|
|
defer fqr.Reset()
|
2023-02-04 04:46:13 +01:00
|
|
|
|
|
|
|
for _, r := range rules {
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(r.Expr, metricWithValueAndLabels(t, 0, "__name__", r.Alert))
|
2023-02-04 04:46:13 +01:00
|
|
|
}
|
|
|
|
|
2023-10-13 13:54:33 +02:00
|
|
|
fg := NewGroup(config.Group{Name: "TestRestore", Rules: rules}, fqr, time.Second, nil)
|
2023-02-04 04:46:13 +01:00
|
|
|
wg := sync.WaitGroup{}
|
|
|
|
wg.Add(1)
|
|
|
|
go func() {
|
2023-10-13 13:54:33 +02:00
|
|
|
nts := func() []notifier.Notifier { return []notifier.Notifier{¬ifier.FakeNotifier{}} }
|
|
|
|
fg.Start(context.Background(), nts, nil, fqr)
|
2023-02-04 04:46:13 +01:00
|
|
|
wg.Done()
|
|
|
|
}()
|
2023-10-13 13:54:33 +02:00
|
|
|
fg.Close()
|
2023-02-04 04:46:13 +01:00
|
|
|
wg.Wait()
|
|
|
|
|
|
|
|
gotAlerts := make(map[uint64]*notifier.Alert)
|
|
|
|
for _, rs := range fg.Rules {
|
|
|
|
alerts := rs.(*AlertingRule).alerts
|
|
|
|
for k, v := range alerts {
|
|
|
|
if !v.Restored {
|
|
|
|
// set not restored alerts to predictable timestamp
|
|
|
|
v.ActiveAt = defaultTS
|
|
|
|
}
|
|
|
|
gotAlerts[k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(gotAlerts) != len(expAlerts) {
|
|
|
|
t.Fatalf("expected %d alerts; got %d", len(expAlerts), len(gotAlerts))
|
|
|
|
}
|
|
|
|
for key, exp := range expAlerts {
|
|
|
|
got, ok := gotAlerts[key]
|
|
|
|
if !ok {
|
|
|
|
t.Fatalf("expected to have key %d", key)
|
|
|
|
}
|
|
|
|
if got.State != notifier.StatePending {
|
|
|
|
t.Fatalf("expected state %d; got %d", notifier.StatePending, got.State)
|
|
|
|
}
|
|
|
|
if got.ActiveAt != exp.ActiveAt {
|
|
|
|
t.Fatalf("expected ActiveAt %v; got %v", exp.ActiveAt, got.ActiveAt)
|
|
|
|
}
|
2023-11-02 15:28:39 +01:00
|
|
|
if got.Name != exp.Name {
|
|
|
|
t.Fatalf("expected alertname %q; got %q", exp.Name, got.Name)
|
|
|
|
}
|
2023-02-04 04:46:13 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
stateMetric := func(name string, value time.Time, labels ...string) datasource.Metric {
|
|
|
|
labels = append(labels, "__name__", alertForStateMetricName)
|
|
|
|
labels = append(labels, alertNameLabel, name)
|
|
|
|
labels = append(labels, alertGroupNameLabel, "TestRestore")
|
|
|
|
return metricWithValueAndLabels(t, float64(value.Unix()), labels...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// one active alert, no previous state
|
|
|
|
fn(
|
|
|
|
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: defaultTS,
|
2020-05-04 23:51:22 +02:00
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Reset()
|
2023-02-04 04:46:13 +01:00
|
|
|
|
|
|
|
// one active alert with state restore
|
|
|
|
ts := time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("foo", ts))
|
|
|
|
fn(
|
|
|
|
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-09-06 16:29:59 +02:00
|
|
|
ActiveAt: ts,
|
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// two rules, two active alerts, one with state restored
|
|
|
|
ts = time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
2023-11-02 15:22:13 +01:00
|
|
|
stateMetric("bar", ts))
|
2023-02-04 04:46:13 +01:00
|
|
|
fn(
|
|
|
|
[]config.Rule{
|
|
|
|
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
|
|
|
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: defaultTS,
|
|
|
|
},
|
|
|
|
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "bar",
|
2023-09-06 16:29:59 +02:00
|
|
|
ActiveAt: ts,
|
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// two rules, two active alerts, two with state restored
|
|
|
|
ts = time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("foo", ts))
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="bar"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("bar", ts))
|
|
|
|
fn(
|
|
|
|
[]config.Rule{
|
|
|
|
{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)},
|
|
|
|
{Alert: "bar", Expr: "bar", For: promutils.NewDuration(time.Second)},
|
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: ts,
|
|
|
|
},
|
|
|
|
hash(map[string]string{alertNameLabel: "bar", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "bar",
|
2023-09-06 16:29:59 +02:00
|
|
|
ActiveAt: ts,
|
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// one active alert but wrong state restore
|
|
|
|
ts = time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertname="bar",alertgroup="TestRestore"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("wrong alert", ts))
|
|
|
|
fn(
|
|
|
|
[]config.Rule{{Alert: "foo", Expr: "foo", For: promutils.NewDuration(time.Second)}},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: defaultTS,
|
2020-05-04 23:51:22 +02:00
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// one active alert with labels
|
|
|
|
ts = time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("foo", ts, "env", "dev"))
|
|
|
|
fn(
|
|
|
|
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: ts,
|
2020-05-04 23:51:22 +02:00
|
|
|
},
|
2023-02-04 04:46:13 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
// one active alert with restore labels missmatch
|
|
|
|
ts = time.Now().Truncate(time.Hour)
|
2023-10-13 13:54:33 +02:00
|
|
|
fqr.Set(`last_over_time(ALERTS_FOR_STATE{alertgroup="TestRestore",alertname="foo",env="dev"}[3600s])`,
|
2023-02-04 04:46:13 +01:00
|
|
|
stateMetric("foo", ts, "env", "dev", "team", "foo"))
|
|
|
|
fn(
|
|
|
|
[]config.Rule{{Alert: "foo", Expr: "foo", Labels: map[string]string{"env": "dev"}, For: promutils.NewDuration(time.Second)}},
|
|
|
|
map[uint64]*notifier.Alert{
|
|
|
|
hash(map[string]string{alertNameLabel: "foo", alertGroupNameLabel: "TestRestore", "env": "dev"}): {
|
2023-11-02 15:28:39 +01:00
|
|
|
Name: "foo",
|
2023-02-04 04:46:13 +01:00
|
|
|
ActiveAt: defaultTS,
|
2020-05-04 23:51:22 +02:00
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2020-11-09 23:27:32 +01:00
|
|
|
func TestAlertingRule_Exec_Negative(t *testing.T) {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq := &datasource.FakeQuerier{}
|
2020-11-09 23:27:32 +01:00
|
|
|
ar := newTestAlertingRule("test", 0)
|
|
|
|
ar.Labels = map[string]string{"job": "test"}
|
2021-04-28 22:41:15 +02:00
|
|
|
ar.q = fq
|
2020-11-09 23:27:32 +01:00
|
|
|
|
|
|
|
// successful attempt
|
2023-12-22 16:07:47 +01:00
|
|
|
// label `job` will be overridden by rule extra label, the original value will be reserved by "exported_job"
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
2023-12-22 16:07:47 +01:00
|
|
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "baz"))
|
2023-10-13 13:54:33 +02:00
|
|
|
_, err := ar.exec(context.TODO(), time.Now(), 0)
|
2020-11-09 23:27:32 +01:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
|
2023-12-22 16:07:47 +01:00
|
|
|
// label `__name__` will be omitted and get duplicated results here
|
|
|
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo_1", "job", "bar"))
|
2023-10-13 13:54:33 +02:00
|
|
|
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
2020-11-09 23:27:32 +01:00
|
|
|
if !errors.Is(err, errDuplicate) {
|
|
|
|
t.Fatalf("expected to have %s error; got %s", errDuplicate, err)
|
|
|
|
}
|
|
|
|
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Reset()
|
2020-11-09 23:27:32 +01:00
|
|
|
|
|
|
|
expErr := "connection reset by peer"
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.SetErr(errors.New(expErr))
|
|
|
|
_, err = ar.exec(context.TODO(), time.Now(), 0)
|
2020-11-09 23:27:32 +01:00
|
|
|
if err == nil {
|
|
|
|
t.Fatalf("expected to get err; got nil")
|
|
|
|
}
|
|
|
|
if !strings.Contains(err.Error(), expErr) {
|
|
|
|
t.Fatalf("expected to get err %q; got %q insterad", expErr, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-09 08:21:30 +02:00
|
|
|
func TestAlertingRuleLimit(t *testing.T) {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq := &datasource.FakeQuerier{}
|
2022-06-09 08:21:30 +02:00
|
|
|
ar := newTestAlertingRule("test", 0)
|
|
|
|
ar.Labels = map[string]string{"job": "test"}
|
|
|
|
ar.q = fq
|
|
|
|
ar.For = time.Minute
|
|
|
|
testCases := []struct {
|
|
|
|
limit int
|
|
|
|
err string
|
|
|
|
tssNum int
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
limit: 0,
|
|
|
|
tssNum: 4,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
limit: -1,
|
|
|
|
tssNum: 4,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
limit: 1,
|
|
|
|
err: "exec exceeded limit of 1 with 2 alerts",
|
|
|
|
tssNum: 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
limit: 4,
|
|
|
|
tssNum: 4,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
var (
|
|
|
|
err error
|
|
|
|
timestamp = time.Now()
|
|
|
|
)
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "job", "bar"))
|
|
|
|
fq.Add(metricWithValueAndLabels(t, 1, "__name__", "foo", "bar", "job"))
|
2022-06-09 08:21:30 +02:00
|
|
|
for _, testCase := range testCases {
|
2023-10-13 13:54:33 +02:00
|
|
|
_, err = ar.exec(context.TODO(), timestamp, testCase.limit)
|
2022-06-09 08:21:30 +02:00
|
|
|
if err != nil && !strings.EqualFold(err.Error(), testCase.err) {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
2023-10-13 13:54:33 +02:00
|
|
|
fq.Reset()
|
2022-06-09 08:21:30 +02:00
|
|
|
}
|
|
|
|
|
2020-12-19 13:10:59 +01:00
|
|
|
func TestAlertingRule_Template(t *testing.T) {
|
|
|
|
testCases := []struct {
|
|
|
|
rule *AlertingRule
|
|
|
|
metrics []datasource.Metric
|
|
|
|
expAlerts map[uint64]*notifier.Alert
|
|
|
|
}{
|
|
|
|
{
|
2022-09-29 18:22:50 +02:00
|
|
|
&AlertingRule{
|
|
|
|
Name: "common",
|
|
|
|
Labels: map[string]string{
|
|
|
|
"region": "east",
|
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": `{{ $labels.alertname }}: Too high connection number for "{{ $labels.instance }}"`,
|
|
|
|
},
|
|
|
|
alerts: make(map[uint64]*notifier.Alert),
|
|
|
|
},
|
2020-12-19 13:10:59 +01:00
|
|
|
[]datasource.Metric{
|
|
|
|
metricWithValueAndLabels(t, 1, "instance", "foo"),
|
|
|
|
metricWithValueAndLabels(t, 1, "instance", "bar"),
|
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "foo"}): {
|
2022-09-29 18:22:50 +02:00
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": `common: Too high connection number for "foo"`,
|
|
|
|
},
|
2020-12-19 13:10:59 +01:00
|
|
|
Labels: map[string]string{
|
2021-10-22 11:30:38 +02:00
|
|
|
alertNameLabel: "common",
|
|
|
|
"region": "east",
|
|
|
|
"instance": "foo",
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
},
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
hash(map[string]string{alertNameLabel: "common", "region": "east", "instance": "bar"}): {
|
2022-09-29 18:22:50 +02:00
|
|
|
Annotations: map[string]string{
|
|
|
|
"summary": `common: Too high connection number for "bar"`,
|
|
|
|
},
|
2020-12-19 13:10:59 +01:00
|
|
|
Labels: map[string]string{
|
2021-10-22 11:30:38 +02:00
|
|
|
alertNameLabel: "common",
|
|
|
|
"region": "east",
|
|
|
|
"instance": "bar",
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
{
|
|
|
|
&AlertingRule{
|
|
|
|
Name: "override label",
|
|
|
|
Labels: map[string]string{
|
|
|
|
"instance": "{{ $labels.instance }}",
|
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
2022-06-27 09:57:56 +02:00
|
|
|
"summary": `{{ $labels.__name__ }}: Too high connection number for "{{ $labels.instance }}"`,
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
"description": `{{ $labels.alertname}}: It is {{ $value }} connections for "{{ $labels.instance }}"`,
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
alerts: make(map[uint64]*notifier.Alert),
|
|
|
|
},
|
|
|
|
[]datasource.Metric{
|
2022-06-27 09:57:56 +02:00
|
|
|
metricWithValueAndLabels(t, 2, "__name__", "first", "instance", "foo", alertNameLabel, "override"),
|
|
|
|
metricWithValueAndLabels(t, 10, "__name__", "second", "instance", "bar", alertNameLabel, "override"),
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{
|
2023-12-22 16:07:47 +01:00
|
|
|
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "foo"}): {
|
2020-12-19 13:10:59 +01:00
|
|
|
Labels: map[string]string{
|
2023-12-22 16:07:47 +01:00
|
|
|
alertNameLabel: "override label",
|
|
|
|
"exported_alertname": "override",
|
|
|
|
"instance": "foo",
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
2022-06-27 09:57:56 +02:00
|
|
|
"summary": `first: Too high connection number for "foo"`,
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
"description": `override: It is 2 connections for "foo"`,
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
},
|
2023-12-22 16:07:47 +01:00
|
|
|
hash(map[string]string{alertNameLabel: "override label", "exported_alertname": "override", "instance": "bar"}): {
|
2020-12-19 13:10:59 +01:00
|
|
|
Labels: map[string]string{
|
2023-12-22 16:07:47 +01:00
|
|
|
alertNameLabel: "override label",
|
|
|
|
"exported_alertname": "override",
|
|
|
|
"instance": "bar",
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
2022-06-27 09:57:56 +02:00
|
|
|
"summary": `second: Too high connection number for "bar"`,
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
"description": `override: It is 10 connections for "bar"`,
|
2020-12-19 13:10:59 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2021-12-10 11:10:26 +01:00
|
|
|
{
|
|
|
|
&AlertingRule{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
Name: "OriginLabels",
|
2021-12-10 11:10:26 +01:00
|
|
|
GroupName: "Testing",
|
|
|
|
Labels: map[string]string{
|
|
|
|
"instance": "{{ $labels.instance }}",
|
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
"summary": `Alert "{{ $labels.alertname }}({{ $labels.alertgroup }})" for instance {{ $labels.instance }}`,
|
2021-12-10 11:10:26 +01:00
|
|
|
},
|
|
|
|
alerts: make(map[uint64]*notifier.Alert),
|
|
|
|
},
|
|
|
|
[]datasource.Metric{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
metricWithValueAndLabels(t, 1,
|
|
|
|
alertNameLabel, "originAlertname",
|
|
|
|
alertGroupNameLabel, "originGroupname",
|
|
|
|
"instance", "foo"),
|
2021-12-10 11:10:26 +01:00
|
|
|
},
|
|
|
|
map[uint64]*notifier.Alert{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
hash(map[string]string{
|
2023-12-22 16:07:47 +01:00
|
|
|
alertNameLabel: "OriginLabels",
|
|
|
|
"exported_alertname": "originAlertname",
|
|
|
|
alertGroupNameLabel: "Testing",
|
|
|
|
"exported_alertgroup": "originGroupname",
|
|
|
|
"instance": "foo",
|
2023-09-06 16:29:59 +02:00
|
|
|
}): {
|
2021-12-10 11:10:26 +01:00
|
|
|
Labels: map[string]string{
|
2023-12-22 16:07:47 +01:00
|
|
|
alertNameLabel: "OriginLabels",
|
|
|
|
"exported_alertname": "originAlertname",
|
|
|
|
alertGroupNameLabel: "Testing",
|
|
|
|
"exported_alertgroup": "originGroupname",
|
|
|
|
"instance": "foo",
|
2021-12-10 11:10:26 +01:00
|
|
|
},
|
|
|
|
Annotations: map[string]string{
|
vmalert: fix labels and annotations processing for alerts (#2403)
To improve compatibility with Prometheus alerting the order of
templates processing has changed.
Before, vmalert did all labels processing beforehand. It meant
all extra labels (such as `alertname`, `alertgroup` or rule labels)
were available in templating. All collisions were resolved in favour
of extra labels.
In Prometheus, only labels from the received metric are available in
templating, so no collisions are possible.
This change makes vmalert's behaviour similar to Prometheus.
For example, consider alerting rule which is triggered by time series
with `alertname` label. In vmalert, this label would be overriden
by alerting rule's name everywhere: for alert labels, for annotations, etc.
In Prometheus, it would be overriden for alert's labels only, but in annotations
the original label value would be available.
See more details here https://github.com/prometheus/compliance/issues/80
Signed-off-by: hagen1778 <roman@victoriametrics.com>
2022-04-06 20:24:45 +02:00
|
|
|
"summary": `Alert "originAlertname(originGroupname)" for instance foo`,
|
2021-12-10 11:10:26 +01:00
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
2020-12-19 13:10:59 +01:00
|
|
|
}
|
|
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
|
|
|
for _, tc := range testCases {
|
|
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
2023-10-13 13:54:33 +02:00
|
|
|
fq := &datasource.FakeQuerier{}
|
2020-12-19 13:10:59 +01:00
|
|
|
tc.rule.GroupID = fakeGroup.ID()
|
2021-04-28 22:41:15 +02:00
|
|
|
tc.rule.q = fq
|
2023-10-13 13:54:33 +02:00
|
|
|
tc.rule.state = &ruleState{entries: make([]StateEntry, 10)}
|
|
|
|
fq.Add(tc.metrics...)
|
|
|
|
if _, err := tc.rule.exec(context.TODO(), time.Now(), 0); err != nil {
|
2020-12-19 13:10:59 +01:00
|
|
|
t.Fatalf("unexpected err: %s", err)
|
|
|
|
}
|
|
|
|
for hash, expAlert := range tc.expAlerts {
|
|
|
|
gotAlert := tc.rule.alerts[hash]
|
|
|
|
if gotAlert == nil {
|
2023-07-13 20:40:41 +02:00
|
|
|
t.Fatalf("alert %d is missing; labels: %v; annotations: %v", hash, expAlert.Labels, expAlert.Annotations)
|
2020-12-19 13:10:59 +01:00
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(expAlert.Annotations, gotAlert.Annotations) {
|
|
|
|
t.Fatalf("expected to have annotations %#v; got %#v", expAlert.Annotations, gotAlert.Annotations)
|
|
|
|
}
|
|
|
|
if !reflect.DeepEqual(expAlert.Labels, gotAlert.Labels) {
|
|
|
|
t.Fatalf("expected to have labels %#v; got %#v", expAlert.Labels, gotAlert.Labels)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-03-16 16:26:33 +01:00
|
|
|
func TestAlertsToSend(t *testing.T) {
|
|
|
|
ts := time.Now()
|
|
|
|
f := func(alerts, expAlerts []*notifier.Alert, resolveDuration, resendDelay time.Duration) {
|
|
|
|
t.Helper()
|
|
|
|
ar := &AlertingRule{alerts: make(map[uint64]*notifier.Alert)}
|
|
|
|
for i, a := range alerts {
|
|
|
|
ar.alerts[uint64(i)] = a
|
|
|
|
}
|
|
|
|
gotAlerts := ar.alertsToSend(ts, resolveDuration, resendDelay)
|
|
|
|
if gotAlerts == nil && expAlerts == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if len(gotAlerts) != len(expAlerts) {
|
|
|
|
t.Fatalf("expected to get %d alerts; got %d instead",
|
|
|
|
len(expAlerts), len(gotAlerts))
|
|
|
|
}
|
|
|
|
sort.Slice(expAlerts, func(i, j int) bool {
|
|
|
|
return expAlerts[i].Name < expAlerts[j].Name
|
|
|
|
})
|
|
|
|
sort.Slice(gotAlerts, func(i, j int) bool {
|
|
|
|
return gotAlerts[i].Name < gotAlerts[j].Name
|
|
|
|
})
|
|
|
|
for i, exp := range expAlerts {
|
|
|
|
got := gotAlerts[i]
|
|
|
|
if got.LastSent != exp.LastSent {
|
|
|
|
t.Fatalf("expected LastSent to be %v; got %v", exp.LastSent, got.LastSent)
|
|
|
|
}
|
|
|
|
if got.End != exp.End {
|
|
|
|
t.Fatalf("expected End to be %v; got %v", exp.End, got.End)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
f( // send firing alert with custom resolve time
|
|
|
|
[]*notifier.Alert{{State: notifier.StateFiring}},
|
|
|
|
[]*notifier.Alert{{LastSent: ts, End: ts.Add(5 * time.Minute)}},
|
|
|
|
5*time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // resolve inactive alert at the current timestamp
|
2022-03-29 15:09:07 +02:00
|
|
|
[]*notifier.Alert{{State: notifier.StateInactive, ResolvedAt: ts}},
|
2022-03-16 16:26:33 +01:00
|
|
|
[]*notifier.Alert{{LastSent: ts, End: ts}},
|
|
|
|
time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // mixed case of firing and resolved alerts. Names are added for deterministic sorting
|
2022-03-29 15:09:07 +02:00
|
|
|
[]*notifier.Alert{{Name: "a", State: notifier.StateFiring}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
|
2022-03-16 16:26:33 +01:00
|
|
|
[]*notifier.Alert{{Name: "a", LastSent: ts, End: ts.Add(5 * time.Minute)}, {Name: "b", LastSent: ts, End: ts}},
|
|
|
|
5*time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // mixed case of pending and resolved alerts. Names are added for deterministic sorting
|
2022-03-29 15:09:07 +02:00
|
|
|
[]*notifier.Alert{{Name: "a", State: notifier.StatePending}, {Name: "b", State: notifier.StateInactive, ResolvedAt: ts}},
|
2022-03-16 16:26:33 +01:00
|
|
|
[]*notifier.Alert{{Name: "b", LastSent: ts, End: ts}},
|
|
|
|
5*time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // attempt to send alert that was already sent in the resendDelay interval
|
|
|
|
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
|
|
|
|
nil,
|
|
|
|
time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // attempt to send alert that was sent out of the resendDelay interval
|
|
|
|
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-2 * time.Minute)}},
|
|
|
|
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
|
|
|
|
time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // alert must be sent even if resendDelay interval is 0
|
|
|
|
[]*notifier.Alert{{State: notifier.StateFiring, LastSent: ts.Add(-time.Second)}},
|
|
|
|
[]*notifier.Alert{{LastSent: ts, End: ts.Add(time.Minute)}},
|
|
|
|
time.Minute, 0,
|
|
|
|
)
|
2022-03-29 15:09:07 +02:00
|
|
|
f( // inactive alert which has been sent already
|
|
|
|
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts.Add(-2 * time.Second)}},
|
|
|
|
nil,
|
|
|
|
time.Minute, time.Minute,
|
|
|
|
)
|
|
|
|
f( // inactive alert which has been resolved after last send
|
|
|
|
[]*notifier.Alert{{State: notifier.StateInactive, LastSent: ts.Add(-time.Second), ResolvedAt: ts}},
|
|
|
|
[]*notifier.Alert{{LastSent: ts, End: ts}},
|
|
|
|
time.Minute, time.Minute,
|
|
|
|
)
|
2022-03-16 16:26:33 +01:00
|
|
|
}
|
|
|
|
|
2020-06-01 12:46:37 +02:00
|
|
|
func newTestRuleWithLabels(name string, labels ...string) *AlertingRule {
|
|
|
|
r := newTestAlertingRule(name, 0)
|
2020-05-04 23:51:22 +02:00
|
|
|
r.Labels = make(map[string]string)
|
|
|
|
for i := 0; i < len(labels); i += 2 {
|
|
|
|
r.Labels[labels[i]] = labels[i+1]
|
|
|
|
}
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
2020-06-01 12:46:37 +02:00
|
|
|
func newTestAlertingRule(name string, waitFor time.Duration) *AlertingRule {
|
2023-07-27 15:13:13 +02:00
|
|
|
rule := AlertingRule{
|
2022-09-14 14:04:24 +02:00
|
|
|
Name: name,
|
|
|
|
For: waitFor,
|
|
|
|
EvalInterval: waitFor,
|
|
|
|
alerts: make(map[uint64]*notifier.Alert),
|
2023-10-13 13:54:33 +02:00
|
|
|
state: &ruleState{entries: make([]StateEntry, 10)},
|
2023-12-06 19:39:35 +01:00
|
|
|
metrics: &alertingRuleMetrics{
|
|
|
|
errors: utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_alerting_rules_errors_total{alertname=%q}`, name)),
|
|
|
|
},
|
2022-09-14 14:04:24 +02:00
|
|
|
}
|
2023-07-27 15:13:13 +02:00
|
|
|
return &rule
|
|
|
|
}
|
|
|
|
|
|
|
|
func newTestAlertingRuleWithKeepFiring(name string, waitFor, keepFiringFor time.Duration) *AlertingRule {
|
|
|
|
rule := newTestAlertingRule(name, waitFor)
|
|
|
|
rule.KeepFiringFor = keepFiringFor
|
|
|
|
return rule
|
2020-05-04 23:51:22 +02:00
|
|
|
}
|
2023-12-22 16:07:47 +01:00
|
|
|
|
|
|
|
func TestAlertingRule_ToLabels(t *testing.T) {
|
|
|
|
metric := datasource.Metric{
|
|
|
|
Labels: []datasource.Label{
|
|
|
|
{Name: "instance", Value: "0.0.0.0:8800"},
|
|
|
|
{Name: "group", Value: "vmalert"},
|
|
|
|
{Name: "alertname", Value: "ConfigurationReloadFailure"},
|
|
|
|
},
|
|
|
|
Values: []float64{1},
|
|
|
|
Timestamps: []int64{time.Now().UnixNano()},
|
|
|
|
}
|
|
|
|
|
|
|
|
ar := &AlertingRule{
|
|
|
|
Labels: map[string]string{
|
|
|
|
"instance": "override", // this should override instance with new value
|
|
|
|
"group": "vmalert", // this shouldn't have effect since value in metric is equal
|
|
|
|
},
|
|
|
|
Expr: "sum(vmalert_alerting_rules_error) by(instance, group, alertname) > 0",
|
|
|
|
Name: "AlertingRulesError",
|
|
|
|
GroupName: "vmalert",
|
|
|
|
}
|
|
|
|
|
|
|
|
expectedOriginLabels := map[string]string{
|
|
|
|
"instance": "0.0.0.0:8800",
|
|
|
|
"group": "vmalert",
|
|
|
|
"alertname": "ConfigurationReloadFailure",
|
|
|
|
"alertgroup": "vmalert",
|
|
|
|
}
|
|
|
|
|
|
|
|
expectedProcessedLabels := map[string]string{
|
|
|
|
"instance": "override",
|
|
|
|
"exported_instance": "0.0.0.0:8800",
|
|
|
|
"alertname": "AlertingRulesError",
|
|
|
|
"exported_alertname": "ConfigurationReloadFailure",
|
|
|
|
"group": "vmalert",
|
|
|
|
"alertgroup": "vmalert",
|
|
|
|
}
|
|
|
|
|
|
|
|
ls, err := ar.toLabels(metric, nil)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("unexpected error: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !reflect.DeepEqual(ls.origin, expectedOriginLabels) {
|
|
|
|
t.Errorf("origin labels mismatch, got: %v, want: %v", ls.origin, expectedOriginLabels)
|
|
|
|
}
|
|
|
|
|
|
|
|
if !reflect.DeepEqual(ls.processed, expectedProcessedLabels) {
|
|
|
|
t.Errorf("processed labels mismatch, got: %v, want: %v", ls.processed, expectedProcessedLabels)
|
|
|
|
}
|
|
|
|
}
|