mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-15 00:13:30 +01:00
c7f3e58032
Before the change we were sending notifications to notifier if following conditions are met: * alert is in Fire state * alert is in Inactive state We were sending Inactive notifications to resolve alert ASAP. Unfortunately, we were sending resolves for Pending alerts that become Inactive, which is wrong. In this change we delete alert from the active list if it was Pending and become Inactive. In this way we now have Inactive alerts only if they were in state Fire before. See test change for example.
544 lines
15 KiB
Go
544 lines
15 KiB
Go
package main
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
|
|
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
|
)
|
|
|
|
func TestRule_Validate(t *testing.T) {
|
|
if err := (&Rule{}).Validate(); err == nil {
|
|
t.Errorf("exptected empty name error")
|
|
}
|
|
if err := (&Rule{Name: "alert"}).Validate(); err == nil {
|
|
t.Errorf("exptected empty expr error")
|
|
}
|
|
if err := (&Rule{Name: "alert", Expr: "test{"}).Validate(); err == nil {
|
|
t.Errorf("exptected invalid expr error")
|
|
}
|
|
if err := (&Rule{Name: "alert", Expr: "test>0"}).Validate(); err != nil {
|
|
t.Errorf("exptected valid rule got %s", err)
|
|
}
|
|
}
|
|
|
|
func TestRule_AlertToTimeSeries(t *testing.T) {
|
|
timestamp := time.Now()
|
|
testCases := []struct {
|
|
rule *Rule
|
|
alert *notifier.Alert
|
|
expTS []prompbmarshal.TimeSeries
|
|
}{
|
|
{
|
|
newTestRule("instant", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("instant extra labels", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
"job": "foo",
|
|
"instance": "bar",
|
|
}},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant extra labels",
|
|
"job": "foo",
|
|
"instance": "bar",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("instant labels override", 0),
|
|
¬ifier.Alert{State: notifier.StateFiring, Labels: map[string]string{
|
|
alertStateLabel: "foo",
|
|
"__name__": "bar",
|
|
}},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "instant labels override",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for", time.Second),
|
|
¬ifier.Alert{State: notifier.StateFiring, Start: timestamp.Add(time.Second)},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StateFiring.String(),
|
|
alertNameLabel: "for",
|
|
}, timestamp),
|
|
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
|
"__name__": alertForStateMetricName,
|
|
alertNameLabel: "for",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for pending", 10*time.Second),
|
|
¬ifier.Alert{State: notifier.StatePending, Start: timestamp.Add(time.Second)},
|
|
[]prompbmarshal.TimeSeries{
|
|
newTimeSeries(1, map[string]string{
|
|
"__name__": alertMetricName,
|
|
alertStateLabel: notifier.StatePending.String(),
|
|
alertNameLabel: "for pending",
|
|
}, timestamp),
|
|
newTimeSeries(float64(timestamp.Add(time.Second).Unix()), map[string]string{
|
|
"__name__": alertForStateMetricName,
|
|
alertNameLabel: "for pending",
|
|
}, timestamp),
|
|
},
|
|
},
|
|
}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
tss := tc.rule.AlertToTimeSeries(tc.alert, timestamp)
|
|
if len(tc.expTS) != len(tss) {
|
|
t.Fatalf("expected number of timeseries %d; got %d", len(tc.expTS), len(tss))
|
|
}
|
|
for i := range tc.expTS {
|
|
expTS, gotTS := tc.expTS[i], tss[i]
|
|
if len(expTS.Samples) != len(gotTS.Samples) {
|
|
t.Fatalf("expected number of samples %d; got %d", len(expTS.Samples), len(gotTS.Samples))
|
|
}
|
|
for i, exp := range expTS.Samples {
|
|
got := gotTS.Samples[i]
|
|
if got.Value != exp.Value {
|
|
t.Errorf("expected value %.2f; got %.2f", exp.Value, got.Value)
|
|
}
|
|
if got.Timestamp != exp.Timestamp {
|
|
t.Errorf("expected timestamp %d; got %d", exp.Timestamp, got.Timestamp)
|
|
}
|
|
}
|
|
if len(expTS.Labels) != len(gotTS.Labels) {
|
|
t.Fatalf("expected number of labels %d; got %d", len(expTS.Labels), len(gotTS.Labels))
|
|
}
|
|
for i, exp := range expTS.Labels {
|
|
got := gotTS.Labels[i]
|
|
if got.Name != exp.Name {
|
|
t.Errorf("expected label name %q; got %q", exp.Name, got.Name)
|
|
}
|
|
if got.Value != exp.Value {
|
|
t.Errorf("expected label value %q; got %q", exp.Value, got.Value)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func newTestRule(name string, waitFor time.Duration) *Rule {
|
|
return &Rule{Name: name, alerts: make(map[uint64]*notifier.Alert), For: waitFor}
|
|
}
|
|
|
|
func TestRule_Exec(t *testing.T) {
|
|
testCases := []struct {
|
|
rule *Rule
|
|
steps [][]datasource.Metric
|
|
expAlerts map[uint64]*notifier.Alert
|
|
}{
|
|
{
|
|
newTestRule("empty", 0),
|
|
[][]datasource.Metric{},
|
|
map[uint64]*notifier.Alert{},
|
|
},
|
|
{
|
|
newTestRule("empty labels", 0),
|
|
[][]datasource.Metric{
|
|
{datasource.Metric{}},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(datasource.Metric{}): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive=>empty", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{},
|
|
},
|
|
{
|
|
newTestRule("single-firing=>inactive=>firing=>inactive=>empty=>firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{},
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("multiple-firing", 0),
|
|
[][]datasource.Metric{
|
|
{
|
|
metricWithLabels(t, "name", "foo"),
|
|
metricWithLabels(t, "name", "foo1"),
|
|
metricWithLabels(t, "name", "foo2"),
|
|
},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateFiring},
|
|
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("multiple-steps-firing", 0),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo1")},
|
|
{metricWithLabels(t, "name", "foo2")},
|
|
},
|
|
// 1: fire first alert
|
|
// 2: fire second alert, set first inactive
|
|
// 3: fire third alert, set second inactive, delete first one
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo1")): {State: notifier.StateInactive},
|
|
hash(metricWithLabels(t, "name", "foo2")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("duplicate", 0),
|
|
[][]datasource.Metric{
|
|
{
|
|
// metrics with the same labelset should result in one alert
|
|
metricWithLabels(t, "name", "foo", "type", "bar"),
|
|
metricWithLabels(t, "type", "bar", "name", "foo"),
|
|
},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo", "type", "bar")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending", time.Minute),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-fired", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>empty", time.Second),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset and delete pending alerts
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateInactive},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive=>pending", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StatePending},
|
|
},
|
|
},
|
|
{
|
|
newTestRule("for-pending=>firing=>inactive=>pending=>firing", time.Millisecond),
|
|
[][]datasource.Metric{
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
// empty step to reset pending alerts
|
|
{},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
{metricWithLabels(t, "name", "foo")},
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "name", "foo")): {State: notifier.StateFiring},
|
|
},
|
|
},
|
|
}
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
fq := &fakeQuerier{}
|
|
tc.rule.group = fakeGroup
|
|
for _, step := range tc.steps {
|
|
fq.reset()
|
|
fq.add(step...)
|
|
if err := tc.rule.Exec(context.TODO(), fq); err != nil {
|
|
t.Fatalf("unexpected err: %s", err)
|
|
}
|
|
// artificial delay between applying steps
|
|
time.Sleep(time.Millisecond)
|
|
}
|
|
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
|
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
|
}
|
|
for key, exp := range tc.expAlerts {
|
|
got, ok := tc.rule.alerts[key]
|
|
if !ok {
|
|
t.Fatalf("expected to have key %d", key)
|
|
}
|
|
if got.State != exp.State {
|
|
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func metricWithLabels(t *testing.T, labels ...string) datasource.Metric {
|
|
t.Helper()
|
|
if len(labels) == 0 || len(labels)%2 != 0 {
|
|
t.Fatalf("expected to get even number of labels")
|
|
}
|
|
m := datasource.Metric{}
|
|
for i := 0; i < len(labels); i += 2 {
|
|
m.Labels = append(m.Labels, datasource.Label{
|
|
Name: labels[i],
|
|
Value: labels[i+1],
|
|
})
|
|
}
|
|
return m
|
|
}
|
|
|
|
type fakeQuerier struct {
|
|
sync.Mutex
|
|
metrics []datasource.Metric
|
|
}
|
|
|
|
func (fq *fakeQuerier) reset() {
|
|
fq.Lock()
|
|
fq.metrics = fq.metrics[:0]
|
|
fq.Unlock()
|
|
}
|
|
|
|
func (fq *fakeQuerier) add(metrics ...datasource.Metric) {
|
|
fq.Lock()
|
|
fq.metrics = append(fq.metrics, metrics...)
|
|
fq.Unlock()
|
|
}
|
|
|
|
func (fq *fakeQuerier) Query(_ context.Context, _ string) ([]datasource.Metric, error) {
|
|
fq.Lock()
|
|
cpy := make([]datasource.Metric, len(fq.metrics))
|
|
copy(cpy, fq.metrics)
|
|
fq.Unlock()
|
|
return cpy, nil
|
|
}
|
|
|
|
func TestRule_Restore(t *testing.T) {
|
|
testCases := []struct {
|
|
rule *Rule
|
|
metrics []datasource.Metric
|
|
expAlerts map[uint64]*notifier.Alert
|
|
}{
|
|
{
|
|
newTestRuleWithLabels("no extra labels"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(datasource.Metric{}): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("metric labels"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t,
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
)): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("rule labels", "source", "vm"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
alertNameLabel, "",
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
// following pair supposed to be dropped
|
|
"source", "vm",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t,
|
|
"foo", "bar",
|
|
"namespace", "baz",
|
|
)): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
},
|
|
},
|
|
{
|
|
newTestRuleWithLabels("multiple alerts"),
|
|
[]datasource.Metric{
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-1",
|
|
),
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(2*time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-2",
|
|
),
|
|
metricWithValueAndLabels(t, float64(time.Now().Truncate(3*time.Hour).Unix()),
|
|
"__name__", alertForStateMetricName,
|
|
"host", "localhost-3",
|
|
),
|
|
},
|
|
map[uint64]*notifier.Alert{
|
|
hash(metricWithLabels(t, "host", "localhost-1")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(time.Hour)},
|
|
hash(metricWithLabels(t, "host", "localhost-2")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(2 * time.Hour)},
|
|
hash(metricWithLabels(t, "host", "localhost-3")): {State: notifier.StatePending,
|
|
Start: time.Now().Truncate(3 * time.Hour)},
|
|
},
|
|
},
|
|
}
|
|
fakeGroup := Group{Name: "TestRule_Exec"}
|
|
for _, tc := range testCases {
|
|
t.Run(tc.rule.Name, func(t *testing.T) {
|
|
fq := &fakeQuerier{}
|
|
tc.rule.group = fakeGroup
|
|
fq.add(tc.metrics...)
|
|
if err := tc.rule.Restore(context.TODO(), fq, time.Hour); err != nil {
|
|
t.Fatalf("unexpected err: %s", err)
|
|
}
|
|
if len(tc.rule.alerts) != len(tc.expAlerts) {
|
|
t.Fatalf("expected %d alerts; got %d", len(tc.expAlerts), len(tc.rule.alerts))
|
|
}
|
|
for key, exp := range tc.expAlerts {
|
|
got, ok := tc.rule.alerts[key]
|
|
if !ok {
|
|
t.Fatalf("expected to have key %d", key)
|
|
}
|
|
if got.State != exp.State {
|
|
t.Fatalf("expected state %d; got %d", exp.State, got.State)
|
|
}
|
|
if got.Start != exp.Start {
|
|
t.Fatalf("expected Start %v; got %v", exp.Start, got.Start)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func newTestRuleWithLabels(name string, labels ...string) *Rule {
|
|
r := newTestRule(name, 0)
|
|
r.Labels = make(map[string]string)
|
|
for i := 0; i < len(labels); i += 2 {
|
|
r.Labels[labels[i]] = labels[i+1]
|
|
}
|
|
return r
|
|
}
|
|
|
|
func metricWithValueAndLabels(t *testing.T, value float64, labels ...string) datasource.Metric {
|
|
t.Helper()
|
|
m := metricWithLabels(t, labels...)
|
|
m.Value = value
|
|
return m
|
|
}
|