From 852a895b700f34a05d19faf4de38a444ecab12db Mon Sep 17 00:00:00 2001 From: Roman Khavronenko Date: Tue, 30 Nov 2021 01:18:48 +0200 Subject: [PATCH] vmalert: make notifier.Addr optional (#1870) For a long time notifier.Addr flag was required. The assumption was that vmalert will be always used for alerting. However, practice shows that some users need only recording rules. In this case, requirement of notifier.Addr is ambigious. The change verifies if loaded config contains recording or alerting rules and if there are corresponding flags set. This is true for initial config load and hot reload. Signed-off-by: hagen1778 --- app/vmalert/README.md | 8 ++-- app/vmalert/main_test.go | 3 ++ app/vmalert/manager.go | 19 +++++++++ app/vmalert/manager_test.go | 80 +++++++++++++++++++++++++++++++++++- app/vmalert/notifier/init.go | 6 +-- 5 files changed, 106 insertions(+), 10 deletions(-) diff --git a/app/vmalert/README.md b/app/vmalert/README.md index 3bbe6e4562..3da3abbac8 100644 --- a/app/vmalert/README.md +++ b/app/vmalert/README.md @@ -38,7 +38,7 @@ The build binary will be placed to `VictoriaMetrics/bin` folder. To start using `vmalert` you will need the following things: * list of rules - PromQL/MetricsQL expressions to execute; * datasource address - reachable MetricsQL endpoint to run queries against; -* notifier address - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing, +* notifier address [optional] - reachable [Alert Manager](https://github.com/prometheus/alertmanager) instance for processing, aggregating alerts and sending notifications. * remote write address [optional] - [remote write](https://prometheus.io/docs/prometheus/latest/storage/#remote-storage-integrations) compatible storage to persist rules and alerts state info; @@ -48,9 +48,9 @@ Then configure `vmalert` accordingly: ``` ./bin/vmalert -rule=alert.rules \ # Path to the file with rules configuration. Supports wildcard -datasource.url=http://localhost:8428 \ # PromQL compatible datasource - -notifier.url=http://localhost:9093 \ # AlertManager URL + -notifier.url=http://localhost:9093 \ # AlertManager URL (required if alerting rules are used) -notifier.url=http://127.0.0.1:9093 \ # AlertManager replica URL - -remoteWrite.url=http://localhost:8428 \ # Remote write compatible storage to persist rules and alerts state info + -remoteWrite.url=http://localhost:8428 \ # Remote write compatible storage to persist rules and alerts state info (required if recording rules are used) -remoteRead.url=http://localhost:8428 \ # MetricsQL compatible datasource to restore alerts state from -external.label=cluster=east-1 \ # External label to be applied for each rule -external.label=replica=a # Multiple external labels may be set @@ -484,7 +484,7 @@ The shortlist of configuration flags is the following: Optional TLS server name to use for connections to -notifier.url. By default the server name from -notifier.url is used Supports an array of values separated by comma or specified via multiple flags. -notifier.url array - Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093 + Prometheus alertmanager URL, e.g. http://127.0.0.1:9093 Supports an array of values separated by comma or specified via multiple flags. -pprofAuthKey string Auth key for /debug/pprof. It overrides httpAuth settings diff --git a/app/vmalert/main_test.go b/app/vmalert/main_test.go index 7b927805a5..a52576421e 100644 --- a/app/vmalert/main_test.go +++ b/app/vmalert/main_test.go @@ -10,6 +10,7 @@ import ( "time" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier" + "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite" "github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil" ) @@ -99,6 +100,8 @@ groups: querierBuilder: &fakeQuerier{}, groups: make(map[uint64]*Group), labels: map[string]string{}, + notifiers: []notifier.Notifier{&fakeNotifier{}}, + rw: &remotewrite.Client{}, } syncCh := make(chan struct{}) diff --git a/app/vmalert/manager.go b/app/vmalert/manager.go index 630a9a2715..dff4a5550a 100644 --- a/app/vmalert/manager.go +++ b/app/vmalert/manager.go @@ -85,12 +85,31 @@ func (m *manager) startGroup(ctx context.Context, group *Group, restore bool) er } func (m *manager) update(ctx context.Context, groupsCfg []config.Group, restore bool) error { + var rrPresent, arPresent bool groupsRegistry := make(map[uint64]*Group) for _, cfg := range groupsCfg { + for _, r := range cfg.Rules { + if rrPresent && arPresent { + continue + } + if r.Record != "" { + rrPresent = true + } + if r.Alert != "" { + arPresent = true + } + } ng := newGroup(cfg, m.querierBuilder, *evaluationInterval, m.labels) groupsRegistry[ng.ID()] = ng } + if rrPresent && m.rw == nil { + return fmt.Errorf("config contains recording rules but `-remoteWrite.url` isn't set") + } + if arPresent && m.notifiers == nil { + return fmt.Errorf("config contains alerting rules but `-notifier.url` isn't set") + } + type updateItem struct { old *Group new *Group diff --git a/app/vmalert/manager_test.go b/app/vmalert/manager_test.go index 503cd9e53a..cb21906dde 100644 --- a/app/vmalert/manager_test.go +++ b/app/vmalert/manager_test.go @@ -5,6 +5,7 @@ import ( "math/rand" "net/url" "os" + "strings" "sync" "testing" "time" @@ -12,6 +13,7 @@ import ( "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource" "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier" + "github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite" ) func TestMain(m *testing.M) { @@ -218,7 +220,11 @@ func TestManagerUpdate(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) - m := &manager{groups: make(map[uint64]*Group), querierBuilder: &fakeQuerier{}} + m := &manager{ + groups: make(map[uint64]*Group), + querierBuilder: &fakeQuerier{}, + notifiers: []notifier.Notifier{&fakeNotifier{}}, + } cfgInit := loadCfg(t, []string{tc.initPath}, true, true) if err := m.update(ctx, cfgInit, false); err != nil { @@ -247,6 +253,78 @@ func TestManagerUpdate(t *testing.T) { } } +func TestManagerUpdateNegative(t *testing.T) { + testCases := []struct { + notifiers []notifier.Notifier + rw *remotewrite.Client + cfg config.Group + expErr string + }{ + { + nil, + nil, + config.Group{Name: "Recording rule only", + Rules: []config.Rule{ + {Record: "record", Expr: "max(up)"}, + }, + }, + "contains recording rules", + }, + { + nil, + nil, + config.Group{Name: "Alerting rule only", + Rules: []config.Rule{ + {Alert: "alert", Expr: "up > 0"}, + }, + }, + "contains alerting rules", + }, + { + []notifier.Notifier{&fakeNotifier{}}, + nil, + config.Group{Name: "Recording and alerting rules", + Rules: []config.Rule{ + {Alert: "alert1", Expr: "up > 0"}, + {Alert: "alert2", Expr: "up > 0"}, + {Record: "record", Expr: "max(up)"}, + }, + }, + "contains recording rules", + }, + { + nil, + &remotewrite.Client{}, + config.Group{Name: "Recording and alerting rules", + Rules: []config.Rule{ + {Record: "record1", Expr: "max(up)"}, + {Record: "record2", Expr: "max(up)"}, + {Alert: "alert", Expr: "up > 0"}, + }, + }, + "contains alerting rules", + }, + } + + for _, tc := range testCases { + t.Run(tc.cfg.Name, func(t *testing.T) { + m := &manager{ + groups: make(map[uint64]*Group), + querierBuilder: &fakeQuerier{}, + notifiers: tc.notifiers, + rw: tc.rw, + } + err := m.update(context.Background(), []config.Group{tc.cfg}, false) + if err == nil { + t.Fatalf("expected to get error; got nil") + } + if !strings.Contains(err.Error(), tc.expErr) { + t.Fatalf("expected err to contain %q; got %q", tc.expErr, err) + } + }) + } +} + func loadCfg(t *testing.T, path []string, validateAnnotations, validateExpressions bool) []config.Group { t.Helper() cfg, err := config.Parse(path, validateAnnotations, validateExpressions) diff --git a/app/vmalert/notifier/init.go b/app/vmalert/notifier/init.go index 69554b8c0b..0263e7e100 100644 --- a/app/vmalert/notifier/init.go +++ b/app/vmalert/notifier/init.go @@ -9,7 +9,7 @@ import ( ) var ( - addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL. Required parameter. e.g. http://127.0.0.1:9093") + addrs = flagutil.NewArray("notifier.url", "Prometheus alertmanager URL, e.g. http://127.0.0.1:9093") basicAuthUsername = flagutil.NewArray("notifier.basicAuth.username", "Optional basic auth username for -notifier.url") basicAuthPassword = flagutil.NewArray("notifier.basicAuth.password", "Optional basic auth password for -notifier.url") @@ -24,10 +24,6 @@ var ( // Init creates a Notifier object based on provided flags. func Init(gen AlertURLGenerator) ([]Notifier, error) { - if len(*addrs) == 0 { - return nil, fmt.Errorf("at least one `-notifier.url` must be set") - } - var notifiers []Notifier for i, addr := range *addrs { cert, key := tlsCertFile.GetOptionalArg(i), tlsKeyFile.GetOptionalArg(i)