app/vmalert: support type param for filtering /api/v1/rules response by rule type (#5749)

Co-authored-by: Hui Wang <haley@victoriametrics.com> (cherry picked from commit 62e5e2a4c8)
2024-12-14 16:12:15 +01:00 · 2024-02-09 05:02:35 -03:00 · 2024-02-09 05:02:35 -03:00 · 56b1d8e9ed
commit 56b1d8e9ed
parent 46de5d7eb1
4 changed files with 133 additions and 7 deletions
--- a/app/vmalert/web.go
+++ b/app/vmalert/web.go
@ -87,7 +87,10 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
 		WriteRuleDetails(w, r, rule)
 		return true
 	case "/vmalert/groups":
-		WriteListGroups(w, r, rh.groups())
+		var data []apiGroup
+		ruleType := r.URL.Query().Get("type")
+		data = rh.groups(ruleType)
+		WriteListGroups(w, r, data)
 		return true
 	case "/vmalert/notifiers":
 		WriteListTargets(w, r, notifier.GetTargets())
@ -98,12 +101,20 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
 	case "/rules":
 		// Grafana makes an extra request to `/rules`
 		// handler in addition to `/api/v1/rules` calls in alerts UI,
-		WriteListGroups(w, r, rh.groups())
+		var data []apiGroup
+		ruleType := r.URL.Query().Get("type")
+		data = rh.groups(ruleType)
+		WriteListGroups(w, r, data)
 		return true

 	case "/vmalert/api/v1/rules", "/api/v1/rules":
 		// path used by Grafana for ng alerting
-		data, err := rh.listGroups()
+		var data []byte
+		var err error
+
+		ruleType := r.URL.Query().Get("type")
+		data, err = rh.listGroups(ruleType)
+
 		if err != nil {
 			httpserver.Errorf(w, r, "%s", err)
 			return true
@ -111,6 +122,7 @@ func (rh *requestHandler) handler(w http.ResponseWriter, r *http.Request) bool {
 		w.Header().Set("Content-Type", "application/json")
 		w.Write(data)
 		return true
+
 	case "/vmalert/api/v1/alerts", "/api/v1/alerts":
 		// path used by Grafana for ng alerting
 		data, err := rh.listAlerts()
@ -207,12 +219,28 @@ type listGroupsResponse struct {
 	} `json:"data"`
 }

-func (rh *requestHandler) groups() []apiGroup {
+func (rh *requestHandler) groups(ruleType string) []apiGroup {
 	rh.m.groupsMu.RLock()
 	defer rh.m.groupsMu.RUnlock()

 	groups := make([]apiGroup, 0)
 	for _, g := range rh.m.groups {
+		g = g.DeepCopy()
+		var matchedRules []rule.Rule
+		if ruleType == "alert" || ruleType == "record" {
+			for _, r := range g.Rules {
+				if _, ok := r.(*rule.AlertingRule); ok && ruleType == "alert" {
+					matchedRules = append(matchedRules, r)
+				}
+				if _, ok := r.(*rule.RecordingRule); ok && ruleType == "record" {
+					matchedRules = append(matchedRules, r)
+				}
+			}
+			if len(matchedRules) == 0 {
+				continue
+			}
+			g.Rules = matchedRules
+		}
 		groups = append(groups, groupToAPI(g))
 	}

@ -224,9 +252,9 @@ func (rh *requestHandler) groups() []apiGroup {
 	return groups
 }

-func (rh *requestHandler) listGroups() ([]byte, error) {
+func (rh *requestHandler) listGroups(ruleType string) ([]byte, error) {
 	lr := listGroupsResponse{Status: "success"}
-	lr.Data.Groups = rh.groups()
+	lr.Data.Groups = rh.groups(ruleType)
 	b, err := json.Marshal(lr)
 	if err != nil {
 		return nil, &httpserver.ErrorWithStatusCode{
--- a/app/vmalert/web_test.go
+++ b/app/vmalert/web_test.go
@ -143,6 +143,103 @@ func TestHandler(t *testing.T) {
 			t.Errorf("expected 1 group got %d", length)
 		}
 	})
+
+	t.Run("/api/v1/rules?type=alert", func(t *testing.T) {
+		vmRuleType := "alerting"
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/api/v1/rules?type=alert", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+
+		for _, g := range lr.Data.Groups {
+			if length := len(g.Rules); length != 1 {
+				t.Errorf("expected 1 valid alert got %d", length)
+			}
+			for _, r := range g.Rules {
+				if r.Type != vmRuleType {
+					t.Errorf("expected only alerts here got %s", r.Type)
+				}
+			}
+		}
+	})
+
+	t.Run("/api/v1/rules?type=record", func(t *testing.T) {
+		vmRuleType := "recording"
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/api/v1/rules?type=record", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+
+		for _, g := range lr.Data.Groups {
+			if length := len(g.Rules); length != 1 {
+				t.Errorf("expected 1 valid recording got %d", length)
+			}
+			for _, r := range g.Rules {
+				if r.Type != vmRuleType {
+					t.Errorf("expected only records here got %s", r.Type)
+				}
+			}
+		}
+	})
+
+	t.Run("ignore bad params /api/v1/rules?type=badParam", func(t *testing.T) {
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/api/v1/rules?type=badParam", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+	})
+
+	t.Run("/vmalert/api/v1/rules?type=alert", func(t *testing.T) {
+		vmRuleType := "alerting"
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/vmalert/api/v1/rules?type=alert", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+
+		for _, g := range lr.Data.Groups {
+			if length := len(g.Rules); length != 1 {
+				t.Errorf("expected 1 valid alert got %d", length)
+			}
+			for _, r := range g.Rules {
+				if r.Type != vmRuleType {
+					t.Errorf("expected only alerts here got %s", r.Type)
+				}
+			}
+		}
+	})
+
+	t.Run("/vmalert/api/v1/rules?type=record", func(t *testing.T) {
+		vmRuleType := "recording"
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/vmalert/api/v1/rules?type=record", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+
+		for _, g := range lr.Data.Groups {
+			if length := len(g.Rules); length != 1 {
+				t.Errorf("expected 1 valid recording got %d", length)
+			}
+			for _, r := range g.Rules {
+				if r.Type != vmRuleType {
+					t.Errorf("expected only records here got %s", r.Type)
+				}
+			}
+		}
+	})
+
+	t.Run("ignore bad params /vmalert/api/v1/rules?type=badParam", func(t *testing.T) {
+		lr := listGroupsResponse{}
+		getResp(ts.URL+"/vmalert/api/v1/rules?type=badParam", &lr, 200)
+		if length := len(lr.Data.Groups); length != 1 {
+			t.Errorf("expected 1 group got %d", length)
+		}
+	})
+
 	t.Run("/api/v1/rule?ruleID&groupID", func(t *testing.T) {
 		expRule := ruleToAPI(ar)
 		gotRule := apiRule{}
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@ -39,6 +39,7 @@ The sandbox cluster installation is running under the constant load generated by
 * FEATURE: add `-search.resetRollupResultCacheOnStartup` command-line flag for resetting [query cache](https://docs.victoriametrics.com/#rollup-result-cache) on startup. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/834).
 * FEATURE: [dashboards/vmagent](https://grafana.com/grafana/dashboards/12683): add `Targets scraped/s` stat panel showing the number of targets scraped by the vmagent per-second.
 * FEATURE: [dashboards/all](https://grafana.com/orgs/victoriametrics): add new panel `CPU spent on GC`. It should help identifying cases when too much CPU is spent on garbage collection, and advice users on how this can be addressed.
+* FEATURE: [vmalert](https://docs.victoriametrics.com/#vmalert): support filtering alerting and recording rules using `type` parameter in API `/vmalert/groups`, `/rules`, `/vmalert/api/v1/rules` and `/api/v1/rules`. See [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5749) by @victoramsantos.
 * FEATURE: [vmbackup](https://docs.victoriametrics.com/vmbackup.html): support client-side TLS configuration for creating and deleting snapshots via `-snapshot.tls*` cmd-line flags. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5724). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5738).

 * BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly propagate [label filters](https://docs.victoriametrics.com/keyconcepts/#filtering) from multiple arguments passed to [aggregate functions](https://docs.victoriametrics.com/metricsql/#aggregate-functions). For example, `sum({job="foo"}, {job="bar"}) by (job) + a` was improperly optimized to `sum({job="foo"}, {job="bar"}) by (job) + a{job="foo"}` before being executed. This could lead to unexpected results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5604).
--- a/docs/vmalert.md
+++ b/docs/vmalert.md
@ -655,7 +655,7 @@ or time series modification via [relabeling](https://docs.victoriametrics.com/vm
 `vmalert` runs a web-server (`-httpListenAddr`) for serving metrics and alerts endpoints:

 * `http://<vmalert-addr>` - UI;
-* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules;
+* `http://<vmalert-addr>/api/v1/rules` - list of all loaded groups and rules. You can also pass `type` as parameter and filter the answer by `alert` or `record` for only `alerting` or `recording` rules respectively;
 * `http://<vmalert-addr>/api/v1/alerts` - list of all active alerts;
 * `http://<vmalert-addr>/vmalert/api/v1/alert?group_id=<group_id>&alert_id=<alert_id>` - get alert status in JSON format.
  Used as alert source in AlertManager.