2020-04-27 23:19:27 +02:00
package main
import (
"context"
2020-11-09 23:27:32 +01:00
"errors"
2022-09-14 14:04:24 +02:00
"sync"
2021-06-09 11:20:38 +02:00
"time"
2022-03-29 15:09:07 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
2020-04-27 23:19:27 +02:00
)
2020-06-01 12:46:37 +02:00
// Rule represents alerting or recording rule
// that has unique ID, can be Executed and
// updated with other Rule.
type Rule interface {
2021-06-09 11:20:38 +02:00
// ID returns unique ID that may be used for
2020-06-01 12:46:37 +02:00
// identifying this Rule among others.
ID ( ) uint64
2022-06-09 08:21:30 +02:00
// Exec executes the rule with given context at the given timestamp and limit.
// returns an err if number of resulting time series exceeds the limit.
Exec ( ctx context . Context , ts time . Time , limit int ) ( [ ] prompbmarshal . TimeSeries , error )
2022-06-09 08:58:25 +02:00
// ExecRange executes the rule on the given time range.
ExecRange ( ctx context . Context , start , end time . Time ) ( [ ] prompbmarshal . TimeSeries , error )
2020-06-01 12:46:37 +02:00
// UpdateWith performs modification of current Rule
// with fields of the given Rule.
UpdateWith ( Rule ) error
2022-03-15 12:54:53 +01:00
// ToAPI converts Rule into APIRule
ToAPI ( ) APIRule
app/vmalert: extend metrics set exported by `vmalert` #573 (#654)
* app/vmalert: extend metrics set exported by `vmalert` #573
New metrics were added to improve observability:
+ vmalert_alerts_pending{alertname, group} - number of pending alerts per group
per alert;
+ vmalert_alerts_acitve{alertname, group} - number of active alerts per group
per alert;
+ vmalert_alerts_error{alertname, group} - is 1 if alertname ended up with error
during prev execution, is 0 if no errors happened;
+ vmalert_recording_rules_error{recording, group} - is 1 if recording rule
ended up with error during prev execution, is 0 if no errors happened;
* vmalert_iteration_total{group, file} - now contains group and file name labels.
This should improve control over specific groups;
* vmalert_iteration_duration_seconds{group, file} - now contains group and file name labels. This should improve control over specific groups;
Some collisions for alerts and recording rules are possible, because neither
group name nor alert/recording rule name are unique for compatibility reasons.
Commit contains list of TODOs for Unregistering metrics since groups and rules
are ephemeral and could be removed without application restart. In order to
unlock Unregistering feature corresponding PR was filed - https://github.com/VictoriaMetrics/metrics/pull/13
* app/vmalert: extend metrics set exported by `vmalert` #573
The changes are following:
* add an ID label to rules metrics, since `name` collisions within one group is
a common case - see the k8s example alerts;
* supports metrics unregistering on rule updates. Consider the case when one rule
was added or removed from the group, or the whole group was added or removed.
The change depends on https://github.com/VictoriaMetrics/metrics/pull/16
where race condition for Unregister method was fixed.
2020-08-09 08:41:29 +02:00
// Close performs the shutdown procedures for rule
// such as metrics unregister
Close ( )
2020-05-04 23:51:22 +02:00
}
2020-11-09 23:27:32 +01:00
2023-06-13 17:03:53 +02:00
var errDuplicate = errors . New ( "result contains metrics with the same labelset after applying rule labels. See https://docs.victoriametrics.com/vmalert.html#series-with-the-same-labelset for details" )
2022-09-14 14:04:24 +02:00
type ruleState struct {
sync . RWMutex
entries [ ] ruleStateEntry
cur int
}
type ruleStateEntry struct {
// stores last moment of time rule.Exec was called
time time . Time
// stores the timesteamp with which rule.Exec was called
at time . Time
// stores the duration of the last rule.Exec call
duration time . Duration
// stores last error that happened in Exec func
// resets on every successful Exec
// may be used as Health ruleState
err error
// stores the number of samples returned during
// the last evaluation
samples int
2023-05-08 09:36:39 +02:00
// stores the number of time series fetched during
// the last evaluation.
// Is supported by VictoriaMetrics only, starting from v1.90.0
// If seriesFetched == nil, then this attribute was missing in
// datasource response (unsupported).
seriesFetched * int
2022-12-09 16:13:29 +01:00
// stores the curl command reflecting the HTTP request used during rule.Exec
curl string
2022-09-14 14:04:24 +02:00
}
2022-12-29 12:36:44 +01:00
func newRuleState ( size int ) * ruleState {
if size < 1 {
2023-02-08 14:34:03 +01:00
size = 1
2022-12-29 12:36:44 +01:00
}
2022-09-14 14:04:24 +02:00
return & ruleState {
2022-12-29 12:36:44 +01:00
entries : make ( [ ] ruleStateEntry , size ) ,
2022-09-14 14:04:24 +02:00
}
}
func ( s * ruleState ) getLast ( ) ruleStateEntry {
s . RLock ( )
defer s . RUnlock ( )
return s . entries [ s . cur ]
}
2022-12-29 12:36:44 +01:00
func ( s * ruleState ) size ( ) int {
s . RLock ( )
defer s . RUnlock ( )
return len ( s . entries )
}
2022-09-14 14:04:24 +02:00
func ( s * ruleState ) getAll ( ) [ ] ruleStateEntry {
entries := make ( [ ] ruleStateEntry , 0 )
s . RLock ( )
defer s . RUnlock ( )
cur := s . cur
for {
e := s . entries [ cur ]
if ! e . time . IsZero ( ) || ! e . at . IsZero ( ) {
entries = append ( entries , e )
}
cur --
if cur < 0 {
cur = cap ( s . entries ) - 1
}
if cur == s . cur {
return entries
}
}
}
func ( s * ruleState ) add ( e ruleStateEntry ) {
s . Lock ( )
defer s . Unlock ( )
s . cur ++
if s . cur > cap ( s . entries ) - 1 {
s . cur = 0
}
s . entries [ s . cur ] = e
}