2020-06-01 12:46:37 +02:00
package config
import (
2024-10-08 14:28:32 +02:00
"bytes"
2020-09-11 21:14:30 +02:00
"crypto/md5"
2024-10-29 16:30:39 +01:00
"flag"
2020-06-01 12:46:37 +02:00
"fmt"
2020-06-15 21:15:47 +02:00
"hash/fnv"
2024-10-08 14:28:32 +02:00
"io"
2021-12-02 13:45:08 +01:00
"net/url"
2020-06-15 21:15:47 +02:00
"sort"
2020-06-01 12:46:37 +02:00
"strings"
2023-03-20 16:08:30 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config/log"
2020-10-20 09:15:21 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/utils"
2020-08-13 15:43:55 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envtemplate"
2022-02-11 15:17:00 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/promutils"
2024-10-08 14:28:32 +02:00
"gopkg.in/yaml.v2"
2020-06-01 12:46:37 +02:00
)
2024-10-29 16:30:39 +01:00
var (
defaultRuleType = flag . String ( "rule.defaultRuleType" , "prometheus" , ` Default type for rule expressions, can be overridden via "type" parameter on the group level, see https://docs.victoriametrics.com/vmalert/#groups. Supported values: "graphite", "prometheus" and "vlogs". ` )
)
2020-06-01 12:46:37 +02:00
// Group contains list of Rules grouped into
// entity with one name and evaluation interval
type Group struct {
2023-10-25 11:54:18 +02:00
Type Type ` yaml:"type,omitempty" `
File string
Name string ` yaml:"name" `
Interval * promutils . Duration ` yaml:"interval,omitempty" `
EvalOffset * promutils . Duration ` yaml:"eval_offset,omitempty" `
// EvalDelay will adjust the `time` parameter of rule evaluation requests to compensate intentional query delay from datasource.
// see https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5155
EvalDelay * promutils . Duration ` yaml:"eval_delay,omitempty" `
2022-06-09 08:21:30 +02:00
Limit int ` yaml:"limit,omitempty" `
2022-04-16 13:25:54 +02:00
Rules [ ] Rule ` yaml:"rules" `
Concurrency int ` yaml:"concurrency" `
2021-08-31 13:52:34 +02:00
// Labels is a set of label value pairs, that will be added to every rule.
// It has priority over the external labels.
Labels map [ string ] string ` yaml:"labels" `
2020-09-11 21:14:30 +02:00
// Checksum stores the hash of yaml definition for this group.
// May be used to detect any changes like rules re-ordering etc.
Checksum string
2021-12-02 13:45:08 +01:00
// Optional HTTP URL parameters added to each rule request
Params url . Values ` yaml:"params" `
2022-07-21 15:59:55 +02:00
// Headers contains optional HTTP headers added to each rule request
2022-07-22 10:44:55 +02:00
Headers [ ] Header ` yaml:"headers,omitempty" `
2023-04-27 13:02:21 +02:00
// NotifierHeaders contains optional HTTP headers sent to notifiers for generated notifications
2023-04-27 12:17:26 +02:00
NotifierHeaders [ ] Header ` yaml:"notifier_headers,omitempty" `
2023-10-10 12:41:19 +02:00
// EvalAlignment will make the timestamp of group query requests be aligned with interval
EvalAlignment * bool ` yaml:"eval_alignment,omitempty" `
2020-06-01 12:46:37 +02:00
// Catches all undefined fields and must be empty after parsing.
2024-07-10 00:14:15 +02:00
XXX map [ string ] any ` yaml:",inline" `
2020-06-01 12:46:37 +02:00
}
2020-09-11 21:14:30 +02:00
// UnmarshalYAML implements the yaml.Unmarshaler interface.
2024-07-10 00:14:15 +02:00
func ( g * Group ) UnmarshalYAML ( unmarshal func ( any ) error ) error {
2020-09-11 21:14:30 +02:00
type group Group
if err := unmarshal ( ( * group ) ( g ) ) ; err != nil {
return err
}
b , err := yaml . Marshal ( g )
if err != nil {
2020-09-23 21:46:24 +02:00
return fmt . Errorf ( "failed to marshal group configuration for checksum: %w" , err )
2020-09-11 21:14:30 +02:00
}
2021-02-01 14:02:44 +01:00
if g . Type . Get ( ) == "" {
2024-10-29 16:30:39 +01:00
g . Type = NewRawType ( * defaultRuleType )
2021-02-01 14:02:44 +01:00
}
2020-09-11 21:14:30 +02:00
h := md5 . New ( )
h . Write ( b )
g . Checksum = fmt . Sprintf ( "%x" , h . Sum ( nil ) )
return nil
}
2023-09-06 16:29:59 +02:00
// Validate checks configuration errors for group and internal rules
2022-07-22 13:50:41 +02:00
func ( g * Group ) Validate ( validateTplFn ValidateTplFn , validateExpressions bool ) error {
2020-06-01 12:46:37 +02:00
if g . Name == "" {
return fmt . Errorf ( "group name must be set" )
}
2023-09-06 16:29:59 +02:00
if g . Interval . Duration ( ) < 0 {
return fmt . Errorf ( "interval shouldn't be lower than 0" )
}
if g . EvalOffset . Duration ( ) < 0 {
return fmt . Errorf ( "eval_offset shouldn't be lower than 0" )
}
// if `eval_offset` is set, interval won't use global evaluationInterval flag and must bigger than offset.
if g . EvalOffset . Duration ( ) > g . Interval . Duration ( ) {
return fmt . Errorf ( "eval_offset should be smaller than interval; now eval_offset: %v, interval: %v" , g . EvalOffset . Duration ( ) , g . Interval . Duration ( ) )
}
if g . Limit < 0 {
return fmt . Errorf ( "invalid limit %d, shouldn't be less than 0" , g . Limit )
}
if g . Concurrency < 0 {
return fmt . Errorf ( "invalid concurrency %d, shouldn't be less than 0" , g . Concurrency )
}
2021-02-01 14:02:44 +01:00
2020-06-15 21:15:47 +02:00
uniqueRules := map [ uint64 ] struct { } { }
2020-06-01 12:46:37 +02:00
for _ , r := range g . Rules {
ruleName := r . Record
if r . Alert != "" {
ruleName = r . Alert
}
2020-06-15 21:15:47 +02:00
if _ , ok := uniqueRules [ r . ID ] ; ok {
2023-09-06 16:29:59 +02:00
return fmt . Errorf ( "%q is a duplicate in group" , r . String ( ) )
2020-06-01 12:46:37 +02:00
}
2020-06-15 21:15:47 +02:00
uniqueRules [ r . ID ] = struct { } { }
2020-06-01 12:46:37 +02:00
if err := r . Validate ( ) ; err != nil {
2023-09-06 16:29:59 +02:00
return fmt . Errorf ( "invalid rule %q: %w" , ruleName , err )
2020-06-01 12:46:37 +02:00
}
2020-06-06 22:27:09 +02:00
if validateExpressions {
2021-02-01 14:02:44 +01:00
// its needed only for tests.
// because correct types must be inherited after unmarshalling.
exprValidator := g . Type . ValidateExpr
if err := exprValidator ( r . Expr ) ; err != nil {
2023-09-06 16:29:59 +02:00
return fmt . Errorf ( "invalid expression for rule %q: %w" , ruleName , err )
2020-06-06 22:27:09 +02:00
}
2020-06-01 12:46:37 +02:00
}
2022-07-22 13:50:41 +02:00
if validateTplFn != nil {
if err := validateTplFn ( r . Annotations ) ; err != nil {
2023-09-06 16:29:59 +02:00
return fmt . Errorf ( "invalid annotations for rule %q: %w" , ruleName , err )
2020-06-06 22:27:09 +02:00
}
2022-07-22 13:50:41 +02:00
if err := validateTplFn ( r . Labels ) ; err != nil {
2023-09-06 16:29:59 +02:00
return fmt . Errorf ( "invalid labels for rule %q: %w" , ruleName , err )
2020-06-06 22:27:09 +02:00
}
2020-06-01 12:46:37 +02:00
}
}
return checkOverflow ( g . XXX , fmt . Sprintf ( "group %q" , g . Name ) )
}
// Rule describes entity that represent either
// recording rule or alerting rule.
type Rule struct {
2023-07-27 15:13:13 +02:00
ID uint64
Record string ` yaml:"record,omitempty" `
Alert string ` yaml:"alert,omitempty" `
Expr string ` yaml:"expr" `
For * promutils . Duration ` yaml:"for,omitempty" `
// Alert will continue firing for this long even when the alerting expression no longer has results.
KeepFiringFor * promutils . Duration ` yaml:"keep_firing_for,omitempty" `
Labels map [ string ] string ` yaml:"labels,omitempty" `
Annotations map [ string ] string ` yaml:"annotations,omitempty" `
Debug bool ` yaml:"debug,omitempty" `
2022-12-29 12:36:44 +01:00
// UpdateEntriesLimit defines max number of rule's state updates stored in memory.
// Overrides `-rule.updateEntriesLimit`.
UpdateEntriesLimit * int ` yaml:"update_entries_limit,omitempty" `
2020-06-15 21:15:47 +02:00
// Catches all undefined fields and must be empty after parsing.
2024-07-10 00:14:15 +02:00
XXX map [ string ] any ` yaml:",inline" `
2020-06-15 21:15:47 +02:00
}
// UnmarshalYAML implements the yaml.Unmarshaler interface.
2024-07-10 00:14:15 +02:00
func ( r * Rule ) UnmarshalYAML ( unmarshal func ( any ) error ) error {
2020-06-15 21:15:47 +02:00
type rule Rule
if err := unmarshal ( ( * rule ) ( r ) ) ; err != nil {
return err
}
r . ID = HashRule ( * r )
return nil
}
2020-07-28 13:20:31 +02:00
// Name returns Rule name according to its type
func ( r * Rule ) Name ( ) string {
if r . Record != "" {
return r . Record
}
return r . Alert
}
2022-09-20 12:52:46 +02:00
// String implements Stringer interface
func ( r * Rule ) String ( ) string {
ruleType := "recording"
if r . Alert != "" {
ruleType = "alerting"
}
b := strings . Builder { }
b . WriteString ( fmt . Sprintf ( "%s rule %q" , ruleType , r . Name ( ) ) )
b . WriteString ( fmt . Sprintf ( "; expr: %q" , r . Expr ) )
kv := sortMap ( r . Labels )
for i := range kv {
if i == 0 {
b . WriteString ( "; labels:" )
}
b . WriteString ( " " )
b . WriteString ( kv [ i ] . key )
b . WriteString ( "=" )
b . WriteString ( kv [ i ] . value )
if i < len ( kv ) - 1 {
b . WriteString ( "," )
}
}
return b . String ( )
}
2020-06-15 21:15:47 +02:00
// HashRule hashes significant Rule fields into
2020-09-11 21:14:30 +02:00
// unique hash that supposed to define Rule uniqueness
2020-06-15 21:15:47 +02:00
func HashRule ( r Rule ) uint64 {
h := fnv . New64a ( )
h . Write ( [ ] byte ( r . Expr ) )
if r . Record != "" {
h . Write ( [ ] byte ( "recording" ) )
h . Write ( [ ] byte ( r . Record ) )
} else {
h . Write ( [ ] byte ( "alerting" ) )
h . Write ( [ ] byte ( r . Alert ) )
}
2020-09-11 21:14:30 +02:00
kv := sortMap ( r . Labels )
2020-06-15 21:15:47 +02:00
for _ , i := range kv {
h . Write ( [ ] byte ( i . key ) )
h . Write ( [ ] byte ( i . value ) )
h . Write ( [ ] byte ( "\xff" ) )
}
return h . Sum64 ( )
2020-06-01 12:46:37 +02:00
}
// Validate check for Rule configuration errors
func ( r * Rule ) Validate ( ) error {
if ( r . Record == "" && r . Alert == "" ) || ( r . Record != "" && r . Alert != "" ) {
return fmt . Errorf ( "either `record` or `alert` must be set" )
}
if r . Expr == "" {
return fmt . Errorf ( "expression can't be empty" )
}
2020-06-15 21:15:47 +02:00
return checkOverflow ( r . XXX , "rule" )
2020-06-01 12:46:37 +02:00
}
2022-07-25 08:22:09 +02:00
// ValidateTplFn must validate the given annotations
2022-07-22 13:50:41 +02:00
type ValidateTplFn func ( annotations map [ string ] string ) error
2023-03-20 16:08:30 +01:00
// cLogger is a logger with support of logs suppressing.
// it is used when logs emitted by config package needs
// to be suppressed.
var cLogger = & log . Logger { }
2023-03-09 14:46:19 +01:00
// ParseSilent parses rule configs from given file patterns without emitting logs
func ParseSilent ( pathPatterns [ ] string , validateTplFn ValidateTplFn , validateExpressions bool ) ( [ ] Group , error ) {
2023-03-20 16:08:30 +01:00
cLogger . Suppress ( true )
defer cLogger . Suppress ( false )
2023-05-08 13:31:54 +02:00
2024-06-18 14:14:30 +02:00
files , err := ReadFromFS ( pathPatterns )
2023-03-09 14:46:19 +01:00
if err != nil {
2023-10-25 21:24:01 +02:00
return nil , fmt . Errorf ( "failed to read from the config: %w" , err )
2023-03-09 14:46:19 +01:00
}
return parse ( files , validateTplFn , validateExpressions )
}
2020-06-01 12:46:37 +02:00
// Parse parses rule configs from given file patterns
2022-07-22 13:50:41 +02:00
func Parse ( pathPatterns [ ] string , validateTplFn ValidateTplFn , validateExpressions bool ) ( [ ] Group , error ) {
2024-06-18 14:14:30 +02:00
files , err := ReadFromFS ( pathPatterns )
2023-02-10 02:18:27 +01:00
if err != nil {
2023-10-25 21:24:01 +02:00
return nil , fmt . Errorf ( "failed to read from the config: %w" , err )
2020-06-01 12:46:37 +02:00
}
2023-03-09 14:46:19 +01:00
groups , err := parse ( files , validateTplFn , validateExpressions )
if err != nil {
2023-10-25 21:24:01 +02:00
return nil , fmt . Errorf ( "failed to parse %s: %w" , pathPatterns , err )
2023-03-09 14:46:19 +01:00
}
if len ( groups ) < 1 {
2023-03-20 16:08:30 +01:00
cLogger . Warnf ( "no groups found in %s" , strings . Join ( pathPatterns , ";" ) )
2023-03-09 14:46:19 +01:00
}
return groups , nil
}
func parse ( files map [ string ] [ ] byte , validateTplFn ValidateTplFn , validateExpressions bool ) ( [ ] Group , error ) {
2020-10-20 09:15:21 +02:00
errGroup := new ( utils . ErrGroup )
2020-06-01 12:46:37 +02:00
var groups [ ] Group
2023-02-10 02:18:27 +01:00
for file , data := range files {
2020-06-01 12:46:37 +02:00
uniqueGroups := map [ string ] struct { } { }
2023-02-10 02:18:27 +01:00
gr , err := parseConfig ( data )
2020-06-01 12:46:37 +02:00
if err != nil {
2020-10-20 09:15:21 +02:00
errGroup . Add ( fmt . Errorf ( "failed to parse file %q: %w" , file , err ) )
continue
2020-06-01 12:46:37 +02:00
}
for _ , g := range gr {
2022-07-22 13:50:41 +02:00
if err := g . Validate ( validateTplFn , validateExpressions ) ; err != nil {
2020-10-20 09:15:21 +02:00
errGroup . Add ( fmt . Errorf ( "invalid group %q in file %q: %w" , g . Name , file , err ) )
continue
2020-06-01 12:46:37 +02:00
}
if _ , ok := uniqueGroups [ g . Name ] ; ok {
2020-10-20 09:15:21 +02:00
errGroup . Add ( fmt . Errorf ( "group name %q duplicate in file %q" , g . Name , file ) )
continue
2020-06-01 12:46:37 +02:00
}
uniqueGroups [ g . Name ] = struct { } { }
g . File = file
groups = append ( groups , g )
}
}
2020-10-20 09:15:21 +02:00
if err := errGroup . Err ( ) ; err != nil {
return nil , err
}
2023-03-09 14:46:19 +01:00
sort . SliceStable ( groups , func ( i , j int ) bool {
if groups [ i ] . File != groups [ j ] . File {
return groups [ i ] . File < groups [ j ] . File
}
return groups [ i ] . Name < groups [ j ] . Name
} )
2020-06-01 12:46:37 +02:00
return groups , nil
}
2023-02-10 02:18:27 +01:00
func parseConfig ( data [ ] byte ) ( [ ] Group , error ) {
data , err := envtemplate . ReplaceBytes ( data )
2022-10-18 09:28:39 +02:00
if err != nil {
2023-02-10 02:18:27 +01:00
return nil , fmt . Errorf ( "cannot expand environment vars: %w" , err )
2020-06-01 12:46:37 +02:00
}
2024-10-08 14:28:32 +02:00
var result [ ] Group
type cfgFile struct {
2020-06-01 12:46:37 +02:00
Groups [ ] Group ` yaml:"groups" `
// Catches all undefined fields and must be empty after parsing.
2024-07-10 00:14:15 +02:00
XXX map [ string ] any ` yaml:",inline" `
2020-06-01 12:46:37 +02:00
}
2024-10-08 14:28:32 +02:00
decoder := yaml . NewDecoder ( bytes . NewReader ( data ) )
for {
var cf cfgFile
if err = decoder . Decode ( & cf ) ; err != nil {
if err == io . EOF { // EOF indicates no more documents to read
break
}
return nil , err
}
if err = checkOverflow ( cf . XXX , "config" ) ; err != nil {
return nil , err
}
result = append ( result , cf . Groups ... )
}
return result , nil
2020-06-01 12:46:37 +02:00
}
2024-07-10 00:14:15 +02:00
func checkOverflow ( m map [ string ] any , ctx string ) error {
2020-06-01 12:46:37 +02:00
if len ( m ) > 0 {
var keys [ ] string
for k := range m {
keys = append ( keys , k )
}
return fmt . Errorf ( "unknown fields in %s: %s" , ctx , strings . Join ( keys , ", " ) )
}
return nil
}
2020-09-11 21:14:30 +02:00
type item struct {
key , value string
}
func sortMap ( m map [ string ] string ) [ ] item {
var kv [ ] item
for k , v := range m {
kv = append ( kv , item { key : k , value : v } )
}
sort . Slice ( kv , func ( i , j int ) bool {
return kv [ i ] . key < kv [ j ] . key
} )
return kv
}