2020-02-16 19:59:02 +01:00
package main
import (
2020-03-13 11:19:31 +01:00
"context"
2020-02-16 19:59:02 +01:00
"flag"
2020-03-13 11:19:31 +01:00
"fmt"
2020-04-01 17:17:53 +02:00
"net/url"
"os"
2020-06-21 12:32:46 +02:00
"strconv"
2020-03-13 11:19:31 +01:00
"strings"
2023-07-03 14:59:52 +02:00
"sync"
2020-03-13 11:19:31 +01:00
"time"
2020-02-16 19:59:02 +01:00
2023-04-26 19:20:22 +02:00
"github.com/VictoriaMetrics/metrics"
2020-10-20 09:15:21 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
2020-02-16 19:59:02 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
2020-04-06 13:44:03 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/notifier"
2020-06-28 13:26:22 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remoteread"
2020-04-27 23:18:02 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
2022-05-14 11:38:44 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/templates"
2020-02-16 19:59:02 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/buildinfo"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/envflag"
2020-05-14 21:01:51 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/fasttime"
2020-03-29 00:48:30 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/flagutil"
2020-02-16 19:59:02 +01:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/httpserver"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/procutil"
2022-07-21 18:58:22 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/pushmetrics"
2020-02-16 19:59:02 +01:00
)
var (
2023-05-08 09:52:57 +02:00
rulePath = flagutil . NewArrayString ( "rule" , ` Path to the files or http url with alerting and / or recording rules .
2023-02-10 02:51:00 +01:00
Supports hierarchical patterns and regexpes .
2020-03-29 00:48:30 +01:00
Examples :
2023-05-08 13:31:54 +02:00
- rule = "/path/to/file" . Path to a single file with alerting rules .
- rule = "http://<some-server-addr>/path/to/rules" . HTTP URL to a page with alerting rules .
2023-02-10 02:18:27 +01:00
- rule = "dir/*.yaml" - rule = "/*.yaml" - rule = "gcs://vmalert-rules/tenant_%{TENANT_ID}/prod" .
2023-05-09 01:21:42 +02:00
- rule = "dir/**/*.yaml" . Includes all the . yaml files in "dir" subfolders recursively .
2023-02-10 02:51:00 +01:00
Rule files may contain % { ENV_VAR } placeholders , which are substituted by the corresponding env vars .
Enterprise version of vmalert supports S3 and GCS paths to rules .
For example : gs : //bucket/path/to/rules, s3://bucket/path/to/rules
S3 and GCS paths support only matching by prefix , e . g . s3 : //bucket/dir/rule_ matches
all files with prefix rule_ in folder dir .
See https : //docs.victoriametrics.com/vmalert.html#reading-rules-from-object-storage
2023-02-10 02:18:27 +01:00
` )
2020-06-06 22:27:09 +02:00
2022-10-01 17:26:05 +02:00
ruleTemplatesPath = flagutil . NewArrayString ( "rule.templates" , ` Path or glob pattern to location with go template definitions
2022-05-14 11:38:44 +02:00
for rules annotations templating . Flag can be specified multiple times .
Examples :
- rule . templates = "/path/to/file" . Path to a single file with go templates
- rule . templates = "dir/*.tpl" - rule . templates = "/*.tpl" . Relative path to all . tpl files in "dir" folder ,
2023-04-26 19:20:22 +02:00
absolute path to all . tpl files in root .
2023-05-09 01:21:42 +02:00
- rule . templates = "dir/**/*.tpl" . Includes all the . tpl files in "dir" subfolders recursively .
2023-04-26 19:20:22 +02:00
` )
2022-05-14 11:38:44 +02:00
2022-02-02 13:11:41 +01:00
configCheckInterval = flag . Duration ( "configCheckInterval" , 0 , "Interval for checking for changes in '-rule' or '-notifier.config' files. " +
2023-05-10 09:50:41 +02:00
"By default, the checking is disabled. Send SIGHUP signal in order to force config check for changes." )
2021-05-25 15:27:22 +02:00
2023-01-27 08:08:35 +01:00
httpListenAddr = flag . String ( "httpListenAddr" , ":8880" , "Address to listen for http connections. See also -httpListenAddr.useProxyProtocol" )
useProxyProtocol = flag . Bool ( "httpListenAddr.useProxyProtocol" , false , "Whether to use proxy protocol for connections accepted at -httpListenAddr . " +
2023-03-08 10:26:53 +01:00
"See https://www.haproxy.org/download/1.8/doc/proxy-protocol.txt . " +
"With enabled proxy protocol http server cannot serve regular /metrics endpoint. Use -pushmetrics.url for metrics pushing" )
2020-06-28 13:26:22 +02:00
evaluationInterval = flag . Duration ( "evaluationInterval" , time . Minute , "How often to evaluate the rules" )
2020-06-06 22:27:09 +02:00
validateTemplates = flag . Bool ( "rule.validateTemplates" , true , "Whether to validate annotation and label templates" )
validateExpressions = flag . Bool ( "rule.validateExpressions" , true , "Whether to validate rules expressions via MetricsQL engine" )
2021-09-13 14:48:18 +02:00
maxResolveDuration = flag . Duration ( "rule.maxResolveDuration" , 0 , "Limits the maximum duration for automatic alert expiration, " +
2023-02-16 19:26:17 +01:00
"which by default is 4 times evaluationInterval of the parent group." )
2022-12-29 12:36:44 +01:00
resendDelay = flag . Duration ( "rule.resendDelay" , 0 , "Minimum amount of time to wait before resending an alert to notifier" )
ruleUpdateEntriesLimit = flag . Int ( "rule.updateEntriesLimit" , 20 , "Defines the max number of rule's state updates stored in-memory. " +
2023-05-10 09:50:41 +02:00
"Rule's updates are available on rule's Details page and are used for debugging purposes. The number of stored updates can be overridden per rule via update_entries_limit param." )
2022-03-16 16:26:33 +01:00
2023-05-30 12:33:17 +02:00
externalURL = flag . String ( "external.url" , "" , "External URL is used as alert's source for sent alerts to the notifier. By default, hostname is used as address." )
2022-10-05 21:52:30 +02:00
externalAlertSource = flag . String ( "external.alert.source" , "" , ` External Alert Source allows to override the Source link for alerts sent to AlertManager ` +
` for cases where you want to build a custom link to Grafana, Prometheus or any other service. ` +
` Supports templating - see https://docs.victoriametrics.com/vmalert.html#templating . ` +
2023-04-04 13:25:29 +02:00
` For example, link to Grafana: -external.alert.source='explore?orgId=1&left= { "datasource":"VictoriaMetrics","queries":[ { "expr": {{ $expr | jsonEscape | queryEscape }} ,"refId":"A"}],"range": { "from":"now-1h","to":"now"}}'. ` +
2023-03-24 09:40:55 +01:00
` Link to VMUI: -external.alert.source='vmui/#/?g0.expr= {{ .Expr | queryEscape }} '. ` +
app/vmalert/templates: properly escape all the special chars in `quotesEscape` function
Previously the `quotesEscape` function was escaping only double quotes.
This wasn't enough, since the input string could contain other special chars,
which must be escaped when put inside JSON string. For example, carriage return and line feed chars (\n\r),
backslash char, etc. This led to the following issues, which were improperly fixed:
- https://github.com/VictoriaMetrics/VictoriaMetrics/issues/890 - this issue
was "fixed" by introducing the `crlfEscape` function, which led to unnecessary
complications in user templates, while not fixing various corner cases
such as backslash chars in the input string.
See https://github.com/VictoriaMetrics/VictoriaMetrics/commit/1de15ad490dbde84ad2a657f3b65a6311991f372
- https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3139 - this issue
was "fixed" by urlencoding the whole string passed to -external.alert.source
command-line flag. This led to invalid urls, which couldn't be parsed by Grafana.
See https://github.com/VictoriaMetrics/VictoriaMetrics/commit/00c838353d1246495fd7c7546f3d71095e855eab
and https://github.com/VictoriaMetrics/VictoriaMetrics/commit/4bd024459931a0671dee4abae4bc3556795ee398
This commit properly encodes the input string passed to `quotesEscape`, so it can be safely embedded inside JSON strings.
This commit deprecates crlfEscape template function and adds the following new template functions:
- strvalue and stripDomain - these functions are supported by Prometheus, so they were added
for compatibility purposes.
- jsonEscape and htmlEscape for converting the input string to valid quoted JSON string
and for html-escaping the input string, so it could be safely embedded as a plaintext
into html.
This commit also documents all supported template functions at https://docs.victoriametrics.com/vmalert.html#template-functions
The deprecated crlfEscape function isn't documented on purpose, since its usefulness is negative in general case.
2022-10-27 22:38:19 +02:00
` If empty 'vmalert/alert?group_id= {{ .GroupID }} &alert_id= {{ .AlertID }} ' is used. ` )
2022-10-01 17:26:05 +02:00
externalLabels = flagutil . NewArrayString ( "external.label" , "Optional label in the form 'Name=value' to add to all generated recording rules and alerts. " +
2020-07-28 13:20:31 +02:00
"Pass multiple -label flags in order to add multiple label sets." )
2020-06-28 13:26:22 +02:00
remoteReadLookBack = flag . Duration ( "remoteRead.lookback" , time . Hour , "Lookback defines how far to look into past for alerts timeseries." +
" For example, if lookback=1h then range from now() to now()-1h will be scanned." )
2023-02-06 09:51:30 +01:00
remoteReadIgnoreRestoreErrors = flag . Bool ( "remoteRead.ignoreRestoreErrors" , true , "Whether to ignore errors from remote storage when restoring alerts state on startup. DEPRECATED - this flag has no effect and will be removed in the next releases." )
2020-10-20 09:15:21 +02:00
2022-02-02 13:11:41 +01:00
disableAlertGroupLabel = flag . Bool ( "disableAlertgroupLabel" , false , "Whether to disable adding group's Name as label to generated alerts and time series." )
2021-08-21 19:08:55 +02:00
2023-07-28 10:42:02 +02:00
dryRun = flag . Bool ( "dryRun" , false , "Whether to check only config files without running vmalert. The rules file are validated. The -rule flag must be specified." )
2020-02-16 19:59:02 +01:00
)
2021-10-13 14:25:11 +02:00
var alertURLGeneratorFn notifier . AlertURLGenerator
2020-02-16 19:59:02 +01:00
func main ( ) {
2020-05-16 10:59:30 +02:00
// Write flags and help message to stdout, since it is easier to grep or pipe.
flag . CommandLine . SetOutput ( os . Stdout )
2020-06-05 09:42:56 +02:00
flag . Usage = usage
2020-02-16 19:59:02 +01:00
envflag . Parse ( )
2022-08-11 09:56:40 +02:00
remoteread . InitSecretFlags ( )
remotewrite . InitSecretFlags ( )
datasource . InitSecretFlags ( )
2020-02-16 19:59:02 +01:00
buildinfo . Init ( )
logger . Init ( )
2022-07-22 12:35:58 +02:00
pushmetrics . Init ( )
2023-02-04 04:46:13 +01:00
if ! * remoteReadIgnoreRestoreErrors {
2023-02-06 09:51:30 +01:00
logger . Warnf ( "flag `remoteRead.ignoreRestoreErrors` is deprecated and will be removed in next releases." )
2023-02-04 04:46:13 +01:00
}
2022-05-14 11:38:44 +02:00
err := templates . Load ( * ruleTemplatesPath , true )
if err != nil {
logger . Fatalf ( "failed to parse %q: %s" , * ruleTemplatesPath , err )
}
2020-06-23 21:45:45 +02:00
2020-10-20 09:15:21 +02:00
if * dryRun {
2022-07-22 13:50:41 +02:00
groups , err := config . Parse ( * rulePath , notifier . ValidateTemplates , true )
2020-10-20 09:15:21 +02:00
if err != nil {
2021-06-09 11:20:38 +02:00
logger . Fatalf ( "failed to parse %q: %s" , * rulePath , err )
2020-10-20 09:15:21 +02:00
}
if len ( groups ) == 0 {
logger . Fatalf ( "No rules for validation. Please specify path to file(s) with alerting and/or recording rules using `-rule` flag" )
}
return
}
2021-10-13 14:25:11 +02:00
eu , err := getExternalURL ( * externalURL , * httpListenAddr , httpserver . IsTLS ( ) )
if err != nil {
logger . Fatalf ( "failed to init `external.url`: %s" , err )
}
2022-05-14 11:38:44 +02:00
2021-10-13 14:25:11 +02:00
alertURLGeneratorFn , err = getAlertURLGenerator ( eu , * externalAlertSource , * validateTemplates )
if err != nil {
logger . Fatalf ( "failed to init `external.alert.source`: %s" , err )
}
2022-07-22 13:50:41 +02:00
var validateTplFn config . ValidateTplFn
if * validateTemplates {
validateTplFn = notifier . ValidateTemplates
}
2021-06-09 11:20:38 +02:00
if * replayFrom != "" || * replayTo != "" {
rw , err := remotewrite . Init ( context . Background ( ) )
if err != nil {
logger . Fatalf ( "failed to init remoteWrite: %s" , err )
}
2021-12-21 19:25:47 +01:00
if rw == nil {
logger . Fatalf ( "remoteWrite.url can't be empty in replay mode" )
}
2022-07-22 13:50:41 +02:00
groupsCfg , err := config . Parse ( * rulePath , validateTplFn , * validateExpressions )
2021-06-09 11:20:38 +02:00
if err != nil {
logger . Fatalf ( "cannot parse configuration file: %s" , err )
}
2021-08-31 13:57:47 +02:00
// prevent queries from caching and boundaries aligning
// when querying VictoriaMetrics datasource.
2021-12-02 13:45:08 +01:00
q , err := datasource . Init ( url . Values { "nocache" : { "1" } } )
2021-06-09 11:20:38 +02:00
if err != nil {
logger . Fatalf ( "failed to init datasource: %s" , err )
}
if err := replay ( groupsCfg , q , rw ) ; err != nil {
logger . Fatalf ( "replay failed: %s" , err )
}
return
}
2020-06-28 13:26:22 +02:00
ctx , cancel := context . WithCancel ( context . Background ( ) )
manager , err := newManager ( ctx )
2020-06-23 21:45:45 +02:00
if err != nil {
2020-06-28 13:26:22 +02:00
logger . Fatalf ( "failed to init: %s" , err )
2020-04-27 23:18:02 +02:00
}
2021-05-25 15:27:22 +02:00
logger . Infof ( "reading rules configuration file from %q" , strings . Join ( * rulePath , ";" ) )
2022-07-22 13:50:41 +02:00
groupsCfg , err := config . Parse ( * rulePath , validateTplFn , * validateExpressions )
2021-05-25 15:27:22 +02:00
if err != nil {
logger . Fatalf ( "cannot parse configuration file: %s" , err )
}
2021-05-21 15:34:03 +02:00
2021-10-19 15:35:27 +02:00
// Register SIGHUP handler for config re-read just before manager.start call.
// This guarantees that the config will be re-read if the signal arrives during manager.start call.
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1240
sighupCh := procutil . NewSighupChan ( )
2021-05-25 15:27:22 +02:00
if err := manager . start ( ctx , groupsCfg ) ; err != nil {
2020-05-10 18:58:17 +02:00
logger . Fatalf ( "failed to start: %s" , err )
}
2020-05-09 11:32:12 +02:00
2021-10-19 15:35:27 +02:00
go configReload ( ctx , manager , groupsCfg , sighupCh )
2020-05-09 11:32:12 +02:00
2020-05-10 18:58:17 +02:00
rh := & requestHandler { m : manager }
2023-01-27 08:08:35 +01:00
go httpserver . Serve ( * httpListenAddr , * useProxyProtocol , rh . handler )
2020-04-06 13:44:03 +02:00
2020-02-16 19:59:02 +01:00
sig := procutil . WaitForSigterm ( )
logger . Infof ( "service received signal %s" , sig )
2020-02-21 22:15:05 +01:00
if err := httpserver . Stop ( * httpListenAddr ) ; err != nil {
logger . Fatalf ( "cannot stop the webservice: %s" , err )
}
2020-03-13 11:19:31 +01:00
cancel ( )
2020-05-10 18:58:17 +02:00
manager . close ( )
2020-03-13 11:19:31 +01:00
}
2020-04-11 21:42:01 +02:00
var (
2020-05-10 18:58:17 +02:00
configReloads = metrics . NewCounter ( ` vmalert_config_last_reload_total ` )
configReloadErrors = metrics . NewCounter ( ` vmalert_config_last_reload_errors_total ` )
configSuccess = metrics . NewCounter ( ` vmalert_config_last_reload_successful ` )
configTimestamp = metrics . NewCounter ( ` vmalert_config_last_reload_success_timestamp_seconds ` )
2020-04-11 21:42:01 +02:00
)
2020-06-28 13:26:22 +02:00
func newManager ( ctx context . Context ) ( * manager , error ) {
2021-08-31 13:57:47 +02:00
q , err := datasource . Init ( nil )
2020-06-28 13:26:22 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "failed to init datasource: %w" , err )
2020-06-28 13:26:22 +02:00
}
2022-02-15 14:59:45 +01:00
2022-05-09 10:11:56 +02:00
labels := make ( map [ string ] string )
2022-02-15 14:59:45 +01:00
for _ , s := range * externalLabels {
if len ( s ) == 0 {
continue
}
n := strings . IndexByte ( s , '=' )
if n < 0 {
return nil , fmt . Errorf ( "missing '=' in `-label`. It must contain label in the form `Name=value`; got %q" , s )
}
labels [ s [ : n ] ] = s [ n + 1 : ]
}
nts , err := notifier . Init ( alertURLGeneratorFn , labels , * externalURL )
2020-06-28 13:26:22 +02:00
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "failed to init notifier: %w" , err )
2020-06-28 13:26:22 +02:00
}
manager := & manager {
2021-04-28 22:41:15 +02:00
groups : make ( map [ uint64 ] * Group ) ,
querierBuilder : q ,
notifiers : nts ,
2022-02-15 14:59:45 +01:00
labels : labels ,
2020-06-28 13:26:22 +02:00
}
rw , err := remotewrite . Init ( ctx )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "failed to init remoteWrite: %w" , err )
2020-06-28 13:26:22 +02:00
}
manager . rw = rw
rr , err := remoteread . Init ( )
if err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "failed to init remoteRead: %w" , err )
2020-06-28 13:26:22 +02:00
}
manager . rr = rr
2020-07-28 13:20:31 +02:00
2020-06-28 13:26:22 +02:00
return manager , nil
}
2020-04-01 17:17:53 +02:00
func getExternalURL ( externalURL , httpListenAddr string , isSecure bool ) ( * url . URL , error ) {
if externalURL != "" {
return url . Parse ( externalURL )
2020-03-13 11:19:31 +01:00
}
2020-04-01 17:17:53 +02:00
hname , err := os . Hostname ( )
2020-03-13 11:19:31 +01:00
if err != nil {
2020-04-01 17:17:53 +02:00
return nil , err
2020-03-13 11:19:31 +01:00
}
2020-04-01 17:17:53 +02:00
port := ""
if ipport := strings . Split ( httpListenAddr , ":" ) ; len ( ipport ) > 1 {
port = ":" + ipport [ 1 ]
}
schema := "http://"
if isSecure {
schema = "https://"
2020-03-13 11:19:31 +01:00
}
2020-04-01 17:17:53 +02:00
return url . Parse ( fmt . Sprintf ( "%s%s%s" , schema , hname , port ) )
2020-02-16 19:59:02 +01:00
}
2020-06-21 12:32:46 +02:00
func getAlertURLGenerator ( externalURL * url . URL , externalAlertSource string , validateTemplate bool ) ( notifier . AlertURLGenerator , error ) {
if externalAlertSource == "" {
2022-07-08 10:26:13 +02:00
return func ( a notifier . Alert ) string {
gID , aID := strconv . FormatUint ( a . GroupID , 10 ) , strconv . FormatUint ( a . ID , 10 )
2022-08-17 14:46:28 +02:00
return fmt . Sprintf ( "%s/vmalert/alert?%s=%s&%s=%s" , externalURL , paramGroupID , gID , paramAlertID , aID )
2020-06-21 12:32:46 +02:00
} , nil
}
if validateTemplate {
if err := notifier . ValidateTemplates ( map [ string ] string {
"tpl" : externalAlertSource ,
} ) ; err != nil {
2020-06-30 21:58:18 +02:00
return nil , fmt . Errorf ( "error validating source template %s: %w" , externalAlertSource , err )
2020-06-21 12:32:46 +02:00
}
}
m := map [ string ] string {
"tpl" : externalAlertSource ,
}
return func ( alert notifier . Alert ) string {
2023-04-26 15:31:14 +02:00
qFn := func ( query string ) ( [ ] datasource . Metric , error ) {
return nil , fmt . Errorf ( "`query` template isn't supported for alert source template" )
}
templated , err := alert . ExecTemplate ( qFn , alert . Labels , m )
2020-06-21 12:32:46 +02:00
if err != nil {
logger . Errorf ( "can not exec source template %s" , err )
}
2022-10-27 21:30:27 +02:00
return fmt . Sprintf ( "%s/%s" , externalURL , templated [ "tpl" ] )
2020-06-21 12:32:46 +02:00
} , nil
}
2020-06-05 09:42:56 +02:00
func usage ( ) {
const s = `
vmalert processes alerts and recording rules .
2021-04-20 19:16:17 +02:00
See the docs at https : //docs.victoriametrics.com/vmalert.html .
2020-06-05 09:42:56 +02:00
`
2020-12-03 20:40:30 +01:00
flagutil . Usage ( s )
2020-06-05 09:42:56 +02:00
}
2021-05-25 15:27:22 +02:00
2021-10-19 15:35:27 +02:00
func configReload ( ctx context . Context , m * manager , groupsCfg [ ] config . Group , sighupCh <- chan os . Signal ) {
2021-05-25 15:27:22 +02:00
var configCheckCh <- chan time . Time
2022-02-02 13:11:41 +01:00
checkInterval := * configCheckInterval
if checkInterval > 0 {
ticker := time . NewTicker ( checkInterval )
2021-05-25 15:27:22 +02:00
configCheckCh = ticker . C
defer ticker . Stop ( )
}
2022-07-22 13:50:41 +02:00
var validateTplFn config . ValidateTplFn
if * validateTemplates {
validateTplFn = notifier . ValidateTemplates
}
2021-05-25 15:27:22 +02:00
// init reload metrics with positive values to improve alerting conditions
2023-07-03 14:59:52 +02:00
setConfigSuccess ( fasttime . UnixTimestamp ( ) )
2023-03-20 16:08:30 +01:00
parseFn := config . Parse
2021-05-25 15:27:22 +02:00
for {
select {
case <- ctx . Done ( ) :
return
case <- sighupCh :
2022-05-14 11:38:44 +02:00
tmplMsg := ""
if len ( * ruleTemplatesPath ) > 0 {
tmplMsg = fmt . Sprintf ( "and templates %q " , * ruleTemplatesPath )
}
logger . Infof ( "SIGHUP received. Going to reload rules %q %s..." , * rulePath , tmplMsg )
2021-05-25 15:27:22 +02:00
configReloads . Inc ( )
2023-03-20 16:08:30 +01:00
// allow logs emitting during manual config reload
parseFn = config . Parse
2021-05-25 15:27:22 +02:00
case <- configCheckCh :
2023-03-20 16:08:30 +01:00
// disable logs emitting during per-interval config reload
parseFn = config . ParseSilent
2021-05-25 15:27:22 +02:00
}
2022-02-02 13:11:41 +01:00
if err := notifier . Reload ( ) ; err != nil {
2023-07-03 14:59:52 +02:00
setConfigError ( err )
2022-02-02 13:11:41 +01:00
logger . Errorf ( "failed to reload notifier config: %s" , err )
continue
}
2022-05-14 11:38:44 +02:00
err := templates . Load ( * ruleTemplatesPath , false )
if err != nil {
2023-07-03 14:59:52 +02:00
setConfigError ( err )
2022-05-14 11:38:44 +02:00
logger . Errorf ( "failed to load new templates: %s" , err )
continue
}
2023-03-20 16:08:30 +01:00
newGroupsCfg , err := parseFn ( * rulePath , validateTplFn , * validateExpressions )
2021-05-25 15:27:22 +02:00
if err != nil {
2023-07-03 14:59:52 +02:00
setConfigError ( err )
2021-05-25 15:27:22 +02:00
logger . Errorf ( "cannot parse configuration file: %s" , err )
continue
}
if configsEqual ( newGroupsCfg , groupsCfg ) {
2022-05-14 11:38:44 +02:00
templates . Reload ( )
2021-08-31 11:28:02 +02:00
// set success to 1 since previous reload
// could have been unsuccessful
configSuccess . Set ( 1 )
2023-07-03 14:59:52 +02:00
setConfigError ( nil )
2021-05-25 15:27:22 +02:00
// config didn't change - skip it
continue
}
2021-11-30 00:23:49 +01:00
if err := m . update ( ctx , newGroupsCfg , false ) ; err != nil {
2023-07-03 14:59:52 +02:00
setConfigError ( err )
2021-05-25 15:27:22 +02:00
logger . Errorf ( "error while reloading rules: %s" , err )
continue
}
2022-05-14 11:38:44 +02:00
templates . Reload ( )
2021-11-30 00:23:49 +01:00
groupsCfg = newGroupsCfg
2023-07-03 14:59:52 +02:00
setConfigSuccess ( fasttime . UnixTimestamp ( ) )
2021-05-25 15:27:22 +02:00
logger . Infof ( "Rules reloaded successfully from %q" , * rulePath )
}
}
func configsEqual ( a , b [ ] config . Group ) bool {
if len ( a ) != len ( b ) {
return false
}
for i := range a {
if a [ i ] . Checksum != b [ i ] . Checksum {
return false
}
}
return true
}
2023-07-03 14:59:52 +02:00
// setConfigSuccess sets config reload status to 1.
func setConfigSuccess ( at uint64 ) {
configSuccess . Set ( 1 )
2023-07-28 20:15:58 +02:00
configTimestamp . Set ( at )
2023-07-03 14:59:52 +02:00
// reset the error if any
setConfigErr ( nil )
}
// setConfigError sets config reload status to 0.
func setConfigError ( err error ) {
configReloadErrors . Inc ( )
configSuccess . Set ( 0 )
setConfigErr ( err )
}
var (
configErrMu sync . RWMutex
// configErr represent the error message from the last
// config reload.
configErr error
)
func setConfigErr ( err error ) {
configErrMu . Lock ( )
configErr = err
configErrMu . Unlock ( )
}
func configError ( ) error {
configErrMu . RLock ( )
defer configErrMu . RUnlock ( )
if configErr != nil {
return configErr
}
return nil
}