2021-06-09 11:20:38 +02:00
package main
import (
"flag"
"fmt"
"strings"
"time"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/config"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/datasource"
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/remotewrite"
2023-10-13 13:54:33 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/app/vmalert/rule"
2021-06-09 11:20:38 +02:00
"github.com/VictoriaMetrics/VictoriaMetrics/lib/logger"
)
var (
replayFrom = flag . String ( "replay.timeFrom" , "" ,
2024-07-05 09:27:34 +02:00
"The time filter in RFC3339 format to start the replay from. E.g. '2020-01-01T20:07:00Z'" )
2021-06-09 11:20:38 +02:00
replayTo = flag . String ( "replay.timeTo" , "" ,
2024-07-05 09:27:34 +02:00
"The time filter in RFC3339 format to finish the replay by. E.g. '2020-01-01T20:07:00Z'. " +
"By default, is set to the current time." )
2021-06-09 11:20:38 +02:00
replayRulesDelay = flag . Duration ( "replay.rulesDelay" , time . Second ,
2023-05-10 09:50:41 +02:00
"Delay between rules evaluation within the group. Could be important if there are chained rules inside the group " +
2021-06-09 11:20:38 +02:00
"and processing need to wait for previous rule results to be persisted by remote storage before evaluating the next rule." +
"Keep it equal or bigger than -remoteWrite.flushInterval." )
replayMaxDatapoints = flag . Int ( "replay.maxDatapointsPerQuery" , 1e3 ,
2023-02-06 09:51:30 +01:00
"Max number of data points expected in one request. It affects the max time range for every `/query_range` request during the replay. The higher the value, the less requests will be made during replay." )
2021-06-09 11:20:38 +02:00
replayRuleRetryAttempts = flag . Int ( "replay.ruleRetryAttempts" , 5 ,
"Defines how many retries to make before giving up on rule if request for it returns an error." )
2022-05-02 10:08:24 +02:00
disableProgressBar = flag . Bool ( "replay.disableProgressBar" , false , "Whether to disable rendering progress bars during the replay. " +
"Progress bar rendering might be verbose or break the logs parsing, so it is recommended to be disabled when not used in interactive mode." )
2021-06-09 11:20:38 +02:00
)
2023-10-13 13:54:33 +02:00
func replay ( groupsCfg [ ] config . Group , qb datasource . QuerierBuilder , rw remotewrite . RWClient ) error {
2021-06-09 11:20:38 +02:00
if * replayMaxDatapoints < 1 {
return fmt . Errorf ( "replay.maxDatapointsPerQuery can't be lower than 1" )
}
tFrom , err := time . Parse ( time . RFC3339 , * replayFrom )
if err != nil {
2024-07-05 09:27:34 +02:00
return fmt . Errorf ( "failed to parse replay.timeFrom=%q: %w" , * replayFrom , err )
2021-06-09 11:20:38 +02:00
}
2024-07-05 09:27:34 +02:00
// use tFrom location for default value, otherwise filters could have different locations
tTo := time . Now ( ) . In ( tFrom . Location ( ) )
if * replayTo != "" {
tTo , err = time . Parse ( time . RFC3339 , * replayTo )
if err != nil {
return fmt . Errorf ( "failed to parse replay.timeTo=%q: %w" , * replayTo , err )
}
2021-06-09 11:20:38 +02:00
}
2024-07-05 09:27:34 +02:00
2021-06-09 11:20:38 +02:00
if ! tTo . After ( tFrom ) {
2024-07-05 09:27:34 +02:00
return fmt . Errorf ( "replay.timeTo=%v must be bigger than replay.timeFrom=%v" , tTo , tFrom )
2021-06-09 11:20:38 +02:00
}
labels := make ( map [ string ] string )
for _ , s := range * externalLabels {
if len ( s ) == 0 {
continue
}
n := strings . IndexByte ( s , '=' )
if n < 0 {
return fmt . Errorf ( "missing '=' in `-label`. It must contain label in the form `name=value`; got %q" , s )
}
labels [ s [ : n ] ] = s [ n + 1 : ]
}
fmt . Printf ( "Replay mode:" +
"\nfrom: \t%v " +
"\nto: \t%v " +
"\nmax data points per request: %d\n" ,
tFrom , tTo , * replayMaxDatapoints )
var total int
for _ , cfg := range groupsCfg {
2023-10-13 13:54:33 +02:00
ng := rule . NewGroup ( cfg , qb , * evaluationInterval , labels )
total += ng . Replay ( tFrom , tTo , rw , * replayMaxDatapoints , * replayRuleRetryAttempts , * replayRulesDelay , * disableProgressBar )
2021-06-09 11:20:38 +02:00
}
2024-06-20 13:20:40 +02:00
logger . Infof ( "replay evaluation finished, generated %d samples" , total )
2024-06-20 15:15:34 +02:00
if err := rw . Close ( ) ; err != nil {
return err
2024-06-20 15:12:53 +02:00
}
droppedRows := remotewrite . GetDroppedRows ( )
if droppedRows > 0 {
2024-06-20 13:20:40 +02:00
return fmt . Errorf ( "failed to push all generated samples to remote write url, dropped %d samples out of %d" , droppedRows , total )
2021-06-09 11:20:38 +02:00
}
2024-06-20 15:12:53 +02:00
return nil
2021-06-09 11:20:38 +02:00
}