vmalert: add new metric vmalert_iteration_interval_seconds (#2623)

The new metric shows the configured evaluation interval per group.
Metric updates its value when group's interval is changed during
hot reload.
The new metric can be used to estimate how close group
is to start missing evaluation rounds. The following query
will show the % of used time by the group to evaluate all rules
before the next round:
```
(max(vmalert_iteration_duration_seconds{quantile="0.99"}) / vmalert_iteration_interval_seconds) * 100
```

https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2618
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-05-20 17:31:16 +02:00 committed by GitHub
parent ac55ca052c
commit 2cf586da78
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -49,14 +49,21 @@ type groupMetrics struct {
iterationTotal *utils.Counter iterationTotal *utils.Counter
iterationDuration *utils.Summary iterationDuration *utils.Summary
iterationMissed *utils.Counter iterationMissed *utils.Counter
iterationInterval *utils.Gauge
} }
func newGroupMetrics(name, file string) *groupMetrics { func newGroupMetrics(g *Group) *groupMetrics {
m := &groupMetrics{} m := &groupMetrics{}
labels := fmt.Sprintf(`group=%q, file=%q`, name, file) labels := fmt.Sprintf(`group=%q, file=%q`, g.Name, g.File)
m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels)) m.iterationTotal = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_total{%s}`, labels))
m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels)) m.iterationDuration = utils.GetOrCreateSummary(fmt.Sprintf(`vmalert_iteration_duration_seconds{%s}`, labels))
m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels)) m.iterationMissed = utils.GetOrCreateCounter(fmt.Sprintf(`vmalert_iteration_missed_total{%s}`, labels))
m.iterationInterval = utils.GetOrCreateGauge(fmt.Sprintf(`vmalert_iteration_interval_seconds{%s}`, labels), func() float64 {
g.mu.RLock()
i := g.Interval.Seconds()
g.mu.RUnlock()
return i
})
return m return m
} }
@ -92,13 +99,13 @@ func newGroup(cfg config.Group, qb datasource.QuerierBuilder, defaultInterval ti
finishedCh: make(chan struct{}), finishedCh: make(chan struct{}),
updateCh: make(chan *Group), updateCh: make(chan *Group),
} }
g.metrics = newGroupMetrics(g.Name, g.File)
if g.Interval == 0 { if g.Interval == 0 {
g.Interval = defaultInterval g.Interval = defaultInterval
} }
if g.Concurrency < 1 { if g.Concurrency < 1 {
g.Concurrency = 1 g.Concurrency = 1
} }
g.metrics = newGroupMetrics(g)
rules := make([]Rule, len(cfg.Rules)) rules := make([]Rule, len(cfg.Rules))
for i, r := range cfg.Rules { for i, r := range cfg.Rules {
var extraLabels map[string]string var extraLabels map[string]string
@ -222,6 +229,8 @@ func (g *Group) close() {
g.metrics.iterationDuration.Unregister() g.metrics.iterationDuration.Unregister()
g.metrics.iterationTotal.Unregister() g.metrics.iterationTotal.Unregister()
g.metrics.iterationMissed.Unregister()
g.metrics.iterationInterval.Unregister()
for _, rule := range g.Rules { for _, rule := range g.Rules {
rule.Close() rule.Close()
} }