mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-15 00:13:30 +01:00
app/vmselect/promql: properly handle partial counter resets in rate(), irate(), increase() and remove_resets() functions
Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787
This commit is contained in:
parent
1e6b0a1f54
commit
fa08220d27
@ -6568,7 +6568,7 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
q := `rate((2000-time())[100s:100s])`
|
q := `rate((2000-time())[100s:100s])`
|
||||||
r := netstorage.Result{
|
r := netstorage.Result{
|
||||||
MetricName: metricNameExpected,
|
MetricName: metricNameExpected,
|
||||||
Values: []float64{5.5, 4.5, 6.5, 4.5, 2.5, 0.5},
|
Values: []float64{0, 0, 6.5, 4.5, 2.5, 0.5},
|
||||||
Timestamps: timestampsExpected,
|
Timestamps: timestampsExpected,
|
||||||
}
|
}
|
||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
@ -6579,7 +6579,7 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
q := `rate((2000-time())[100s:100s] offset 100s)`
|
q := `rate((2000-time())[100s:100s] offset 100s)`
|
||||||
r := netstorage.Result{
|
r := netstorage.Result{
|
||||||
MetricName: metricNameExpected,
|
MetricName: metricNameExpected,
|
||||||
Values: []float64{6, 5, 7.5, 5.5, 3.5, 1.5},
|
Values: []float64{0, 0, 3.5, 5.5, 3.5, 1.5},
|
||||||
Timestamps: timestampsExpected,
|
Timestamps: timestampsExpected,
|
||||||
}
|
}
|
||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
@ -6590,7 +6590,7 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
q := `rate((2000-time())[100s:100s] offset 100s)[:] offset 100s`
|
q := `rate((2000-time())[100s:100s] offset 100s)[:] offset 100s`
|
||||||
r := netstorage.Result{
|
r := netstorage.Result{
|
||||||
MetricName: metricNameExpected,
|
MetricName: metricNameExpected,
|
||||||
Values: []float64{7, 6, 5, 7.5, 5.5, 3.5},
|
Values: []float64{0, 0, 0, 3.5, 5.5, 3.5},
|
||||||
Timestamps: timestampsExpected,
|
Timestamps: timestampsExpected,
|
||||||
}
|
}
|
||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
@ -6746,7 +6746,7 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
})
|
})
|
||||||
t.Run(`remove_resets()`, func(t *testing.T) {
|
t.Run(`remove_resets()`, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
q := `remove_resets( abs(1500-time()) )`
|
q := `remove_resets(abs(1500-time()))`
|
||||||
r := netstorage.Result{
|
r := netstorage.Result{
|
||||||
MetricName: metricNameExpected,
|
MetricName: metricNameExpected,
|
||||||
Values: []float64{500, 800, 900, 900, 1100, 1300},
|
Values: []float64{500, 800, 900, 900, 1100, 1300},
|
||||||
@ -6755,6 +6755,20 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
resultExpected := []netstorage.Result{r}
|
resultExpected := []netstorage.Result{r}
|
||||||
f(q, resultExpected)
|
f(q, resultExpected)
|
||||||
})
|
})
|
||||||
|
t.Run(`remove_resets(sum)`, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
q := `remove_resets(sum(
|
||||||
|
alias(time(), "full"),
|
||||||
|
alias(time()/5 < 300, "partial"),
|
||||||
|
))`
|
||||||
|
r := netstorage.Result{
|
||||||
|
MetricName: metricNameExpected,
|
||||||
|
Values: []float64{1200, 1440, 1680, 1680, 1880, 2080},
|
||||||
|
Timestamps: timestampsExpected,
|
||||||
|
}
|
||||||
|
resultExpected := []netstorage.Result{r}
|
||||||
|
f(q, resultExpected)
|
||||||
|
})
|
||||||
t.Run(`range_avg(time())`, func(t *testing.T) {
|
t.Run(`range_avg(time())`, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
q := `range_avg(time())`
|
q := `range_avg(time())`
|
||||||
@ -6945,10 +6959,10 @@ func TestExecSuccess(t *testing.T) {
|
|||||||
})
|
})
|
||||||
t.Run(`aggr_over_time(single-func)`, func(t *testing.T) {
|
t.Run(`aggr_over_time(single-func)`, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
q := `aggr_over_time("increase", rand(0)[:10s])`
|
q := `round(aggr_over_time("increase", rand(0)[:10s]),0.01)`
|
||||||
r1 := netstorage.Result{
|
r1 := netstorage.Result{
|
||||||
MetricName: metricNameExpected,
|
MetricName: metricNameExpected,
|
||||||
Values: []float64{5.465672601448873, 6.642207999066246, 6.8400051805114295, 7.182425481980655, 5.1677922402706, 6.594060518641982},
|
Values: []float64{5.47, 6.64, 6.84, 7.24, 5.17, 6.59},
|
||||||
Timestamps: timestampsExpected,
|
Timestamps: timestampsExpected,
|
||||||
}
|
}
|
||||||
r1.MetricName.Tags = []storage.Tag{{
|
r1.MetricName.Tags = []storage.Tag{{
|
||||||
|
@ -704,9 +704,9 @@ func removeCounterResets(values []float64) {
|
|||||||
d := v - prevValue
|
d := v - prevValue
|
||||||
if d < 0 {
|
if d < 0 {
|
||||||
if (-d * 8) < prevValue {
|
if (-d * 8) < prevValue {
|
||||||
// This is likely jitter from `Prometheus HA pairs`.
|
// This is likely a partial counter reset.
|
||||||
// Just substitute v with prevValue.
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787
|
||||||
v = prevValue
|
correction += prevValue - v
|
||||||
} else {
|
} else {
|
||||||
correction += prevValue
|
correction += prevValue
|
||||||
}
|
}
|
||||||
|
@ -100,10 +100,11 @@ func TestRemoveCounterResets(t *testing.T) {
|
|||||||
timestampsExpected := []int64{0, 1, 2, 3}
|
timestampsExpected := []int64{0, 1, 2, 3}
|
||||||
testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected)
|
testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected)
|
||||||
|
|
||||||
// verify how jitter from `Prometheus HA pairs` is handled
|
// verify how partial counter reset is handled.
|
||||||
values = []float64{100, 95, 120, 140, 137, 50}
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787
|
||||||
|
values = []float64{100, 95, 120, 119, 139, 50}
|
||||||
removeCounterResets(values)
|
removeCounterResets(values)
|
||||||
valuesExpected = []float64{100, 100, 120, 140, 140, 190}
|
valuesExpected = []float64{100, 100, 125, 125, 145, 195}
|
||||||
timestampsExpected = []int64{0, 1, 2, 3, 4, 5}
|
timestampsExpected = []int64{0, 1, 2, 3, 4, 5}
|
||||||
testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected)
|
testRowsEqual(t, values, timestampsExpected, valuesExpected, timestampsExpected)
|
||||||
}
|
}
|
||||||
|
@ -2329,9 +2329,9 @@ func removeCounterResetsMaybeNaNs(values []float64) {
|
|||||||
d := v - prevValue
|
d := v - prevValue
|
||||||
if d < 0 {
|
if d < 0 {
|
||||||
if (-d * 8) < prevValue {
|
if (-d * 8) < prevValue {
|
||||||
// This is likely jitter from `Prometheus HA pairs`.
|
// This is likely a partial counter reset.
|
||||||
// Just substitute v with prevValue.
|
// See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787
|
||||||
v = prevValue
|
correction += prevValue - v
|
||||||
} else {
|
} else {
|
||||||
correction += prevValue
|
correction += prevValue
|
||||||
}
|
}
|
||||||
|
@ -35,6 +35,7 @@ scrape_configs:
|
|||||||
|
|
||||||
* FEATURE: [query tracing](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#query-tracing): show timestamps in query traces in human-readable format (aka `RFC3339` in UTC timezone) instead of milliseconds since Unix epoch. For example, `2022-06-27T10:32:54.506Z` instead of `1656325974506`. This improves traces' readability.
|
* FEATURE: [query tracing](https://docs.victoriametrics.com/Single-server-VictoriaMetrics.html#query-tracing): show timestamps in query traces in human-readable format (aka `RFC3339` in UTC timezone) instead of milliseconds since Unix epoch. For example, `2022-06-27T10:32:54.506Z` instead of `1656325974506`. This improves traces' readability.
|
||||||
* FEATURE: improve performance of [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers) requests, which return big number of time series.
|
* FEATURE: improve performance of [/api/v1/series](https://prometheus.io/docs/prometheus/latest/querying/api/#finding-series-by-label-matchers) requests, which return big number of time series.
|
||||||
|
* FEATURE: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets in [remove_resets](https://docs.victoriametrics.com/MetricsQL.html#remove_resets) function. Now `remove_resets(sum(m))` should returns the expected increasing line when some time series matching `m` disappear on the selected time range. Previously such a query would return horizontal line after the disappeared series.
|
||||||
* FEATURE: expose additional histogram metrics at `http://victoriametrics:8428/metrics`, which may help understanding query workload:
|
* FEATURE: expose additional histogram metrics at `http://victoriametrics:8428/metrics`, which may help understanding query workload:
|
||||||
|
|
||||||
* `vm_rows_read_per_query` - the number of raw samples read per query.
|
* `vm_rows_read_per_query` - the number of raw samples read per query.
|
||||||
@ -51,6 +52,7 @@ scrape_configs:
|
|||||||
{% endraw %}
|
{% endraw %}
|
||||||
|
|
||||||
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
|
* BUGFIX: limit max memory occupied by the cache, which stores parsed regular expressions. Previously too long regular expressions passed in [MetricsQL queries](https://docs.victoriametrics.com/MetricsQL.html) could result in big amounts of used memory (e.g. multiple of gigabytes). Now the max cache size for parsed regexps is limited to a a few megabytes.
|
||||||
|
* BUGFIX: [MetricsQL](https://docs.victoriametrics.com/MetricsQL.html): properly handle partial counter resets when calculating [rate](https://docs.victoriametrics.com/MetricsQL.html#rate), [irate](https://docs.victoriametrics.com/MetricsQL.html#irate) and [increase](https://docs.victoriametrics.com/MetricsQL.html#increase) functions. Previously these functions could return zero values after partial counter resets until the counter increases to the last value before partial counter reset. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2787).
|
||||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
|
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): make sure that [stale markers](https://docs.victoriametrics.com/vmagent.html#prometheus-staleness-markers) are generated with the actual timestamp when unsuccessful scrape occurs. This should prevent from possible time series overlap on scrape target restart in dynmaic envirnoments such as Kubernetes.
|
||||||
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix.
|
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): properly reload changed `-promscrape.config` file when `-promscrape.configCheckInterval` option is set. The changed config file wasn't reloaded in this case since [v1.69.0](#v1690). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/2786). Thanks to @ttyv for the fix.
|
||||||
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).
|
* BUGFIX: [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html): assume that the response is complete if `-search.denyPartialResponse` is enabled and up to `-replicationFactor - 1` `vmstorage` nodes are unavailable. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1767).
|
||||||
|
Loading…
Reference in New Issue
Block a user