(app|lib)/vmstorage: do not increment vm_rows_ignored_total on NaNs (#7166)

`vm_rows_ignored_total` metric is a metric for users to signalize about
ingestion issues, such as bad timestamp or parsing error.
In commit
a5424e95b3
this metric started to increment each time vmstorage gets NaN. But NaN
is a valid value for Prometheus data model and for Prometheus metrics
exposition format. Exporters from Prometheus ecosystem could expose NaNs
as values for metrics and these values will be delivered to vmstorage
and increment the metric.
Since there is nothing user can do with this, in opposite to parsing
errors or bad timestamps, there is not much sense in incrementing this
metric. So this commit rolls-back `reason="nan_value"` increments.

### Describe Your Changes

Please provide a brief description of the changes you made. Be as
specific as possible to help others understand the purpose and impact of
your modifications.

### Checklist

The following checks are **mandatory**:

- [ ] My change adheres [VictoriaMetrics contributing
guidelines](https://docs.victoriametrics.com/contributing/).

Signed-off-by: hagen1778 <roman@victoriametrics.com>

(cherry picked from commit 0d4f4b8f7d)
Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2024-10-02 12:37:27 +02:00 committed by hagen1778
parent 608f364ff7
commit dfb2ad4ab4
No known key found for this signature in database
GPG Key ID: 3BF75F3741CA9640
3 changed files with 0 additions and 25 deletions

View File

@ -451,7 +451,6 @@ func writeStorageMetrics(w io.Writer, strg *storage.Storage) {
metrics.WriteCounterUint64(w, `vm_deduplicated_samples_total{type="merge"}`, m.DedupsDuringMerge) metrics.WriteCounterUint64(w, `vm_deduplicated_samples_total{type="merge"}`, m.DedupsDuringMerge)
metrics.WriteGaugeUint64(w, `vm_snapshots`, m.SnapshotsCount) metrics.WriteGaugeUint64(w, `vm_snapshots`, m.SnapshotsCount)
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="nan_value"}`, m.NaNValueRows)
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="big_timestamp"}`, m.TooBigTimestampRows) metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="big_timestamp"}`, m.TooBigTimestampRows)
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="small_timestamp"}`, m.TooSmallTimestampRows) metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="small_timestamp"}`, m.TooSmallTimestampRows)
metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="invalid_raw_metric_name"}`, m.InvalidRawMetricNames) metrics.WriteCounterUint64(w, `vm_rows_ignored_total{reason="invalid_raw_metric_name"}`, m.InvalidRawMetricNames)

View File

@ -42,7 +42,6 @@ const (
type Storage struct { type Storage struct {
rowsReceivedTotal atomic.Uint64 rowsReceivedTotal atomic.Uint64
rowsAddedTotal atomic.Uint64 rowsAddedTotal atomic.Uint64
naNValueRows atomic.Uint64
tooSmallTimestampRows atomic.Uint64 tooSmallTimestampRows atomic.Uint64
tooBigTimestampRows atomic.Uint64 tooBigTimestampRows atomic.Uint64
@ -505,7 +504,6 @@ type Metrics struct {
DedupsDuringMerge uint64 DedupsDuringMerge uint64
SnapshotsCount uint64 SnapshotsCount uint64
NaNValueRows uint64
TooSmallTimestampRows uint64 TooSmallTimestampRows uint64
TooBigTimestampRows uint64 TooBigTimestampRows uint64
InvalidRawMetricNames uint64 InvalidRawMetricNames uint64
@ -581,7 +579,6 @@ func (s *Storage) UpdateMetrics(m *Metrics) {
m.DedupsDuringMerge = dedupsDuringMerge.Load() m.DedupsDuringMerge = dedupsDuringMerge.Load()
m.SnapshotsCount += uint64(s.mustGetSnapshotsCount()) m.SnapshotsCount += uint64(s.mustGetSnapshotsCount())
m.NaNValueRows += s.naNValueRows.Load()
m.TooSmallTimestampRows += s.tooSmallTimestampRows.Load() m.TooSmallTimestampRows += s.tooSmallTimestampRows.Load()
m.TooBigTimestampRows += s.tooBigTimestampRows.Load() m.TooBigTimestampRows += s.tooBigTimestampRows.Load()
m.InvalidRawMetricNames += s.invalidRawMetricNames.Load() m.InvalidRawMetricNames += s.invalidRawMetricNames.Load()
@ -1946,7 +1943,6 @@ func (s *Storage) add(rows []rawRow, dstMrs []*MetricRow, mrs []MetricRow, preci
if !decimal.IsStaleNaN(mr.Value) { if !decimal.IsStaleNaN(mr.Value) {
// Skip NaNs other than Prometheus staleness marker, since the underlying encoding // Skip NaNs other than Prometheus staleness marker, since the underlying encoding
// doesn't know how to work with them. // doesn't know how to work with them.
s.naNValueRows.Add(1)
continue continue
} }
} }

View File

@ -2,7 +2,6 @@ package storage
import ( import (
"fmt" "fmt"
"math"
"math/rand" "math/rand"
"os" "os"
"path/filepath" "path/filepath"
@ -1591,9 +1590,6 @@ func TestStorageRowsNotAdded(t *testing.T) {
if got, want := gotMetrics.RowsAddedTotal, opts.wantMetrics.RowsAddedTotal; got != want { if got, want := gotMetrics.RowsAddedTotal, opts.wantMetrics.RowsAddedTotal; got != want {
t.Fatalf("unexpected Metrics.RowsAddedTotal: got %d, want %d", got, want) t.Fatalf("unexpected Metrics.RowsAddedTotal: got %d, want %d", got, want)
} }
if got, want := gotMetrics.NaNValueRows, opts.wantMetrics.NaNValueRows; got != want {
t.Fatalf("unexpected Metrics.NaNValueRows: got %d, want %d", got, want)
}
if got, want := gotMetrics.InvalidRawMetricNames, opts.wantMetrics.InvalidRawMetricNames; got != want { if got, want := gotMetrics.InvalidRawMetricNames, opts.wantMetrics.InvalidRawMetricNames; got != want {
t.Fatalf("unexpected Metrics.InvalidRawMetricNames: got %d, want %d", got, want) t.Fatalf("unexpected Metrics.InvalidRawMetricNames: got %d, want %d", got, want)
} }
@ -1649,22 +1645,6 @@ func TestStorageRowsNotAdded(t *testing.T) {
}, },
}) })
minTimestamp = time.Now().UnixMilli()
maxTimestamp = minTimestamp + 1000
mrs = testGenerateMetricRowsForTenant(accountID, projectID, rng, numRows, minTimestamp, maxTimestamp)
for i := range numRows {
mrs[i].Value = math.NaN()
}
f(&options{
name: "NaN",
mrs: mrs,
tr: TimeRange{minTimestamp, maxTimestamp},
wantMetrics: &Metrics{
RowsReceivedTotal: numRows,
NaNValueRows: numRows,
},
})
minTimestamp = time.Now().UnixMilli() minTimestamp = time.Now().UnixMilli()
maxTimestamp = minTimestamp + 1000 maxTimestamp = minTimestamp + 1000
mrs = testGenerateMetricRowsForTenant(accountID, projectID, rng, numRows, minTimestamp, maxTimestamp) mrs = testGenerateMetricRowsForTenant(accountID, projectID, rng, numRows, minTimestamp, maxTimestamp)