From 03002f1fe19f77f39f09b50c2f954a2f8b40e41f Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 25 Nov 2020 14:41:02 +0200 Subject: [PATCH] lib/storage: log metric name plus all its labels when the metric timestamp is outside the configured retention This should simplify debugging when the source of the metric with unexpected timestamp must be found. --- docs/CHANGELOG.md | 1 + lib/storage/metric_name.go | 16 ++++++++-------- lib/storage/metric_name_test.go | 26 ++++++++++++++++++++++++++ lib/storage/storage.go | 19 ++++++++++++++----- 4 files changed, 49 insertions(+), 13 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 550672a275..abbb0c8b51 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/851 * FEATURE: add remoteAddr to slow query log in order to simplify identifying the client that sends slow queries to VictoriaMetrics. Slow query logging is controlled with `-search.logSlowQueryDuration` command-line flag. * FEATURE: add `/tags/delSeries` handler from Graphite Tags API. See https://victoriametrics.github.io/#graphite-tags-api-usage +* FEATURE: log metric name plus all its labels when the metric timestamp is out of the configured retention. This should simplify detecting the source of metrics with unexpected timestamps. * BUGFIX: properly parse Prometheus metrics with [exemplars](https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md#exemplars-1) such as `foo 123 # {bar="baz"} 1`. * BUGFIX: properly parse "infinity" values in [OpenMetrics format](https://github.com/OpenObservability/OpenMetrics/blob/master/OpenMetrics.md#abnf). diff --git a/lib/storage/metric_name.go b/lib/storage/metric_name.go index 9ebf70a868..f03735eb33 100644 --- a/lib/storage/metric_name.go +++ b/lib/storage/metric_name.go @@ -350,17 +350,17 @@ func hasTag(tags []string, key []byte) bool { } // String returns user-readable representation of the metric name. -// -// Use this function only for debug logging. func (mn *MetricName) String() string { - mn.sortTags() + var mnCopy MetricName + mnCopy.CopyFrom(mn) + mnCopy.sortTags() var tags []string - for i := range mn.Tags { - t := &mn.Tags[i] - tags = append(tags, fmt.Sprintf("%q=%q", t.Key, t.Value)) + for i := range mnCopy.Tags { + t := &mnCopy.Tags[i] + tags = append(tags, fmt.Sprintf("%s=%q", t.Key, t.Value)) } - tagsStr := strings.Join(tags, ", ") - return fmt.Sprintf("AccountID=%d, ProjectID=%d, MetricGroup=%q, tags=[%s]", mn.AccountID, mn.ProjectID, mn.MetricGroup, tagsStr) + tagsStr := strings.Join(tags, ",") + return fmt.Sprintf("AccountID=%d, ProjectID=%d, %s{%s}", mnCopy.AccountID, mnCopy.ProjectID, mnCopy.MetricGroup, tagsStr) } // Marshal appends marshaled mn to dst and returns the result. diff --git a/lib/storage/metric_name_test.go b/lib/storage/metric_name_test.go index 2765217c7b..4935f78fa7 100644 --- a/lib/storage/metric_name_test.go +++ b/lib/storage/metric_name_test.go @@ -6,6 +6,32 @@ import ( "testing" ) +func TestMetricNameString(t *testing.T) { + f := func(mn *MetricName, resultExpected string) { + t.Helper() + result := mn.String() + if result != resultExpected { + t.Fatalf("unexpected result\ngot\n%s\nwant\n%s", result, resultExpected) + } + } + f(&MetricName{ + MetricGroup: []byte("foobar"), + }, "foobar{}") + f(&MetricName{ + MetricGroup: []byte("abc"), + Tags: []Tag{ + { + Key: []byte("foo"), + Value: []byte("bar"), + }, + { + Key: []byte("baz"), + Value: []byte("123"), + }, + }, + }, `abc{baz="123",foo="bar"}`) +} + func TestMetricNameSortTags(t *testing.T) { testMetricNameSortTags(t, []string{}, []string{}) testMetricNameSortTags(t, []string{"foo"}, []string{"foo"}) diff --git a/lib/storage/storage.go b/lib/storage/storage.go index b4144e2724..c4ea66624c 100644 --- a/lib/storage/storage.go +++ b/lib/storage/storage.go @@ -1324,9 +1324,10 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra if mr.Timestamp < minTimestamp { // Skip rows with too small timestamps outside the retention. if firstWarn == nil { + metricName := getUserReadableMetricName(mr.MetricNameRaw) firstWarn = fmt.Errorf("cannot insert row with too small timestamp %d outside the retention; minimum allowed timestamp is %d; "+ - "probably you need updating -retentionPeriod command-line flag", - mr.Timestamp, minTimestamp) + "probably you need updating -retentionPeriod command-line flag; metricName: %s", + mr.Timestamp, minTimestamp, metricName) } atomic.AddUint64(&s.tooSmallTimestampRows, 1) continue @@ -1334,9 +1335,9 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra if mr.Timestamp > maxTimestamp { // Skip rows with too big timestamps significantly exceeding the current time. if firstWarn == nil { - firstWarn = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowed timestamp is %d; "+ - "propbably you need updating -retentionPeriod command-line flag", - mr.Timestamp, maxTimestamp) + metricName := getUserReadableMetricName(mr.MetricNameRaw) + firstWarn = fmt.Errorf("cannot insert row with too big timestamp %d exceeding the current time; maximum allowed timestamp is %d; metricName: %s", + mr.Timestamp, maxTimestamp, metricName) } atomic.AddUint64(&s.tooBigTimestampRows, 1) continue @@ -1445,6 +1446,14 @@ func (s *Storage) add(rows []rawRow, mrs []MetricRow, precisionBits uint8) ([]ra return rows, nil } +func getUserReadableMetricName(metricNameRaw []byte) string { + var mn MetricName + if err := mn.unmarshalRaw(metricNameRaw); err != nil { + return fmt.Sprintf("cannot unmarshal metricNameRaw %q: %s", metricNameRaw, err) + } + return mn.String() +} + type pendingMetricRow struct { MetricName []byte mr MetricRow