From 74affa3aecd769ef94cc4f50774c1759518d8619 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 17 Jul 2024 12:37:07 +0200 Subject: [PATCH] lib/protoparser/graphite: follow-up for 476faf5578dee90e87296eda3bfdce4d14da3ad6 - Clarify the description of -graphite.sanitizeMetricName command-line flag at README.md - Do not sanitize tag values - only metric names and tag names must be sanitized, since they are treated specially by Grafana. Grafana doesn't apply any restrictions on tag values. - Properly replace more than two consecutive dots with a single dot. - Disallow unicode letters in metric names and tag names, since neither Prometheus nor Grafana do not support them. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6489 Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077 --- README.md | 16 ++--- docs/CHANGELOG.md | 2 +- docs/README.md | 16 ++--- docs/Single-server-VictoriaMetrics.md | 16 ++--- lib/protoparser/graphite/parser.go | 24 +++---- lib/protoparser/graphite/parser_test.go | 92 +++++++++++++++---------- 6 files changed, 89 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 4404ffad7..b654f8ad6 100644 --- a/README.md +++ b/README.md @@ -699,23 +699,21 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` -To sanitize ingested metric names and labels according to Prometheus naming convention enable -`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: -- replace `/`,`@`,`*` with `_`; -- drop `\`; -- remove redundant dots, e.g: `metric..name` => `metric.name`; -- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. +The ingested metrics can be sanitized according to Prometheus naming convention by passing `-graphite.sanitizeMetricName` command-line flag +to VictoriaMetrics. The following modifications are applied to the ingested samples when this flag is passed to VictoriaMetrics: + +- remove redundant dots, e.g: `metric..name` => `metric.name` +- replace characters not matching `a-zA-Z0-9:_.` chars with `_` + +VictoriaMetrics sets the current time to the ingested samples if the timestamp is omitted. -VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: - ```sh curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz' ``` - The `/api/v1/export` endpoint should return the following response: ```json diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6039b275d..5ae03cd38 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -42,7 +42,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [vmauth](https://docs.victoriametrics.com/vmauth/): allow disabling request body caching with `-maxRequestBodySizeToRetry=0`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6445). Thanks to @shichanglin5 for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/6533). * FEATURE: [dashboards](https://grafana.com/orgs/victoriametrics): add [Grafana dashboard](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/dashboards/vmauth.json) and [alerting rules](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-vmauth.yml) for [vmauth](https://docs.victoriametrics.com/vmauth/) dashboard. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4313) for details. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/): [`yandexcloud_sd_configs`](https://docs.victoriametrics.com/sd_configs/#yandexcloud_sd_configs): add support for obtaining IAM token in [GCE format](https://yandex.cloud/en-ru/docs/compute/operations/vm-connect/auth-inside-vm#auth-inside-vm) additionally to the [deprecated Amazon EC2 IMDSv1 format](https://yandex.cloud/en/docs/security/standard/authentication#aws-token). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5513). -* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): add `-graphite.sanitizeMetricName` cmd-line flag for sanitizing metrics ingested via [Graphite protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077). +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent/) and [Single-node VictoriaMetrics](https://docs.victoriametrics.com/): add `-graphite.sanitizeMetricName` command-line flag for sanitizing metrics ingested via [Graphite protocol](https://docs.victoriametrics.com/#how-to-send-data-from-graphite-compatible-agents-such-as-statsd). See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/6077). * FEATURE: [streaming aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose the following metrics at `/metrics` page of [vmagent](https://docs.victoriametrics.com/vmagent/) and [single-node VictoriaMetrics](https://docs.victoriametrics.com/): * `vm_streamaggr_matched_samples_total` - the number of input samples matched by the corresponding aggregation rule * `vm_streamaggr_output_samples_total` - the number of output samples produced by the corresponding aggregation rule diff --git a/docs/README.md b/docs/README.md index 6510b8015..514174aa8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -702,23 +702,21 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` -To sanitize ingested metric names and labels according to Prometheus naming convention enable -`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: -- replace `/`,`@`,`*` with `_`; -- drop `\`; -- remove redundant dots, e.g: `metric..name` => `metric.name`; -- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. +The ingested metrics can be sanitized according to Prometheus naming convention by passing `-graphite.sanitizeMetricName` command-line flag +to VictoriaMetrics. The following modifications are applied to the ingested samples when this flag is passed to VictoriaMetrics: + +- remove redundant dots, e.g: `metric..name` => `metric.name` +- replace characters not matching `a-zA-Z0-9:_.` chars with `_` + +VictoriaMetrics sets the current time to the ingested samples if the timestamp is omitted. -VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: - ```sh curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz' ``` - The `/api/v1/export` endpoint should return the following response: ```json diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 060702d7b..778538816 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -710,23 +710,21 @@ Example for writing data with Graphite plaintext protocol to local VictoriaMetri echo "foo.bar.baz;tag1=value1;tag2=value2 123 `date +%s`" | nc -N localhost 2003 ``` -To sanitize ingested metric names and labels according to Prometheus naming convention enable -`-graphite.sanitizeMetricName` cmd-line flag. When enabled, VictoriaMetrics will apply the following modifications: -- replace `/`,`@`,`*` with `_`; -- drop `\`; -- remove redundant dots, e.g: `metric..name` => `metric.name`; -- replace characters not matching the expression `^a-zA-Z0-9:._` with `_`. +The ingested metrics can be sanitized according to Prometheus naming convention by passing `-graphite.sanitizeMetricName` command-line flag +to VictoriaMetrics. The following modifications are applied to the ingested samples when this flag is passed to VictoriaMetrics: + +- remove redundant dots, e.g: `metric..name` => `metric.name` +- replace characters not matching `a-zA-Z0-9:_.` chars with `_` + +VictoriaMetrics sets the current time to the ingested samples if the timestamp is omitted. -VictoriaMetrics sets the current time if the timestamp is omitted. An arbitrary number of lines delimited by `\n` (aka newline char) can be sent in one go. After that the data may be read via [/api/v1/export](#how-to-export-data-in-json-line-format) endpoint: - ```sh curl -G 'http://localhost:8428/api/v1/export' -d 'match=foo.bar.baz' ``` - The `/api/v1/export` endpoint should return the following response: ```json diff --git a/lib/protoparser/graphite/parser.go b/lib/protoparser/graphite/parser.go index 1bf524239..b046955c7 100644 --- a/lib/protoparser/graphite/parser.go +++ b/lib/protoparser/graphite/parser.go @@ -81,12 +81,12 @@ func (r *Row) UnmarshalMetricAndTags(s string, tagsPool []Tag) ([]Tag, error) { tags := tagsPool[tagsStart:] r.Tags = tags[:len(tags):len(tags)] } - if len(r.Metric) == 0 { - return tagsPool, fmt.Errorf("metric cannot be empty") - } if *sanitizeMetricName { r.Metric = sanitizer.Transform(r.Metric) } + if len(r.Metric) == 0 { + return tagsPool, fmt.Errorf("metric cannot be empty") + } return tagsPool, nil } @@ -126,7 +126,7 @@ func (r *Row) unmarshal(s string, tagsPool []Tag) ([]Tag, error) { } v, err := fastfloat.Parse(valueStr) if err != nil { - return tagsPool, fmt.Errorf("cannot unmarshal value from %q: %w; original line: %q", valueStr, err, sOrig) + return tagsPool, fmt.Errorf("cannot unmarshal metric value from %q: %w; original line: %q", valueStr, err, sOrig) } r.Value = v return tagsPool, nil @@ -213,9 +213,6 @@ func (t *Tag) reset() { func (t *Tag) unmarshal(s string) { t.reset() - if *sanitizeMetricName { - s = sanitizer.Transform(s) - } n := strings.IndexByte(s, '=') if n < 0 { // Empty tag value. @@ -226,6 +223,9 @@ func (t *Tag) unmarshal(s string) { t.Key = s[:n] t.Value = s[n+1:] } + if *sanitizeMetricName { + t.Key = sanitizer.Transform(t.Key) + } } func stripTrailingWhitespace(s string) string { @@ -257,15 +257,13 @@ func stripLeadingWhitespace(s string) string { var sanitizer = bytesutil.NewFastStringTransformer(func(s string) string { // Apply rule to drop some chars to preserve backwards compatibility - s = dropChars.Replace(s) + s = repeatedDots.ReplaceAllString(s, ".") + // Replace any remaining illegal chars return allowedChars.ReplaceAllLiteralString(s, "_") }) var ( - dropChars = strings.NewReplacer( - `\`, "", - "..", ".", - ) - allowedChars = regexp.MustCompile(`[^a-zA-Z0-9:._=\p{L}]`) + repeatedDots = regexp.MustCompile(`[.]+`) + allowedChars = regexp.MustCompile(`[^a-zA-Z0-9:_.]`) ) diff --git a/lib/protoparser/graphite/parser_test.go b/lib/protoparser/graphite/parser_test.go index f93fc7889..a999a2392 100644 --- a/lib/protoparser/graphite/parser_test.go +++ b/lib/protoparser/graphite/parser_test.go @@ -5,7 +5,7 @@ import ( "testing" ) -func TestUnmarshalMetricAndTagsFailure(t *testing.T) { +func TestUnmarshalMetricAndTags_Failure(t *testing.T) { f := func(s string) { t.Helper() var r Row @@ -18,9 +18,7 @@ func TestUnmarshalMetricAndTagsFailure(t *testing.T) { f(";foo=bar") } -func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { - sanitizeFlagValue := *sanitizeMetricName - *sanitizeMetricName = true +func TestUnmarshalMetricAndTags_Success(t *testing.T) { f := func(s string, rExpected *Row) { t.Helper() var r Row @@ -33,10 +31,10 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { } } f(" ", &Row{ - Metric: "_", + Metric: " ", }) f("foo ;bar=baz", &Row{ - Metric: "foo_", + Metric: "foo ", Tags: []Tag{ { Key: "bar", @@ -45,7 +43,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { }, }) f("f oo;bar=baz", &Row{ - Metric: "f_oo", + Metric: "f oo", Tags: []Tag{ { Key: "bar", @@ -58,7 +56,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: "baz___", + Value: "baz ", }, }, }) @@ -67,7 +65,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: "_baz", + Value: " baz", }, }, }) @@ -76,7 +74,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Tags: []Tag{ { Key: "bar", - Value: "b_az", + Value: "b az", }, }, }) @@ -84,7 +82,7 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { Metric: "foo", Tags: []Tag{ { - Key: "b_ar", + Key: "b ar", Value: "baz", }, }, @@ -105,25 +103,9 @@ func TestUnmarshalMetricAndTagsSuccess(t *testing.T) { }, }, }) - f("foo..bar;bar=123;baz=aa=bb", &Row{ - Metric: "foo.bar", - Tags: []Tag{ - { - Key: "bar", - Value: "123", - }, - { - Key: "baz", - Value: "aa=bb", - }, - }, - }) - *sanitizeMetricName = sanitizeFlagValue } -func TestRowsUnmarshalFailure(t *testing.T) { - sanitizeFlagValue := *sanitizeMetricName - *sanitizeMetricName = true +func TestRowsUnmarshal_Failure(t *testing.T) { f := func(s string) { t.Helper() var rows Rows @@ -147,12 +129,51 @@ func TestRowsUnmarshalFailure(t *testing.T) { // invalid timestamp f("aa 123 bar") - *sanitizeMetricName = sanitizeFlagValue } -func TestRowsUnmarshalSuccess(t *testing.T) { - sanitizeFlagValue := *sanitizeMetricName - *sanitizeMetricName = true +func TestRowsUnmarshal_SanitizeMetricNamesSuccess(t *testing.T) { + f := func(s string, rowsExpected *Rows) { + t.Helper() + + sanitizeMetricNameOrig := *sanitizeMetricName + *sanitizeMetricName = true + defer func() { + *sanitizeMetricName = sanitizeMetricNameOrig + }() + + var rows Rows + rows.Unmarshal(s) + if !reflect.DeepEqual(rows.Rows, rowsExpected.Rows) { + t.Fatalf("unexpected rows;\ngot\n%+v;\nwant\n%+v", rows.Rows, rowsExpected.Rows) + } + } + + // Only metric name. + f(`foo...b..a.r\a--baz 123`, &Rows{ + Rows: []Row{{ + Metric: `foo.b.a.r_a__baz`, + Value: 123, + }}, + }) + + // Metric name with tags. + // Tag values shouldn't be sanitized. + f(`s a;ta g..1=a-b..c;tag2 123 456`, &Rows{ + Rows: []Row{{ + Metric: `s_a`, + Value: 123, + Timestamp: 456, + Tags: []Tag{ + { + Key: "ta_g.1", + Value: "a-b..c", + }, + }, + }}, + }) +} + +func TestRowsUnmarshal_Success(t *testing.T) { f := func(s string, rowsExpected *Rows) { t.Helper() var rows Rows @@ -205,17 +226,17 @@ func TestRowsUnmarshalSuccess(t *testing.T) { // See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3102 f("s a;ta g1=aaa1;tag2=bb b2;tag3 1 23", &Rows{ Rows: []Row{{ - Metric: "s_a", + Metric: "s a", Value: 1, Timestamp: 23, Tags: []Tag{ { - Key: "ta_g1", + Key: "ta g1", Value: "aaa1", }, { Key: "tag2", - Value: "bb_b2", + Value: "bb b2", }, }, }}, @@ -400,5 +421,4 @@ func TestRowsUnmarshalSuccess(t *testing.T) { Timestamp: 1789, }}, }) - *sanitizeMetricName = sanitizeFlagValue }