From dbc20091b1aab481aaab045ad16750f51254b057 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 26 Sep 2022 13:57:20 +0300 Subject: [PATCH] lib/protoparser/datadog: sanitize metric names by default in the same way as DataDog does This commit is based on the pull request https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105 Thanks to @PerGon for the idea and initial implementation. --- app/vmagent/README.md | 2 ++ docs/CHANGELOG.md | 1 + docs/Cluster-VictoriaMetrics.md | 2 ++ docs/README.md | 2 ++ docs/Single-server-VictoriaMetrics.md | 2 ++ docs/vmagent.md | 2 ++ lib/protoparser/datadog/streamparser.go | 35 ++++++++++++++++++-- lib/protoparser/datadog/streamparser_test.go | 23 +++++++++++++ 8 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 lib/protoparser/datadog/streamparser_test.go diff --git a/app/vmagent/README.md b/app/vmagent/README.md index b228776caf..8b136e910d 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -915,6 +915,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . -datadog.maxInsertRequestSize size The maximum size in bytes of a single DataDog POST request to /api/v1/series Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) + -datadog.sanitizeMetricName + Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true) -denyQueryTracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing -dryRun diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index f50a49f29c..b6e4ddb6ce 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f **Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) changes default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval. +* FEATURE: sanitize metric names for data ingested via [DataDog protocol](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) according to [DataDog metric naming](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics). The behaviour can be disabled by passing `-datadog.sanitizeMetricName=false` command-line flag. Thanks to @PerGon for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105). * FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113). * FEATURE: check the correctess of raw sample timestamps stored on disk when reading them. This reduces the probability of possible silent corruption of the data stored on disk. This should help [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2998) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3011). * FEATURE: atomically delete directories with snapshots, parts and partitions at [storage level](https://docs.victoriametrics.com/#storage). Previously such directories can be left in partially deleted state when the deletion operation was interrupted by unclean shutdown. This may result in `cannot open file ...: no such file or directory` error on the next start. The probability of this error was quite high when NFS or EFS was used as persistent storage for VictoriaMetrics data. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3038). diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 1abd6fdfa8..afbbbfc376 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -639,6 +639,8 @@ Below is the output for `/path/to/vminsert -help`: -datadog.maxInsertRequestSize size The maximum size in bytes of a single DataDog POST request to /api/v1/series Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) + -datadog.sanitizeMetricName + Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true) -denyQueryTracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing -disableRerouting diff --git a/docs/README.md b/docs/README.md index b220ac9cb2..fc6757a7f0 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1997,6 +1997,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li -datadog.maxInsertRequestSize size The maximum size in bytes of a single DataDog POST request to /api/v1/series Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) + -datadog.sanitizeMetricName + Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true) -dedup.minScrapeInterval duration Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling -deleteAuthKey string diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index a5b5a46e66..f5f530607a 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -2001,6 +2001,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li -datadog.maxInsertRequestSize size The maximum size in bytes of a single DataDog POST request to /api/v1/series Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) + -datadog.sanitizeMetricName + Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true) -dedup.minScrapeInterval duration Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling -deleteAuthKey string diff --git a/docs/vmagent.md b/docs/vmagent.md index 6747d3a04c..7747e55f2f 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -919,6 +919,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . -datadog.maxInsertRequestSize size The maximum size in bytes of a single DataDog POST request to /api/v1/series Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) + -datadog.sanitizeMetricName + Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true) -denyQueryTracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing -dryRun diff --git a/lib/protoparser/datadog/streamparser.go b/lib/protoparser/datadog/streamparser.go index ace0165b0c..24cc557aed 100644 --- a/lib/protoparser/datadog/streamparser.go +++ b/lib/protoparser/datadog/streamparser.go @@ -2,8 +2,10 @@ package datadog import ( "bufio" + "flag" "fmt" "io" + "regexp" "sync" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" @@ -14,8 +16,18 @@ import ( "github.com/VictoriaMetrics/metrics" ) -// The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics -var maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series") +var ( + // The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics + maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series") + + // If all metrics in Datadog have the same naming schema as custom metrics, then the following rules apply: + // https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics + // But there's some hidden behaviour. In addition to what it states in the docs, the following is also done: + // - Consecutive underscores are replaced with just one underscore + // - Underscore immediately before or after a dot are removed + sanitizeMetricName = flag.Bool("datadog.sanitizeMetricName", true, "Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at "+ + "https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics") +) // ParseStream parses DataDog POST request for /api/v1/series from reader and calls callback for the parsed request. // @@ -52,6 +64,9 @@ func ParseStream(r io.Reader, contentEncoding string, callback func(series []Ser series := req.Series for i := range series { rows += len(series[i].Points) + if *sanitizeMetricName { + series[i].Metric = sanitizeName(series[i].Metric) + } } rowsRead.Add(rows) @@ -136,3 +151,19 @@ func putRequest(req *Request) { } var requestPool sync.Pool + +// sanitizeName performs DataDog-compatible santizing for metric names +// +// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics +func sanitizeName(s string) string { + s = unsupportedDatadogChars.ReplaceAllString(s, "_") + s = multiUnderscores.ReplaceAllString(s, "_") + s = underscoresWithDots.ReplaceAllString(s, ".") + return s +} + +var ( + unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`) + multiUnderscores = regexp.MustCompile(`_+`) + underscoresWithDots = regexp.MustCompile(`_?\._?`) +) diff --git a/lib/protoparser/datadog/streamparser_test.go b/lib/protoparser/datadog/streamparser_test.go new file mode 100644 index 0000000000..670be59d4a --- /dev/null +++ b/lib/protoparser/datadog/streamparser_test.go @@ -0,0 +1,23 @@ +package datadog + +import ( + "testing" +) + +func TestSanitizeName(t *testing.T) { + f := func(s, resultExpected string) { + t.Helper() + result := sanitizeName(s) + if result != resultExpected { + t.Fatalf("unexpected result for sanitizeName(%q); got\n%q\nwant\n%q", s, result, resultExpected) + } + } + f("before.dot.metric!.name", "before.dot.metric.name") + f("after.dot.metric.!name", "after.dot.metric.name") + f("in.the.middle.met!ric.name", "in.the.middle.met_ric.name") + f("before.and.after.and.middle.met!ric!.!name", "before.and.after.and.middle.met_ric.name") + f("many.consecutive.met!!!!ric!!.!!name", "many.consecutive.met_ric.name") + f("many.non.consecutive.m!e!t!r!i!c!.!name", "many.non.consecutive.m_e_t_r_i_c.name") + f("how.about.underscores_.!_metric!_!.__!!name", "how.about.underscores.metric.name") + f("how.about.underscores.middle.met!_!_ric.name", "how.about.underscores.middle.met_ric.name") +}