lib/protoparser/datadog: sanitize metric names by default in the same way as DataDog does

This commit is based on the pull request https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105

Thanks to @PerGon for the idea and initial implementation.
This commit is contained in:
Aliaksandr Valialkin 2022-09-26 13:57:20 +03:00
parent 6a6cf9c590
commit dbc20091b1
No known key found for this signature in database
GPG Key ID: A72BEC6CD3D0DED1
8 changed files with 67 additions and 2 deletions

View File

@ -915,6 +915,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-datadog.maxInsertRequestSize size -datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-denyQueryTracing -denyQueryTracing
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
-dryRun -dryRun

View File

@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f
**Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) changes default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval. **Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) changes default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval.
* FEATURE: sanitize metric names for data ingested via [DataDog protocol](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) according to [DataDog metric naming](https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics). The behaviour can be disabled by passing `-datadog.sanitizeMetricName=false` command-line flag. Thanks to @PerGon for [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3105).
* FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113). * FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113).
* FEATURE: check the correctess of raw sample timestamps stored on disk when reading them. This reduces the probability of possible silent corruption of the data stored on disk. This should help [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2998) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3011). * FEATURE: check the correctess of raw sample timestamps stored on disk when reading them. This reduces the probability of possible silent corruption of the data stored on disk. This should help [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2998) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3011).
* FEATURE: atomically delete directories with snapshots, parts and partitions at [storage level](https://docs.victoriametrics.com/#storage). Previously such directories can be left in partially deleted state when the deletion operation was interrupted by unclean shutdown. This may result in `cannot open file ...: no such file or directory` error on the next start. The probability of this error was quite high when NFS or EFS was used as persistent storage for VictoriaMetrics data. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3038). * FEATURE: atomically delete directories with snapshots, parts and partitions at [storage level](https://docs.victoriametrics.com/#storage). Previously such directories can be left in partially deleted state when the deletion operation was interrupted by unclean shutdown. This may result in `cannot open file ...: no such file or directory` error on the next start. The probability of this error was quite high when NFS or EFS was used as persistent storage for VictoriaMetrics data. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3038).

View File

@ -639,6 +639,8 @@ Below is the output for `/path/to/vminsert -help`:
-datadog.maxInsertRequestSize size -datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-denyQueryTracing -denyQueryTracing
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
-disableRerouting -disableRerouting

View File

@ -1997,6 +1997,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-datadog.maxInsertRequestSize size -datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-dedup.minScrapeInterval duration -dedup.minScrapeInterval duration
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
-deleteAuthKey string -deleteAuthKey string

View File

@ -2001,6 +2001,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li
-datadog.maxInsertRequestSize size -datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-dedup.minScrapeInterval duration -dedup.minScrapeInterval duration
Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling Leave only the last sample in every time series per each discrete interval equal to -dedup.minScrapeInterval > 0. See https://docs.victoriametrics.com/#deduplication and https://docs.victoriametrics.com/#downsampling
-deleteAuthKey string -deleteAuthKey string

View File

@ -919,6 +919,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
-datadog.maxInsertRequestSize size -datadog.maxInsertRequestSize size
The maximum size in bytes of a single DataDog POST request to /api/v1/series The maximum size in bytes of a single DataDog POST request to /api/v1/series
Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864) Supports the following optional suffixes for size values: KB, MB, GB, KiB, MiB, GiB (default 67108864)
-datadog.sanitizeMetricName
Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics (default true)
-denyQueryTracing -denyQueryTracing
Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing Whether to disable the ability to trace queries. See https://docs.victoriametrics.com/#query-tracing
-dryRun -dryRun

View File

@ -2,8 +2,10 @@ package datadog
import ( import (
"bufio" "bufio"
"flag"
"fmt" "fmt"
"io" "io"
"regexp"
"sync" "sync"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil"
@ -14,8 +16,18 @@ import (
"github.com/VictoriaMetrics/metrics" "github.com/VictoriaMetrics/metrics"
) )
// The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics var (
var maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series") // The maximum request size is defined at https://docs.datadoghq.com/api/latest/metrics/#submit-metrics
maxInsertRequestSize = flagutil.NewBytes("datadog.maxInsertRequestSize", 64*1024*1024, "The maximum size in bytes of a single DataDog POST request to /api/v1/series")
// If all metrics in Datadog have the same naming schema as custom metrics, then the following rules apply:
// https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
// But there's some hidden behaviour. In addition to what it states in the docs, the following is also done:
// - Consecutive underscores are replaced with just one underscore
// - Underscore immediately before or after a dot are removed
sanitizeMetricName = flag.Bool("datadog.sanitizeMetricName", true, "Sanitize metric names for the ingested DataDog data to comply with DataDog behaviour described at "+
"https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics")
)
// ParseStream parses DataDog POST request for /api/v1/series from reader and calls callback for the parsed request. // ParseStream parses DataDog POST request for /api/v1/series from reader and calls callback for the parsed request.
// //
@ -52,6 +64,9 @@ func ParseStream(r io.Reader, contentEncoding string, callback func(series []Ser
series := req.Series series := req.Series
for i := range series { for i := range series {
rows += len(series[i].Points) rows += len(series[i].Points)
if *sanitizeMetricName {
series[i].Metric = sanitizeName(series[i].Metric)
}
} }
rowsRead.Add(rows) rowsRead.Add(rows)
@ -136,3 +151,19 @@ func putRequest(req *Request) {
} }
var requestPool sync.Pool var requestPool sync.Pool
// sanitizeName performs DataDog-compatible santizing for metric names
//
// See https://docs.datadoghq.com/metrics/custom_metrics/#naming-custom-metrics
func sanitizeName(s string) string {
s = unsupportedDatadogChars.ReplaceAllString(s, "_")
s = multiUnderscores.ReplaceAllString(s, "_")
s = underscoresWithDots.ReplaceAllString(s, ".")
return s
}
var (
unsupportedDatadogChars = regexp.MustCompile(`[^0-9a-zA-Z_\.]+`)
multiUnderscores = regexp.MustCompile(`_+`)
underscoresWithDots = regexp.MustCompile(`_?\._?`)
)

View File

@ -0,0 +1,23 @@
package datadog
import (
"testing"
)
func TestSanitizeName(t *testing.T) {
f := func(s, resultExpected string) {
t.Helper()
result := sanitizeName(s)
if result != resultExpected {
t.Fatalf("unexpected result for sanitizeName(%q); got\n%q\nwant\n%q", s, result, resultExpected)
}
}
f("before.dot.metric!.name", "before.dot.metric.name")
f("after.dot.metric.!name", "after.dot.metric.name")
f("in.the.middle.met!ric.name", "in.the.middle.met_ric.name")
f("before.and.after.and.middle.met!ric!.!name", "before.and.after.and.middle.met_ric.name")
f("many.consecutive.met!!!!ric!!.!!name", "many.consecutive.met_ric.name")
f("many.non.consecutive.m!e!t!r!i!c!.!name", "many.non.consecutive.m_e_t_r_i_c.name")
f("how.about.underscores_.!_metric!_!.__!!name", "how.about.underscores.metric.name")
f("how.about.underscores.middle.met!_!_ric.name", "how.about.underscores.middle.met_ric.name")
}