From fcffdba9dcbc6a696d328cc8f4268349d54218e3 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Mon, 26 Sep 2022 13:11:37 +0300 Subject: [PATCH] app/{vmagent,vminsert}: add `-usePromCompatibleNaming` command-line flag for normalizing metric names and label names in the ingested samples Updates https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113 Thanks to @erkexzcx for the idea and the initial pull request at https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3146 --- README.md | 5 ++++ app/vmagent/README.md | 2 ++ app/vmagent/remotewrite/relabel.go | 20 ++++++++++++++ app/vminsert/relabel/relabel.go | 39 +++++++++++++++++++++------ docs/CHANGELOG.md | 1 + docs/Cluster-VictoriaMetrics.md | 2 ++ docs/README.md | 5 ++++ docs/Single-server-VictoriaMetrics.md | 5 ++++ docs/vmagent.md | 2 ++ 9 files changed, 73 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index cd285f1cd..b220ac9cb 100644 --- a/README.md +++ b/README.md @@ -465,6 +465,9 @@ VictoriaMetrics performs the following transformations to the ingested InfluxDB * Field names are mapped to time series names prefixed with `{measurement}{separator}` value, where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty or if `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names. * Field values are mapped to time series values. * Tags are mapped to Prometheus labels as-is. +* If `-usePromCompatibleNaming` command-line flag is set, then all the metric names and label names + are normalized to [Prometheus-compatible naming](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels) by replacing unsupported chars with `_`. + For example, `foo.bar-baz/1` metric name or label name is substituted with `foo_bar_baz_1`. For example, the following InfluxDB line: @@ -2333,6 +2336,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version -vmalert.proxyURL string diff --git a/app/vmagent/README.md b/app/vmagent/README.md index 0db26b0d9..b228776ca 100644 --- a/app/vmagent/README.md +++ b/app/vmagent/README.md @@ -1267,6 +1267,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version ``` diff --git a/app/vmagent/remotewrite/relabel.go b/app/vmagent/remotewrite/relabel.go index 76e140532..abcdbb0de 100644 --- a/app/vmagent/remotewrite/relabel.go +++ b/app/vmagent/remotewrite/relabel.go @@ -3,6 +3,7 @@ package remotewrite import ( "flag" "fmt" + "regexp" "strings" "sync" @@ -25,6 +26,10 @@ var ( relabelDebug = flagutil.NewArrayBool("remoteWrite.urlRelabelDebug", "Whether to log metrics before and after relabeling with -remoteWrite.urlRelabelConfig. "+ "If the -remoteWrite.urlRelabelDebug is enabled, then the metrics aren't sent to the corresponding -remoteWrite.url. "+ "This is useful for debugging the relabeling configs") + + usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+ + "in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+ + "See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels") ) var labelsGlobal []prompbmarshal.Label @@ -107,6 +112,18 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab labels = append(labels, *extraLabel) } } + if *usePromCompatibleNaming { + // Replace unsupported Prometheus chars in label names and metric names with underscores. + tmpLabels := labels[labelsLen:] + for j := range tmpLabels { + label := &tmpLabels[j] + if label.Name == "__name__" { + label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_") + } else { + label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_") + } + } + } labels = pcs.Apply(labels, labelsLen, true) if len(labels) == labelsLen { // Drop the current time series, since relabeling removed all the labels. @@ -121,6 +138,9 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, extraLab return tssDst } +// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) + type relabelCtx struct { // pool for labels, which are used during the relabeling. labels []prompbmarshal.Label diff --git a/app/vminsert/relabel/relabel.go b/app/vminsert/relabel/relabel.go index ac17d834a..bb02836c5 100644 --- a/app/vminsert/relabel/relabel.go +++ b/app/vminsert/relabel/relabel.go @@ -3,6 +3,7 @@ package relabel import ( "flag" "fmt" + "regexp" "sync/atomic" "github.com/VictoriaMetrics/VictoriaMetrics/lib/bytesutil" @@ -20,6 +21,10 @@ var ( "See https://docs.victoriametrics.com/#relabeling for details. The config is reloaded on SIGHUP signal") relabelDebug = flag.Bool("relabelDebug", false, "Whether to log metrics before and after relabeling with -relabelConfig. If the -relabelDebug is enabled, "+ "then the metrics aren't sent to storage. This is useful for debugging the relabeling configs") + + usePromCompatibleNaming = flag.Bool("usePromCompatibleNaming", false, "Whether to replace characters unsupported by Prometheus with underscores "+ + "in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. "+ + "See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels") ) // Init must be called after flag.Parse and before using the relabel package. @@ -67,7 +72,7 @@ func loadRelabelConfig() (*promrelabel.ParsedConfigs, error) { // HasRelabeling returns true if there is global relabeling. func HasRelabeling() bool { pcs := pcsGlobal.Load().(*promrelabel.ParsedConfigs) - return pcs.Len() > 0 + return pcs.Len() > 0 || *usePromCompatibleNaming } // Ctx holds relabeling context. @@ -87,11 +92,11 @@ func (ctx *Ctx) Reset() { // The returned labels are valid until the next call to ApplyRelabeling. func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label { pcs := pcsGlobal.Load().(*promrelabel.ParsedConfigs) - if pcs.Len() == 0 { + if pcs.Len() == 0 && !*usePromCompatibleNaming { // There are no relabeling rules. return labels } - // Convert src to prompbmarshal.Label format suitable for relabeling. + // Convert labels to prompbmarshal.Label format suitable for relabeling. tmpLabels := ctx.tmpLabels[:0] for _, label := range labels { name := bytesutil.ToUnsafeString(label.Name) @@ -105,13 +110,28 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label { }) } - // Apply relabeling - tmpLabels = pcs.Apply(tmpLabels, 0, true) - ctx.tmpLabels = tmpLabels - if len(tmpLabels) == 0 { - metricsDropped.Inc() + if *usePromCompatibleNaming { + // Replace unsupported Prometheus chars in label names and metric names with underscores. + for i := range tmpLabels { + label := &tmpLabels[i] + if label.Name == "__name__" { + label.Value = unsupportedPromChars.ReplaceAllString(label.Value, "_") + } else { + label.Name = unsupportedPromChars.ReplaceAllString(label.Name, "_") + } + } } + if pcs.Len() > 0 { + // Apply relabeling + tmpLabels = pcs.Apply(tmpLabels, 0, true) + if len(tmpLabels) == 0 { + metricsDropped.Inc() + } + } + + ctx.tmpLabels = tmpLabels + // Return back labels to the desired format. dst := labels[:0] for _, label := range tmpLabels { @@ -129,3 +149,6 @@ func (ctx *Ctx) ApplyRelabeling(labels []prompb.Label) []prompb.Label { } var metricsDropped = metrics.NewCounter(`vm_relabel_metrics_dropped_total`) + +// See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels +var unsupportedPromChars = regexp.MustCompile(`[^a-zA-Z0-9_:]`) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 3df6c7f2f..f50a49f29 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -19,6 +19,7 @@ The following tip changes can be tested by building VictoriaMetrics components f **Update note 2:** [vmalert](https://docs.victoriametrics.com/vmalert.html) changes default value for command-line flag `-datasource.queryStep` from `0s` to `5m`. The change supposed to improve reliability of the rules evaluation when evaluation interval is lower than scraping interval. +* FEATURE: add `-usePromCompatibleNaming` command-line flag to [vmagent](https://docs.victoriametrics.com/vmagent.html), to single-node VictoriaMetrics and to `vminsert` component of VictoriaMetrics cluster. This flag can be used for normalizing the ingested metric names and label names to [Prometheus-compatible form](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels). If this flag is set, then all the chars unsupported by Prometheus are replaced with `_` chars in metric names and labels of the ingested samples. See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3113). * FEATURE: check the correctess of raw sample timestamps stored on disk when reading them. This reduces the probability of possible silent corruption of the data stored on disk. This should help [this](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2998) and [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3011). * FEATURE: atomically delete directories with snapshots, parts and partitions at [storage level](https://docs.victoriametrics.com/#storage). Previously such directories can be left in partially deleted state when the deletion operation was interrupted by unclean shutdown. This may result in `cannot open file ...: no such file or directory` error on the next start. The probability of this error was quite high when NFS or EFS was used as persistent storage for VictoriaMetrics data. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3038). * FEATURE: set the `start` arg to `end - 5 minutes` if isn't passed explicitly to [/api/v1/labels](https://docs.victoriametrics.com/url-examples.html#apiv1labels) and [/api/v1/label/.../values](https://docs.victoriametrics.com/url-examples.html#apiv1labelvalues). See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/3052). diff --git a/docs/Cluster-VictoriaMetrics.md b/docs/Cluster-VictoriaMetrics.md index 9684386e3..1abd6fdfa 100644 --- a/docs/Cluster-VictoriaMetrics.md +++ b/docs/Cluster-VictoriaMetrics.md @@ -775,6 +775,8 @@ Below is the output for `/path/to/vminsert -help`: Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version -vmstorageDialTimeout duration diff --git a/docs/README.md b/docs/README.md index cd285f1cd..b220ac9cb 100644 --- a/docs/README.md +++ b/docs/README.md @@ -465,6 +465,9 @@ VictoriaMetrics performs the following transformations to the ingested InfluxDB * Field names are mapped to time series names prefixed with `{measurement}{separator}` value, where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty or if `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names. * Field values are mapped to time series values. * Tags are mapped to Prometheus labels as-is. +* If `-usePromCompatibleNaming` command-line flag is set, then all the metric names and label names + are normalized to [Prometheus-compatible naming](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels) by replacing unsupported chars with `_`. + For example, `foo.bar-baz/1` metric name or label name is substituted with `foo_bar_baz_1`. For example, the following InfluxDB line: @@ -2333,6 +2336,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version -vmalert.proxyURL string diff --git a/docs/Single-server-VictoriaMetrics.md b/docs/Single-server-VictoriaMetrics.md index 12d65ce82..a5b5a46e6 100644 --- a/docs/Single-server-VictoriaMetrics.md +++ b/docs/Single-server-VictoriaMetrics.md @@ -469,6 +469,9 @@ VictoriaMetrics performs the following transformations to the ingested InfluxDB * Field names are mapped to time series names prefixed with `{measurement}{separator}` value, where `{separator}` equals to `_` by default. It can be changed with `-influxMeasurementFieldSeparator` command-line flag. See also `-influxSkipSingleField` command-line flag. If `{measurement}` is empty or if `-influxSkipMeasurement` command-line flag is set, then time series names correspond to field names. * Field values are mapped to time series values. * Tags are mapped to Prometheus labels as-is. +* If `-usePromCompatibleNaming` command-line flag is set, then all the metric names and label names + are normalized to [Prometheus-compatible naming](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels) by replacing unsupported chars with `_`. + For example, `foo.bar-baz/1` metric name or label name is substituted with `foo_bar_baz_1`. For example, the following InfluxDB line: @@ -2337,6 +2340,8 @@ Pass `-help` to VictoriaMetrics in order to see the list of supported command-li Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version -vmalert.proxyURL string diff --git a/docs/vmagent.md b/docs/vmagent.md index 9a8e39a4d..6747d3a04 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -1271,6 +1271,8 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Supports an array of values separated by comma or specified via multiple flags. -tlsKeyFile string Path to file with TLS key if -tls is set. The provided key file is automatically re-read every second, so it can be dynamically updated + -usePromCompatibleNaming + Whether to replace characters unsupported by Prometheus with underscores in the ingested metric names and label names. For example, foo.bar{a.b='c'} is transformed into foo_bar{a_b='c'} during data ingestion if this flag is set. See https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels -version Show VictoriaMetrics version ```