From 3de865655199e89620a5fcfc2e46cb4c0dbef0fb Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Wed, 3 Apr 2024 00:36:32 +0300 Subject: [PATCH] app/vmagent/remotewrite: follow-up for 166b97b8d03e80830befd6512b872a3c1c28bc15 and b6bd9a97a3cf1da0da3e8df36dfcd2be343e21c4 - Make the configuration more clear by accepting the list of ignored labels during sharding via a dedicated command-line flag - -remoteWrite.shardByURL.ignoreLabels. This prevents from overloading the meaning of -remoteWrite.shardByURL.labels command-line flag. - Removed superfluous memory allocation per each processed sample if sharding by remote storage is enabled. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938 --- app/vmagent/remotewrite/remotewrite.go | 67 +++++++++++++-------- app/vmagent/remotewrite/remotewrite_test.go | 38 ------------ docs/CHANGELOG.md | 2 +- docs/vmagent.md | 21 ++++--- 4 files changed, 57 insertions(+), 71 deletions(-) delete mode 100644 app/vmagent/remotewrite/remotewrite_test.go diff --git a/app/vmagent/remotewrite/remotewrite.go b/app/vmagent/remotewrite/remotewrite.go index b338c0402..0c6c5f774 100644 --- a/app/vmagent/remotewrite/remotewrite.go +++ b/app/vmagent/remotewrite/remotewrite.go @@ -51,9 +51,11 @@ var ( "By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages") shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+ "among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+ - "even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels.") - shardByURLLabelsInverse = flag.Bool("remoteWrite.shardByURL.inverseLabels", false, "Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels.") - tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+ + "even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels") + shardByURLIgnoreLabels = flagutil.NewArrayString("remoteWrite.shardByURL.ignoreLabels", "Optional list of labels, which must be ignored when sharding outgoing samples "+ + "among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+ + "even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels") + tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+ "See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue") keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+ "Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.") @@ -150,7 +152,10 @@ func InitSecretFlags() { } } -var shardByURLLabelsMap map[string]struct{} +var ( + shardByURLLabelsMap map[string]struct{} + shardByURLIgnoreLabelsMap map[string]struct{} +) // Init initializes remotewrite. // @@ -189,13 +194,14 @@ func Init() { if *queues <= 0 { *queues = 1 } - if len(*shardByURLLabels) > 0 { - m := make(map[string]struct{}, len(*shardByURLLabels)) - for _, label := range *shardByURLLabels { - m[label] = struct{}{} - } - shardByURLLabelsMap = m + + if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 { + logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " + + "see https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages") } + shardByURLLabelsMap = newMapFromStrings(*shardByURLLabels) + shardByURLIgnoreLabelsMap = newMapFromStrings(*shardByURLIgnoreLabels) + initLabelsGlobal() // Register SIGHUP handler for config reload before loadRelabelConfigs. @@ -561,7 +567,24 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs)) tmpLabels := promutils.GetLabels() for _, ts := range tssBlock { - hashLabels := extractShardingLabels(tmpLabels.Labels, ts.Labels, *shardByURLLabelsInverse) + hashLabels := ts.Labels + if len(shardByURLLabelsMap) > 0 { + hashLabels = tmpLabels.Labels[:0] + for _, label := range ts.Labels { + if _, ok := shardByURLLabelsMap[label.Name]; ok { + hashLabels = append(hashLabels, label) + } + } + tmpLabels.Labels = hashLabels + } else if len(shardByURLIgnoreLabelsMap) > 0 { + hashLabels = tmpLabels.Labels[:0] + for _, label := range ts.Labels { + if _, ok := shardByURLIgnoreLabelsMap[label.Name]; !ok { + hashLabels = append(hashLabels, label) + } + } + tmpLabels.Labels = hashLabels + } h := getLabelsHash(hashLabels) idx := h % uint64(len(tssByURL)) tssByURL[idx] = append(tssByURL[idx], ts) @@ -607,20 +630,6 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar return !anyPushFailed.Load() } -func extractShardingLabels(dst, src []prompbmarshal.Label, inverse bool) []prompbmarshal.Label { - if len(shardByURLLabelsMap) < 1 { - return src - } - dst = dst[:0] - for _, label := range src { - _, ok := shardByURLLabelsMap[label.Name] - if ok && !inverse || !ok && inverse { - dst = append(dst, label) - } - } - return dst -} - // sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set. func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) { if !*sortLabels { @@ -1022,3 +1031,11 @@ func CheckStreamAggrConfigs() error { } return nil } + +func newMapFromStrings(a []string) map[string]struct{} { + m := make(map[string]struct{}, len(a)) + for _, s := range a { + m[s] = struct{}{} + } + return m +} diff --git a/app/vmagent/remotewrite/remotewrite_test.go b/app/vmagent/remotewrite/remotewrite_test.go deleted file mode 100644 index b5634772a..000000000 --- a/app/vmagent/remotewrite/remotewrite_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package remotewrite - -import ( - "reflect" - "testing" - - "github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal" -) - -func TestExtractShardingLabels(t *testing.T) { - shardByURLLabelsMap = make(map[string]struct{}) - shardByURLLabelsMap["instance"] = struct{}{} - shardByURLLabelsMap["job"] = struct{}{} - defer func() { - shardByURLLabelsMap = nil - }() - - f := func(in, exp []prompbmarshal.Label, inverse bool) { - t.Helper() - var got []prompbmarshal.Label - got = extractShardingLabels(got, in, inverse) - if !reflect.DeepEqual(got, exp) { - t.Fatalf("expected to get \n%#v; \ngot \n%#v instead", exp, got) - } - } - - f(nil, nil, true) - f(nil, nil, false) - - f([]prompbmarshal.Label{{Name: "foo"}}, nil, false) - f([]prompbmarshal.Label{{Name: "foo"}}, []prompbmarshal.Label{{Name: "foo"}}, true) - - f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "job"}}, false) - f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true) - - f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "instance"}, {Name: "job"}}, false) - f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true) -} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c6c75aa15..15da07791 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -55,7 +55,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): ability to limit the ingestion rate via `-maxIngestionRate` command-line flag. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5900). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.tlsServerName` as `Host` header in requests to `-remoteWrite.url`. This allows sending data to https remote storage by IP address instead of hostname. Thanks to @minor-fixes for initial idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5802). -* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.inverseLabels` cmd-line flag to enhance sharding logic across configured `-remoteWrite.url` URLs when `-remoteWrite.shardByURL.labels` is set. Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938). +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.ignoreLabels` command-line flag, which can be used for specifying the ignored list of labels when [sharding by `-remoteWrite.url` is enabled](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages). Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938). * FEATURE: optimize [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) when `match[]` filters contains metric name. For example, `/api/v1/label/instance/values?match[]=up` now works much faster than before. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes. diff --git a/docs/vmagent.md b/docs/vmagent.md index 00277545a..088c1a0a0 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -187,12 +187,17 @@ and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) typ to the same second-level `vmagent` instance, so they are aggregated properly. If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding -among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular -set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label +among remote storage systems specified in `-remoteWrite.url`. + +Sometimes it may be needed to use only a particular set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURL.labels` command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance` -label to the same `-remoteWrite.url`. The sharding logic can be inverted by specifying `-remoteWrite.shardByURL.inverseLabels`, -so that metrics are sharded using all labels except the ones specified in `-remoteWrite.shardByURL.labels`. +label to the same `-remoteWrite.url`. + +Sometimes is may be needed ignoring some labels when sharding samples across multiple `-remoteWrite.url` backends. +For example, if all the [raw samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) with the same set of labels +except of `instance` and `pod` labels must be routed to the same backend. In this case the list of ignored labels must be passed to +`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`. See also [how to scrape big number of targets](#scraping-big-number-of-targets). @@ -2106,10 +2111,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html . Empty values are set to default value. -remoteWrite.shardByURL Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages - -remoteWrite.shardByURL.inverseLabels - Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels. + -remoteWrite.shardByURL.ignoreLabels array + Optional list of labels, which must be ignored when sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels + Supports an array of values separated by comma or specified via multiple flags. + Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -remoteWrite.shardByURL.labels array - Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels. + Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels Supports an array of values separated by comma or specified via multiple flags. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. -remoteWrite.showURL