app/vmagent/remotewrite: follow-up for 166b97b8d0 and b6bd9a97a3

- Make the configuration more clear by accepting the list of ignored labels during sharding
  via a dedicated command-line flag - -remoteWrite.shardByURL.ignoreLabels.
  This prevents from overloading the meaning of -remoteWrite.shardByURL.labels command-line flag.

- Removed superfluous memory allocation per each processed sample if sharding by remote storage is enabled.

Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938
This commit is contained in:
Aliaksandr Valialkin 2024-04-03 00:36:32 +03:00
parent 55bd43f28e
commit 3de8656551
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
4 changed files with 57 additions and 71 deletions

View File

@ -51,9 +51,11 @@ var (
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages") "By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+ shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+ "among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
"even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels.") "even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels")
shardByURLLabelsInverse = flag.Bool("remoteWrite.shardByURL.inverseLabels", false, "Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels.") shardByURLIgnoreLabels = flagutil.NewArrayString("remoteWrite.shardByURL.ignoreLabels", "Optional list of labels, which must be ignored when sharding outgoing samples "+
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+ "among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
"even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels")
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue") "See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+ keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.") "Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
@ -150,7 +152,10 @@ func InitSecretFlags() {
} }
} }
var shardByURLLabelsMap map[string]struct{} var (
shardByURLLabelsMap map[string]struct{}
shardByURLIgnoreLabelsMap map[string]struct{}
)
// Init initializes remotewrite. // Init initializes remotewrite.
// //
@ -189,13 +194,14 @@ func Init() {
if *queues <= 0 { if *queues <= 0 {
*queues = 1 *queues = 1
} }
if len(*shardByURLLabels) > 0 {
m := make(map[string]struct{}, len(*shardByURLLabels)) if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
for _, label := range *shardByURLLabels { logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
m[label] = struct{}{} "see https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages")
}
shardByURLLabelsMap = m
} }
shardByURLLabelsMap = newMapFromStrings(*shardByURLLabels)
shardByURLIgnoreLabelsMap = newMapFromStrings(*shardByURLIgnoreLabels)
initLabelsGlobal() initLabelsGlobal()
// Register SIGHUP handler for config reload before loadRelabelConfigs. // Register SIGHUP handler for config reload before loadRelabelConfigs.
@ -561,7 +567,24 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs)) tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
tmpLabels := promutils.GetLabels() tmpLabels := promutils.GetLabels()
for _, ts := range tssBlock { for _, ts := range tssBlock {
hashLabels := extractShardingLabels(tmpLabels.Labels, ts.Labels, *shardByURLLabelsInverse) hashLabels := ts.Labels
if len(shardByURLLabelsMap) > 0 {
hashLabels = tmpLabels.Labels[:0]
for _, label := range ts.Labels {
if _, ok := shardByURLLabelsMap[label.Name]; ok {
hashLabels = append(hashLabels, label)
}
}
tmpLabels.Labels = hashLabels
} else if len(shardByURLIgnoreLabelsMap) > 0 {
hashLabels = tmpLabels.Labels[:0]
for _, label := range ts.Labels {
if _, ok := shardByURLIgnoreLabelsMap[label.Name]; !ok {
hashLabels = append(hashLabels, label)
}
}
tmpLabels.Labels = hashLabels
}
h := getLabelsHash(hashLabels) h := getLabelsHash(hashLabels)
idx := h % uint64(len(tssByURL)) idx := h % uint64(len(tssByURL))
tssByURL[idx] = append(tssByURL[idx], ts) tssByURL[idx] = append(tssByURL[idx], ts)
@ -607,20 +630,6 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
return !anyPushFailed.Load() return !anyPushFailed.Load()
} }
func extractShardingLabels(dst, src []prompbmarshal.Label, inverse bool) []prompbmarshal.Label {
if len(shardByURLLabelsMap) < 1 {
return src
}
dst = dst[:0]
for _, label := range src {
_, ok := shardByURLLabelsMap[label.Name]
if ok && !inverse || !ok && inverse {
dst = append(dst, label)
}
}
return dst
}
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set. // sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) { func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) {
if !*sortLabels { if !*sortLabels {
@ -1022,3 +1031,11 @@ func CheckStreamAggrConfigs() error {
} }
return nil return nil
} }
func newMapFromStrings(a []string) map[string]struct{} {
m := make(map[string]struct{}, len(a))
for _, s := range a {
m[s] = struct{}{}
}
return m
}

View File

@ -1,38 +0,0 @@
package remotewrite
import (
"reflect"
"testing"
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
)
func TestExtractShardingLabels(t *testing.T) {
shardByURLLabelsMap = make(map[string]struct{})
shardByURLLabelsMap["instance"] = struct{}{}
shardByURLLabelsMap["job"] = struct{}{}
defer func() {
shardByURLLabelsMap = nil
}()
f := func(in, exp []prompbmarshal.Label, inverse bool) {
t.Helper()
var got []prompbmarshal.Label
got = extractShardingLabels(got, in, inverse)
if !reflect.DeepEqual(got, exp) {
t.Fatalf("expected to get \n%#v; \ngot \n%#v instead", exp, got)
}
}
f(nil, nil, true)
f(nil, nil, false)
f([]prompbmarshal.Label{{Name: "foo"}}, nil, false)
f([]prompbmarshal.Label{{Name: "foo"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "job"}}, false)
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "instance"}, {Name: "job"}}, false)
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
}

View File

@ -55,7 +55,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926). * FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): ability to limit the ingestion rate via `-maxIngestionRate` command-line flag. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5900). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): ability to limit the ingestion rate via `-maxIngestionRate` command-line flag. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5900).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.tlsServerName` as `Host` header in requests to `-remoteWrite.url`. This allows sending data to https remote storage by IP address instead of hostname. Thanks to @minor-fixes for initial idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5802). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.tlsServerName` as `Host` header in requests to `-remoteWrite.url`. This allows sending data to https remote storage by IP address instead of hostname. Thanks to @minor-fixes for initial idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5802).
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.inverseLabels` cmd-line flag to enhance sharding logic across configured `-remoteWrite.url` URLs when `-remoteWrite.shardByURL.labels` is set. Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.ignoreLabels` command-line flag, which can be used for specifying the ignored list of labels when [sharding by `-remoteWrite.url` is enabled](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages). Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938).
* FEATURE: optimize [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) when `match[]` filters contains metric name. For example, `/api/v1/label/instance/values?match[]=up` now works much faster than before. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055). * FEATURE: optimize [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) when `match[]` filters contains metric name. For example, `/api/v1/label/instance/values?match[]=up` now works much faster than before. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824). * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824).
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes. * FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes.

View File

@ -187,12 +187,17 @@ and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) typ
to the same second-level `vmagent` instance, so they are aggregated properly. to the same second-level `vmagent` instance, so they are aggregated properly.
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular among remote storage systems specified in `-remoteWrite.url`.
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
Sometimes it may be needed to use only a particular set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURL.labels` to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURL.labels`
command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance` command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
label to the same `-remoteWrite.url`. The sharding logic can be inverted by specifying `-remoteWrite.shardByURL.inverseLabels`, label to the same `-remoteWrite.url`.
so that metrics are sharded using all labels except the ones specified in `-remoteWrite.shardByURL.labels`.
Sometimes is may be needed ignoring some labels when sharding samples across multiple `-remoteWrite.url` backends.
For example, if all the [raw samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) with the same set of labels
except of `instance` and `pod` labels must be routed to the same backend. In this case the list of ignored labels must be passed to
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
See also [how to scrape big number of targets](#scraping-big-number-of-targets). See also [how to scrape big number of targets](#scraping-big-number-of-targets).
@ -2106,10 +2111,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
Empty values are set to default value. Empty values are set to default value.
-remoteWrite.shardByURL -remoteWrite.shardByURL
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
-remoteWrite.shardByURL.inverseLabels -remoteWrite.shardByURL.ignoreLabels array
Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels. Optional list of labels, which must be ignored when sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels
Supports an array of values separated by comma or specified via multiple flags.
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
-remoteWrite.shardByURL.labels array -remoteWrite.shardByURL.labels array
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels. Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels
Supports an array of values separated by comma or specified via multiple flags. Supports an array of values separated by comma or specified via multiple flags.
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces. Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
-remoteWrite.showURL -remoteWrite.showURL