mirror of
https://github.com/VictoriaMetrics/VictoriaMetrics.git
synced 2024-12-13 13:11:37 +01:00
app/vmagent/remotewrite: follow-up for 166b97b8d0
and b6bd9a97a3
- Make the configuration more clear by accepting the list of ignored labels during sharding via a dedicated command-line flag - -remoteWrite.shardByURL.ignoreLabels. This prevents from overloading the meaning of -remoteWrite.shardByURL.labels command-line flag. - Removed superfluous memory allocation per each processed sample if sharding by remote storage is enabled. Updates https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938
This commit is contained in:
parent
111a8e5fd0
commit
3f79e54a51
@ -50,9 +50,11 @@ var (
|
||||
"By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages")
|
||||
shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels.")
|
||||
shardByURLLabelsInverse = flag.Bool("remoteWrite.shardByURL.inverseLabels", false, "Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels.")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels")
|
||||
shardByURLIgnoreLabels = flagutil.NewArrayString("remoteWrite.shardByURL.ignoreLabels", "Optional list of labels, which must be ignored when sharding outgoing samples "+
|
||||
"among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain "+
|
||||
"even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels")
|
||||
tmpDataPath = flag.String("remoteWrite.tmpDataPath", "vmagent-remotewrite-data", "Path to directory for storing pending data, which isn't sent to the configured -remoteWrite.url . "+
|
||||
"See also -remoteWrite.maxDiskUsagePerURL and -remoteWrite.disableOnDiskQueue")
|
||||
keepDanglingQueues = flag.Bool("remoteWrite.keepDanglingQueues", false, "Keep persistent queues contents at -remoteWrite.tmpDataPath in case there are no matching -remoteWrite.url. "+
|
||||
"Useful when -remoteWrite.url is changed temporarily and persistent queue files will be needed later on.")
|
||||
@ -149,7 +151,10 @@ func InitSecretFlags() {
|
||||
}
|
||||
}
|
||||
|
||||
var shardByURLLabelsMap map[string]struct{}
|
||||
var (
|
||||
shardByURLLabelsMap map[string]struct{}
|
||||
shardByURLIgnoreLabelsMap map[string]struct{}
|
||||
)
|
||||
|
||||
// Init initializes remotewrite.
|
||||
//
|
||||
@ -193,13 +198,14 @@ func Init() {
|
||||
if *queues <= 0 {
|
||||
*queues = 1
|
||||
}
|
||||
if len(*shardByURLLabels) > 0 {
|
||||
m := make(map[string]struct{}, len(*shardByURLLabels))
|
||||
for _, label := range *shardByURLLabels {
|
||||
m[label] = struct{}{}
|
||||
}
|
||||
shardByURLLabelsMap = m
|
||||
|
||||
if len(*shardByURLLabels) > 0 && len(*shardByURLIgnoreLabels) > 0 {
|
||||
logger.Fatalf("-remoteWrite.shardByURL.labels and -remoteWrite.shardByURL.ignoreLabels cannot be set simultaneously; " +
|
||||
"see https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages")
|
||||
}
|
||||
shardByURLLabelsMap = newMapFromStrings(*shardByURLLabels)
|
||||
shardByURLIgnoreLabelsMap = newMapFromStrings(*shardByURLIgnoreLabels)
|
||||
|
||||
initLabelsGlobal()
|
||||
|
||||
// Register SIGHUP handler for config reload before loadRelabelConfigs.
|
||||
@ -539,7 +545,24 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
|
||||
tssByURL := make([][]prompbmarshal.TimeSeries, len(rwctxs))
|
||||
tmpLabels := promutils.GetLabels()
|
||||
for _, ts := range tssBlock {
|
||||
hashLabels := extractShardingLabels(tmpLabels.Labels, ts.Labels, *shardByURLLabelsInverse)
|
||||
hashLabels := ts.Labels
|
||||
if len(shardByURLLabelsMap) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLLabelsMap[label.Name]; ok {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
tmpLabels.Labels = hashLabels
|
||||
} else if len(shardByURLIgnoreLabelsMap) > 0 {
|
||||
hashLabels = tmpLabels.Labels[:0]
|
||||
for _, label := range ts.Labels {
|
||||
if _, ok := shardByURLIgnoreLabelsMap[label.Name]; !ok {
|
||||
hashLabels = append(hashLabels, label)
|
||||
}
|
||||
}
|
||||
tmpLabels.Labels = hashLabels
|
||||
}
|
||||
h := getLabelsHash(hashLabels)
|
||||
idx := h % uint64(len(tssByURL))
|
||||
tssByURL[idx] = append(tssByURL[idx], ts)
|
||||
@ -585,20 +608,6 @@ func tryPushBlockToRemoteStorages(rwctxs []*remoteWriteCtx, tssBlock []prompbmar
|
||||
return !anyPushFailed.Load()
|
||||
}
|
||||
|
||||
func extractShardingLabels(dst, src []prompbmarshal.Label, inverse bool) []prompbmarshal.Label {
|
||||
if len(shardByURLLabelsMap) < 1 {
|
||||
return src
|
||||
}
|
||||
dst = dst[:0]
|
||||
for _, label := range src {
|
||||
_, ok := shardByURLLabelsMap[label.Name]
|
||||
if ok && !inverse || !ok && inverse {
|
||||
dst = append(dst, label)
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
||||
|
||||
// sortLabelsIfNeeded sorts labels if -sortLabels command-line flag is set.
|
||||
func sortLabelsIfNeeded(tss []prompbmarshal.TimeSeries) {
|
||||
if !*sortLabels {
|
||||
@ -1004,3 +1013,11 @@ func CheckStreamAggrConfigs() error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func newMapFromStrings(a []string) map[string]struct{} {
|
||||
m := make(map[string]struct{}, len(a))
|
||||
for _, s := range a {
|
||||
m[s] = struct{}{}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
package remotewrite
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/VictoriaMetrics/VictoriaMetrics/lib/prompbmarshal"
|
||||
)
|
||||
|
||||
func TestExtractShardingLabels(t *testing.T) {
|
||||
shardByURLLabelsMap = make(map[string]struct{})
|
||||
shardByURLLabelsMap["instance"] = struct{}{}
|
||||
shardByURLLabelsMap["job"] = struct{}{}
|
||||
defer func() {
|
||||
shardByURLLabelsMap = nil
|
||||
}()
|
||||
|
||||
f := func(in, exp []prompbmarshal.Label, inverse bool) {
|
||||
t.Helper()
|
||||
var got []prompbmarshal.Label
|
||||
got = extractShardingLabels(got, in, inverse)
|
||||
if !reflect.DeepEqual(got, exp) {
|
||||
t.Fatalf("expected to get \n%#v; \ngot \n%#v instead", exp, got)
|
||||
}
|
||||
}
|
||||
|
||||
f(nil, nil, true)
|
||||
f(nil, nil, false)
|
||||
|
||||
f([]prompbmarshal.Label{{Name: "foo"}}, nil, false)
|
||||
f([]prompbmarshal.Label{{Name: "foo"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
|
||||
|
||||
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "job"}}, false)
|
||||
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
|
||||
|
||||
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "instance"}, {Name: "job"}}, false)
|
||||
f([]prompbmarshal.Label{{Name: "foo"}, {Name: "instance"}, {Name: "job"}}, []prompbmarshal.Label{{Name: "foo"}}, true)
|
||||
}
|
@ -54,7 +54,7 @@ See also [LTS releases](https://docs.victoriametrics.com/lts-releases/).
|
||||
* FEATURE: [stream aggregation](https://docs.victoriametrics.com/stream-aggregation/): expose `vm_streamaggr_flush_timeouts_total` and `vm_streamaggr_dedup_flush_timeouts_total` [counters](https://docs.victoriametrics.com/keyconcepts/#counter) at [`/metrics` page](https://docs.victoriametrics.com/#monitoring), which can be used for detecting flush timeouts for stream aggregation states. Expose also `vm_streamaggr_flush_duration_seconds` and `vm_streamaggr_dedup_flush_duration_seconds` [histograms](https://docs.victoriametrics.com/keyconcepts/#histogram) for monitoring the real flush durations of stream aggregation states.
|
||||
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): improve trace display for better visual separation of branches. See [this pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5926).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): use the provided `-remoteWrite.tlsServerName` as `Host` header in requests to `-remoteWrite.url`. This allows sending data to https remote storage by IP address instead of hostname. Thanks to @minor-fixes for initial idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5802).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.inverseLabels` cmd-line flag to enhance sharding logic across configured `-remoteWrite.url` URLs when `-remoteWrite.shardByURL.labels` is set. Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938).
|
||||
* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.shardByURL.ignoreLabels` command-line flag, which can be used for specifying the ignored list of labels when [sharding by `-remoteWrite.url` is enabled](https://docs.victoriametrics.com/vmagent/#sharding-among-remote-storages). Thanks to @edma2 for the idea and [the pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5938).
|
||||
* FEATURE: optimize [`/api/v1/labels`](https://docs.victoriametrics.com/url-examples/#apiv1labels) and [`/api/v1/label/.../values`](https://docs.victoriametrics.com/url-examples/#apiv1labelvalues) when `match[]` filters contains metric name. For example, `/api/v1/label/instance/values?match[]=up` now works much faster than before. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5055).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for [native protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-from-victoriametrics). See [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/5748). Thanks to @khushijain21 for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5824).
|
||||
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): support client-side TLS configuration for VictoriaMetrics destination specified via `--vm-*` cmd-line flags used in [InfluxDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-influxdb-1x), [Remote Read protocol](https://docs.victoriametrics.com/vmctl/#migrating-data-by-remote-read-protocol), [OpenTSDB](https://docs.victoriametrics.com/vmctl/#migrating-data-from-opentsdb), [Prometheus](https://docs.victoriametrics.com/vmctl/#migrating-data-from-prometheus) and [Promscale](https://docs.victoriametrics.com/vmctl/#migrating-data-from-promscale) migration modes.
|
||||
|
@ -187,12 +187,17 @@ and [histogram](https://docs.victoriametrics.com/keyConcepts.html#histogram) typ
|
||||
to the same second-level `vmagent` instance, so they are aggregated properly.
|
||||
|
||||
If `-remoteWrite.shardByURL` command-line flag is set, then all the metric labels are used for even sharding
|
||||
among remote storage systems specified in `-remoteWrite.url`. Sometimes it may be needed to use only a particular
|
||||
set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
|
||||
among remote storage systems specified in `-remoteWrite.url`.
|
||||
|
||||
Sometimes it may be needed to use only a particular set of labels for sharding. For example, it may be needed to route all the metrics with the same `instance` label
|
||||
to the same `-remoteWrite.url`. In this case you can specify comma-separated list of these labels in the `-remoteWrite.shardByURL.labels`
|
||||
command-line flag. For example, `-remoteWrite.shardByURL.labels=instance,__name__` would shard metrics with the same name and `instance`
|
||||
label to the same `-remoteWrite.url`. The sharding logic can be inverted by specifying `-remoteWrite.shardByURL.inverseLabels`,
|
||||
so that metrics are sharded using all labels except the ones specified in `-remoteWrite.shardByURL.labels`.
|
||||
label to the same `-remoteWrite.url`.
|
||||
|
||||
Sometimes is may be needed ignoring some labels when sharding samples across multiple `-remoteWrite.url` backends.
|
||||
For example, if all the [raw samples](https://docs.victoriametrics.com/keyconcepts/#raw-samples) with the same set of labels
|
||||
except of `instance` and `pod` labels must be routed to the same backend. In this case the list of ignored labels must be passed to
|
||||
`-remoteWrite.shardByURL.ignoreLabels` command-line flag: `-remoteWrite.shardByURL.ignoreLabels=instance,pod`.
|
||||
|
||||
See also [how to scrape big number of targets](#scraping-big-number-of-targets).
|
||||
|
||||
@ -2106,10 +2111,12 @@ See the docs at https://docs.victoriametrics.com/vmagent.html .
|
||||
Empty values are set to default value.
|
||||
-remoteWrite.shardByURL
|
||||
Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages
|
||||
-remoteWrite.shardByURL.inverseLabels
|
||||
Inverse the behavior of -remoteWrite.shardByURL.labels so that series are sharded using all labels except the ones specified in -remoteWrite.shardByURL.labels.
|
||||
-remoteWrite.shardByURL.ignoreLabels array
|
||||
Optional list of labels, which must be ignored when sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.labels
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.shardByURL.labels array
|
||||
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.inverseLabels.
|
||||
Optional list of labels, which must be used for sharding outgoing samples among remote storage systems if -remoteWrite.shardByURL command-line flag is set. By default all the labels are used for sharding in order to gain even distribution of series over the specified -remoteWrite.url systems. See also -remoteWrite.shardByURL.ignoreLabels
|
||||
Supports an array of values separated by comma or specified via multiple flags.
|
||||
Value can contain comma inside single-quoted or double-quoted string, {}, [] and () braces.
|
||||
-remoteWrite.showURL
|
||||
|
Loading…
Reference in New Issue
Block a user