diff --git a/app/vmagent/remotewrite/relabel.go b/app/vmagent/remotewrite/relabel.go index c51203d5d3..a0e0737b26 100644 --- a/app/vmagent/remotewrite/relabel.go +++ b/app/vmagent/remotewrite/relabel.go @@ -3,6 +3,7 @@ package remotewrite import ( "flag" "fmt" + "strconv" "strings" "sync" @@ -92,6 +93,7 @@ func (rctx *relabelCtx) applyRelabeling(tss []prompbmarshal.TimeSeries, pcs *pro // Nothing to change. return tss } + rctx.reset() tssDst := tss[:0] labels := rctx.labels[:0] for i := range tss { @@ -120,6 +122,7 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL if len(extraLabels) == 0 { return } + rctx.reset() labels := rctx.labels[:0] for i := range tss { ts := &tss[i] @@ -139,6 +142,34 @@ func (rctx *relabelCtx) appendExtraLabels(tss []prompbmarshal.TimeSeries, extraL rctx.labels = labels } +func (rctx *relabelCtx) tenantToLabels(tss []prompbmarshal.TimeSeries, accountID, projectID uint32) { + rctx.reset() + accountIDStr := strconv.FormatUint(uint64(accountID), 10) + projectIDStr := strconv.FormatUint(uint64(projectID), 10) + labels := rctx.labels[:0] + for i := range tss { + ts := &tss[i] + labelsLen := len(labels) + for _, label := range ts.Labels { + labelName := label.Name + if labelName == "vm_account_id" || labelName == "vm_project_id" { + continue + } + labels = append(labels, label) + } + labels = append(labels, prompbmarshal.Label{ + Name: "vm_account_id", + Value: accountIDStr, + }) + labels = append(labels, prompbmarshal.Label{ + Name: "vm_project_id", + Value: projectIDStr, + }) + ts.Labels = labels[labelsLen:] + } + rctx.labels = labels +} + type relabelCtx struct { // pool for labels, which are used during the relabeling. labels []prompbmarshal.Label @@ -160,7 +191,7 @@ func getRelabelCtx() *relabelCtx { } func putRelabelCtx(rctx *relabelCtx) { - rctx.labels = rctx.labels[:0] + rctx.reset() relabelCtxPool.Put(rctx) } diff --git a/app/vmagent/remotewrite/remotewrite.go b/app/vmagent/remotewrite/remotewrite.go index 7cb5e0c5c1..04d38a4aae 100644 --- a/app/vmagent/remotewrite/remotewrite.go +++ b/app/vmagent/remotewrite/remotewrite.go @@ -37,11 +37,15 @@ var ( remoteWriteURLs = flagutil.NewArrayString("remoteWrite.url", "Remote storage URL to write data to. It must support either VictoriaMetrics remote write protocol "+ "or Prometheus remote_write protocol. Example url: http://:8428/api/v1/write . "+ "Pass multiple -remoteWrite.url options in order to replicate the collected data to multiple remote storage systems. "+ - "The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set. "+ - "See also -remoteWrite.multitenantURL") + "The data can be sharded among the configured remote storage systems if -remoteWrite.shardByURL flag is set") remoteWriteMultitenantURLs = flagutil.NewArrayString("remoteWrite.multitenantURL", "Base path for multitenant remote storage URL to write data to. "+ "See https://docs.victoriametrics.com/vmagent.html#multitenancy for details. Example url: http://:8480 . "+ - "Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. See also -remoteWrite.url") + "Pass multiple -remoteWrite.multitenantURL flags in order to replicate data to multiple remote storage systems. "+ + "This flag is deprecated in favor of -enableMultitenantHandlers . See https://docs.victoriametrics.com/vmagent.html#multitenancy") + enableMultitenantHandlers = flag.Bool("enableMultitenantHandlers", false, "Whether to process incoming data via multitenant insert handlers according to "+ + "https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format . By default incoming data is processed via single-node insert handlers "+ + "according to https://docs.victoriametrics.com/#how-to-import-time-series-data ."+ + "See https://docs.victoriametrics.com/vmagent.html#multitenancy for details") shardByURL = flag.Bool("remoteWrite.shardByURL", false, "Whether to shard outgoing series across all the remote storage systems enumerated via -remoteWrite.url . "+ "By default the data is replicated across all the -remoteWrite.url . See https://docs.victoriametrics.com/vmagent.html#sharding-among-remote-storages") shardByURLLabels = flagutil.NewArrayString("remoteWrite.shardByURL.labels", "Optional list of labels, which must be used for sharding outgoing samples "+ @@ -114,9 +118,9 @@ var ( } ) -// MultitenancyEnabled returns true if -remoteWrite.multitenantURL is specified. +// MultitenancyEnabled returns true if -enableMultitenantHandlers or -remoteWrite.multitenantURL is specified. func MultitenancyEnabled() bool { - return len(*remoteWriteMultitenantURLs) > 0 + return *enableMultitenantHandlers || len(*remoteWriteMultitenantURLs) > 0 } // Contains the current relabelConfigs. @@ -384,17 +388,23 @@ func TryPush(at *auth.Token, wr *prompbmarshal.WriteRequest) bool { } func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailure bool) bool { - if at == nil && len(*remoteWriteMultitenantURLs) > 0 { - // Write data to default tenant if at isn't set while -remoteWrite.multitenantURL is set. + tss := wr.Timeseries + + if at == nil && MultitenancyEnabled() { + // Write data to default tenant if at isn't set when multitenancy is enabled. at = defaultAuthToken } + + var tenantRctx *relabelCtx var rwctxs []*remoteWriteCtx if at == nil { rwctxs = rwctxsDefault + } else if len(*remoteWriteMultitenantURLs) == 0 { + // Convert at to (vm_account_id, vm_project_id) labels. + tenantRctx = getRelabelCtx() + defer putRelabelCtx(tenantRctx) + rwctxs = rwctxsDefault } else { - if len(*remoteWriteMultitenantURLs) == 0 { - logger.Panicf("BUG: -remoteWrite.multitenantURL command-line flag must be set when __tenant_id__=%q label is set", at) - } rwctxsMapLock.Lock() tenantID := tenantmetrics.TenantID{ AccountID: at.AccountID, @@ -408,7 +418,6 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur rwctxsMapLock.Unlock() } - tss := wr.Timeseries rowsCount := getRowsCount(tss) if *disableOnDiskQueue { @@ -433,10 +442,7 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur pcsGlobal := rcs.global if pcsGlobal.Len() > 0 { rctx = getRelabelCtx() - defer func() { - rctx.reset() - putRelabelCtx(rctx) - }() + defer putRelabelCtx(rctx) } globalRowsPushedBeforeRelabel.Add(rowsCount) maxSamplesPerBlock := *maxRowsPerBlock @@ -463,6 +469,9 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur } else { tss = nil } + if tenantRctx != nil { + tenantRctx.tenantToLabels(tssBlock, at.AccountID, at.ProjectID) + } if rctx != nil { rowsCountBeforeRelabel := getRowsCount(tssBlock) tssBlock = rctx.applyRelabeling(tssBlock, pcsGlobal) @@ -482,9 +491,6 @@ func tryPush(at *auth.Token, wr *prompbmarshal.WriteRequest, dropSamplesOnFailur } return false } - if rctx != nil { - rctx.reset() - } } return true } diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index e1da7470ad..eda6d99524 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -31,6 +31,7 @@ The sandbox cluster installation is running under the constant load generated by * SECURITY: upgrade base docker image (Alpine) from 3.18.4 to 3.18.5. See [alpine 3.18.5 release notes](https://www.alpinelinux.org/posts/Alpine-3.15.11-3.16.8-3.17.6-3.18.5-released.html). * FEATURE: `vmselect`: allow opening [vmui](https://docs.victoriametrics.com/#vmui) and investigating [Top queries](https://docs.victoriametrics.com/#top-queries) and [Active queries](https://docs.victoriametrics.com/#active-queries) when the `vmselect` is overloaded with concurrent queries (e.g. when more than `-search.maxConcurrentRequests` concurrent queries are executed). Previously an attempt to open `Top queries` or `Active queries` at `vmui` could result in `couldn't start executing the request in ... seconds, since -search.maxConcurrentRequests=... concurrent requests are executed` error, which could complicate debugging of overloaded `vmselect` or single-node VictoriaMetrics. +* FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-enableMultitenantHandlers` command-line flag, which allows receiving data via [VictoriaMetrics cluster urls](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) at `vmagent` and converting [tenant ids](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) to (`vm_account_id`, `vm_project_id`) labels before sending the data to the configured `-remoteWrite.url`. See [these docs](https://docs.victoriametrics.com/vmagent.html#multitenancy) for details. * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add `-remoteWrite.disableOnDiskQueue` command-line flag, which can be used for disabling data queueing to disk when the remote storage cannot keep up with the data ingestion rate. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence) and [this feature request](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/2110). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for reading and writing samples via [Google PubSub](https://cloud.google.com/pubsub). See [these docs](https://docs.victoriametrics.com/vmagent.html#google-pubsub-integration). * FEATURE: [vmagent](https://docs.victoriametrics.com/vmagent.html): add support for Datadog `/api/v2/series` and `/api/beta/sketches` ingestion protocols to vmagent/vminsert components. See this [doc](https://docs.victoriametrics.com/#how-to-send-data-from-datadog-agent) for examples. Thanks to @AndrewChubatiuk for the [pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/5094). diff --git a/docs/vmagent.md b/docs/vmagent.md index 80052a0a65..0ffc5d77f5 100644 --- a/docs/vmagent.md +++ b/docs/vmagent.md @@ -250,41 +250,31 @@ by specifying `-remoteWrite.forcePromProto` command-line flag for the correspond ## Multitenancy -By default `vmagent` collects the data without tenant identifiers and routes it to the configured `-remoteWrite.url`. +By default `vmagent` collects the data without [tenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) identifiers +and routes it to the remote storage specified via `-remoteWrite.url` command-line flag. The `-remoteWrite.url` can point to `/insert//prometheus/api/v1/write` path +at `vminsert` according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format). In this case all the metrics are written to the given `` tenant. -[VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) supports writing data to multiple tenants -specified via special labels - see [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels). -This allows specifying tenant ids via [relabeling](#relabeling) and writing multitenant data -to a single `-remoteWrite.url=http:///insert/multitenant/prometheus/api/v1/write`. +The easiest way to write data to multiple distinct tenants is to specify the needed tenants via `vm_account_id` and `vm_project_id` labels +and then to push metrics with these labels to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels). +The `vm_account_id` and `vm_project_id` labels can be specified via [relabeling](#relabeling) before sending the metrics to `-remoteWrite.url`. -`vmagent` can accept data from the same multitenant endpoints as `vminsert` from [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) -does according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) and route the accepted data -to the corresponding [tenants](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) in VictoriaMetrics cluster -pointed by the `-remoteWrite.multitenantURL` command-line flag. For example, if `-remoteWrite.multitenantURL` is set to `http://vminsert-service`, -then `vmagent` would accept multitenant data at `http://vmagent:8429/insert//...` endpoints in the same way -as [VictoriaMetrics cluster does](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) and route -it to `http://vminsert-service/insert//prometheus/api/v1/write`. - -If multiple `-remoteWrite.multitenantURL` command-line options are set, then `vmagent` replicates the collected data across all the configured urls. -This allows using a single `vmagent` instance in front of multiple VictoriaMetrics clusters. - -If `-remoteWrite.multitenantURL` command-line flag is set and `vmagent` is configured to scrape Prometheus-compatible targets -(e.g. if `-promscrape.config` command-line flag is set) then `vmagent` reads tenantID from `__tenant_id__` label -for the discovered targets and routes all the metrics from this target to the given `__tenant_id__`, -e.g. to the url `<-remoteWrite.multitenantURL>/insert/<__tenant_id__>/prometheus/api/v1/write`. - -For example, the following relabeling rule instructs sending metrics to tenantID defined in the `prometheus.io/tenant` annotation of Kubernetes pod deployment: +For example, the following relabeling rule instructs sending metrics to `:0` [tenant](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy) +defined in the `prometheus.io/account_id` annotation of Kubernetes pod deployment: ```yaml scrape_configs: - kubernetes_sd_configs: - role: pod relabel_configs: - - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_tenant] - target_label: __tenant_id__ + - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_account_id] + target_label: vm_account_id ``` -If the target has no associated `__tenant_id__` label, then its' metrics are routed to zero tenantID, e.g. to `<-remoteWrite.multitenantURL>/insert/0/prometheus/api/v1/write`. +`vmagent` can accept data via the same multitenant endpoints as `vminsert` at [VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html) +does according to [these docs](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#url-format) if `-enableMultitenantHandlers` command-line flag is set. +In this case it automatically converts tenant identifiers to `vm_account_id` and `vm_project_id` labels before applying [relabeling](#relabeling) specified via `-remoteWrite.relabelConfig` +and `-remoteWrite.urlRelabelConfig` command-line flags. Metrics with `vm_account_id` and `vm_project_id` labels can be routed to the corresponding tenants +when specifying `-remoteWrite.url` to [multitenant url at VictoriaMetrics cluster](https://docs.victoriametrics.com/Cluster-VictoriaMetrics.html#multitenancy-via-labels). ## How to collect metrics in Prometheus format @@ -1190,6 +1180,8 @@ with the following config: In this case it may be useful to disable on-disk data persistence in order to prevent from unbounded growth of the on-disk queue. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence). +See also [how to write metrics to multiple distinct tenants](https://docs.victoriametrics.com/vmagent.html#multitenancy). + #### Consume metrics from multiple topics `vmagent` can read messages from different topics in different formats. For example, the following command starts `vmagent`, which reads plaintext @@ -1322,6 +1314,8 @@ data_format = "influx" In this case it may be useful to disable on-disk data persistence in order to prevent from unbounded growth of the on-disk queue. See [these docs](https://docs.victoriametrics.com/vmagent.html#disabling-on-disk-persistence). +See also [how to write metrics to multiple distinct tenants](https://docs.victoriametrics.com/vmagent.html#multitenancy). + #### Command-line flags for Kafka consumer These command-line flags are available only in [enterprise](https://docs.victoriametrics.com/enterprise.html) version of `vmagent`,