From 48920bdef8511b98a4c2c2492e3dbca098837491 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Sun, 15 Aug 2021 13:32:40 +0300 Subject: [PATCH] app/vmagent/remotewrite: expose vmagent_remotewrite_send_duration_seconds_total metric This metric can be used for determining high saturation of every connection to remote storage with an alerting query `rate(vmagent_remotewrite_send_duration_seconds_total) > 0.9s`. This query triggers when a connection is satureated by more than 90% --- app/vmagent/remotewrite/client.go | 4 ++++ docs/CHANGELOG.md | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/app/vmagent/remotewrite/client.go b/app/vmagent/remotewrite/client.go index c1900b0399..667f5adbfc 100644 --- a/app/vmagent/remotewrite/client.go +++ b/app/vmagent/remotewrite/client.go @@ -78,6 +78,7 @@ type client struct { errorsCount *metrics.Counter packetsDropped *metrics.Counter retriesCount *metrics.Counter + sendDuration *metrics.FloatCounter wg sync.WaitGroup stopCh chan struct{} @@ -133,6 +134,7 @@ func newClient(argIdx int, remoteWriteURL, sanitizedURL string, fq *persistentqu c.errorsCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_errors_total{url=%q}`, c.sanitizedURL)) c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL)) c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL)) + c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL)) for i := 0; i < concurrency; i++ { c.wg.Add(1) go func() { @@ -204,7 +206,9 @@ func (c *client) runWorker() { return } go func() { + startTime := time.Now() ch <- c.sendBlock(block) + c.sendDuration.Add(time.Since(startTime).Seconds()) }() select { case ok := <-ch: diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6284398cc7..4775f68d0e 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -16,7 +16,8 @@ sort: 15 * FEATURE: add `-search.maxSamplesPerQuery` command-line flag for limiting the number of raw samples a single query can process across all the time series. This option can protect from heavy queries, which select too big number of raw samples. Thanks to @jiangxinlingdu for [the initial pull request](https://github.com/VictoriaMetrics/VictoriaMetrics/pull/1478). * FEATURE: improve performance for queries that process big number of time series and/or samples on systems with big number of CPU cores. * FEATURE: vmalert: expose `vmalert_alerting_rules_last_evaluation_samples` and `vmalert_recording_rules_last_evaluation_samples` metrics. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1494). -* FEATURE: vminsert: expose `vm_rpc_send_duration_seconds_total` counter, which can be used for determining high saturation of every `vminsert -> vmstorage` link with an alerting query `rate(vm_rpc_send_duration_seconds_total) > 0.9s`. This query triggers when the link is saturated by more than 90%. +* FEATURE: vminsert: expose `vm_rpc_send_duration_seconds_total` counter, which can be used for determining high saturation of every `vminsert -> vmstorage` link with an alerting query `rate(vm_rpc_send_duration_seconds_total) > 0.9s`. This query triggers when the link is saturated by more than 90%. This usually means that more `vminsert` or `vmstorage` nodes must be added to the cluster in order to increase the total number of `vminsert -> vmstorage` links. +* FEATURE: vmagent: expose `vmagent_remotewrite_send_duration_seconds_total` counter, which can be used for determining high saturation of every connection to remote storage with an alerting query `rate(vmagent_remotewrite_send_duration_seconds_total) > 0.9s`. This query triggers when a connection is satureated by more than 90%. This usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage. * BUGFIX: fix corner cases for queries on time ranges exceeding 40 days. Previously some series can be missing in query results. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1486). * BUGFIX: vmselect: return dummy response at `/rules` page in the same way as for `/api/v1/rules` page. The `/rules` page is requested by Grafana 8. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/1493) for details.