vmagent: expose metric vmagent_remotewrite_queues (#2871)

The new metric `vmagent_remotewrite_queues` exports a static value of
number of configured remote write queus. This metric is useful to
calculate total saturation per each configured URL with given number
of queues. See corresponding changes to vmagent alerts and dashboard.

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2022-07-18 13:31:35 +02:00 committed by GitHub
parent bf59511c96
commit 27f1c65074
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 40 deletions

View File

@ -154,6 +154,9 @@ func (c *client) init(argIdx, concurrency int, sanitizedURL string) {
c.packetsDropped = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_packets_dropped_total{url=%q}`, c.sanitizedURL))
c.retriesCount = metrics.GetOrCreateCounter(fmt.Sprintf(`vmagent_remotewrite_retries_count_total{url=%q}`, c.sanitizedURL))
c.sendDuration = metrics.GetOrCreateFloatCounter(fmt.Sprintf(`vmagent_remotewrite_send_duration_seconds_total{url=%q}`, c.sanitizedURL))
metrics.GetOrCreateGauge(fmt.Sprintf(`vmagent_remotewrite_queues{url=%q}`, c.sanitizedURL), func() float64 {
return float64(*queues)
})
for i := 0; i < concurrency; i++ {
c.wg.Add(1)
go func() {

View File

@ -6,7 +6,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "8.5.3"
"version": "8.4.4"
},
{
"type": "panel",
@ -61,12 +61,12 @@
}
]
},
"description": "Overview for VictoriaMetrics vmagent v1.73.0 or higher",
"description": "Overview for VictoriaMetrics vmagent v1.80.0 or higher",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"iteration": 1656943336787,
"iteration": 1657810604530,
"links": [
{
"icon": "doc",
@ -154,7 +154,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"up\"})",
@ -218,7 +218,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_promscrape_targets{job=~\"$job\", instance=~\"$instance\", status=\"down\"})",
@ -285,7 +285,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(increase(vm_log_messages_total{job=~\"$job\", instance=~\"$instance\", level!=\"info\"}[30m]))",
@ -344,7 +344,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"targets": [
{
"expr": "sum(vm_persistentqueue_bytes_pending{job=~\"$job\", instance=~\"$instance\"})",
@ -490,7 +490,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -589,7 +589,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -702,7 +702,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -805,7 +805,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -946,7 +946,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -1039,7 +1039,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -1138,7 +1138,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -1237,7 +1237,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -1344,7 +1344,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2457,7 +2457,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 4
"y": 43
},
"hiddenSeries": false,
"id": 60,
@ -2480,7 +2480,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2555,7 +2555,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 4
"y": 43
},
"hiddenSeries": false,
"id": 66,
@ -2578,7 +2578,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2652,7 +2652,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 12
"y": 51
},
"hiddenSeries": false,
"id": 61,
@ -2675,7 +2675,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2748,7 +2748,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 12
"y": 51
},
"hiddenSeries": false,
"id": 65,
@ -2771,7 +2771,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2837,7 +2837,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 20
"y": 59
},
"heatmap": {},
"hideZeroBuckets": false,
@ -2881,9 +2881,10 @@
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Shows saturation of every connection to remote storage. If the threshold of 0.9sec is reached, then the connection is saturated by more than 90% and vmagent won't be able to keep up. This usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage.\n",
"description": "Shows saturation of every connection to remote storage. If the threshold of 90% is reached, then the connection is saturated (busy or slow) by more than 90%, so vmagent won't be able to keep up and can start buffering data. \n\nThis usually means that `-remoteWrite.queues` command-line flag must be increased in order to increase the number of connections per each remote storage.\n",
"fieldConfig": {
"defaults": {
"links": []
@ -2896,7 +2897,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 20
"y": 59
},
"hiddenSeries": false,
"id": 84,
@ -2919,7 +2920,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -2930,7 +2931,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum(rate(vmagent_remotewrite_send_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance, url)",
"expr": "sum(rate(vmagent_remotewrite_send_duration_seconds_total{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}[$__rate_interval])) by (instance, url)\n/\nmax(vmagent_remotewrite_queues{job=~\"$job\", instance=~\"$instance\", url=~\"$url\"}) by(instance, url)",
"interval": "",
"legendFormat": "",
"refId": "A"
@ -2943,7 +2944,7 @@
"fill": true,
"line": true,
"op": "gt",
"value": 0.9,
"value": 90,
"yaxis": "left"
}
],
@ -2963,7 +2964,7 @@
"yaxes": [
{
"$$hashKey": "object:662",
"format": "s",
"format": "percentunit",
"logBase": 1,
"min": "0",
"show": true
@ -2997,7 +2998,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 28
"y": 67
},
"heatmap": {},
"hideZeroBuckets": false,
@ -3053,7 +3054,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 28
"y": 67
},
"heatmap": {},
"hideZeroBuckets": false,
@ -3104,7 +3105,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 36
"y": 75
},
"hiddenSeries": false,
"id": 88,
@ -3124,7 +3125,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -3207,7 +3208,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 36
"y": 75
},
"hiddenSeries": false,
"id": 90,
@ -3227,7 +3228,7 @@
"alertThreshold": true
},
"percentage": false,
"pluginVersion": "8.5.3",
"pluginVersion": "8.4.4",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -4567,7 +4568,7 @@
}
],
"refresh": "",
"schemaVersion": 36,
"schemaVersion": 35,
"style": "dark",
"tags": [
"vmagent",
@ -4577,7 +4578,9 @@
"list": [
{
"current": {
"selected": false
"selected": true,
"text": "VM",
"value": "VM"
},
"hide": 0,
"includeAll": false,

View File

@ -270,7 +270,9 @@ groups:
Ensure that destination is up and reachable."
- alert: RemoteWriteConnectionIsSaturated
expr: rate(vmagent_remotewrite_send_duration_seconds_total[5m]) > 0.9
expr: |
sum(rate(vmagent_remotewrite_send_duration_seconds_total[5m])) by(job, instance, url)
> 0.9 * max(vmagent_remotewrite_queues) by(job, instance, url)
for: 15m
labels:
severity: warning