dashboards: add panels for absoulte value of mem and cpu usage by vmalert

See https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4627

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
hagen1778 2023-08-03 11:14:14 +02:00
parent 64e24e9e2b
commit c47138e1b0
No known key found for this signature in database
GPG Key ID: 3BF75F3741CA9640
2 changed files with 276 additions and 43 deletions

View File

@ -6,7 +6,7 @@
"type": "grafana",
"id": "grafana",
"name": "Grafana",
"version": "9.2.6"
"version": "9.2.7"
},
{
"type": "datasource",
@ -204,7 +204,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -264,7 +264,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -324,7 +324,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -388,7 +388,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -452,7 +452,7 @@
"text": {},
"textMode": "auto"
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -546,7 +546,7 @@
},
"showHeader": true
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -1182,7 +1182,7 @@
}
]
},
"pluginVersion": "9.2.6",
"pluginVersion": "9.2.7",
"targets": [
{
"datasource": {
@ -1243,6 +1243,230 @@
},
"id": 43,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "The precentage of used RSS memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 33
},
"id": 37,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)",
"interval": "",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "Memory usage % ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"description": "Amount of used RSS memory\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 33
},
"id": 57,
"links": [
{
"targetBlank": true,
"title": "Profiling",
"url": "https://docs.victoriametrics.com/vmagent.html#profiling"
}
],
"options": {
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max"
],
"displayMode": "table",
"placement": "bottom",
"showLegend": true,
"sortBy": "Last *",
"sortDesc": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"pluginVersion": "9.2.6",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n) by(job)",
"interval": "",
"legendFormat": "{{job}}",
"range": true,
"refId": "A"
}
],
"title": "Memory usage ($instance)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
@ -1308,7 +1532,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 35
"y": 41
},
"id": 35,
"links": [
@ -1362,7 +1586,7 @@
"type": "prometheus",
"uid": "$ds"
},
"description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.",
"description": "Shows the max number of CPU cores used by a `job` and the corresponding limit.",
"fieldConfig": {
"defaults": {
"color": {
@ -1414,7 +1638,7 @@
}
]
},
"unit": "percentunit"
"unit": "short"
},
"overrides": []
},
@ -1422,9 +1646,9 @@
"h": 8,
"w": 12,
"x": 12,
"y": 35
"y": 41
},
"id": 37,
"id": 56,
"links": [
{
"targetBlank": true,
@ -1447,7 +1671,7 @@
},
"tooltip": {
"mode": "multi",
"sort": "none"
"sort": "desc"
}
},
"pluginVersion": "9.2.6",
@ -1459,14 +1683,32 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "max(\n max_over_time(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job\", instance=~\"$instance\"}\n) by(job)",
"expr": "max(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(job)",
"format": "time_series",
"interval": "",
"legendFormat": "__auto",
"intervalFactor": 1,
"legendFormat": "{{job}}",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"editorMode": "code",
"exemplar": false,
"expr": "min(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}) by(job)",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "limit ({{job}})",
"range": true,
"refId": "B"
}
],
"title": "Memory usage % ($instance)",
"title": "CPU usage ($instance)",
"type": "timeseries"
},
{
@ -1535,7 +1777,7 @@
"h": 8,
"w": 12,
"x": 0,
"y": 43
"y": 49
},
"id": 39,
"links": [],
@ -1641,7 +1883,7 @@
"h": 8,
"w": 12,
"x": 12,
"y": 43
"y": 49
},
"id": 41,
"links": [],
@ -1754,8 +1996,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1857,8 +2098,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -1960,8 +2200,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2064,8 +2303,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2164,8 +2402,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2292,8 +2529,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2395,8 +2631,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2497,8 +2732,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2620,8 +2854,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2713,8 +2946,7 @@
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
"color": "green"
},
{
"color": "red",
@ -2776,9 +3008,9 @@
"list": [
{
"current": {
"selected": true,
"text": "VictoriaMetrics",
"value": "VictoriaMetrics"
"selected": false,
"text": "VictoriaMetrics - cluster",
"value": "VictoriaMetrics - cluster"
},
"hide": 0,
"includeAll": false,
@ -2862,7 +3094,7 @@
},
{
"current": {
"selected": true,
"selected": false,
"text": "5",
"value": "5"
},

View File

@ -33,6 +33,7 @@ The following `tip` changes can be tested by building VictoriaMetrics components
* FEATURE: [vmctl](https://docs.victoriametrics.com/vmctl.html): do not add `/api/v1/read` suffix to remote read storage address defined by `--remote-read-src-addr` if a `--remote-read-disable-path-append` command-line flag is set. It allows an overriding path for remote-read API via `--remote-read-src-addr`. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4655).
* FEATURE: [vmui](https://docs.victoriametrics.com/#vmui): add warning in query field of vmui for partial data responses. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4721).
* FEATURE: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): add `Concurrent inserts` panel to vmagent's dasbhoard. The new panel supposed to show whether the number of concurrent inserts processed by vmagent isn't reaching the limit.
* FEATURE: [Official Grafana dashboards for VictoriaMetrics](https://grafana.com/orgs/victoriametrics): add panels for absolute Mem and CPU usage by vmalert. See related issue [here](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4627).
* FEATURE: [Alerting rules for VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics/tree/master/deployment/docker#alerts): `ConcurrentFlushesHitTheLimit` alerting rule was moved from [single-server](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts.yml) and [cluster](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-cluster.yml) alerts to the [list of "health" alerts](https://github.com/VictoriaMetrics/VictoriaMetrics/blob/master/deployment/docker/alerts-health.yml) as it could be related to many VictoriaMetrics components.
* BUGFIX: [vmagent](https://docs.victoriametrics.com/vmagent.html): use local scrape timestamps for the scraped metrics unless `honor_timestamps: true` option is explicitly set at [scrape_config](https://docs.victoriametrics.com/sd_configs.html#scrape_configs). This fixes gaps for metrics collected from [cadvisor](https://github.com/google/cadvisor) or similar exporters, which export metrics with invalid timestamps. See [this issue](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697) and [this comment](https://github.com/VictoriaMetrics/VictoriaMetrics/issues/4697#issuecomment-1654614799) for details. The issue has been introduced in [v1.68.0](#v1680).