Dashboards upd (#3942)

* dashboards/cluser: use `quantile` since `median` isn't supported by PromQL

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* dashboards/*: add `restarts` annotation to show when there were restarts

The cluster's annotation query is aggregated `by job`,
while vmagent/vmalert are aggregated `by job, instance`.
This is because cluster dashboard can contains too many instances
and annotation could become too noisy.

Signed-off-by: hagen1778 <roman@victoriametrics.com>

* dashboards/*: support instance filter in Version annotation

Signed-off-by: hagen1778 <roman@victoriametrics.com>

---------

Signed-off-by: hagen1778 <roman@victoriametrics.com>
This commit is contained in:
Roman Khavronenko 2023-03-10 17:13:19 +01:00 committed by GitHub
parent f7a7eb8f3e
commit 3eebe52a06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 10 deletions

View File

@ -76,12 +76,24 @@
"uid": "$ds" "uid": "$ds"
}, },
"enable": true, "enable": true,
"expr": "sum(vm_app_version{job=~\"$job\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\"} offset 20m) by(short_version))", "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_version))",
"hide": true, "hide": true,
"iconColor": "dark-blue", "iconColor": "dark-blue",
"name": "version change", "name": "version change",
"textFormat": "{{short_version}}", "textFormat": "{{short_version}}",
"titleFormat": "Version change" "titleFormat": "Version change"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"enable": true,
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job)",
"hide": true,
"iconColor": "dark-yellow",
"name": "restarts",
"textFormat": "{{job}} restarted"
} }
] ]
}, },
@ -4642,7 +4654,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": false, "exemplar": false,
"expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_storage\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_storage\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -4788,7 +4800,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n max_over_time(process_resident_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_storage\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -5381,7 +5393,7 @@
"uid": "$ds" "uid": "$ds"
}, },
"editorMode": "code", "editorMode": "code",
"expr": "median(\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)", "expr": "quantile(0.5,\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) /\n (\n sum(vm_free_disk_space_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance) +\n sum(vm_data_size_bytes{job=~\"$job\", instance=~\"$instance\"}) by(job, instance)\n ) \n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"intervalFactor": 1, "intervalFactor": 1,
@ -6255,7 +6267,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_select\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_select\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -6399,7 +6411,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n max_over_time(process_resident_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_select\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -7364,7 +7376,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "median(\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n rate(process_cpu_seconds_total{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n process_cpu_cores_available{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",
@ -7508,7 +7520,7 @@
}, },
"editorMode": "code", "editorMode": "code",
"exemplar": true, "exemplar": true,
"expr": "median(\n max_over_time(process_resident_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)", "expr": "quantile(0.5,\n max_over_time(process_resident_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}[$__rate_interval])\n /\n vm_available_memory_bytes{job=~\"$job_insert\", instance=~\"$instance\"}\n)",
"format": "time_series", "format": "time_series",
"hide": false, "hide": false,
"interval": "", "interval": "",

View File

@ -65,12 +65,24 @@
"uid": "$ds" "uid": "$ds"
}, },
"enable": true, "enable": true,
"expr": "sum(vm_app_version{job=~\"$job\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\"} offset 20m) by(short_version))", "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_version))",
"hide": true, "hide": true,
"iconColor": "dark-blue", "iconColor": "dark-blue",
"name": "version", "name": "version",
"textFormat": "{{short_version}}", "textFormat": "{{short_version}}",
"titleFormat": "Version change" "titleFormat": "Version change"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"enable": true,
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job, instance)",
"hide": true,
"iconColor": "dark-yellow",
"name": "restarts",
"textFormat": "{{job}}:{{instance}} restarted"
} }
] ]
}, },

View File

@ -59,12 +59,24 @@
"uid": "$ds" "uid": "$ds"
}, },
"enable": true, "enable": true,
"expr": "sum(vm_app_version{job=~\"$job\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\"} offset 20m) by(short_version))", "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(short_version) unless (sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"} offset 20m) by(short_version))",
"hide": true, "hide": true,
"iconColor": "dark-blue", "iconColor": "dark-blue",
"name": "version", "name": "version",
"textFormat": "{{short_version}}", "textFormat": "{{short_version}}",
"titleFormat": "Version change" "titleFormat": "Version change"
},
{
"datasource": {
"type": "prometheus",
"uid": "$ds"
},
"enable": true,
"expr": "sum(changes(vm_app_start_timestamp{job=~\"$job\", instance=~\"$instance\"})) by(job, instance)",
"hide": true,
"iconColor": "dark-yellow",
"name": "restarts",
"textFormat": "{{job}}:{{instance}} restarted"
} }
] ]
}, },