{ "__inputs": [], "__requires": [ { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "9.0.3" }, { "type": "panel", "id": "graph", "name": "Graph (old)", "version": "" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, { "type": "panel", "id": "table", "name": "Table", "version": "" }, { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "datasource", "uid": "grafana" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "target": { "limit": 100, "matchAny": false, "tags": [], "type": "dashboard" }, "type": "dashboard" } ] }, "description": "Overview for VictoriaMetrics vmalert v1.73.0 or higher", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, "id": null, "iteration": 1663341746917, "links": [ { "asDropdown": false, "icon": "external link", "includeVars": false, "keepTime": false, "tags": [], "targetBlank": true, "title": "vmalert docs", "tooltip": "", "type": "link", "url": "https://docs.victoriametrics.com/vmalert.html" }, { "asDropdown": false, "icon": "external link", "includeVars": false, "keepTime": false, "tags": [], "targetBlank": true, "title": "Found a bug?", "tooltip": "", "type": "link", "url": " https://github.com/VictoriaMetrics/VictoriaMetrics/issues" }, { "asDropdown": false, "icon": "external link", "includeVars": false, "keepTime": false, "tags": [], "targetBlank": true, "title": "New releases", "tooltip": "", "type": "link", "url": " https://github.com/VictoriaMetrics/VictoriaMetrics/releases" } ], "liveNow": false, "panels": [ { "collapsed": false, "datasource": { "type": "datasource", "uid": "grafana" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 11, "panels": [], "targets": [ { "datasource": { "type": "datasource", "uid": "grafana" }, "refId": "A" } ], "title": "General ($instance)", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows if the last configuration update was successful. \"Not Ok\" means there was an unsuccessful attempt to update the configuration due to some error. Check the log for details.", "fieldConfig": { "defaults": { "mappings": [ { "options": { "0": { "color": "green", "index": 0, "text": "Ok" } }, "type": "value" }, { "options": { "from": 1, "result": { "color": "red", "index": 1, "text": "Not Ok" }, "to": 999999 }, "type": "range" } ], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 3, "x": 0, "y": 1 }, "id": 6, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "9.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "count(vmalert_config_last_reload_successful{job=~\"$job\", instance=~\"$instance\"} < 1 ) or 0", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Config error", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the total number of errors generated by recording/alerting rules for selected instances and groups.", "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 1 } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 3, "y": 1 }, "id": 8, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "9.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "(sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0)) + \n(sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) or vector(0))", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Errors", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the total number of loaded alerting rules across selected instances and groups.", "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 7, "y": 1 }, "id": 9, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "9.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "count(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Alerting rules", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the total number of loaded recording rules across selected instances and groups.", "fieldConfig": { "defaults": { "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] } }, "overrides": [] }, "gridPos": { "h": 3, "w": 4, "x": 11, "y": 1 }, "id": 7, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ "last" ], "fields": "", "values": false }, "text": {}, "textMode": "auto" }, "pluginVersion": "9.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "count(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"})", "interval": "", "legendFormat": "", "refId": "A" } ], "title": "Recording rules", "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "custom": { "align": "auto", "displayMode": "auto", "inspect": false, "minWidth": 50 }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] } }, "overrides": [ { "matcher": { "id": "byName", "options": "Time" }, "properties": [ { "id": "custom.hidden", "value": true } ] }, { "matcher": { "id": "byName", "options": "Value" }, "properties": [ { "id": "displayName", "value": "Count" } ] } ] }, "gridPos": { "h": 4, "w": 9, "x": 0, "y": 4 }, "id": 45, "options": { "footer": { "fields": "", "reducer": [ "sum" ], "show": false }, "showHeader": true }, "pluginVersion": "9.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "editorMode": "code", "exemplar": false, "expr": "sum(vm_app_version{job=~\"$job\", instance=~\"$instance\"}) by(job, short_version)", "format": "table", "instant": true, "range": false, "refId": "A" } ], "type": "table" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 4, "w": 15, "x": 9, "y": 4 }, "hiddenSeries": false, "id": 4, "legend": { "alignAsTable": true, "avg": false, "current": true, "hideEmpty": false, "hideZero": false, "max": false, "min": false, "rightSide": true, "show": true, "sort": "current", "sortDesc": false, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null as zero", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": true, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sort(sum(up{job=~\"$job\", instance=~\"$instance\"}) by (job, instance))", "format": "time_series", "instant": false, "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Uptime", "tooltip": { "shared": true, "sort": 1, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:170", "decimals": 0, "format": "short", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:171", "format": "short", "label": "", "logBase": 1, "show": true } ], "yaxis": { "align": false, "alignLevel": 2 } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the number of fired alerts by instance.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, "hiddenSeries": false, "id": 15, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(increase(vmalert_alerts_fired_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance)", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Alerts fired total", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:62", "format": "short", "logBase": 1, "show": true }, { "$$hashKey": "object:63", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Average evaluation duration by group. Basically means how long it takes to execute all the rules per each group.", "fieldConfig": { "defaults": { "unit": "s" }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "hiddenSeries": false, "id": 23, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": false }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(rate(vmalert_iteration_duration_seconds_sum{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group) / \nsum(rate(vmalert_iteration_duration_seconds_count{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(group)", "interval": "", "legendFormat": "{{group}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Groups avg evaluation duration ($group)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "s", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows how many requests (executions) per second vmalert sends to the configured datasource.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "hiddenSeries": false, "id": 24, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(rate(vmalert_execution_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Rules execution rate ($instance)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:182", "format": "short", "logBase": 1, "show": true }, { "$$hashKey": "object:183", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the error rate while executing configured rules. Non-zero value means there are some issues with existing rules. Check the logs to get more details.", "fill": 1, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "hiddenSeries": false, "id": 25, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(increase(vmalert_execution_errors_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance) > 0", "interval": "", "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Rules execution errors ($instance)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:244", "format": "short", "logBase": 1, "show": true }, { "$$hashKey": "object:245", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "collapsed": true, "datasource": { "type": "datasource", "uid": "grafana" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }, "id": 17, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the current active (firing) alerting rules per group.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }, "hiddenSeries": false, "id": 14, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(vmalert_alerts_firing{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0", "interval": "", "legendFormat": "{{alertname}} ({{group}})", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Active ($group)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the events when rule execution resulted into an error. Check the logs for more details.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }, "hiddenSeries": false, "id": 13, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(vmalert_alerting_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0", "interval": "", "legendFormat": "{{alertname}} ({{group}})", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Errors ($group)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the current pending alerting rules per group.\nBy pending means the rule which remains active less than configured `for` parameter.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 33 }, "hiddenSeries": false, "id": 20, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(vmalert_alerts_pending{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, alertname) > 0", "interval": "", "legendFormat": "{{alertname}} ({{group}})", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Pending ($group)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows how many alerts are sent to Alertmanager per second. Only active alerts are sent.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 33 }, "hiddenSeries": false, "id": 26, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(rate(vmalert_alerts_sent_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance, addr) > 0", "interval": "", "legendFormat": "{{instance}} => {{addr}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Requests rate to Alertmanager ($group)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:229", "format": "short", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:230", "format": "short", "logBase": 1, "min": "0", "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the error rate for the attempts to send alerts to Alertmanager. If not zero it means there issues on attempt to send notification to Alertmanager and some alerts may be not delivered properly. Check the logs for more details.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 41 }, "hiddenSeries": false, "id": 32, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(rate(vmalert_alerts_send_errors_total{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}[$__rate_interval])) by(instance, addr) > 0", "interval": "", "legendFormat": "{{instance}} => {{addr}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Errors rate to Alertmanager ($group)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:229", "format": "short", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:230", "format": "short", "logBase": 1, "min": "0", "show": true } ], "yaxis": { "align": false } } ], "targets": [ { "datasource": { "type": "datasource", "uid": "grafana" }, "refId": "A" } ], "title": "Alerting rules ($instance)", "type": "row" }, { "collapsed": true, "datasource": { "type": "datasource", "uid": "grafana" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 25 }, "id": 28, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the top 10 recording rules which generate the most of samples. Each generated sample is basically a time series which then ingested into configured remote storage. Rules with high numbers may cause the most pressure on the remote database and become a source of too high cardinality.", "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 50 }, "hiddenSeries": false, "id": 31, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "topk(10, sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0)", "interval": "", "legendFormat": "{{recording}} ({{group}})", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Top 10 rules by produced samples ($group)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the rules which do not produce any samples during the evaluation. Usually it means that such rules are misconfigured, since they give no output during the evaluation.\nPlease check if rule's expression is correct and it is working as expected.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": true, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 50 }, "id": 33, "options": { "legend": { "calcs": [ "max", "lastNotNull", "mean" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single", "sort": "none" } }, "pluginVersion": "8.0.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(vmalert_recording_rules_last_evaluation_samples{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) < 1", "interval": "", "legendFormat": "{{recording}} ({{group}})", "refId": "A" } ], "title": "Rules with 0 produced samples ($group)", "type": "timeseries" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 58 }, "hiddenSeries": false, "id": 30, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(vmalert_recording_rules_error{job=~\"$job\", instance=~\"$instance\", group=~\"$group\"}) by(group, recording) > 0", "interval": "", "legendFormat": "{{recording}} ({{group}})", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Errors ($group)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "short", "logBase": 1, "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } } ], "targets": [ { "datasource": { "type": "datasource", "uid": "grafana" }, "refId": "A" } ], "title": "Recording rules ($instance)", "type": "row" }, { "collapsed": true, "datasource": { "type": "datasource", "uid": "grafana" }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 26 }, "id": 43, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Shows the CPU usage percentage per vmalert instance. \nIf you think that usage is abnormal or unexpected pls file an issue and attach CPU profile if possible.", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 67 }, "hiddenSeries": false, "id": 35, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ { "targetBlank": true, "title": "Profiling", "url": "https://docs.victoriametrics.com/vmagent.html#profiling" } ], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(rate(process_cpu_seconds_total{job=~\"$job\", instance=~\"$instance\"}[$__rate_interval])) by(instance) / min(process_cpu_cores_available{job=~\"$job\", instance=~\"$instance\"}) by(instance)", "format": "time_series", "interval": "", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "CPU ($instance)", "tooltip": { "shared": true, "sort": 2, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:473", "format": "percentunit", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:474", "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Amount of used memory\n\nResident memory shows share which can be freed by OS when needed.\n\nAnonymous shows share for memory allocated by the process itself. This share cannot be freed by the OS, so it must be taken into account by OOM killer.\n\nIf you think that usage is abnormal or unexpected, please file an issue and attach memory profile if possible.", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 67 }, "hiddenSeries": false, "id": 37, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [ { "targetBlank": true, "title": "Profiling", "url": "https://docs.victoriametrics.com/vmagent.html#profiling" } ], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(process_resident_memory_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "interval": "", "legendFormat": "resident {{instance}}", "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "exemplar": false, "expr": "sum(process_resident_memory_anon_bytes{job=~\"$job\", instance=~\"$instance\"}) by (instance)", "hide": false, "interval": "", "legendFormat": "anonymous {{instance}}", "refId": "B" } ], "thresholds": [], "timeRegions": [], "title": "Memory usage ($instance)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "format": "bytes", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "description": "Panel shows the percentage of open file descriptors in the OS.\nReaching the limit of open files can cause various issues and must be prevented.\n\nSee how to change limits here https://medium.com/@muhammadtriwibowo/set-permanently-ulimit-n-open-files-in-ubuntu-4d61064429a", "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 75 }, "hiddenSeries": false, "id": 39, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "editorMode": "code", "exemplar": false, "expr": "sum(process_open_fds{job=~\"$job\", instance=~\"$instance\"}) by (instance) \n/\nmin(process_max_fds{job=~\"$job\", instance=~\"$instance\"}) by(instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "open {{instance}}", "range": true, "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Open FDs ($instance)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:540", "decimals": 3, "format": "percentunit", "logBase": 1, "min": "0", "show": true }, { "$$hashKey": "object:541", "format": "short", "logBase": 1, "min": "0", "show": true } ], "yaxis": { "align": false } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": { "type": "prometheus", "uid": "$ds" }, "fieldConfig": { "defaults": { "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 75 }, "hiddenSeries": false, "id": 41, "legend": { "alignAsTable": true, "avg": true, "current": true, "max": true, "min": false, "show": true, "sort": "current", "sortDesc": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "links": [], "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "9.0.3", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "datasource": { "type": "prometheus", "uid": "$ds" }, "expr": "sum(go_goroutines{job=~\"$job\", instance=~\"$instance\"}) by(instance)", "format": "time_series", "interval": "", "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [], "timeRegions": [], "title": "Goroutines ($instance)", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "mode": "time", "show": true, "values": [] }, "yaxes": [ { "decimals": 0, "format": "short", "logBase": 1, "min": "0", "show": true }, { "format": "short", "logBase": 1, "show": true } ], "yaxis": { "align": false } } ], "targets": [ { "datasource": { "type": "datasource", "uid": "grafana" }, "refId": "A" } ], "title": "Resource usage", "type": "row" } ], "refresh": false, "schemaVersion": 36, "style": "dark", "tags": [ "victoriametrics", "vmalert" ], "templating": { "list": [ { "current": { "selected": false, "text": "VictoriaMetrics", "value": "VictoriaMetrics" }, "hide": 0, "includeAll": false, "multi": false, "name": "ds", "options": [], "query": "prometheus", "queryValue": "", "refresh": 1, "regex": "", "skipUrlSync": false, "type": "datasource" }, { "current": {}, "datasource": { "type": "prometheus", "uid": "$ds" }, "definition": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)", "hide": 0, "includeAll": false, "multi": false, "name": "job", "options": [], "query": { "query": "label_values(vm_app_version{version=~\"^vmalert.*\"}, job)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" }, { "allValue": ".*", "current": {}, "datasource": { "type": "prometheus", "uid": "$ds" }, "definition": "label_values(vm_app_version{job=~\"$job\"}, instance)", "hide": 0, "includeAll": true, "multi": true, "name": "instance", "options": [], "query": { "query": "label_values(vm_app_version{job=~\"$job\"}, instance)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" }, { "allValue": ".*", "current": {}, "datasource": { "type": "prometheus", "uid": "$ds" }, "definition": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)", "hide": 0, "includeAll": true, "multi": true, "name": "group", "options": [], "query": { "query": "label_values(vmalert_iteration_duration_seconds{job=~\"$job\", instance=~\"$instance\"}, group)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" }, { "datasource": { "type": "prometheus", "uid": "$ds" }, "filters": [], "hide": 0, "name": "adhoc", "skipUrlSync": false, "type": "adhoc" } ] }, "time": { "from": "now-3h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "VictoriaMetrics - vmalert", "uid": "LzldHAVnz", "version": 1, "weekStart": "" }