diff --git a/docs/node-mixin/dashboards/node.libsonnet b/docs/node-mixin/dashboards/node.libsonnet index ceaa3aec..5cec0fab 100644 --- a/docs/node-mixin/dashboards/node.libsonnet +++ b/docs/node-mixin/dashboards/node.libsonnet @@ -30,7 +30,7 @@ local gauge = promgrafonnet.gauge; ||| % $._config, legendFormat='{{cpu}}', intervalFactor=5, - interval='1m', + interval='5m', )); local systemLoad = @@ -101,17 +101,17 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} read', - interval='1m', + interval='5m', )) .addTarget(prometheus.target( 'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} written', - interval='1m', + interval='5m', )) .addTarget(prometheus.target( 'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config, legendFormat='{{device}} io time', - interval='1m', + interval='5m', )) + { seriesOverrides: [ @@ -188,7 +188,7 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, legendFormat='{{device}}', - interval='1m', + interval='5m', )); local networkTransmitted = @@ -203,7 +203,7 @@ local gauge = promgrafonnet.gauge; .addTarget(prometheus.target( 'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config, legendFormat='{{device}}', - interval='1m', + interval='5m', )); dashboard.new('Nodes', time_from='now-1h') diff --git a/docs/node-mixin/dashboards/use.libsonnet b/docs/node-mixin/dashboards/use.libsonnet index 02282a56..76eeed6e 100644 --- a/docs/node-mixin/dashboards/use.libsonnet +++ b/docs/node-mixin/dashboards/use.libsonnet @@ -12,7 +12,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('CPU Utilisation') + g.queryPanel(||| ( - instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s} + instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s} * instance:node_num_cpu:sum{%(nodeExporterSelector)s} ) @@ -47,7 +47,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Saturation (Major Page Faults)') + - g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + + g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) + g.stack + { yaxes: g.yaxes('rps') }, ) @@ -58,8 +58,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, - 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, ], ['{{instance}} Receive', '{{instance}} Transmit'], legendLink, @@ -84,8 +84,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Saturation (Drops Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, - 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, + 'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config, ], ['{{instance}} Receive', '{{instance}} Transmit'], legendLink, @@ -116,8 +116,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson // TODO: Does the partition by device make sense? Using the most utilized device per // instance might make more sense. g.queryPanel(||| - instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s})) + instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -125,8 +125,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson .addPanel( g.panel('Disk IO Saturation') + g.queryPanel(||| - instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s} - / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s})) + instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s} + / scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s})) ||| % $._config, '{{instance}} {{device}}', legendLink) + g.stack + { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, @@ -156,7 +156,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.row('CPU') .addPanel( g.panel('CPU Utilisation') + - g.queryPanel('instance:node_cpu_utilisation:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + + g.queryPanel('instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') + { yaxes: g.yaxes('percentunit'), legend+: { show: false }, @@ -182,7 +182,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson ) .addPanel( g.panel('Memory Saturation (Major Page Faults)') + - g.queryPanel('instance:node_vmstat_pgmajfault:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') + + g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') + { yaxes: g.yaxes('short'), legend+: { show: false }, @@ -195,8 +195,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Utilisation (Bytes Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, - 'instance:node_network_transmit_bytes_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, ], ['Receive', 'Transmit'], ) + @@ -219,8 +219,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.panel('Net Saturation (Drops Receive/Transmit)') + g.queryPanel( [ - 'instance:node_network_receive_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, - 'instance:node_network_transmit_drop_excluding_lo:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, + 'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, ], ['Receive drops', 'Transmit drops'], ) + @@ -244,12 +244,12 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson g.row('Disk IO') .addPanel( g.panel('Disk IO Utilisation') + - g.queryPanel('instance_device:node_disk_io_time_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + + g.queryPanel('instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + { yaxes: g.yaxes('percentunit') }, ) .addPanel( g.panel('Disk IO Saturation') + - g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate1m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + + g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') + { yaxes: g.yaxes('percentunit') }, ) ) diff --git a/docs/node-mixin/rules/rules.libsonnet b/docs/node-mixin/rules/rules.libsonnet index 6b396e3b..dd9899af 100644 --- a/docs/node-mixin/rules/rules.libsonnet +++ b/docs/node-mixin/rules/rules.libsonnet @@ -17,10 +17,10 @@ }, { // CPU utilisation is % CPU is not idle. - record: 'instance:node_cpu_utilisation:rate1m', + record: 'instance:node_cpu_utilisation:rate5m', expr: ||| 1 - avg without (cpu, mode) ( - rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[1m]) + rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[5m]) ) ||| % $._config, }, @@ -50,55 +50,55 @@ ||| % $._config, }, { - record: 'instance:node_vmstat_pgmajfault:rate1m', + record: 'instance:node_vmstat_pgmajfault:rate5m', expr: ||| - rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[1m]) + rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m]) ||| % $._config, }, { // Disk utilisation (seconds spent, 1 second rate). - record: 'instance_device:node_disk_io_time_seconds:rate1m', + record: 'instance_device:node_disk_io_time_seconds:rate5m', expr: ||| - rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) + rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) ||| % $._config, }, { // Disk saturation (weighted seconds spent, 1 second rate). - record: 'instance_device:node_disk_io_time_weighted_seconds:rate1m', + record: 'instance_device:node_disk_io_time_weighted_seconds:rate5m', expr: ||| - rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[1m]) + rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m]) ||| % $._config, }, { - record: 'instance:node_network_receive_bytes_excluding_lo:rate1m', + record: 'instance:node_network_receive_bytes_excluding_lo:rate5m', expr: ||| sum without (device) ( - rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[1m]) + rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) ) ||| % $._config, }, { - record: 'instance:node_network_transmit_bytes_excluding_lo:rate1m', + record: 'instance:node_network_transmit_bytes_excluding_lo:rate5m', expr: ||| sum without (device) ( - rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[1m]) + rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m]) ) ||| % $._config, }, // TODO: Find out if those drops ever happen on modern switched networks. { - record: 'instance:node_network_receive_drop_excluding_lo:rate1m', + record: 'instance:node_network_receive_drop_excluding_lo:rate5m', expr: ||| sum without (device) ( - rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[1m]) + rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) ) ||| % $._config, }, { - record: 'instance:node_network_transmit_drop_excluding_lo:rate1m', + record: 'instance:node_network_transmit_drop_excluding_lo:rate5m', expr: ||| sum without (device) ( - rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[1m]) + rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m]) ) ||| % $._config, },