Make interval configurable

Signed-off-by: Johannes 'fish' Ziemke <github@freigeist.org>
This commit is contained in:
Johannes 'fish' Ziemke 2021-04-03 12:40:22 +02:00
parent 772335caa8
commit a5908bf82b
4 changed files with 42 additions and 40 deletions

View File

@ -53,5 +53,7 @@
fsSpaceAvailableWarningThreshold: 3,
grafana_prefix: '',
rateInterval: '5m',
},
}

View File

@ -30,7 +30,7 @@ local gauge = promgrafonnet.gauge;
||| % $._config,
legendFormat='{{cpu}}',
intervalFactor=5,
interval='5m',
interval='$__rate_interval',
));
local systemLoad =
@ -101,17 +101,17 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target(
'rate(node_disk_read_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} read',
interval='5m',
interval='$__rate_interval',
))
.addTarget(prometheus.target(
'rate(node_disk_written_bytes_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} written',
interval='5m',
interval='$__rate_interval',
))
.addTarget(prometheus.target(
'rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, instance="$instance", %(diskDeviceSelector)s}[$__interval])' % $._config,
legendFormat='{{device}} io time',
interval='5m',
interval='$__rate_interval',
)) +
{
seriesOverrides: [
@ -188,7 +188,7 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target(
'rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}',
interval='5m',
interval='$__rate_interval',
));
local networkTransmitted =
@ -203,7 +203,7 @@ local gauge = promgrafonnet.gauge;
.addTarget(prometheus.target(
'rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, instance="$instance", device!="lo"}[$__interval])' % $._config,
legendFormat='{{device}}',
interval='5m',
interval='$__rate_interval',
));
dashboard.new('Nodes', time_from='now-1h')

View File

@ -12,7 +12,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('CPU Utilisation') +
g.queryPanel(|||
(
instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s}
instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s}
*
instance:node_num_cpu:sum{%(nodeExporterSelector)s}
)
@ -47,7 +47,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addPanel(
g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config, '{{instance}}', legendLink) +
g.stack +
{ yaxes: g.yaxes('rps') },
)
@ -58,8 +58,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Utilisation (Bytes Receive/Transmit)') +
g.queryPanel(
[
'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
],
['{{instance}} Receive', '{{instance}} Transmit'],
legendLink,
@ -84,8 +84,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Saturation (Drops Receive/Transmit)') +
g.queryPanel(
[
'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s}' % $._config,
],
['{{instance}} Receive', '{{instance}} Transmit'],
legendLink,
@ -116,8 +116,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
// TODO: Does the partition by device make sense? Using the most utilized device per
// instance might make more sense.
g.queryPanel(|||
instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s}))
instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}} {{device}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -125,8 +125,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel(|||
instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s}))
instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}
/ scalar(count(instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s}))
||| % $._config, '{{instance}} {{device}}', legendLink) +
g.stack +
{ yaxes: g.yaxes({ format: 'percentunit', max: 1 }) },
@ -156,7 +156,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.row('CPU')
.addPanel(
g.panel('CPU Utilisation') +
g.queryPanel('instance:node_cpu_utilisation:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
g.queryPanel('instance:node_cpu_utilisation:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Utilisation') +
{
yaxes: g.yaxes('percentunit'),
legend+: { show: false },
@ -182,7 +182,7 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
)
.addPanel(
g.panel('Memory Saturation (Major Page Faults)') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') +
g.queryPanel('instance:node_vmstat_pgmajfault:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, 'Major page faults') +
{
yaxes: g.yaxes('short'),
legend+: { show: false },
@ -195,8 +195,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Utilisation (Bytes Receive/Transmit)') +
g.queryPanel(
[
'instance:node_network_receive_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
],
['Receive', 'Transmit'],
) +
@ -219,8 +219,8 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.panel('Net Saturation (Drops Receive/Transmit)') +
g.queryPanel(
[
'instance:node_network_receive_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config,
],
['Receive drops', 'Transmit drops'],
) +
@ -244,12 +244,12 @@ local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libson
g.row('Disk IO')
.addPanel(
g.panel('Disk IO Utilisation') +
g.queryPanel('instance_device:node_disk_io_time_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
g.queryPanel('instance_device:node_disk_io_time_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
{ yaxes: g.yaxes('percentunit') },
)
.addPanel(
g.panel('Disk IO Saturation') +
g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate5m{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
g.queryPanel('instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s{%(nodeExporterSelector)s, instance="$instance"}' % $._config, '{{device}}') +
{ yaxes: g.yaxes('percentunit') },
)
)

View File

@ -17,10 +17,10 @@
},
{
// CPU utilisation is % CPU is not idle.
record: 'instance:node_cpu_utilisation:rate5m',
record: 'instance:node_cpu_utilisation:rate%(rateInterval)s' % $._config,
expr: |||
1 - avg without (cpu, mode) (
rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[5m])
rate(node_cpu_seconds_total{%(nodeExporterSelector)s, mode="idle"}[%(rateInterval)s])
)
||| % $._config,
},
@ -50,55 +50,55 @@
||| % $._config,
},
{
record: 'instance:node_vmstat_pgmajfault:rate5m',
record: 'instance:node_vmstat_pgmajfault:rate%(rateInterval)s' % $._config,
expr: |||
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[5m])
rate(node_vmstat_pgmajfault{%(nodeExporterSelector)s}[%(rateInterval)s])
||| % $._config,
},
{
// Disk utilisation (seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_seconds:rate5m',
record: 'instance_device:node_disk_io_time_seconds:rate%(rateInterval)s' % $._config,
expr: |||
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m])
rate(node_disk_io_time_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s])
||| % $._config,
},
{
// Disk saturation (weighted seconds spent, 1 second rate).
record: 'instance_device:node_disk_io_time_weighted_seconds:rate5m',
record: 'instance_device:node_disk_io_time_weighted_seconds:rate%(rateInterval)s' % $._config,
expr: |||
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[5m])
rate(node_disk_io_time_weighted_seconds_total{%(nodeExporterSelector)s, %(diskDeviceSelector)s}[%(rateInterval)s])
||| % $._config,
},
{
record: 'instance:node_network_receive_bytes_excluding_lo:rate5m',
record: 'instance:node_network_receive_bytes_excluding_lo:rate%(rateInterval)s' % $._config,
expr: |||
sum without (device) (
rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m])
rate(node_network_receive_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
)
||| % $._config,
},
{
record: 'instance:node_network_transmit_bytes_excluding_lo:rate5m',
record: 'instance:node_network_transmit_bytes_excluding_lo:rate%(rateInterval)s' % $._config,
expr: |||
sum without (device) (
rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[5m])
rate(node_network_transmit_bytes_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
)
||| % $._config,
},
// TODO: Find out if those drops ever happen on modern switched networks.
{
record: 'instance:node_network_receive_drop_excluding_lo:rate5m',
record: 'instance:node_network_receive_drop_excluding_lo:rate%(rateInterval)s' % $._config,
expr: |||
sum without (device) (
rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m])
rate(node_network_receive_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
)
||| % $._config,
},
{
record: 'instance:node_network_transmit_drop_excluding_lo:rate5m',
record: 'instance:node_network_transmit_drop_excluding_lo:rate%(rateInterval)s' % $._config,
expr: |||
sum without (device) (
rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[5m])
rate(node_network_transmit_drop_total{%(nodeExporterSelector)s, device!="lo"}[%(rateInterval)s])
)
||| % $._config,
},