fix(docs): add node(Warning|Critical)WindowHours to node-mixin

Signed-off-by: Duologic <jeroen@simplistic.be>
This commit is contained in:
Duologic 2022-09-12 15:11:50 +02:00 committed by Johannes 'fish' Ziemke
parent d0c1d00d18
commit 2fccdf4e17
2 changed files with 18 additions and 8 deletions

View File

@ -10,7 +10,7 @@
( (
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d
and and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[%(fsSpaceFillingUpPredictionWindow)s], 24*60*60) < 0 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[%(fsSpaceFillingUpPredictionWindow)s], %(nodeWarningWindowHours)s*60*60) < 0
and and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
) )
@ -20,7 +20,7 @@
severity: 'warning', severity: 'warning',
}, },
annotations: { annotations: {
summary: 'Filesystem is predicted to run out of space within the next 24 hours.', summary: 'Filesystem is predicted to run out of space within the next %(nodeWarningWindowHours)s hours.' % $._config,
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.', description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up.',
}, },
}, },
@ -30,7 +30,7 @@
( (
node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d
and and
predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0 predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeCriticalWindowHours)s*60*60) < 0
and and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
) )
@ -40,7 +40,7 @@
severity: '%(nodeCriticalSeverity)s' % $._config, severity: '%(nodeCriticalSeverity)s' % $._config,
}, },
annotations: { annotations: {
summary: 'Filesystem is predicted to run out of space within the next 4 hours.', summary: 'Filesystem is predicted to run out of space within the next %(nodeCriticalWindowHours)s hours.' % $._config,
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.', description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available space left and is filling up fast.',
}, },
}, },
@ -86,7 +86,7 @@
( (
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 40 node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 40
and and
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 24*60*60) < 0 predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeWarningWindowHours)s*60*60) < 0
and and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
) )
@ -96,7 +96,7 @@
severity: 'warning', severity: 'warning',
}, },
annotations: { annotations: {
summary: 'Filesystem is predicted to run out of inodes within the next 24 hours.', summary: 'Filesystem is predicted to run out of inodes within the next %(nodeWarningWindowHours)s hours.' % $._config,
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.', description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up.',
}, },
}, },
@ -106,7 +106,7 @@
( (
node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 20 node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} / node_filesystem_files{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} * 100 < 20
and and
predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], 4*60*60) < 0 predict_linear(node_filesystem_files_free{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s}[6h], %(nodeCriticalWindowHours)s*60*60) < 0
and and
node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0 node_filesystem_readonly{%(nodeExporterSelector)s,%(fsSelector)s,%(fsMountpointSelector)s} == 0
) )
@ -116,7 +116,7 @@
severity: '%(nodeCriticalSeverity)s' % $._config, severity: '%(nodeCriticalSeverity)s' % $._config,
}, },
annotations: { annotations: {
summary: 'Filesystem is predicted to run out of inodes within the next 4 hours.', summary: 'Filesystem is predicted to run out of inodes within the next %(nodeCriticalWindowHours)s hours.' % $._config,
description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.', description: 'Filesystem on {{ $labels.device }}, mounted on {{ $labels.mountpoint }}, at {{ $labels.instance }} has only {{ printf "%.2f" $value }}% available inodes left and is filling up fast.',
}, },
}, },

View File

@ -50,6 +50,16 @@
// 'NodeSystemSaturation' alert. // 'NodeSystemSaturation' alert.
systemSaturationPerCoreThreshold: 2, systemSaturationPerCoreThreshold: 2,
// Some of the alerts use predict_linear() to fire alerts ahead of time to
// prevent unrecoverable situations (eg. no more disk space). However, the
// node may have automatic processes (cronjobs) in place to prevent that
// within a certain time window, this may not align with the default time
// window of these alerts. This can cause these alerts to start flapping.
// By reducing the time window, the system gets more time to
// resolve this before problems occur.
nodeWarningWindowHours: '24',
nodeCriticalWindowHours: '4',
// Available disk space (%) thresholds on which to trigger the // Available disk space (%) thresholds on which to trigger the
// 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
// usage grows in a way that it is predicted to run out in 4h or 1d // usage grows in a way that it is predicted to run out in 4h or 1d