diff --git a/docs/node-mixin/alerts/alerts.libsonnet b/docs/node-mixin/alerts/alerts.libsonnet index 0cdc16e2..b95b1c58 100644 --- a/docs/node-mixin/alerts/alerts.libsonnet +++ b/docs/node-mixin/alerts/alerts.libsonnet @@ -8,7 +8,7 @@ alert: 'NodeFilesystemSpaceFillingUp', expr: ||| ( - node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 40 + node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpWarningThreshold)d and predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 24*60*60) < 0 and @@ -28,7 +28,7 @@ alert: 'NodeFilesystemSpaceFillingUp', expr: ||| ( - node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < 20 + node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s} / node_filesystem_size_bytes{%(nodeExporterSelector)s,%(fsSelector)s} * 100 < %(fsSpaceFillingUpCriticalThreshold)d and predict_linear(node_filesystem_avail_bytes{%(nodeExporterSelector)s,%(fsSelector)s}[6h], 4*60*60) < 0 and diff --git a/docs/node-mixin/config.libsonnet b/docs/node-mixin/config.libsonnet index fdea71d2..c06252cb 100644 --- a/docs/node-mixin/config.libsonnet +++ b/docs/node-mixin/config.libsonnet @@ -35,6 +35,18 @@ // just a warning for K8s nodes. nodeCriticalSeverity: 'critical', + // Available disk space (%) thresholds on which to trigger the + // 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk + // usage grows in a way that it is predicted to run out in 4h or 1d + // and if the provided thresholds have been reached right now. + // In some cases you'll want to adjust these, e.g. by default Kubernetes + // runs the image garbage collection when the disk usage reaches 85% + // of its available space. In that case, you'll want to reduce the + // critical threshold below to something like 14 or 15, otherwise + // the alert could fire under normal node usage. + fsSpaceFillingUpWarningThreshold: 40, + fsSpaceFillingUpCriticalThreshold: 20, + grafana_prefix: '', }, }