2018-05-08 12:10:29 +02:00
|
|
|
{
|
|
|
|
_config+:: {
|
|
|
|
// Selectors are inserted between {} in Prometheus queries.
|
2019-07-16 19:34:27 +02:00
|
|
|
|
2019-10-30 22:52:36 +01:00
|
|
|
// Select the metrics coming from the node exporter. Note that all
|
|
|
|
// the selected metrics are shown stacked on top of each other in
|
|
|
|
// the 'USE Method / Cluster' dashboard. Consider disabling that
|
|
|
|
// dashboard if mixing up all those metrics in the same dashboard
|
|
|
|
// doesn't make sense (e.g. because they are coming from different
|
|
|
|
// clusters).
|
2019-07-16 21:18:17 +02:00
|
|
|
nodeExporterSelector: 'job="node"',
|
2018-05-08 12:10:29 +02:00
|
|
|
|
2019-07-17 23:54:31 +02:00
|
|
|
// Select the fstype for filesystem-related queries. If left
|
|
|
|
// empty, all filesystems are selected. If you have unusual
|
|
|
|
// filesystem you don't want to include in dashboards and
|
|
|
|
// alerting, you can exclude them here, e.g. 'fstype!="tmpfs"'.
|
2019-09-12 13:57:19 +02:00
|
|
|
fsSelector: 'fstype!=""',
|
2018-05-08 12:10:29 +02:00
|
|
|
|
2022-10-20 13:06:31 +02:00
|
|
|
// Select the mountpoint for filesystem-related queries. If left
|
|
|
|
// empty, all mountpoints are selected. For example if you have a
|
|
|
|
// special purpose tmpfs instance that has a fixed size and will
|
|
|
|
// always be 100% full, but you still want alerts and dashboards for
|
|
|
|
// other tmpfs instances, you can exclude those by mountpoint prefix
|
|
|
|
// like so: 'mountpoint!~"/var/lib/foo.*"'.
|
|
|
|
fsMountpointSelector: 'mountpoint!=""',
|
|
|
|
|
2019-07-17 23:54:31 +02:00
|
|
|
// Select the device for disk-related queries. If left empty, all
|
|
|
|
// devices are selected. If you have unusual devices you don't
|
|
|
|
// want to include in dashboards and alerting, you can exclude
|
|
|
|
// them here, e.g. 'device!="tmpfs"'.
|
2019-09-12 13:57:19 +02:00
|
|
|
diskDeviceSelector: 'device!=""',
|
2019-07-16 19:34:27 +02:00
|
|
|
|
2019-08-14 22:24:24 +02:00
|
|
|
// Some of the alerts are meant to fire if a critical failure of a
|
|
|
|
// node is imminent (e.g. the disk is about to run full). In a
|
|
|
|
// true “cloud native” setup, failures of a single node should be
|
|
|
|
// tolerated. Hence, even imminent failure of a single node is no
|
|
|
|
// reason to create a paging alert. However, in practice there are
|
|
|
|
// still many situations where operators like to get paged in time
|
|
|
|
// before a node runs out of disk space. nodeCriticalSeverity can
|
|
|
|
// be set to the desired severity for this kind of alerts. This
|
|
|
|
// can even be templated to depend on labels of the node, e.g. you
|
|
|
|
// could make this critical for traditional database masters but
|
|
|
|
// just a warning for K8s nodes.
|
|
|
|
nodeCriticalSeverity: 'critical',
|
|
|
|
|
2020-03-02 16:24:51 +01:00
|
|
|
// Available disk space (%) thresholds on which to trigger the
|
|
|
|
// 'NodeFilesystemSpaceFillingUp' alerts. These alerts fire if the disk
|
|
|
|
// usage grows in a way that it is predicted to run out in 4h or 1d
|
|
|
|
// and if the provided thresholds have been reached right now.
|
|
|
|
// In some cases you'll want to adjust these, e.g. by default Kubernetes
|
|
|
|
// runs the image garbage collection when the disk usage reaches 85%
|
|
|
|
// of its available space. In that case, you'll want to reduce the
|
|
|
|
// critical threshold below to something like 14 or 15, otherwise
|
|
|
|
// the alert could fire under normal node usage.
|
|
|
|
fsSpaceFillingUpWarningThreshold: 40,
|
|
|
|
fsSpaceFillingUpCriticalThreshold: 20,
|
|
|
|
|
2020-09-18 11:28:32 +02:00
|
|
|
// Available disk space (%) thresholds on which to trigger the
|
|
|
|
// 'NodeFilesystemAlmostOutOfSpace' alerts.
|
2022-05-10 14:50:20 +02:00
|
|
|
fsSpaceAvailableWarningThreshold: 5,
|
|
|
|
fsSpaceAvailableCriticalThreshold: 3,
|
2020-09-18 11:28:32 +02:00
|
|
|
|
2021-04-03 12:40:22 +02:00
|
|
|
rateInterval: '5m',
|
2021-04-02 02:34:23 +02:00
|
|
|
// Opt-in for multi-cluster support.
|
|
|
|
showMultiCluster: false,
|
|
|
|
clusterLabel: 'cluster',
|
|
|
|
|
|
|
|
dashboardNamePrefix: 'Node Exporter / ',
|
|
|
|
dashboardTags: ['node-exporter-mixin'],
|
2018-05-08 12:10:29 +02:00
|
|
|
},
|
|
|
|
}
|