collector: add slab info

Co-authored-by: Ben Kochie <superq@gmail.com>
Signed-off-by: Nobuhiro MIKI <nmiki@yahoo-corp.jp>
This commit is contained in:
Nobuhiro MIKI 2022-05-13 16:19:19 +09:00 committed by Johannes 'fish' Ziemke
parent 69a3f73a9b
commit 3ed95908d6
7 changed files with 192 additions and 0 deletions

View File

@ -8,6 +8,7 @@
* [ENHANCEMENT] Add node_softirqs_total metric #2221 * [ENHANCEMENT] Add node_softirqs_total metric #2221
* [ENHANCEMENT] Add device filter flags to arp collector #2254 * [ENHANCEMENT] Add device filter flags to arp collector #2254
* [ENHANCEMENT] Add rapl zone name label option #2401 * [ENHANCEMENT] Add rapl zone name label option #2401
* [ENHANCEMENT] Add slabinfo collector #1799
* [BUGFIX] Sanitize rapl zone names #2299 * [BUGFIX] Sanitize rapl zone names #2299
## 1.3.1 / 2021-12-01 ## 1.3.1 / 2021-12-01

View File

@ -210,6 +210,7 @@ perf | Exposes perf based metrics (Warning: Metrics are dependent on kernel conf
processes | Exposes aggregate process statistics from `/proc`. | Linux processes | Exposes aggregate process statistics from `/proc`. | Linux
qdisc | Exposes [queuing discipline](https://en.wikipedia.org/wiki/Network_scheduler#Linux_kernel) statistics | Linux qdisc | Exposes [queuing discipline](https://en.wikipedia.org/wiki/Network_scheduler#Linux_kernel) statistics | Linux
runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_ runit | Exposes service status from [runit](http://smarden.org/runit/). | _any_
slabinfo | Exposes slab statistics from `/proc/slabinfo`. Note that permission of `/proc/slabinfo` is usually 0400, so set it appropriately. | Linux
supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_ supervisord | Exposes service status from [supervisord](http://supervisord.org/). | _any_
systemd | Exposes service and system status from [systemd](http://www.freedesktop.org/wiki/Software/systemd/). | Linux systemd | Exposes service and system status from [systemd](http://www.freedesktop.org/wiki/Software/systemd/). | Linux
tcpstat | Exposes TCP connection status information from `/proc/net/tcp` and `/proc/net/tcp6`. (Warning: the current version has potential performance issues in high load situations.) | Linux tcpstat | Exposes TCP connection status information from `/proc/net/tcp` and `/proc/net/tcp6`. (Warning: the current version has potential performance issues in high load situations.) | Linux

View File

@ -3013,6 +3013,7 @@ node_scrape_collector_success{collector="qdisc"} 1
node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="rapl"} 1
node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="schedstat"} 1
node_scrape_collector_success{collector="selinux"} 1 node_scrape_collector_success{collector="selinux"} 1
node_scrape_collector_success{collector="slabinfo"} 1
node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="sockstat"} 1
node_scrape_collector_success{collector="softnet"} 1 node_scrape_collector_success{collector="softnet"} 1
node_scrape_collector_success{collector="stat"} 1 node_scrape_collector_success{collector="stat"} 1
@ -3029,6 +3030,36 @@ node_scrape_collector_success{collector="zoneinfo"} 1
# HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false # HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false
# TYPE node_selinux_enabled gauge # TYPE node_selinux_enabled gauge
node_selinux_enabled 0 node_selinux_enabled 0
# HELP node_slabinfo_active_objects The number of objects that are currently active (i.e., in use).
# TYPE node_slabinfo_active_objects gauge
node_slabinfo_active_objects{slab="dmaengine-unmap-128"} 1206
node_slabinfo_active_objects{slab="kmalloc-8192"} 132
node_slabinfo_active_objects{slab="kmem_cache"} 320
node_slabinfo_active_objects{slab="tw_sock_TCP"} 704
# HELP node_slabinfo_object_size_bytes The size of objects in this slab, in bytes.
# TYPE node_slabinfo_object_size_bytes gauge
node_slabinfo_object_size_bytes{slab="dmaengine-unmap-128"} 1088
node_slabinfo_object_size_bytes{slab="kmalloc-8192"} 8192
node_slabinfo_object_size_bytes{slab="kmem_cache"} 256
node_slabinfo_object_size_bytes{slab="tw_sock_TCP"} 256
# HELP node_slabinfo_objects The total number of allocated objects (i.e., objects that are both in use and not in use).
# TYPE node_slabinfo_objects gauge
node_slabinfo_objects{slab="dmaengine-unmap-128"} 1320
node_slabinfo_objects{slab="kmalloc-8192"} 148
node_slabinfo_objects{slab="kmem_cache"} 320
node_slabinfo_objects{slab="tw_sock_TCP"} 864
# HELP node_slabinfo_objects_per_slab The number of objects stored in each slab.
# TYPE node_slabinfo_objects_per_slab gauge
node_slabinfo_objects_per_slab{slab="dmaengine-unmap-128"} 30
node_slabinfo_objects_per_slab{slab="kmalloc-8192"} 4
node_slabinfo_objects_per_slab{slab="kmem_cache"} 32
node_slabinfo_objects_per_slab{slab="tw_sock_TCP"} 32
# HELP node_slabinfo_pages_per_slab The number of pages allocated for each slab.
# TYPE node_slabinfo_pages_per_slab gauge
node_slabinfo_pages_per_slab{slab="dmaengine-unmap-128"} 8
node_slabinfo_pages_per_slab{slab="kmalloc-8192"} 8
node_slabinfo_pages_per_slab{slab="kmem_cache"} 2
node_slabinfo_pages_per_slab{slab="tw_sock_TCP"} 2
# HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse. # HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse.
# TYPE node_sockstat_FRAG6_inuse gauge # TYPE node_sockstat_FRAG6_inuse gauge
node_sockstat_FRAG6_inuse 0 node_sockstat_FRAG6_inuse 0

View File

@ -3035,6 +3035,7 @@ node_scrape_collector_success{collector="qdisc"} 1
node_scrape_collector_success{collector="rapl"} 1 node_scrape_collector_success{collector="rapl"} 1
node_scrape_collector_success{collector="schedstat"} 1 node_scrape_collector_success{collector="schedstat"} 1
node_scrape_collector_success{collector="selinux"} 1 node_scrape_collector_success{collector="selinux"} 1
node_scrape_collector_success{collector="slabinfo"} 1
node_scrape_collector_success{collector="sockstat"} 1 node_scrape_collector_success{collector="sockstat"} 1
node_scrape_collector_success{collector="softnet"} 1 node_scrape_collector_success{collector="softnet"} 1
node_scrape_collector_success{collector="stat"} 1 node_scrape_collector_success{collector="stat"} 1
@ -3051,6 +3052,36 @@ node_scrape_collector_success{collector="zoneinfo"} 1
# HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false # HELP node_selinux_enabled SELinux is enabled, 1 is true, 0 is false
# TYPE node_selinux_enabled gauge # TYPE node_selinux_enabled gauge
node_selinux_enabled 0 node_selinux_enabled 0
# HELP node_slabinfo_active_objects The number of objects that are currently active (i.e., in use).
# TYPE node_slabinfo_active_objects gauge
node_slabinfo_active_objects{slab="dmaengine-unmap-128"} 1206
node_slabinfo_active_objects{slab="kmalloc-8192"} 132
node_slabinfo_active_objects{slab="kmem_cache"} 320
node_slabinfo_active_objects{slab="tw_sock_TCP"} 704
# HELP node_slabinfo_object_size_bytes The size of objects in this slab, in bytes.
# TYPE node_slabinfo_object_size_bytes gauge
node_slabinfo_object_size_bytes{slab="dmaengine-unmap-128"} 1088
node_slabinfo_object_size_bytes{slab="kmalloc-8192"} 8192
node_slabinfo_object_size_bytes{slab="kmem_cache"} 256
node_slabinfo_object_size_bytes{slab="tw_sock_TCP"} 256
# HELP node_slabinfo_objects The total number of allocated objects (i.e., objects that are both in use and not in use).
# TYPE node_slabinfo_objects gauge
node_slabinfo_objects{slab="dmaengine-unmap-128"} 1320
node_slabinfo_objects{slab="kmalloc-8192"} 148
node_slabinfo_objects{slab="kmem_cache"} 320
node_slabinfo_objects{slab="tw_sock_TCP"} 864
# HELP node_slabinfo_objects_per_slab The number of objects stored in each slab.
# TYPE node_slabinfo_objects_per_slab gauge
node_slabinfo_objects_per_slab{slab="dmaengine-unmap-128"} 30
node_slabinfo_objects_per_slab{slab="kmalloc-8192"} 4
node_slabinfo_objects_per_slab{slab="kmem_cache"} 32
node_slabinfo_objects_per_slab{slab="tw_sock_TCP"} 32
# HELP node_slabinfo_pages_per_slab The number of pages allocated for each slab.
# TYPE node_slabinfo_pages_per_slab gauge
node_slabinfo_pages_per_slab{slab="dmaengine-unmap-128"} 8
node_slabinfo_pages_per_slab{slab="kmalloc-8192"} 8
node_slabinfo_pages_per_slab{slab="kmem_cache"} 2
node_slabinfo_pages_per_slab{slab="tw_sock_TCP"} 2
# HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse. # HELP node_sockstat_FRAG6_inuse Number of FRAG6 sockets in state inuse.
# TYPE node_sockstat_FRAG6_inuse gauge # TYPE node_sockstat_FRAG6_inuse gauge
node_sockstat_FRAG6_inuse 0 node_sockstat_FRAG6_inuse 0

View File

@ -0,0 +1,6 @@
slabinfo - version: 2.1
# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab> : tunables <limit> <batchcount> <sharedfactor> : slabdata <active_slabs> <num_slabs> <sharedavail>
tw_sock_TCP 704 864 256 32 2 : tunables 0 0 0 : slabdata 27 27 0
dmaengine-unmap-128 1206 1320 1088 30 8 : tunables 0 0 0 : slabdata 44 44 0
kmalloc-8192 132 148 8192 4 8 : tunables 0 0 0 : slabdata 37 37 0
kmem_cache 320 320 256 32 2 : tunables 0 0 0 : slabdata 10 10 0

121
collector/slabinfo_linux.go Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2022 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build linux && !noslabinfo
// +build linux,!noslabinfo
package collector
import (
"fmt"
"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)
type slabinfoCollector struct {
fs procfs.FS
logger log.Logger
subsystem string
labels []string
}
func init() {
registerCollector("slabinfo", defaultDisabled, NewSlabinfoCollector)
}
func NewSlabinfoCollector(logger log.Logger) (Collector, error) {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}
return &slabinfoCollector{logger: logger,
fs: fs,
subsystem: "slabinfo",
labels: []string{"slab"},
}, nil
}
func (c *slabinfoCollector) Update(ch chan<- prometheus.Metric) error {
slabinfo, err := c.fs.SlabInfo()
if err != nil {
return fmt.Errorf("couldn't get %s: %w", c.subsystem, err)
}
for _, slab := range slabinfo.Slabs {
ch <- c.activeObjects(slab.Name, slab.ObjActive)
ch <- c.objects(slab.Name, slab.ObjNum)
ch <- c.objectSizeBytes(slab.Name, slab.ObjSize)
ch <- c.objectsPerSlab(slab.Name, slab.ObjPerSlab)
ch <- c.pagesPerSlab(slab.Name, slab.PagesPerSlab)
}
return nil
}
func (c *slabinfoCollector) activeObjects(label string, val int64) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "active_objects"),
"The number of objects that are currently active (i.e., in use).",
c.labels, nil)
return prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, float64(val), label,
)
}
func (c *slabinfoCollector) objects(label string, val int64) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "objects"),
"The total number of allocated objects (i.e., objects that are both in use and not in use).",
c.labels, nil)
return prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, float64(val), label,
)
}
func (c *slabinfoCollector) objectSizeBytes(label string, val int64) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "object_size_bytes"),
"The size of objects in this slab, in bytes.",
c.labels, nil)
return prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, float64(val), label,
)
}
func (c *slabinfoCollector) objectsPerSlab(label string, val int64) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "objects_per_slab"),
"The number of objects stored in each slab.",
c.labels, nil)
return prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, float64(val), label,
)
}
func (c *slabinfoCollector) pagesPerSlab(label string, val int64) prometheus.Metric {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, c.subsystem, "pages_per_slab"),
"The number of pages allocated for each slab.",
c.labels, nil)
return prometheus.MustNewConstMetric(
desc, prometheus.GaugeValue, float64(val), label,
)
}

View File

@ -38,6 +38,7 @@ enabled_collectors=$(cat << COLLECTORS
rapl rapl
schedstat schedstat
selinux selinux
slabinfo
sockstat sockstat
stat stat
thermal_zone thermal_zone