2017-06-13 11:21:53 +02:00
|
|
|
// Copyright 2015 The Prometheus Authors
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
// +build !nocpu
|
|
|
|
|
|
|
|
package collector
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"path/filepath"
|
2018-02-27 19:43:15 +01:00
|
|
|
"strconv"
|
2017-06-13 11:21:53 +02:00
|
|
|
|
2019-12-31 17:19:37 +01:00
|
|
|
"github.com/go-kit/kit/log"
|
|
|
|
"github.com/go-kit/kit/log/level"
|
2017-06-13 11:21:53 +02:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/prometheus/procfs"
|
2019-12-31 17:19:37 +01:00
|
|
|
"gopkg.in/alecthomas/kingpin.v2"
|
2017-06-13 11:21:53 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type cpuCollector struct {
|
2019-04-10 18:16:12 +02:00
|
|
|
fs procfs.FS
|
2017-06-13 11:21:53 +02:00
|
|
|
cpu *prometheus.Desc
|
2019-09-11 23:06:36 +02:00
|
|
|
cpuInfo *prometheus.Desc
|
2017-11-23 15:04:47 +01:00
|
|
|
cpuGuest *prometheus.Desc
|
2017-06-13 11:21:53 +02:00
|
|
|
cpuCoreThrottle *prometheus.Desc
|
|
|
|
cpuPackageThrottle *prometheus.Desc
|
2019-12-31 17:19:37 +01:00
|
|
|
logger log.Logger
|
2017-06-13 11:21:53 +02:00
|
|
|
}
|
|
|
|
|
2019-09-11 23:06:36 +02:00
|
|
|
var (
|
|
|
|
enableCPUInfo = kingpin.Flag("collector.cpu.info", "Enables metric cpu_info").Bool()
|
|
|
|
)
|
|
|
|
|
2017-06-13 11:21:53 +02:00
|
|
|
func init() {
|
2017-09-28 15:06:26 +02:00
|
|
|
registerCollector("cpu", defaultEnabled, NewCPUCollector)
|
2017-06-13 11:21:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// NewCPUCollector returns a new Collector exposing kernel/system statistics.
|
2019-12-31 17:19:37 +01:00
|
|
|
func NewCPUCollector(logger log.Logger) (Collector, error) {
|
2019-04-10 18:16:12 +02:00
|
|
|
fs, err := procfs.NewFS(*procPath)
|
|
|
|
if err != nil {
|
2019-11-29 14:51:31 +01:00
|
|
|
return nil, fmt.Errorf("failed to open procfs: %w", err)
|
2019-04-10 18:16:12 +02:00
|
|
|
}
|
2017-06-13 11:21:53 +02:00
|
|
|
return &cpuCollector{
|
2019-04-10 18:16:12 +02:00
|
|
|
fs: fs,
|
2018-04-29 14:34:47 +02:00
|
|
|
cpu: nodeCPUSecondsDesc,
|
2019-09-11 23:06:36 +02:00
|
|
|
cpuInfo: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"),
|
|
|
|
"CPU information from /proc/cpuinfo.",
|
|
|
|
[]string{"package", "core", "cpu", "vendor", "family", "model", "microcode", "cachesize"}, nil,
|
|
|
|
),
|
2017-11-23 15:04:47 +01:00
|
|
|
cpuGuest: prometheus.NewDesc(
|
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"),
|
|
|
|
"Seconds the cpus spent in guests (VMs) for each mode.",
|
|
|
|
[]string{"cpu", "mode"}, nil,
|
|
|
|
),
|
2017-06-13 11:21:53 +02:00
|
|
|
cpuCoreThrottle: prometheus.NewDesc(
|
2017-09-28 15:06:26 +02:00
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"),
|
2017-06-13 11:21:53 +02:00
|
|
|
"Number of times this cpu core has been throttled.",
|
2018-04-09 18:01:52 +02:00
|
|
|
[]string{"package", "core"}, nil,
|
2017-06-13 11:21:53 +02:00
|
|
|
),
|
|
|
|
cpuPackageThrottle: prometheus.NewDesc(
|
2017-09-28 15:06:26 +02:00
|
|
|
prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "package_throttles_total"),
|
2017-06-13 11:21:53 +02:00
|
|
|
"Number of times this cpu package has been throttled.",
|
2018-04-09 18:01:52 +02:00
|
|
|
[]string{"package"}, nil,
|
2017-06-13 11:21:53 +02:00
|
|
|
),
|
2019-12-31 17:19:37 +01:00
|
|
|
logger: logger,
|
2017-06-13 11:21:53 +02:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Update implements Collector and exposes cpu related metrics from /proc/stat and /sys/.../cpu/.
|
|
|
|
func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error {
|
2019-09-11 23:06:36 +02:00
|
|
|
if *enableCPUInfo {
|
|
|
|
if err := c.updateInfo(ch); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2017-06-13 11:21:53 +02:00
|
|
|
if err := c.updateStat(ch); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-10-18 17:28:19 +02:00
|
|
|
if err := c.updateThermalThrottle(ch); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2017-06-13 11:21:53 +02:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2019-09-11 23:06:36 +02:00
|
|
|
// updateInfo reads /proc/cpuinfo
|
|
|
|
func (c *cpuCollector) updateInfo(ch chan<- prometheus.Metric) error {
|
|
|
|
info, err := c.fs.CPUInfo()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for _, cpu := range info {
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuInfo,
|
|
|
|
prometheus.GaugeValue,
|
|
|
|
1,
|
|
|
|
cpu.PhysicalID,
|
|
|
|
cpu.CoreID,
|
2020-02-19 14:34:05 +01:00
|
|
|
strconv.Itoa(int(cpu.Processor)),
|
2019-09-11 23:06:36 +02:00
|
|
|
cpu.VendorID,
|
|
|
|
cpu.CPUFamily,
|
|
|
|
cpu.Model,
|
|
|
|
cpu.Microcode,
|
|
|
|
cpu.CacheSize)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-10-18 17:28:19 +02:00
|
|
|
// updateThermalThrottle reads /sys/devices/system/cpu/cpu* and expose thermal throttle statistics.
|
|
|
|
func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error {
|
2018-04-09 18:01:52 +02:00
|
|
|
cpus, err := filepath.Glob(sysFilePath("devices/system/cpu/cpu[0-9]*"))
|
2017-06-13 11:21:53 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-04-09 18:01:52 +02:00
|
|
|
packageThrottles := make(map[uint64]uint64)
|
|
|
|
packageCoreThrottles := make(map[uint64]map[uint64]uint64)
|
2018-02-27 19:43:15 +01:00
|
|
|
|
2017-09-07 23:24:18 +02:00
|
|
|
// cpu loop
|
2017-06-13 11:21:53 +02:00
|
|
|
for _, cpu := range cpus {
|
2018-04-09 18:01:52 +02:00
|
|
|
// See
|
|
|
|
// https://www.kernel.org/doc/Documentation/x86/topology.txt
|
|
|
|
// https://www.kernel.org/doc/Documentation/cputopology.txt
|
|
|
|
// https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-devices-system-cpu
|
|
|
|
var err error
|
|
|
|
var physicalPackageID, coreID uint64
|
|
|
|
|
|
|
|
// topology/physical_package_id
|
|
|
|
if physicalPackageID, err = readUintFromFile(filepath.Join(cpu, "topology", "physical_package_id")); err != nil {
|
2019-12-31 17:19:37 +01:00
|
|
|
level.Debug(c.logger).Log("msg", "CPU is missing physical_package_id", "cpu", cpu)
|
2018-04-09 18:01:52 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
// topology/core_id
|
|
|
|
if coreID, err = readUintFromFile(filepath.Join(cpu, "topology", "core_id")); err != nil {
|
2019-12-31 17:19:37 +01:00
|
|
|
level.Debug(c.logger).Log("msg", "CPU is missing core_id", "cpu", cpu)
|
2017-09-07 23:24:18 +02:00
|
|
|
continue
|
|
|
|
}
|
2018-02-27 19:43:15 +01:00
|
|
|
|
2018-04-09 18:01:52 +02:00
|
|
|
// metric node_cpu_core_throttles_total
|
|
|
|
//
|
|
|
|
// We process this metric before the package throttles as there
|
|
|
|
// are cpu+kernel combinations that only present core throttles
|
|
|
|
// but no package throttles.
|
|
|
|
// Seen e.g. on an Intel Xeon E5472 system with RHEL 6.9 kernel.
|
|
|
|
if _, present := packageCoreThrottles[physicalPackageID]; !present {
|
|
|
|
packageCoreThrottles[physicalPackageID] = make(map[uint64]uint64)
|
|
|
|
}
|
|
|
|
if _, present := packageCoreThrottles[physicalPackageID][coreID]; !present {
|
|
|
|
// Read thermal_throttle/core_throttle_count only once
|
|
|
|
if coreThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err == nil {
|
|
|
|
packageCoreThrottles[physicalPackageID][coreID] = coreThrottleCount
|
|
|
|
} else {
|
2019-12-31 17:19:37 +01:00
|
|
|
level.Debug(c.logger).Log("msg", "CPU is missing core_throttle_count", "cpu", cpu)
|
2018-02-27 19:43:15 +01:00
|
|
|
}
|
2017-09-07 23:24:18 +02:00
|
|
|
}
|
2018-02-27 19:43:15 +01:00
|
|
|
|
2018-04-09 18:01:52 +02:00
|
|
|
// metric node_cpu_package_throttles_total
|
|
|
|
if _, present := packageThrottles[physicalPackageID]; !present {
|
|
|
|
// Read thermal_throttle/package_throttle_count only once
|
|
|
|
if packageThrottleCount, err := readUintFromFile(filepath.Join(cpu, "thermal_throttle", "package_throttle_count")); err == nil {
|
|
|
|
packageThrottles[physicalPackageID] = packageThrottleCount
|
|
|
|
} else {
|
2019-12-31 17:19:37 +01:00
|
|
|
level.Debug(c.logger).Log("msg", "CPU is missing package_throttle_count", "cpu", cpu)
|
2018-04-09 18:01:52 +02:00
|
|
|
}
|
|
|
|
}
|
2017-09-07 23:24:18 +02:00
|
|
|
}
|
2017-06-20 07:51:26 +02:00
|
|
|
|
2018-04-09 18:01:52 +02:00
|
|
|
for physicalPackageID, packageThrottleCount := range packageThrottles {
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
float64(packageThrottleCount),
|
|
|
|
strconv.FormatUint(physicalPackageID, 10))
|
2017-09-07 23:24:18 +02:00
|
|
|
}
|
|
|
|
|
2018-04-29 14:34:47 +02:00
|
|
|
for physicalPackageID, coreMap := range packageCoreThrottles {
|
|
|
|
for coreID, coreThrottleCount := range coreMap {
|
2018-04-09 18:01:52 +02:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle,
|
|
|
|
prometheus.CounterValue,
|
|
|
|
float64(coreThrottleCount),
|
|
|
|
strconv.FormatUint(physicalPackageID, 10),
|
|
|
|
strconv.FormatUint(coreID, 10))
|
2017-09-07 23:24:18 +02:00
|
|
|
}
|
2017-06-13 11:21:53 +02:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// updateStat reads /proc/stat through procfs and exports cpu related metrics.
|
|
|
|
func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error {
|
2019-06-12 20:47:16 +02:00
|
|
|
stats, err := c.fs.Stat()
|
2017-06-13 11:21:53 +02:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
for cpuID, cpuStat := range stats.CPU {
|
2020-02-19 14:34:05 +01:00
|
|
|
cpuNum := strconv.Itoa(cpuID)
|
2018-02-01 18:42:20 +01:00
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal")
|
2017-11-23 15:04:47 +01:00
|
|
|
|
|
|
|
// Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics.
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user")
|
|
|
|
ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice")
|
2017-06-13 11:21:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|