Fix and simplify parsing of raid metrics

Fixes the wrong reporting of active+total disk metrics for inactive
raids. Also simplifies the code and removes a couple of redundant
comments.
This commit is contained in:
Tobias Schmidt 2017-03-18 14:36:26 -03:00
parent faa7483215
commit 0400e437be
3 changed files with 55 additions and 89 deletions

View File

@ -798,7 +798,7 @@ node_md_disks{device="md10"} 2
node_md_disks{device="md11"} 2 node_md_disks{device="md11"} 2
node_md_disks{device="md12"} 2 node_md_disks{device="md12"} 2
node_md_disks{device="md127"} 2 node_md_disks{device="md127"} 2
node_md_disks{device="md219"} 2 node_md_disks{device="md219"} 0
node_md_disks{device="md3"} 8 node_md_disks{device="md3"} 8
node_md_disks{device="md4"} 2 node_md_disks{device="md4"} 2
node_md_disks{device="md6"} 2 node_md_disks{device="md6"} 2
@ -813,7 +813,7 @@ node_md_disks_active{device="md10"} 2
node_md_disks_active{device="md11"} 2 node_md_disks_active{device="md11"} 2
node_md_disks_active{device="md12"} 2 node_md_disks_active{device="md12"} 2
node_md_disks_active{device="md127"} 2 node_md_disks_active{device="md127"} 2
node_md_disks_active{device="md219"} 2 node_md_disks_active{device="md219"} 0
node_md_disks_active{device="md3"} 8 node_md_disks_active{device="md3"} 8
node_md_disks_active{device="md4"} 2 node_md_disks_active{device="md4"} 2
node_md_disks_active{device="md6"} 1 node_md_disks_active{device="md6"} 1

View File

@ -36,8 +36,8 @@ var (
) )
type mdStatus struct { type mdStatus struct {
mdName string name string
isActive bool active bool
disksActive int64 disksActive int64
disksTotal int64 disksTotal int64
blocksTotal int64 blocksTotal int64
@ -136,97 +136,78 @@ func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) {
return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err) return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err)
} }
mdStatusFile := string(content) lines := strings.Split(string(content), "\n")
lines := strings.Split(mdStatusFile, "\n")
var (
currentMD string
personality string
active, total, size int64
)
// Each md has at least the deviceline, statusline and one empty line afterwards // Each md has at least the deviceline, statusline and one empty line afterwards
// so we will have probably something of the order len(lines)/3 devices // so we will have probably something of the order len(lines)/3 devices
// so we use that for preallocation. // so we use that for preallocation.
estimateMDs := len(lines) / 3 mdStates := make([]mdStatus, 0, len(lines)/3)
mdStates := make([]mdStatus, 0, estimateMDs) for i, line := range lines {
if line == "" {
for i, l := range lines { continue
if l == "" { }
// Skip entirely empty lines. if line[0] == ' ' || line[0] == '\t' {
// Lines starting with white space are not the beginning of a md-section.
continue
}
if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") {
// These lines contain general information.
continue continue
} }
if l[0] == ' ' || l[0] == '\t' { mainLine := strings.Split(line, " ")
// Those lines are not the beginning of a md-section.
continue
}
if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") {
// We aren't interested in lines with general info.
continue
}
mainLine := strings.Split(l, " ")
if len(mainLine) < 4 { if len(mainLine) < 4 {
return mdStates, fmt.Errorf("error parsing mdline: %s", l) return mdStates, fmt.Errorf("error parsing mdline: %s", line)
} }
currentMD = mainLine[0] // The name of the md-device. md := mdStatus{
isActive := (mainLine[2] == "active") // The activity status of the md-device. name: mainLine[0],
personality = "" active: mainLine[2] == "active",
}
if len(lines) <= i+3 {
return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", md.name)
}
personality := ""
for _, possiblePersonality := range mainLine[3:] { for _, possiblePersonality := range mainLine[3:] {
if raidPersonalityRE.MatchString(possiblePersonality) { if raidPersonalityRE.MatchString(possiblePersonality) {
personality = possiblePersonality personality = possiblePersonality
break break
} }
} }
if len(lines) <= i+3 {
return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", currentMD)
}
switch { switch {
case personality == "raid0": case personality == "raid0":
active = int64(len(mainLine) - 4) // Get the number of devices from the main line. md.disksActive = int64(len(mainLine) - 4) // Get the number of devices from the main line.
total = active // Raid0 active and total is always the same if active. md.disksTotal = md.disksActive // Raid0 active and total is always the same if active.
size, err = evalRaid0line(lines[i+1]) // Parse statusline, always present. md.blocksTotal, err = evalRaid0line(lines[i+1])
case raidPersonalityRE.MatchString(personality): case raidPersonalityRE.MatchString(personality):
active, total, size, err = evalStatusline(lines[i+1]) // Parse statusline, always present. md.disksActive, md.disksTotal, md.blocksTotal, err = evalStatusline(lines[i+1])
default: default:
log.Infof("Personality unknown: %s\n", mainLine) log.Infof("Personality unknown: %s\n", mainLine)
size, err = evalUnknownPersonalitylineRE(lines[i+1]) // Parse statusline, always present. md.blocksTotal, err = evalUnknownPersonalitylineRE(lines[i+1])
} }
if err != nil { if err != nil {
return mdStates, fmt.Errorf("error parsing mdstat: %s", err) return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
} }
// Now get the number of synced blocks. syncLine := lines[i+2]
var syncedBlocks int64 if strings.Contains(syncLine, "bitmap") {
syncLine = lines[i+3]
// Get the line number of the syncing-line.
var j int
if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line
j = i + 3
} else {
j = i + 2
} }
// If device is syncing at the moment, get the number of currently synced bytes, // If device is syncing at the moment, get the number of currently synced bytes,
// otherwise that number equals the size of the device. // otherwise that number equals the size of the device.
if strings.Contains(lines[j], "recovery") || if strings.Contains(syncLine, "recovery") ||
strings.Contains(lines[j], "resync") && strings.Contains(syncLine, "resync") &&
!strings.Contains(lines[j], "\tresync=") { !strings.Contains(syncLine, "\tresync=") {
syncedBlocks, err = evalBuildline(lines[j]) md.blocksSynced, err = evalBuildline(syncLine)
if err != nil { if err != nil {
return mdStates, fmt.Errorf("error parsing mdstat: %s", err) return mdStates, fmt.Errorf("error parsing mdstat: %s", err)
} }
} else { } else {
syncedBlocks = size md.blocksSynced = md.blocksTotal
} }
mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks}) mdStates = append(mdStates, md)
} }
return mdStates, nil return mdStates, nil
@ -277,68 +258,55 @@ var (
func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error {
statusfile := procFilePath("mdstat") statusfile := procFilePath("mdstat")
if _, err := os.Stat(statusfile); err != nil { if _, err := os.Stat(statusfile); err != nil {
// Take care we don't crash on non-existent statusfiles.
if os.IsNotExist(err) { if os.IsNotExist(err) {
// no such file or directory, nothing to do, just return
log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile) log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile)
return nil return nil
} }
return err return err
} }
// First parse mdstat-file...
mdstate, err := parseMdstat(statusfile) mdstate, err := parseMdstat(statusfile)
if err != nil { if err != nil {
return fmt.Errorf("error parsing mdstatus: %s", err) return fmt.Errorf("error parsing mdstatus: %s", err)
} }
// ... and then plug the result into the metrics to be exported.
var isActiveFloat float64
for _, mds := range mdstate { for _, mds := range mdstate {
log.Debugf("collecting metrics for device %s", mds.name)
log.Debugf("collecting metrics for device %s", mds.mdName) var active float64
if mds.active {
if mds.isActive { active = 1
isActiveFloat = 1
} else {
isActiveFloat = 0
} }
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
isActiveDesc, isActiveDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
isActiveFloat, active,
mds.mdName, mds.name,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
disksActiveDesc, disksActiveDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(mds.disksActive), float64(mds.disksActive),
mds.mdName, mds.name,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
disksTotalDesc, disksTotalDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(mds.disksTotal), float64(mds.disksTotal),
mds.mdName, mds.name,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
blocksTotalDesc, blocksTotalDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(mds.blocksTotal), float64(mds.blocksTotal),
mds.mdName, mds.name,
) )
ch <- prometheus.MustNewConstMetric( ch <- prometheus.MustNewConstMetric(
blocksSyncedDesc, blocksSyncedDesc,
prometheus.GaugeValue, prometheus.GaugeValue,
float64(mds.blocksSynced), float64(mds.blocksSynced),
mds.mdName, mds.name,
) )
} }
return nil return nil

View File

@ -19,7 +19,6 @@ import (
func TestMdadm(t *testing.T) { func TestMdadm(t *testing.T) {
mdStates, err := parseMdstat("fixtures/proc/mdstat") mdStates, err := parseMdstat("fixtures/proc/mdstat")
if err != nil { if err != nil {
t.Fatalf("parsing of reference-file failed entirely: %s", err) t.Fatalf("parsing of reference-file failed entirely: %s", err)
} }
@ -37,13 +36,13 @@ func TestMdadm(t *testing.T) {
"md10": {"md10", true, 2, 2, 314159265, 314159265}, "md10": {"md10", true, 2, 2, 314159265, 314159265},
"md11": {"md11", true, 2, 2, 4190208, 4190208}, "md11": {"md11", true, 2, 2, 4190208, 4190208},
"md12": {"md12", true, 2, 2, 3886394368, 3886394368}, "md12": {"md12", true, 2, 2, 3886394368, 3886394368},
"md219": {"md219", false, 2, 2, 7932, 7932}, "md219": {"md219", false, 0, 0, 7932, 7932},
"md00": {"md00", true, 1, 1, 4186624, 4186624}, "md00": {"md00", true, 1, 1, 4186624, 4186624},
} }
for _, md := range mdStates { for _, md := range mdStates {
if md != refs[md.mdName] { if md != refs[md.name] {
t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.mdName, refs[md.mdName], md) t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.name, refs[md.name], md)
} }
} }
@ -54,7 +53,6 @@ func TestMdadm(t *testing.T) {
func TestInvalidMdstat(t *testing.T) { func TestInvalidMdstat(t *testing.T) {
_, err := parseMdstat("fixtures/proc/mdstat_invalid") _, err := parseMdstat("fixtures/proc/mdstat_invalid")
if err == nil { if err == nil {
t.Fatalf("parsing of invalid reference file did not find any errors") t.Fatalf("parsing of invalid reference file did not find any errors")
} }