From 0400e437be52d622db3050e6496fa53cb0830a28 Mon Sep 17 00:00:00 2001 From: Tobias Schmidt Date: Sat, 18 Mar 2017 14:36:26 -0300 Subject: [PATCH] Fix and simplify parsing of raid metrics Fixes the wrong reporting of active+total disk metrics for inactive raids. Also simplifies the code and removes a couple of redundant comments. --- collector/fixtures/e2e-output.txt | 4 +- collector/mdadm_linux.go | 132 +++++++++++------------------- collector/mdadm_linux_test.go | 8 +- 3 files changed, 55 insertions(+), 89 deletions(-) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index a6171cae..48df21b9 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -798,7 +798,7 @@ node_md_disks{device="md10"} 2 node_md_disks{device="md11"} 2 node_md_disks{device="md12"} 2 node_md_disks{device="md127"} 2 -node_md_disks{device="md219"} 2 +node_md_disks{device="md219"} 0 node_md_disks{device="md3"} 8 node_md_disks{device="md4"} 2 node_md_disks{device="md6"} 2 @@ -813,7 +813,7 @@ node_md_disks_active{device="md10"} 2 node_md_disks_active{device="md11"} 2 node_md_disks_active{device="md12"} 2 node_md_disks_active{device="md127"} 2 -node_md_disks_active{device="md219"} 2 +node_md_disks_active{device="md219"} 0 node_md_disks_active{device="md3"} 8 node_md_disks_active{device="md4"} 2 node_md_disks_active{device="md6"} 1 diff --git a/collector/mdadm_linux.go b/collector/mdadm_linux.go index c2069ffb..03f76fde 100644 --- a/collector/mdadm_linux.go +++ b/collector/mdadm_linux.go @@ -36,8 +36,8 @@ var ( ) type mdStatus struct { - mdName string - isActive bool + name string + active bool disksActive int64 disksTotal int64 blocksTotal int64 @@ -136,97 +136,78 @@ func parseMdstat(mdStatusFilePath string) ([]mdStatus, error) { return []mdStatus{}, fmt.Errorf("error parsing mdstat: %s", err) } - mdStatusFile := string(content) - - lines := strings.Split(mdStatusFile, "\n") - var ( - currentMD string - personality string - active, total, size int64 - ) - + lines := strings.Split(string(content), "\n") // Each md has at least the deviceline, statusline and one empty line afterwards // so we will have probably something of the order len(lines)/3 devices // so we use that for preallocation. - estimateMDs := len(lines) / 3 - mdStates := make([]mdStatus, 0, estimateMDs) - - for i, l := range lines { - if l == "" { - // Skip entirely empty lines. + mdStates := make([]mdStatus, 0, len(lines)/3) + for i, line := range lines { + if line == "" { + continue + } + if line[0] == ' ' || line[0] == '\t' { + // Lines starting with white space are not the beginning of a md-section. + continue + } + if strings.HasPrefix(line, "Personalities") || strings.HasPrefix(line, "unused") { + // These lines contain general information. continue } - if l[0] == ' ' || l[0] == '\t' { - // Those lines are not the beginning of a md-section. - continue - } - - if strings.HasPrefix(l, "Personalities") || strings.HasPrefix(l, "unused") { - // We aren't interested in lines with general info. - continue - } - - mainLine := strings.Split(l, " ") + mainLine := strings.Split(line, " ") if len(mainLine) < 4 { - return mdStates, fmt.Errorf("error parsing mdline: %s", l) + return mdStates, fmt.Errorf("error parsing mdline: %s", line) } - currentMD = mainLine[0] // The name of the md-device. - isActive := (mainLine[2] == "active") // The activity status of the md-device. - personality = "" + md := mdStatus{ + name: mainLine[0], + active: mainLine[2] == "active", + } + + if len(lines) <= i+3 { + return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", md.name) + } + + personality := "" for _, possiblePersonality := range mainLine[3:] { if raidPersonalityRE.MatchString(possiblePersonality) { personality = possiblePersonality break } } - - if len(lines) <= i+3 { - return mdStates, fmt.Errorf("error parsing mdstat: entry for %s has fewer lines than expected", currentMD) - } - switch { case personality == "raid0": - active = int64(len(mainLine) - 4) // Get the number of devices from the main line. - total = active // Raid0 active and total is always the same if active. - size, err = evalRaid0line(lines[i+1]) // Parse statusline, always present. + md.disksActive = int64(len(mainLine) - 4) // Get the number of devices from the main line. + md.disksTotal = md.disksActive // Raid0 active and total is always the same if active. + md.blocksTotal, err = evalRaid0line(lines[i+1]) case raidPersonalityRE.MatchString(personality): - active, total, size, err = evalStatusline(lines[i+1]) // Parse statusline, always present. + md.disksActive, md.disksTotal, md.blocksTotal, err = evalStatusline(lines[i+1]) default: log.Infof("Personality unknown: %s\n", mainLine) - size, err = evalUnknownPersonalitylineRE(lines[i+1]) // Parse statusline, always present. + md.blocksTotal, err = evalUnknownPersonalitylineRE(lines[i+1]) } - if err != nil { return mdStates, fmt.Errorf("error parsing mdstat: %s", err) } - // Now get the number of synced blocks. - var syncedBlocks int64 - - // Get the line number of the syncing-line. - var j int - if strings.Contains(lines[i+2], "bitmap") { // then skip the bitmap line - j = i + 3 - } else { - j = i + 2 + syncLine := lines[i+2] + if strings.Contains(syncLine, "bitmap") { + syncLine = lines[i+3] } // If device is syncing at the moment, get the number of currently synced bytes, // otherwise that number equals the size of the device. - if strings.Contains(lines[j], "recovery") || - strings.Contains(lines[j], "resync") && - !strings.Contains(lines[j], "\tresync=") { - syncedBlocks, err = evalBuildline(lines[j]) + if strings.Contains(syncLine, "recovery") || + strings.Contains(syncLine, "resync") && + !strings.Contains(syncLine, "\tresync=") { + md.blocksSynced, err = evalBuildline(syncLine) if err != nil { return mdStates, fmt.Errorf("error parsing mdstat: %s", err) } } else { - syncedBlocks = size + md.blocksSynced = md.blocksTotal } - mdStates = append(mdStates, mdStatus{currentMD, isActive, active, total, size, syncedBlocks}) - + mdStates = append(mdStates, md) } return mdStates, nil @@ -277,68 +258,55 @@ var ( func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { statusfile := procFilePath("mdstat") if _, err := os.Stat(statusfile); err != nil { - // Take care we don't crash on non-existent statusfiles. if os.IsNotExist(err) { - // no such file or directory, nothing to do, just return log.Debugf("Not collecting mdstat, file does not exist: %s", statusfile) return nil } return err } - // First parse mdstat-file... mdstate, err := parseMdstat(statusfile) if err != nil { return fmt.Errorf("error parsing mdstatus: %s", err) } - // ... and then plug the result into the metrics to be exported. - var isActiveFloat float64 for _, mds := range mdstate { + log.Debugf("collecting metrics for device %s", mds.name) - log.Debugf("collecting metrics for device %s", mds.mdName) - - if mds.isActive { - isActiveFloat = 1 - } else { - isActiveFloat = 0 + var active float64 + if mds.active { + active = 1 } - ch <- prometheus.MustNewConstMetric( isActiveDesc, prometheus.GaugeValue, - isActiveFloat, - mds.mdName, + active, + mds.name, ) - ch <- prometheus.MustNewConstMetric( disksActiveDesc, prometheus.GaugeValue, float64(mds.disksActive), - mds.mdName, + mds.name, ) - ch <- prometheus.MustNewConstMetric( disksTotalDesc, prometheus.GaugeValue, float64(mds.disksTotal), - mds.mdName, + mds.name, ) - ch <- prometheus.MustNewConstMetric( blocksTotalDesc, prometheus.GaugeValue, float64(mds.blocksTotal), - mds.mdName, + mds.name, ) - ch <- prometheus.MustNewConstMetric( blocksSyncedDesc, prometheus.GaugeValue, float64(mds.blocksSynced), - mds.mdName, + mds.name, ) - } return nil diff --git a/collector/mdadm_linux_test.go b/collector/mdadm_linux_test.go index 0e4e89ee..94e1ffe8 100644 --- a/collector/mdadm_linux_test.go +++ b/collector/mdadm_linux_test.go @@ -19,7 +19,6 @@ import ( func TestMdadm(t *testing.T) { mdStates, err := parseMdstat("fixtures/proc/mdstat") - if err != nil { t.Fatalf("parsing of reference-file failed entirely: %s", err) } @@ -37,13 +36,13 @@ func TestMdadm(t *testing.T) { "md10": {"md10", true, 2, 2, 314159265, 314159265}, "md11": {"md11", true, 2, 2, 4190208, 4190208}, "md12": {"md12", true, 2, 2, 3886394368, 3886394368}, - "md219": {"md219", false, 2, 2, 7932, 7932}, + "md219": {"md219", false, 0, 0, 7932, 7932}, "md00": {"md00", true, 1, 1, 4186624, 4186624}, } for _, md := range mdStates { - if md != refs[md.mdName] { - t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.mdName, refs[md.mdName], md) + if md != refs[md.name] { + t.Errorf("failed parsing md-device %s correctly: want %v, got %v", md.name, refs[md.name], md) } } @@ -54,7 +53,6 @@ func TestMdadm(t *testing.T) { func TestInvalidMdstat(t *testing.T) { _, err := parseMdstat("fixtures/proc/mdstat_invalid") - if err == nil { t.Fatalf("parsing of invalid reference file did not find any errors") }