diff --git a/collector/collector.go b/collector/collector.go new file mode 100644 index 00000000..5fb6f4fd --- /dev/null +++ b/collector/collector.go @@ -0,0 +1,21 @@ +// Exporter is a prometheus exporter using multiple Factories to collect and export system metrics. +package collector + +import ( + "github.com/prometheus/client_golang/prometheus" +) + +var Factories []func(Config, prometheus.Registry) (Collector, error) + +// Interface a collector has to implement. +type Collector interface { + // Get new metrics and expose them via prometheus registry. + Update() (n int, err error) + + // Returns the name of the collector. + Name() string +} + +type Config struct { + Attributes map[string]string `json:"attributes"` +} diff --git a/collector/fixtures/diskstats b/collector/fixtures/diskstats new file mode 100644 index 00000000..a35ad5ce --- /dev/null +++ b/collector/fixtures/diskstats @@ -0,0 +1,38 @@ + 1 0 ram0 0 0 0 0 0 0 0 0 0 0 0 + 1 1 ram1 0 0 0 0 0 0 0 0 0 0 0 + 1 2 ram2 0 0 0 0 0 0 0 0 0 0 0 + 1 3 ram3 0 0 0 0 0 0 0 0 0 0 0 + 1 4 ram4 0 0 0 0 0 0 0 0 0 0 0 + 1 5 ram5 0 0 0 0 0 0 0 0 0 0 0 + 1 6 ram6 0 0 0 0 0 0 0 0 0 0 0 + 1 7 ram7 0 0 0 0 0 0 0 0 0 0 0 + 1 8 ram8 0 0 0 0 0 0 0 0 0 0 0 + 1 9 ram9 0 0 0 0 0 0 0 0 0 0 0 + 1 10 ram10 0 0 0 0 0 0 0 0 0 0 0 + 1 11 ram11 0 0 0 0 0 0 0 0 0 0 0 + 1 12 ram12 0 0 0 0 0 0 0 0 0 0 0 + 1 13 ram13 0 0 0 0 0 0 0 0 0 0 0 + 1 14 ram14 0 0 0 0 0 0 0 0 0 0 0 + 1 15 ram15 0 0 0 0 0 0 0 0 0 0 0 + 7 0 loop0 0 0 0 0 0 0 0 0 0 0 0 + 7 1 loop1 0 0 0 0 0 0 0 0 0 0 0 + 7 2 loop2 0 0 0 0 0 0 0 0 0 0 0 + 7 3 loop3 0 0 0 0 0 0 0 0 0 0 0 + 7 4 loop4 0 0 0 0 0 0 0 0 0 0 0 + 7 5 loop5 0 0 0 0 0 0 0 0 0 0 0 + 7 6 loop6 0 0 0 0 0 0 0 0 0 0 0 + 7 7 loop7 0 0 0 0 0 0 0 0 0 0 0 + 8 0 sda 25354637 34367663 1003346126 18492372 28444756 11134226 505697032 63877960 0 9653880 82621804 + 8 1 sda1 250 0 2000 36 0 0 0 0 0 36 36 + 8 2 sda2 246 0 1968 32 0 0 0 0 0 32 32 + 8 3 sda3 340 13 2818 52 11 8 152 8 0 56 60 + 8 4 sda4 25353629 34367650 1003337964 18492232 27448755 11134218 505696880 61593380 0 7576432 80332428 + 252 0 dm-0 59910002 0 1003337218 46229572 39231014 0 505696880 1158557800 0 11325968 1206301256 + 252 1 dm-1 388 0 3104 84 74 0 592 0 0 76 84 + 252 2 dm-2 11571 0 308350 6536 153522 0 5093416 122884 0 65400 129416 + 252 3 dm-3 3870 0 3870 104 0 0 0 0 0 16 104 + 252 4 dm-4 392 0 1034 28 38 0 137 16 0 24 44 + 252 5 dm-5 3729 0 84279 924 98918 0 1151688 104684 0 58848 105632 + 179 0 mmcblk0 192 3 1560 156 0 0 0 0 0 136 156 + 179 1 mmcblk0p1 17 3 160 24 0 0 0 0 0 24 24 + 179 2 mmcblk0p2 95 0 760 68 0 0 0 0 0 68 68 diff --git a/collector/fixtures/interrupts b/collector/fixtures/interrupts new file mode 100644 index 00000000..300217c7 --- /dev/null +++ b/collector/fixtures/interrupts @@ -0,0 +1,31 @@ + CPU0 CPU1 CPU2 CPU3 + 0: 18 0 0 0 IR-IO-APIC-edge timer + 1: 17960 105 28 28 IR-IO-APIC-edge i8042 + 8: 1 0 0 0 IR-IO-APIC-edge rtc0 + 9: 398553 2320 824 863 IR-IO-APIC-fasteoi acpi + 12: 380847 1021 240 198 IR-IO-APIC-edge i8042 + 16: 328511 322879 293782 351412 IR-IO-APIC-fasteoi ehci_hcd:usb1, mmc0 + 23: 1451445 3333499 1092032 2644609 IR-IO-APIC-fasteoi ehci_hcd:usb2 + 40: 0 0 0 0 DMAR_MSI-edge dmar0 + 41: 0 0 0 0 DMAR_MSI-edge dmar1 + 42: 378324 1734637 440240 2434308 IR-PCI-MSI-edge xhci_hcd + 43: 7434032 8092205 6478877 7492252 IR-PCI-MSI-edge ahci + 44: 140636 226313 347 633 IR-PCI-MSI-edge i915 + 45: 4 22 0 0 IR-PCI-MSI-edge mei_me + 46: 43078464 130 460171 290 IR-PCI-MSI-edge iwlwifi + 47: 350 224 0 0 IR-PCI-MSI-edge snd_hda_intel +NMI: 47 5031 6211 4968 Non-maskable interrupts +LOC: 174326351 135776678 168393257 130980079 Local timer interrupts +SPU: 0 0 0 0 Spurious interrupts +PMI: 47 5031 6211 4968 Performance monitoring interrupts +IWI: 1509379 2411776 1512975 2428828 IRQ work interrupts +RTR: 0 0 0 0 APIC ICR read retries +RES: 10847134 9111507 15999335 7457260 Rescheduling interrupts +CAL: 148554 157441 142912 155528 Function call interrupts +TLB: 10460334 9918429 10494258 10345022 TLB shootdowns +TRM: 0 0 0 0 Thermal event interrupts +THR: 0 0 0 0 Threshold APIC interrupts +MCE: 0 0 0 0 Machine check exceptions +MCP: 2406 2399 2399 2399 Machine check polls +ERR: 0 +MIS: 0 diff --git a/collector/fixtures/loadavg b/collector/fixtures/loadavg new file mode 100644 index 00000000..8897b7ce --- /dev/null +++ b/collector/fixtures/loadavg @@ -0,0 +1 @@ +0.21 0.37 0.39 1/719 19737 diff --git a/collector/fixtures/meminfo b/collector/fixtures/meminfo new file mode 100644 index 00000000..9e97d448 --- /dev/null +++ b/collector/fixtures/meminfo @@ -0,0 +1,42 @@ +MemTotal: 3742148 kB +MemFree: 225472 kB +Buffers: 22040 kB +Cached: 930888 kB +SwapCached: 192504 kB +Active: 2233416 kB +Inactive: 1028728 kB +Active(anon): 2020004 kB +Inactive(anon): 883052 kB +Active(file): 213412 kB +Inactive(file): 145676 kB +Unevictable: 32 kB +Mlocked: 32 kB +SwapTotal: 4194300 kB +SwapFree: 3155360 kB +Dirty: 1052 kB +Writeback: 0 kB +AnonPages: 2244172 kB +Mapped: 239220 kB +Shmem: 593840 kB +Slab: 98932 kB +SReclaimable: 44772 kB +SUnreclaim: 54160 kB +KernelStack: 5800 kB +PageTables: 75212 kB +NFS_Unstable: 0 kB +Bounce: 0 kB +WritebackTmp: 0 kB +CommitLimit: 6065372 kB +Committed_AS: 7835436 kB +VmallocTotal: 34359738367 kB +VmallocUsed: 352840 kB +VmallocChunk: 34359338876 kB +HardwareCorrupted: 0 kB +AnonHugePages: 0 kB +HugePages_Total: 0 +HugePages_Free: 0 +HugePages_Rsvd: 0 +HugePages_Surp: 0 +Hugepagesize: 2048 kB +DirectMap4k: 185660 kB +DirectMap2M: 3698688 kB diff --git a/collector/fixtures/net-dev b/collector/fixtures/net-dev new file mode 100644 index 00000000..6209c16b --- /dev/null +++ b/collector/fixtures/net-dev @@ -0,0 +1,8 @@ +Inter-| Receive | Transmit + face |bytes packets errs drop fifo frame compressed multicast|bytes packets errs drop fifo colls carrier compressed + tun0: 1888 24 0 0 0 0 0 0 67120 934 0 0 0 0 0 0 +veth4B09XN: 648 8 0 0 0 0 0 0 1943284 10640 0 0 0 0 0 0 + lo: 435303245 1832522 0 0 0 0 0 0 435303245 1832522 0 0 0 0 0 0 +lxcbr0: 0 0 0 0 0 0 0 0 2630299 28339 0 0 0 0 0 0 + wlan0: 10437182923 13899359 0 0 0 0 0 0 2851649360 11726200 0 0 0 0 0 0 +docker0: 64910168 1065585 0 0 0 0 0 0 2681662018 1929779 0 0 0 0 0 0 diff --git a/exporter/ganglia/format.go b/collector/ganglia/format.go similarity index 100% rename from exporter/ganglia/format.go rename to collector/ganglia/format.go diff --git a/exporter/gmond_collector.go b/collector/gmond_collector.go similarity index 91% rename from exporter/gmond_collector.go rename to collector/gmond_collector.go index 90d73afb..d9c299dc 100644 --- a/exporter/gmond_collector.go +++ b/collector/gmond_collector.go @@ -1,17 +1,18 @@ // +build ganglia -package exporter +package collector import ( "bufio" "encoding/xml" "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/node_exporter/exporter/ganglia" "io" "net" "regexp" "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/node_exporter/collector/ganglia" ) const ( @@ -23,18 +24,18 @@ const ( type gmondCollector struct { name string Metrics map[string]prometheus.Gauge - config config + config Config registry prometheus.Registry } func init() { - collectorFactories = append(collectorFactories, NewGmondCollector) + Factories = append(Factories, NewGmondCollector) } var illegalCharsRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) // Takes a config struct and prometheus registry and returns a new Collector scraping ganglia. -func NewGmondCollector(config config, registry prometheus.Registry) (Collector, error) { +func NewGmondCollector(config Config, registry prometheus.Registry) (Collector, error) { c := gmondCollector{ name: "gmond_collector", config: config, diff --git a/exporter/helper.go b/collector/helper.go similarity index 85% rename from exporter/helper.go rename to collector/helper.go index 6baa554f..00012a2d 100644 --- a/exporter/helper.go +++ b/collector/helper.go @@ -1,12 +1,15 @@ -package exporter +package collector import ( + "flag" "fmt" "log" "strconv" "strings" ) +var verbose = flag.Bool("verbose", false, "Verbose output.") + func debug(name string, format string, a ...interface{}) { if *verbose { f := fmt.Sprintf("%s: %s", name, format) diff --git a/exporter/native_collector.go b/collector/native_collector.go similarity index 89% rename from exporter/native_collector.go rename to collector/native_collector.go index 281a0e09..bc917f7e 100644 --- a/exporter/native_collector.go +++ b/collector/native_collector.go @@ -1,11 +1,10 @@ // +build !nonative -package exporter +package collector import ( "bufio" "fmt" - "github.com/prometheus/client_golang/prometheus" "io" "io/ioutil" "os" @@ -13,6 +12,8 @@ import ( "strconv" "strings" "time" + + "github.com/prometheus/client_golang/prometheus" ) const ( @@ -42,16 +43,16 @@ type nativeCollector struct { netStats prometheus.Counter diskStats prometheus.Counter name string - config config + config Config } func init() { - collectorFactories = append(collectorFactories, NewNativeCollector) + Factories = append(Factories, NewNativeCollector) } // Takes a config struct and prometheus registry and returns a new Collector exposing // load, seconds since last login and a list of tags as specified by config. -func NewNativeCollector(config config, registry prometheus.Registry) (Collector, error) { +func NewNativeCollector(config Config, registry prometheus.Registry) (Collector, error) { c := nativeCollector{ name: "native_collector", config: config, @@ -160,7 +161,7 @@ func (c *nativeCollector) Update() (updates int, err error) { updates++ fv, err := strconv.ParseFloat(value, 64) if err != nil { - return updates, fmt.Errorf("Invalid value in interrupts: %s", fv, err) + return updates, fmt.Errorf("Invalid value %s in interrupts: %s", value, err) } labels := map[string]string{ "CPU": strconv.Itoa(cpuNo), @@ -217,7 +218,11 @@ func getLoad() (float64, error) { if err != nil { return 0, err } - parts := strings.Fields(string(data)) + return parseLoad(string(data)) +} + +func parseLoad(data string) (float64, error) { + parts := strings.Fields(data) load, err := strconv.ParseFloat(parts[0], 64) if err != nil { return 0, fmt.Errorf("Could not parse load '%s': %s", parts[0], err) @@ -276,13 +281,17 @@ func getSecondsSinceLastLogin() (float64, error) { } func getMemInfo() (map[string]string, error) { - memInfo := map[string]string{} - fh, err := os.Open(procMemInfo) + file, err := os.Open(procMemInfo) if err != nil { return nil, err } - defer fh.Close() - scanner := bufio.NewScanner(fh) + return parseMemInfo(file) +} + +func parseMemInfo(r io.ReadCloser) (map[string]string, error) { + defer r.Close() + memInfo := map[string]string{} + scanner := bufio.NewScanner(r) for scanner.Scan() { line := scanner.Text() parts := strings.Fields(string(line)) @@ -308,13 +317,17 @@ type interrupt struct { } func getInterrupts() (map[string]interrupt, error) { - interrupts := map[string]interrupt{} - fh, err := os.Open(procInterrupts) + file, err := os.Open(procInterrupts) if err != nil { return nil, err } - defer fh.Close() - scanner := bufio.NewScanner(fh) + return parseInterrupts(file) +} + +func parseInterrupts(r io.ReadCloser) (map[string]interrupt, error) { + defer r.Close() + interrupts := map[string]interrupt{} + scanner := bufio.NewScanner(r) if !scanner.Scan() { return nil, fmt.Errorf("%s empty", procInterrupts) } @@ -343,15 +356,20 @@ func getInterrupts() (map[string]interrupt, error) { } func getNetStats() (map[string]map[string]map[string]string, error) { - netStats := map[string]map[string]map[string]string{} - netStats["transmit"] = map[string]map[string]string{} - netStats["receive"] = map[string]map[string]string{} - fh, err := os.Open(procNetDev) + file, err := os.Open(procNetDev) if err != nil { return nil, err } - defer fh.Close() - scanner := bufio.NewScanner(fh) + return parseNetStats(file) +} + +func parseNetStats(r io.ReadCloser) (map[string]map[string]map[string]string, error) { + defer r.Close() + netStats := map[string]map[string]map[string]string{} + netStats["transmit"] = map[string]map[string]string{} + netStats["receive"] = map[string]map[string]string{} + + scanner := bufio.NewScanner(r) scanner.Scan() // skip first header scanner.Scan() parts := strings.Split(string(scanner.Text()), "|") @@ -392,13 +410,17 @@ func parseNetDevLine(parts []string, header []string) (map[string]string, error) } func getDiskStats() (map[string]map[string]string, error) { - diskStats := map[string]map[string]string{} - fh, err := os.Open(procDiskStats) + file, err := os.Open(procDiskStats) if err != nil { return nil, err } - defer fh.Close() - scanner := bufio.NewScanner(fh) + return parseDiskStats(file) +} + +func parseDiskStats(r io.ReadCloser) (map[string]map[string]string, error) { + defer r.Close() + diskStats := map[string]map[string]string{} + scanner := bufio.NewScanner(r) for scanner.Scan() { parts := strings.Fields(string(scanner.Text())) if len(parts) != len(diskStatsHeader)+3 { // we strip major, minor and dev diff --git a/collector/native_collector_test.go b/collector/native_collector_test.go new file mode 100644 index 00000000..22f452ea --- /dev/null +++ b/collector/native_collector_test.go @@ -0,0 +1,118 @@ +package collector + +import ( + "io/ioutil" + "os" + "testing" +) + +const ( + loadExpected = 0.21 + + memTotalExpected = "3742148" + memDirectMap2MExpected = "3698688" + + interruptsNmi1Expected = "5031" + + netReceiveWlan0Bytes = "10437182923" + netTransmitTun0Packages = "934" + + diskSda4ReadsCompleted = "25353629" + diskMmcIoTimeWeighted = "68" + + testProcLoad = "fixtures/loadavg" + testProcMemInfo = "fixtures/meminfo" + testProcInterrupts = "fixtures/interrupts" + testProcNetDev = "fixtures/net-dev" + testProcDiskStats = "fixtures/diskstats" +) + +func TestLoad(t *testing.T) { + data, err := ioutil.ReadFile(testProcLoad) + if err != nil { + t.Fatal(err) + } + load, err := parseLoad(string(data)) + if err != nil { + t.Fatal(err) + } + if load != loadExpected { + t.Fatalf("Unexpected load: %f != %f", load, loadExpected) + } +} + +func TestMemInfo(t *testing.T) { + file, err := os.Open(testProcMemInfo) + if err != nil { + t.Fatal(err) + } + + memInfo, err := parseMemInfo(file) + if err != nil { + t.Fatal(err) + } + if memInfo["MemTotal_kB"] != memTotalExpected { + t.Fatalf("Unexpected memory: %s != %s", memInfo["MemTotal_kB"], memTotalExpected) + } + if memInfo["DirectMap2M_kB"] != memDirectMap2MExpected { + t.Fatalf("Unexpected memory: %s != %s", memInfo["MemTotal_kB"], memTotalExpected) + } + +} + +func TestInterrupts(t *testing.T) { + file, err := os.Open(testProcInterrupts) + if err != nil { + t.Fatal(err) + } + + interrupts, err := parseInterrupts(file) + if err != nil { + t.Fatal(err) + } + if interrupts["NMI"].values[1] != interruptsNmi1Expected { + t.Fatalf("Unexpected interrupts: %s != %s", interrupts["NMI"].values[1], + interruptsNmi1Expected) + } + +} + +func TestNetStats(t *testing.T) { + file, err := os.Open(testProcNetDev) + if err != nil { + t.Fatal(err) + } + netStats, err := parseNetStats(file) + if err != nil { + t.Fatal(err) + } + if netStats["receive"]["wlan0"]["bytes"] != netReceiveWlan0Bytes { + t.Fatalf("Unexpected netstats: %s != %s", netStats["receive"]["wlan0"]["bytes"], + netReceiveWlan0Bytes) + } + if netStats["transmit"]["tun0"]["packets"] != netTransmitTun0Packages { + t.Fatalf("Unexpected netstats: %s != %s", netStats["transmit"]["tun0"]["packets"], + netTransmitTun0Packages) + } +} + +func TestDiskStats(t *testing.T) { + file, err := os.Open(testProcDiskStats) + if err != nil { + t.Fatal(err) + } + diskStats, err := parseDiskStats(file) + if err != nil { + t.Fatal(err) + } + + if diskStats["sda4"]["reads_completed"] != diskSda4ReadsCompleted { + t.Fatalf("Unexpected diskstats: %s != %s", diskStats["sda4"]["reads_completed"], + diskSda4ReadsCompleted) + } + + if diskStats["mmcblk0p2"]["io_time_weighted"] != diskMmcIoTimeWeighted { + t.Fatalf("Unexpected diskstats: %s != %s", + diskStats["mmcblk0p2"]["io_time_weighted"], diskMmcIoTimeWeighted) + } +} diff --git a/exporter/runit_collector.go b/collector/runit_collector.go similarity index 91% rename from exporter/runit_collector.go rename to collector/runit_collector.go index 22f0d707..24b89731 100644 --- a/exporter/runit_collector.go +++ b/collector/runit_collector.go @@ -1,6 +1,6 @@ // +build runit -package exporter +package collector import ( "github.com/prometheus/client_golang/prometheus" @@ -9,17 +9,17 @@ import ( type runitCollector struct { name string - config config + config Config state prometheus.Gauge stateDesired prometheus.Gauge stateNormal prometheus.Gauge } func init() { - collectorFactories = append(collectorFactories, NewRunitCollector) + Factories = append(Factories, NewRunitCollector) } -func NewRunitCollector(config config, registry prometheus.Registry) (Collector, error) { +func NewRunitCollector(config Config, registry prometheus.Registry) (Collector, error) { c := runitCollector{ name: "runit_collector", config: config, diff --git a/exporter/exporter.go b/exporter/exporter.go deleted file mode 100644 index 2e0c4756..00000000 --- a/exporter/exporter.go +++ /dev/null @@ -1,167 +0,0 @@ -// Exporter is a prometheus exporter using multiple collectorFactories to collect and export system metrics. -package exporter - -import ( - "encoding/json" - "flag" - "fmt" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/exp" - "io/ioutil" - "log" - "net/http" - "os" - "os/signal" - "runtime/pprof" - "sync" - "syscall" - "time" -) - -var verbose = flag.Bool("verbose", false, "Verbose output.") -var collectorFactories []func(config, prometheus.Registry) (Collector, error) - -// Interface a collector has to implement. -type Collector interface { - // Get new metrics and expose them via prometheus registry. - Update() (n int, err error) - - // Returns the name of the collector - Name() string -} - -type config struct { - Attributes map[string]string `json:"attributes"` - ListeningAddress string `json:"listeningAddress"` - ScrapeInterval int `json:"scrapeInterval"` -} - -func (e *exporter) loadConfig() (err error) { - log.Printf("Reading config %s", e.configFile) - bytes, err := ioutil.ReadFile(e.configFile) - if err != nil { - return - } - - return json.Unmarshal(bytes, &e.config) // Make sure this is safe -} - -type exporter struct { - configFile string - listeningAddress string - scrapeInterval time.Duration - scrapeDurations prometheus.Histogram - metricsUpdated prometheus.Gauge - config config - registry prometheus.Registry - Collectors []Collector - MemProfile string -} - -// New takes the path to a config file and returns an exporter instance -func New(configFile string) (e exporter, err error) { - registry := prometheus.NewRegistry() - e = exporter{ - configFile: configFile, - scrapeDurations: prometheus.NewDefaultHistogram(), - metricsUpdated: prometheus.NewGauge(), - listeningAddress: ":8080", - scrapeInterval: 60 * time.Second, - registry: registry, - } - - err = e.loadConfig() - if err != nil { - return e, fmt.Errorf("Couldn't read config: %s", err) - } - for _, fn := range collectorFactories { - c, err := fn(e.config, e.registry) - if err != nil { - return e, err - } - e.Collectors = append(e.Collectors, c) - } - - if e.config.ListeningAddress != "" { - e.listeningAddress = e.config.ListeningAddress - } - if e.config.ScrapeInterval != 0 { - e.scrapeInterval = time.Duration(e.config.ScrapeInterval) * time.Second - } - - registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, e.scrapeDurations) - registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, e.metricsUpdated) - - return e, nil -} - -func (e *exporter) serveStatus() { - exp.Handle(prometheus.ExpositionResource, e.registry.Handler()) - http.ListenAndServe(e.listeningAddress, exp.DefaultCoarseMux) -} - -func (e *exporter) Execute(c Collector) { - begin := time.Now() - updates, err := c.Update() - duration := time.Since(begin) - - label := map[string]string{ - "collector": c.Name(), - } - if err != nil { - log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err) - label["result"] = "error" - } else { - log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds()) - label["result"] = "success" - } - e.scrapeDurations.Add(label, duration.Seconds()) - e.metricsUpdated.Set(label, float64(updates)) -} - -func (e *exporter) Loop() { - sigHup := make(chan os.Signal) - sigUsr1 := make(chan os.Signal) - signal.Notify(sigHup, syscall.SIGHUP) - signal.Notify(sigUsr1, syscall.SIGUSR1) - - go e.serveStatus() - - tick := time.Tick(e.scrapeInterval) - for { - select { - case <-sigHup: - err := e.loadConfig() - if err != nil { - log.Printf("Couldn't reload config: %s", err) - continue - } - log.Printf("Got new config") - tick = time.Tick(e.scrapeInterval) - - case <-tick: - log.Printf("Starting new scrape interval") - wg := sync.WaitGroup{} - wg.Add(len(e.Collectors)) - for _, c := range e.Collectors { - go func(c Collector) { - e.Execute(c) - wg.Done() - }(c) - } - wg.Wait() - - case <-sigUsr1: - log.Printf("got signal") - if e.MemProfile != "" { - log.Printf("Writing memory profile to %s", e.MemProfile) - f, err := os.Create(e.MemProfile) - if err != nil { - log.Fatal(err) - } - pprof.WriteHeapProfile(f) - f.Close() - } - } - } -} diff --git a/node_exporter.conf b/node_exporter.conf index 261d76b4..6800bac1 100644 --- a/node_exporter.conf +++ b/node_exporter.conf @@ -1,5 +1,4 @@ { - "scrapeInterval": 10, "attributes" : { "web-server" : "1", "zone" : "a", diff --git a/node_exporter.go b/node_exporter.go index 3d14e703..ee2d5017 100644 --- a/node_exporter.go +++ b/node_exporter.go @@ -1,27 +1,140 @@ package main import ( + "encoding/json" "flag" + "io/ioutil" "log" + "net/http" + "os" + "os/signal" + "runtime/pprof" + "sync" + "syscall" + "time" - "github.com/prometheus/node_exporter/exporter" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/exp" + "github.com/prometheus/node_exporter/collector" ) var ( - configFile = flag.String("config", "node_exporter.conf", "config file.") - memprofile = flag.String("memprofile", "", "write memory profile to this file") + configFile = flag.String("config", "node_exporter.conf", "config file.") + memProfile = flag.String("memprofile", "", "write memory profile to this file") + listeningAddress = flag.String("listen", ":8080", "address to listen on") + interval = flag.Duration("interval", 60*time.Second, "refresh interval") + scrapeDurations = prometheus.NewDefaultHistogram() + metricsUpdated = prometheus.NewGauge() ) func main() { flag.Parse() - - exporter, err := exporter.New(*configFile) + registry := prometheus.NewRegistry() + collectors, err := loadCollectors(*configFile, registry) if err != nil { - log.Fatalf("Couldn't instantiate exporter: %s", err) + log.Fatalf("Couldn't load config and collectors: %s", err) } + + registry.Register("node_exporter_scrape_duration_seconds", "node_exporter: Duration of a scrape job.", prometheus.NilLabels, scrapeDurations) + registry.Register("node_exporter_metrics_updated", "node_exporter: Number of metrics updated.", prometheus.NilLabels, metricsUpdated) + log.Printf("Registered collectors:") - for _, c := range exporter.Collectors { + for _, c := range collectors { log.Print(" - ", c.Name()) } - exporter.Loop() + + sigHup := make(chan os.Signal) + sigUsr1 := make(chan os.Signal) + signal.Notify(sigHup, syscall.SIGHUP) + signal.Notify(sigUsr1, syscall.SIGUSR1) + + go serveStatus(registry) + + tick := time.Tick(*interval) + for { + select { + case <-sigHup: + collectors, err = loadCollectors(*configFile, registry) + if err != nil { + log.Fatalf("Couldn't load config and collectors: %s", err) + } + log.Printf("Reloaded collectors and config") + tick = time.Tick(*interval) + + case <-tick: + log.Printf("Starting new interval") + wg := sync.WaitGroup{} + wg.Add(len(collectors)) + for _, c := range collectors { + go func(c collector.Collector) { + Execute(c) + wg.Done() + }(c) + } + wg.Wait() + + case <-sigUsr1: + log.Printf("got signal") + if *memProfile != "" { + log.Printf("Writing memory profile to %s", *memProfile) + f, err := os.Create(*memProfile) + if err != nil { + log.Fatal(err) + } + pprof.WriteHeapProfile(f) + f.Close() + } + } + } + +} + +func loadCollectors(file string, registry prometheus.Registry) ([]collector.Collector, error) { + collectors := []collector.Collector{} + config, err := getConfig(file) + if err != nil { + log.Fatalf("Couldn't read config %s: %s", file, err) + } + for _, fn := range collector.Factories { + c, err := fn(*config, registry) + if err != nil { + return nil, err + } + collectors = append(collectors, c) + } + return collectors, nil +} + +func getConfig(file string) (*collector.Config, error) { + config := &collector.Config{} + log.Printf("Reading config %s", *configFile) + bytes, err := ioutil.ReadFile(*configFile) + if err != nil { + return nil, err + } + return config, json.Unmarshal(bytes, &config) +} + +func serveStatus(registry prometheus.Registry) { + exp.Handle(prometheus.ExpositionResource, registry.Handler()) + http.ListenAndServe(*listeningAddress, exp.DefaultCoarseMux) +} + +func Execute(c collector.Collector) { + begin := time.Now() + updates, err := c.Update() + duration := time.Since(begin) + + label := map[string]string{ + "collector": c.Name(), + } + if err != nil { + log.Printf("ERROR: %s failed after %fs: %s", c.Name(), duration.Seconds(), err) + label["result"] = "error" + } else { + log.Printf("OK: %s success after %fs.", c.Name(), duration.Seconds()) + label["result"] = "success" + } + scrapeDurations.Add(label, duration.Seconds()) + metricsUpdated.Set(label, float64(updates)) }