2013-05-07 16:40:10 +02:00
package main
import (
2014-02-18 12:35:11 +01:00
"encoding/json"
2013-05-07 16:40:10 +02:00
"flag"
2014-06-04 15:09:33 +02:00
"fmt"
2014-02-18 12:35:11 +01:00
"io/ioutil"
2013-05-07 16:40:10 +02:00
"log"
2014-02-18 12:35:11 +01:00
"net/http"
"os"
"os/signal"
"runtime/pprof"
2014-06-04 13:12:34 +02:00
"strings"
2014-02-18 12:35:11 +01:00
"sync"
"syscall"
"time"
2014-02-07 17:09:39 +01:00
2014-06-04 13:12:34 +02:00
"github.com/golang/glog"
2014-02-18 12:35:11 +01:00
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/node_exporter/collector"
2013-05-07 16:40:10 +02:00
)
2014-06-26 19:20:36 +02:00
const subsystem = "exporter"
2013-05-07 16:40:10 +02:00
var (
2014-06-04 13:12:34 +02:00
configFile = flag . String ( "config" , "node_exporter.conf" , "config file." )
memProfile = flag . String ( "memprofile" , "" , "write memory profile to this file" )
listeningAddress = flag . String ( "listen" , ":8080" , "address to listen on" )
2014-07-28 12:37:01 +02:00
enabledCollectors = flag . String ( "enabledCollectors" , "attributes,diskstats,filesystem,loadavg,meminfo,stat,time,netdev" , "comma-seperated list of collectors to use" )
2014-06-04 15:09:33 +02:00
printCollectors = flag . Bool ( "printCollectors" , false , "If true, print available collectors and exit" )
2014-06-04 13:12:34 +02:00
interval = flag . Duration ( "interval" , 60 * time . Second , "refresh interval" )
2014-06-26 19:20:36 +02:00
collectorLabelNames = [ ] string { "collector" , "result" }
scrapeDurations = prometheus . NewSummaryVec (
prometheus . SummaryOpts {
Namespace : collector . Namespace ,
Subsystem : subsystem ,
Name : "scrape_duration_seconds" ,
Help : "node_exporter: Duration of a scrape job." ,
} ,
collectorLabelNames ,
)
metricsUpdated = prometheus . NewGaugeVec (
prometheus . GaugeOpts {
Namespace : collector . Namespace ,
Subsystem : subsystem ,
Name : "metrics_updated" ,
Help : "node_exporter: Number of metrics updated." ,
} ,
collectorLabelNames ,
)
2013-05-07 16:40:10 +02:00
)
func main ( ) {
flag . Parse ( )
2014-06-04 15:09:33 +02:00
if * printCollectors {
fmt . Printf ( "Available collectors:\n" )
for n , _ := range collector . Factories {
fmt . Printf ( " - %s\n" , n )
}
return
}
2014-06-26 19:20:36 +02:00
collectors , err := loadCollectors ( * configFile )
2013-05-07 16:40:10 +02:00
if err != nil {
2014-02-18 12:35:11 +01:00
log . Fatalf ( "Couldn't load config and collectors: %s" , err )
2013-05-07 16:40:10 +02:00
}
2014-02-18 12:35:11 +01:00
2014-06-26 19:20:36 +02:00
prometheus . MustRegister ( scrapeDurations )
prometheus . MustRegister ( metricsUpdated )
2014-02-18 12:35:11 +01:00
2014-06-04 13:12:34 +02:00
glog . Infof ( "Enabled collectors:" )
for n , _ := range collectors {
glog . Infof ( " - %s" , n )
2014-02-07 17:09:39 +01:00
}
2014-02-18 12:35:11 +01:00
sigHup := make ( chan os . Signal )
sigUsr1 := make ( chan os . Signal )
signal . Notify ( sigHup , syscall . SIGHUP )
signal . Notify ( sigUsr1 , syscall . SIGUSR1 )
2014-06-26 19:20:36 +02:00
go serveStatus ( )
2014-02-18 12:35:11 +01:00
2014-06-04 13:12:34 +02:00
glog . Infof ( "Starting initial collection" )
2014-05-23 14:07:34 +02:00
collect ( collectors )
2014-02-18 12:35:11 +01:00
tick := time . Tick ( * interval )
for {
select {
case <- sigHup :
2014-06-26 19:20:36 +02:00
collectors , err = loadCollectors ( * configFile )
2014-02-18 12:35:11 +01:00
if err != nil {
log . Fatalf ( "Couldn't load config and collectors: %s" , err )
}
2014-06-04 13:12:34 +02:00
glog . Infof ( "Reloaded collectors and config" )
2014-02-18 12:35:11 +01:00
tick = time . Tick ( * interval )
case <- tick :
2014-06-04 13:12:34 +02:00
glog . Infof ( "Starting new interval" )
2014-05-23 14:07:34 +02:00
collect ( collectors )
2014-02-18 12:35:11 +01:00
case <- sigUsr1 :
2014-06-04 13:12:34 +02:00
glog . Infof ( "got signal" )
2014-02-18 12:35:11 +01:00
if * memProfile != "" {
2014-06-04 13:12:34 +02:00
glog . Infof ( "Writing memory profile to %s" , * memProfile )
2014-02-18 12:35:11 +01:00
f , err := os . Create ( * memProfile )
if err != nil {
log . Fatal ( err )
}
pprof . WriteHeapProfile ( f )
f . Close ( )
}
}
}
}
2014-06-26 19:20:36 +02:00
func loadCollectors ( file string ) ( map [ string ] collector . Collector , error ) {
2014-06-04 13:12:34 +02:00
collectors := map [ string ] collector . Collector { }
2014-02-18 12:35:11 +01:00
config , err := getConfig ( file )
if err != nil {
log . Fatalf ( "Couldn't read config %s: %s" , file , err )
}
2014-06-04 13:12:34 +02:00
for _ , name := range strings . Split ( * enabledCollectors , "," ) {
fn , ok := collector . Factories [ name ]
if ! ok {
log . Fatalf ( "Collector '%s' not available" , name )
}
2014-06-26 19:20:36 +02:00
c , err := fn ( * config )
2014-02-18 12:35:11 +01:00
if err != nil {
return nil , err
}
2014-06-04 13:12:34 +02:00
collectors [ name ] = c
2014-02-18 12:35:11 +01:00
}
return collectors , nil
}
func getConfig ( file string ) ( * collector . Config , error ) {
config := & collector . Config { }
2014-06-04 13:12:34 +02:00
glog . Infof ( "Reading config %s" , * configFile )
2014-02-18 12:35:11 +01:00
bytes , err := ioutil . ReadFile ( * configFile )
if err != nil {
return nil , err
}
return config , json . Unmarshal ( bytes , & config )
}
2014-06-26 19:20:36 +02:00
func serveStatus ( ) {
http . Handle ( "/metrics" , prometheus . Handler ( ) )
http . ListenAndServe ( * listeningAddress , nil )
2014-02-18 12:35:11 +01:00
}
2014-06-04 13:12:34 +02:00
func collect ( collectors map [ string ] collector . Collector ) {
2014-05-23 14:07:34 +02:00
wg := sync . WaitGroup { }
wg . Add ( len ( collectors ) )
2014-06-04 13:12:34 +02:00
for n , c := range collectors {
go func ( n string , c collector . Collector ) {
Execute ( n , c )
2014-05-23 14:07:34 +02:00
wg . Done ( )
2014-06-04 13:12:34 +02:00
} ( n , c )
2014-05-23 14:07:34 +02:00
}
wg . Wait ( )
}
2014-06-04 13:12:34 +02:00
func Execute ( name string , c collector . Collector ) {
2014-02-18 12:35:11 +01:00
begin := time . Now ( )
updates , err := c . Update ( )
duration := time . Since ( begin )
2014-06-26 19:20:36 +02:00
var result string
2014-02-18 12:35:11 +01:00
if err != nil {
2014-06-04 13:12:34 +02:00
glog . Infof ( "ERROR: %s failed after %fs: %s" , name , duration . Seconds ( ) , err )
2014-06-26 19:20:36 +02:00
result = "error"
2014-02-18 12:35:11 +01:00
} else {
2014-06-04 13:12:34 +02:00
glog . Infof ( "OK: %s success after %fs." , name , duration . Seconds ( ) )
2014-06-26 19:20:36 +02:00
result = "success"
2014-02-18 12:35:11 +01:00
}
2014-06-26 19:20:36 +02:00
scrapeDurations . WithLabelValues ( name , result ) . Observe ( duration . Seconds ( ) )
metricsUpdated . WithLabelValues ( name , result ) . Set ( float64 ( updates ) )
2013-05-07 16:40:10 +02:00
}