2015-09-26 17:36:40 +02:00
// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2014-07-28 12:36:28 +02:00
// +build !nontp
package collector
import (
"fmt"
2017-09-19 10:36:14 +02:00
"net"
"time"
2014-07-28 12:36:28 +02:00
"github.com/beevik/ntp"
"github.com/prometheus/client_golang/prometheus"
2017-08-12 15:07:24 +02:00
"gopkg.in/alecthomas/kingpin.v2"
2014-07-28 12:36:28 +02:00
)
2017-09-19 10:36:14 +02:00
const (
2017-09-28 15:06:26 +02:00
hour24 = 24 * time . Hour // `time` does not export `Day` as Day != 24h because of DST
2017-09-19 10:36:14 +02:00
ntpSubsystem = "ntp"
)
2014-07-28 12:36:28 +02:00
var (
2017-09-19 10:36:14 +02:00
ntpServer = kingpin . Flag ( "collector.ntp.server" , "NTP server to use for ntp collector" ) . Default ( "127.0.0.1" ) . String ( )
2017-08-12 15:07:24 +02:00
ntpProtocolVersion = kingpin . Flag ( "collector.ntp.protocol-version" , "NTP protocol version" ) . Default ( "4" ) . Int ( )
2017-09-19 10:36:14 +02:00
ntpServerIsLocal = kingpin . Flag ( "collector.ntp.server-is-local" , "Certify that collector.ntp.server address is the same local host as this collector." ) . Default ( "false" ) . Bool ( )
ntpIPTTL = kingpin . Flag ( "collector.ntp.ip-ttl" , "IP TTL to use while sending NTP query" ) . Default ( "1" ) . Int ( )
// 3.46608s ~ 1.5s + PHI * (1 << maxPoll), where 1.5s is MAXDIST from ntp.org, it is 1.0 in RFC5905
// max-distance option is used as-is without phi*(1<<poll)
ntpMaxDistance = kingpin . Flag ( "collector.ntp.max-distance" , "Max accumulated distance to the root" ) . Default ( "3.46608s" ) . Duration ( )
ntpOffsetTolerance = kingpin . Flag ( "collector.ntp.local-offset-tolerance" , "Offset between local clock and local ntpd time to tolerate" ) . Default ( "1ms" ) . Duration ( )
leapMidnight time . Time
2014-07-28 12:36:28 +02:00
)
type ntpCollector struct {
2017-09-19 10:36:14 +02:00
stratum , leap , rtt , offset , reftime , rootDelay , rootDispersion , sanity typedDesc
2014-07-28 12:36:28 +02:00
}
func init ( ) {
2017-09-28 15:06:26 +02:00
registerCollector ( "ntp" , defaultDisabled , NewNtpCollector )
2014-07-28 12:36:28 +02:00
}
2017-09-19 10:36:14 +02:00
// NewNtpCollector returns a new Collector exposing sanity of local NTP server.
// Default definition of "local" is:
// - collector.ntp.server address is a loopback address (or collector.ntp.server-is-mine flag is turned on)
// - the server is reachable with outgoin IP_TTL = 1
2015-05-20 20:04:49 +02:00
func NewNtpCollector ( ) ( Collector , error ) {
2017-09-19 10:36:14 +02:00
ipaddr := net . ParseIP ( * ntpServer )
if ! * ntpServerIsLocal && ( ipaddr == nil || ! ipaddr . IsLoopback ( ) ) {
2017-09-28 15:06:26 +02:00
return nil , fmt . Errorf ( "only IP address of local NTP server is valid for --collector.ntp.server" )
2014-07-28 12:36:28 +02:00
}
2017-09-19 10:36:14 +02:00
2015-11-10 10:07:30 +01:00
if * ntpProtocolVersion < 2 || * ntpProtocolVersion > 4 {
2015-11-13 16:09:11 +01:00
return nil , fmt . Errorf ( "invalid NTP protocol version %d; must be 2, 3, or 4" , * ntpProtocolVersion )
2015-11-10 10:07:30 +01:00
}
2014-07-28 12:36:28 +02:00
2017-09-19 10:36:14 +02:00
if * ntpOffsetTolerance < 0 {
return nil , fmt . Errorf ( "Offset tolerance must be non-negative" )
}
2014-11-25 03:00:17 +01:00
return & ntpCollector {
2017-09-19 10:36:14 +02:00
stratum : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "stratum" ) ,
2017-09-19 10:36:14 +02:00
"NTPD stratum." ,
2016-12-28 15:21:31 +01:00
nil , nil ,
) , prometheus . GaugeValue } ,
2017-09-19 10:36:14 +02:00
leap : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "leap" ) ,
2017-09-19 10:36:14 +02:00
"NTPD leap second indicator, 2 bits." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rtt : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "rtt_seconds" ) ,
2017-09-19 10:36:14 +02:00
"RTT to NTPD." ,
nil , nil ,
) , prometheus . GaugeValue } ,
offset : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "offset_seconds" ) ,
2017-09-19 10:36:14 +02:00
"ClockOffset between NTP and local clock." ,
nil , nil ,
) , prometheus . GaugeValue } ,
reftime : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "reference_timestamp_seconds" ) ,
2017-09-19 10:36:14 +02:00
"NTPD ReferenceTime, UNIX timestamp." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rootDelay : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "root_delay_seconds" ) ,
2017-09-19 10:36:14 +02:00
"NTPD RootDelay." ,
nil , nil ,
) , prometheus . GaugeValue } ,
rootDispersion : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "root_dispersion_seconds" ) ,
2017-09-19 10:36:14 +02:00
"NTPD RootDispersion." ,
nil , nil ,
) , prometheus . GaugeValue } ,
sanity : typedDesc { prometheus . NewDesc (
2017-09-28 15:06:26 +02:00
prometheus . BuildFQName ( namespace , ntpSubsystem , "sanity" ) ,
2017-09-19 10:36:14 +02:00
"NTPD sanity according to RFC5905 heuristics and configured limits." ,
2016-12-28 15:21:31 +01:00
nil , nil ,
) , prometheus . GaugeValue } ,
2014-11-25 03:00:17 +01:00
} , nil
2014-07-28 12:36:28 +02:00
}
2017-02-28 19:47:20 +01:00
func ( c * ntpCollector ) Update ( ch chan <- prometheus . Metric ) error {
2017-09-19 10:36:14 +02:00
resp , err := ntp . QueryWithOptions ( * ntpServer , ntp . QueryOptions {
Version : * ntpProtocolVersion ,
TTL : * ntpIPTTL ,
Timeout : time . Second , // default `ntpdate` timeout
} )
2014-07-28 12:36:28 +02:00
if err != nil {
2017-09-19 10:36:14 +02:00
return fmt . Errorf ( "couldn't get SNTP reply: %s" , err )
}
ch <- c . stratum . mustNewConstMetric ( float64 ( resp . Stratum ) )
ch <- c . leap . mustNewConstMetric ( float64 ( resp . Leap ) )
ch <- c . rtt . mustNewConstMetric ( resp . RTT . Seconds ( ) )
ch <- c . offset . mustNewConstMetric ( resp . ClockOffset . Seconds ( ) )
if resp . ReferenceTime . Unix ( ) > 0 {
// Go Zero is 0001-01-01 00:00:00 UTC
// NTP Zero is 1900-01-01 00:00:00 UTC
// UNIX Zero is 1970-01-01 00:00:00 UTC
// so let's keep ALL ancient `reftime` values as zero
ch <- c . reftime . mustNewConstMetric ( float64 ( resp . ReferenceTime . UnixNano ( ) ) / 1e9 )
} else {
ch <- c . reftime . mustNewConstMetric ( 0 )
}
ch <- c . rootDelay . mustNewConstMetric ( resp . RootDelay . Seconds ( ) )
ch <- c . rootDispersion . mustNewConstMetric ( resp . RootDispersion . Seconds ( ) )
// Here is SNTP packet sanity check that is exposed to move burden of
// configuration from node_exporter user to the developer.
maxerr := * ntpOffsetTolerance
if resp . Leap == ntp . LeapAddSecond || resp . Leap == ntp . LeapDelSecond {
// state of leapMidnight is cached as leap flag is dropped right after midnight
leapMidnight = resp . Time . Truncate ( hour24 ) . Add ( hour24 )
}
if leapMidnight . Add ( - hour24 ) . Before ( resp . Time ) && resp . Time . Before ( leapMidnight . Add ( hour24 ) ) {
// tolerate leap smearing
maxerr += time . Second
}
2017-10-04 08:33:49 +02:00
if resp . Validate ( ) == nil && resp . RootDistance <= * ntpMaxDistance && resp . MinError <= maxerr {
2017-09-19 10:36:14 +02:00
ch <- c . sanity . mustNewConstMetric ( 1 )
} else {
ch <- c . sanity . mustNewConstMetric ( 0 )
2014-07-28 12:36:28 +02:00
}
2016-06-03 12:25:30 +02:00
return nil
2014-07-28 12:36:28 +02:00
}