mirror of
https://github.com/prometheus/node_exporter.git
synced 2024-11-23 12:30:46 +01:00
Add gauges for allocated memory for queued UDP and TCP packages (#1503)
* Two new states will be added to the tcpstat collector called rx_queued_bytes and tx_queued_bytes. For UDP datagrams an additional collector 'udp_queues' can be used to expose the total lengths of the tx_queue and rx_queue. @SuperQ and @discordianfish this changes gives us the option to check for overloaded UDP + TCP processing. The names of the new TCP states and the UDP metric can be discussed. The current reasons are just: I don't want to add another collector for the same exposed file, so I just added the new states to the tcpstat collector. I chose the name 'udp_queue' instead of 'udpstat' as UDP has no state. Signed-off-by: Peter Bueschel <peter.bueschel@logmein.com>
This commit is contained in:
parent
4891b01b6c
commit
da5972b539
@ -41,6 +41,7 @@
|
|||||||
* [FEATURE] Add Btrfs collector #1512
|
* [FEATURE] Add Btrfs collector #1512
|
||||||
* [FEATURE] Add RAPL collector #1523
|
* [FEATURE] Add RAPL collector #1523
|
||||||
* [FEATURE] Add new softnet collector #1576
|
* [FEATURE] Add new softnet collector #1576
|
||||||
|
* [FEATURE] Add new udp_queues collector #1503
|
||||||
* [ENHANCEMENT] Log pid when there is a problem reading the process stats #1341
|
* [ENHANCEMENT] Log pid when there is a problem reading the process stats #1341
|
||||||
* [ENHANCEMENT] Collect InfiniBand port state and physical state #1357
|
* [ENHANCEMENT] Collect InfiniBand port state and physical state #1357
|
||||||
* [ENHANCEMENT] Include additional XFS runtime statistics. #1423
|
* [ENHANCEMENT] Include additional XFS runtime statistics. #1423
|
||||||
|
@ -60,6 +60,7 @@ textfile | Exposes statistics read from local disk. The `--collector.textfile.di
|
|||||||
thermal\_zone | Exposes thermal zone & cooling device statistics from `/sys/class/thermal`. | Linux
|
thermal\_zone | Exposes thermal zone & cooling device statistics from `/sys/class/thermal`. | Linux
|
||||||
time | Exposes the current system time. | _any_
|
time | Exposes the current system time. | _any_
|
||||||
timex | Exposes selected adjtimex(2) system call stats. | Linux
|
timex | Exposes selected adjtimex(2) system call stats. | Linux
|
||||||
|
udp_queues | Exposes UDP total lengths of the rx_queue and tx_queue from `/proc/net/udp` and `/proc/net/udp6`. | Linux
|
||||||
uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD
|
uname | Exposes system information as provided by the uname system call. | Darwin, FreeBSD, Linux, OpenBSD
|
||||||
vmstat | Exposes statistics from `/proc/vmstat`. | Linux
|
vmstat | Exposes statistics from `/proc/vmstat`. | Linux
|
||||||
xfs | Exposes XFS runtime statistics. | Linux (kernel 4.4+)
|
xfs | Exposes XFS runtime statistics. | Linux (kernel 4.4+)
|
||||||
|
@ -2644,6 +2644,7 @@ node_scrape_collector_success{collector="softnet"} 1
|
|||||||
node_scrape_collector_success{collector="stat"} 1
|
node_scrape_collector_success{collector="stat"} 1
|
||||||
node_scrape_collector_success{collector="textfile"} 1
|
node_scrape_collector_success{collector="textfile"} 1
|
||||||
node_scrape_collector_success{collector="thermal_zone"} 1
|
node_scrape_collector_success{collector="thermal_zone"} 1
|
||||||
|
node_scrape_collector_success{collector="udp_queues"} 1
|
||||||
node_scrape_collector_success{collector="vmstat"} 1
|
node_scrape_collector_success{collector="vmstat"} 1
|
||||||
node_scrape_collector_success{collector="wifi"} 1
|
node_scrape_collector_success{collector="wifi"} 1
|
||||||
node_scrape_collector_success{collector="xfs"} 1
|
node_scrape_collector_success{collector="xfs"} 1
|
||||||
@ -2734,6 +2735,10 @@ node_textfile_scrape_error 0
|
|||||||
# HELP node_thermal_zone_temp Zone temperature in Celsius
|
# HELP node_thermal_zone_temp Zone temperature in Celsius
|
||||||
# TYPE node_thermal_zone_temp gauge
|
# TYPE node_thermal_zone_temp gauge
|
||||||
node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376
|
node_thermal_zone_temp{type="cpu-thermal",zone="0"} 12.376
|
||||||
|
# HELP node_udp_queues Number of allocated memory in the kernel for UDP datagrams in bytes.
|
||||||
|
# TYPE node_udp_queues gauge
|
||||||
|
node_udp_queues{ip="v4",queue="rx"} 0
|
||||||
|
node_udp_queues{ip="v4",queue="tx"} 21
|
||||||
# HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill.
|
# HELP node_vmstat_oom_kill /proc/vmstat information field oom_kill.
|
||||||
# TYPE node_vmstat_oom_kill untyped
|
# TYPE node_vmstat_oom_kill untyped
|
||||||
node_vmstat_oom_kill 0
|
node_vmstat_oom_kill 0
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
|
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
|
||||||
0: 00000000:0016 00000000:0000 0A 00000000:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0
|
0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0
|
||||||
1: 0F02000A:0016 0202000A:8B6B 01 00000000:00000000 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46
|
1: 0F02000A:0016 0202000A:8B6B 01 00000015:00000001 02:000AC99B 00000000 0 0 3652 4 ffff88003d3ae040 21 4 31 47 46
|
||||||
|
2
collector/fixtures/proc/net/udp
Normal file
2
collector/fixtures/proc/net/udp
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
sl local_address rem_address st tx_queue rx_queue tr tm->when retrnsmt uid timeout inode
|
||||||
|
0: 00000000:0016 00000000:0000 0A 00000015:00000000 00:00000000 00000000 0 0 2740 1 ffff88003d3af3c0 100 0 0 10 0
|
@ -52,6 +52,10 @@ const (
|
|||||||
tcpListen
|
tcpListen
|
||||||
// TCP_CLOSING
|
// TCP_CLOSING
|
||||||
tcpClosing
|
tcpClosing
|
||||||
|
// TCP_RX_BUFFER
|
||||||
|
tcpRxQueuedBytes
|
||||||
|
// TCP_TX_BUFFER
|
||||||
|
tcpTxQueuedBytes
|
||||||
)
|
)
|
||||||
|
|
||||||
type tcpStatCollector struct {
|
type tcpStatCollector struct {
|
||||||
@ -122,16 +126,34 @@ func parseTCPStats(r io.Reader) (map[tcpConnectionState]float64, error) {
|
|||||||
if len(parts) == 0 {
|
if len(parts) == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if len(parts) < 4 {
|
if len(parts) < 5 {
|
||||||
return nil, fmt.Errorf("invalid TCP stats line: %q", line)
|
return nil, fmt.Errorf("invalid TCP stats line: %q", line)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qu := strings.Split(parts[4], ":")
|
||||||
|
if len(qu) < 2 {
|
||||||
|
return nil, fmt.Errorf("cannot parse tx_queues and rx_queues: %q", line)
|
||||||
|
}
|
||||||
|
|
||||||
|
tx, err := strconv.ParseUint(qu[0], 16, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tcpStats[tcpConnectionState(tcpTxQueuedBytes)] += float64(tx)
|
||||||
|
|
||||||
|
rx, err := strconv.ParseUint(qu[1], 16, 64)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
tcpStats[tcpConnectionState(tcpRxQueuedBytes)] += float64(rx)
|
||||||
|
|
||||||
st, err := strconv.ParseInt(parts[3], 16, 8)
|
st, err := strconv.ParseInt(parts[3], 16, 8)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tcpStats[tcpConnectionState(st)]++
|
tcpStats[tcpConnectionState(st)]++
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return tcpStats, nil
|
return tcpStats, nil
|
||||||
@ -161,6 +183,10 @@ func (st tcpConnectionState) String() string {
|
|||||||
return "listen"
|
return "listen"
|
||||||
case tcpClosing:
|
case tcpClosing:
|
||||||
return "closing"
|
return "closing"
|
||||||
|
case tcpRxQueuedBytes:
|
||||||
|
return "rx_queued_bytes"
|
||||||
|
case tcpTxQueuedBytes:
|
||||||
|
return "tx_queued_bytes"
|
||||||
default:
|
default:
|
||||||
return "unknown"
|
return "unknown"
|
||||||
}
|
}
|
||||||
|
@ -28,8 +28,27 @@ func Test_parseTCPStatsError(t *testing.T) {
|
|||||||
name: "too few fields",
|
name: "too few fields",
|
||||||
in: "sl local_address\n 0: 00000000:0016",
|
in: "sl local_address\n 0: 00000000:0016",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "missing colon in tx-rx field",
|
||||||
|
in: "sl local_address rem_address st tx_queue rx_queue\n" +
|
||||||
|
" 1: 0F02000A:0016 0202000A:8B6B 01 0000000000000001",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tx parsing issue",
|
||||||
|
in: "sl local_address rem_address st tx_queue rx_queue\n" +
|
||||||
|
" 1: 0F02000A:0016 0202000A:8B6B 01 0000000x:00000001",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rx parsing issue",
|
||||||
|
in: "sl local_address rem_address st tx_queue rx_queue\n" +
|
||||||
|
" 1: 0F02000A:0016 0202000A:8B6B 01 00000000:0000000x",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "state parsing issue",
|
||||||
|
in: "sl local_address rem_address st tx_queue rx_queue\n" +
|
||||||
|
" 1: 0F02000A:0016 0202000A:8B6B 0H 00000000:00000001",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil {
|
if _, err := parseTCPStats(strings.NewReader(tt.in)); err == nil {
|
||||||
@ -40,6 +59,14 @@ func Test_parseTCPStatsError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestTCPStat(t *testing.T) {
|
func TestTCPStat(t *testing.T) {
|
||||||
|
|
||||||
|
noFile, _ := os.Open("follow the white rabbit")
|
||||||
|
defer noFile.Close()
|
||||||
|
|
||||||
|
if _, err := parseTCPStats(noFile); err == nil {
|
||||||
|
t.Fatal("expected an error, but none occurred")
|
||||||
|
}
|
||||||
|
|
||||||
file, err := os.Open("fixtures/proc/net/tcpstat")
|
file, err := os.Open("fixtures/proc/net/tcpstat")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -58,4 +85,39 @@ func TestTCPStat(t *testing.T) {
|
|||||||
if want, got := 1, int(tcpStats[tcpListen]); want != got {
|
if want, got := 1, int(tcpStats[tcpListen]); want != got {
|
||||||
t.Errorf("want tcpstat number of listen state %d, got %d", want, got)
|
t.Errorf("want tcpstat number of listen state %d, got %d", want, got)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if want, got := 42, int(tcpStats[tcpTxQueuedBytes]); want != got {
|
||||||
|
t.Errorf("want tcpstat number of bytes in tx queue %d, got %d", want, got)
|
||||||
|
}
|
||||||
|
if want, got := 1, int(tcpStats[tcpRxQueuedBytes]); want != got {
|
||||||
|
t.Errorf("want tcpstat number of bytes in rx queue %d, got %d", want, got)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func Test_getTCPStats(t *testing.T) {
|
||||||
|
type args struct {
|
||||||
|
statsFile string
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
args args
|
||||||
|
wantErr bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "file not found",
|
||||||
|
args: args{statsFile: "somewhere over the rainbow"},
|
||||||
|
wantErr: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
_, err := getTCPStats(tt.args.statsFile)
|
||||||
|
if (err != nil) != tt.wantErr {
|
||||||
|
t.Errorf("getTCPStats() error = %v, wantErr %v", err, tt.wantErr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// other cases are covered by TestTCPStat()
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
87
collector/udp_queues_linux.go
Normal file
87
collector/udp_queues_linux.go
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
// Copyright 2015 The Prometheus Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// +build !noudp_queues
|
||||||
|
|
||||||
|
package collector
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/go-kit/kit/log"
|
||||||
|
"github.com/go-kit/kit/log/level"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/procfs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
udpQueuesCollector struct {
|
||||||
|
fs procfs.FS
|
||||||
|
desc *prometheus.Desc
|
||||||
|
logger log.Logger
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
registerCollector("udp_queues", defaultEnabled, NewUDPqueuesCollector)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewUDPqueuesCollector returns a new Collector exposing network udp queued bytes.
|
||||||
|
func NewUDPqueuesCollector(logger log.Logger) (Collector, error) {
|
||||||
|
fs, err := procfs.NewFS(*procPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open procfs: %v", err)
|
||||||
|
}
|
||||||
|
return &udpQueuesCollector{
|
||||||
|
fs: fs,
|
||||||
|
desc: prometheus.NewDesc(
|
||||||
|
prometheus.BuildFQName(namespace, "udp", "queues"),
|
||||||
|
"Number of allocated memory in the kernel for UDP datagrams in bytes.",
|
||||||
|
[]string{"queue", "ip"}, nil,
|
||||||
|
),
|
||||||
|
logger: logger,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *udpQueuesCollector) Update(ch chan<- prometheus.Metric) error {
|
||||||
|
|
||||||
|
s4, errIPv4 := c.fs.NetUDPSummary()
|
||||||
|
if errIPv4 == nil {
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.TxQueueLength), "tx", "v4")
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s4.RxQueueLength), "rx", "v4")
|
||||||
|
} else {
|
||||||
|
if os.IsNotExist(errIPv4) {
|
||||||
|
level.Debug(c.logger).Log("msg", "not collecting ipv4 based metrics")
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("couldn't get upd queued bytes: %s", errIPv4)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s6, errIPv6 := c.fs.NetUDP6Summary()
|
||||||
|
if errIPv6 == nil {
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.TxQueueLength), "tx", "v6")
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.desc, prometheus.GaugeValue, float64(s6.RxQueueLength), "rx", "v6")
|
||||||
|
} else {
|
||||||
|
if os.IsNotExist(errIPv6) {
|
||||||
|
level.Debug(c.logger).Log("msg", "not collecting ipv6 based metrics")
|
||||||
|
} else {
|
||||||
|
return fmt.Errorf("couldn't get upd6 queued bytes: %s", errIPv6)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if os.IsNotExist(errIPv4) && os.IsNotExist(errIPv6) {
|
||||||
|
return ErrNoData
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
@ -38,6 +38,7 @@ enabled_collectors=$(cat << COLLECTORS
|
|||||||
thermal_zone
|
thermal_zone
|
||||||
textfile
|
textfile
|
||||||
bonding
|
bonding
|
||||||
|
udp_queues
|
||||||
vmstat
|
vmstat
|
||||||
wifi
|
wifi
|
||||||
xfs
|
xfs
|
||||||
|
Loading…
Reference in New Issue
Block a user