Add mellanox_hca_temp text collector example (#1128)

* deleted_libraries: Upgrade to Python 3

Python 2.7 will not be maintained past 2020. Therefore upgrade
text_collector_examples/deleted_libraries.py to Python 3.

* Add mellanox_hca_temp text collector example

mellanox_hca_temp is a script that reads Mellanox HCA temperature using
the Mellanox mget_temp_ext tool.

Signed-off-by: Benjamin Drung <benjamin.drung@cloud.ionos.com>
This commit is contained in:
Benjamin Drung 2018-11-01 12:23:06 +01:00 committed by Ben Kochie
parent 073e056121
commit 2d5fcdeef4
2 changed files with 64 additions and 5 deletions

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python #!/usr/bin/env python3
""" """
Script to count the number of deleted libraries that are linked by running Script to count the number of deleted libraries that are linked by running
processes and expose a summary as Prometheus metrics. processes and expose a summary as Prometheus metrics.
@ -20,7 +20,7 @@ def main():
try: try:
with open(path, 'rb') as file: with open(path, 'rb') as file:
for line in file: for line in file:
part = line.strip().split() part = line.decode().strip().split()
if len(part) == 7: if len(part) == 7:
library = part[5] library = part[5]
@ -42,9 +42,9 @@ def main():
num_processes_per_library = {} num_processes_per_library = {}
for process, library_count in processes_linking_deleted_libraries.iteritems(): for process, library_count in processes_linking_deleted_libraries.items():
libraries_seen = set() libraries_seen = set()
for library, count in library_count.iteritems(): for library, count in library_count.items():
if library in libraries_seen: if library in libraries_seen:
continue continue
@ -59,7 +59,7 @@ def main():
print('# HELP {0} {1}'.format(metric_name, description)) print('# HELP {0} {1}'.format(metric_name, description))
print('# TYPE {0} gauge'.format(metric_name)) print('# TYPE {0} gauge'.format(metric_name))
for library, count in num_processes_per_library.iteritems(): for library, count in num_processes_per_library.items():
dir_path, basename = os.path.split(library) dir_path, basename = os.path.split(library)
basename = basename.replace('"', '\\"') basename = basename.replace('"', '\\"')
dir_path = dir_path.replace('"', '\\"') dir_path = dir_path.replace('"', '\\"')

View File

@ -0,0 +1,59 @@
#!/bin/bash
set -eu
# Script to read Mellanox HCA temperature using the Mellanox mget_temp_ext tool
# Copyright 2018 The Prometheus Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Author: Jan Phillip Greimann <jan.greimann@cloud.ionos.com>
# check if root
if [ "$EUID" -ne 0 ]; then
echo "${0##*/}: Please run as root!" >&2
exit 1
fi
# check if programs are installed
if ! command -v mget_temp_ext >/dev/null 2>&1; then
echo "${0##*/}: mget_temp_ext is not installed. Aborting." >&2
exit 1
fi
cat <<EOF
# HELP node_infiniband_hca_temp_celsius Celsius temperature of Mellanox InfiniBand HCA.
# TYPE node_infiniband_hca_temp_celsius gauge
EOF
# run for each found Mellanox device
for dev in /sys/class/infiniband/*; do
if test ! -d "$dev"; then
continue
fi
device="${dev##*/}"
# get temperature
if temperature="$(mget_temp_ext -d "${device}")"; then
# output
echo "node_infiniband_hca_temp_celsius{hca_device=\"${device}\"} ${temperature//[[:space:]]/}"
else
echo "${0##*/}: Failed to get temperature from InfiniBand HCA '${device}'!" >&2
fi
done
# if device is empty, no device was found
if [ -z "${device-}" ]; then
echo "${0##*/}: No InfiniBand HCA device found!" >&2
exit 1
fi