From a6ff1cdb947ede90453d76e6860b44e0ad2acc1f Mon Sep 17 00:00:00 2001 From: "Matthias P. Braendli" Date: Tue, 30 Jan 2018 15:30:28 +0100 Subject: Add munin monitoring script --- README.md | 5 +- TODO | 1 + doc/README-RC.md | 7 +- doc/stats_dabmod_munin.py | 315 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 324 insertions(+), 4 deletions(-) create mode 100755 doc/stats_dabmod_munin.py diff --git a/README.md b/README.md index b0b4635..272ba4d 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Features - Can be used to drive the [LimeSDR board](https://myriadrf.org/projects/limesdr/), the [HackRF](https://greatscottgadgets.com/hackrf/) and others. - Timestamping support required for SFN - GPSDO monitoring (both Ettus and [ODR LEA-M8F board](http://www.opendigitalradio.org/lea-m8f-gpsdo)) +- Monitoring integration with munin - A FIR filter for improved spectrum mask - TII insertion - Logging: log to file, to syslog @@ -38,8 +39,8 @@ Features The src/ directory contains the source code of ODR-DabMod. -The doc/ directory contains the ODR-DabMod documentation, and an example -configuration file. +The doc/ directory contains the ODR-DabMod documentation, an example +configuration file and a script for munin integration. The lib/ directory contains source code of libraries needed to build ODR-DabMod. diff --git a/TODO b/TODO index b736a7a..2378a3e 100644 --- a/TODO +++ b/TODO @@ -27,6 +27,7 @@ Clean up and separate GPS and refclk checks. * Ensure muting is set properly at startup. * Ensure synchronous is taken in account. * Verify resync after underflow and muting + * Add GPSDO status to RC and munin. *done* Add antenna selection to config. diff --git a/doc/README-RC.md b/doc/README-RC.md index c0fe4a7..054fdeb 100644 --- a/doc/README-RC.md +++ b/doc/README-RC.md @@ -11,8 +11,11 @@ the software will only listen on the local loopback interface. To get secure remote access, use SSH port forwarding. The ZeroMQ interface is designed for machine interaction, e.g. for usage in -scripts or from third party tools. An example python script to connect to that -interface is available in `doc/zmq-ctrl/zmq_remote.py`, and example C++ code is available in `doc/zmq-ctrl/cpp/`. +scripts or from third party tools. The Munin monitoring is also using this +interface, please see `doc/stats_dabmod_munin.py`. +An example python script to connect to that +interface is available in `doc/zmq-ctrl/zmq_remote.py`, +and example C++ code is available in `doc/zmq-ctrl/cpp/`. Both interfaces may be enabled simultaneously. diff --git a/doc/stats_dabmod_munin.py b/doc/stats_dabmod_munin.py new file mode 100755 index 0000000..33745ad --- /dev/null +++ b/doc/stats_dabmod_munin.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python2 +# +# present statistics from ODR-DabMod's +# RC interface to munin + +import sys +import json +import zmq +import os +import re + +# Values monitored: + +config_all = "" + +#default data type is GAUGE + +# One GAUGE multigraph from 0% to 100% with +# ofdm clip_stats clip_ratio +# ofdm clip_stats errorclip_ratio +config_all += """ +multigraph ofdm_clip_stats +graph_title OFDM CFR clip stats +graph_order clip_ratio errorclip_ratio +graph_vlabel number of samples/errors clipped during last ${{graph_period}} +graph_category dabmod +graph_info This graph shows CFR clipping statistics + +clip_ratio.info Number of samples clipped +clip_ratio.label Number of samples clipped +clip_ratio.min 0 +clip_ratio.max 100 +errorclip_ratio.info Number of errors clipped +errorclip_ratio.label Number of errors clipped +errorclip_ratio.min 0 +errorclip_ratio.max 100""" + +# One GAUGE multigraph +# ofdm clip_stats mer +config_all += """ +multigraph ofdm_clip_stats_mer +graph_title OFDM MER after CFR +graph_order mer +graph_vlabel MER in dB after CFR +graph_category dabmod +graph_info This graph shows MER after CFR + +mer.info MER dB +mer.label MER dB +mer.min 0 +mer.max 100""" + +# One GAUGE multigraph in dB for +# ofdm papr before-cfr +# ofdm papr after-cfr +config_all += """ +multigraph ofdm_papr +graph_title OFDM PAPR stats +graph_order before_cfr after_cfr +graph_args --base 1000 +graph_vlabel Averate PAPR before/after CFR during last ${{graph_period}} +graph_category dabmod +graph_info This graph shows the Peak-to-Average Power Ratio before and after CFR + +before_cfr.info PAPR before CFR +before_cfr.label PAPR before CFR +before_cfr.min 0 +after_cfr.info PAPR after CFR +after_cfr.label PAPR after CFR +after_cfr.min 0""" + +# One GAUGE graph for +# tist offset +config_all += """ +multigraph tist_offset +graph_title TIST configured offset +graph_order offset +graph_args --base 1000 +graph_vlabel Configured offset +graph_category dabmod +graph_info This graph shows the configured TIST offset + +offset.info Configured offset +offset.label Configured offset +offset.min 0 +offset.max 300""" + +# One COUNTER (min 0, max 249) graph for +# tist timestamp fct +config_all += """ +multigraph frame_fct +graph_title TIST FCT +graph_order fct +graph_args --base 1000 +graph_vlabel FCT value +graph_category dabmod +graph_info This graph shows the FCT value + +fct.info FCT +fct.label FCT +fct.type COUNTER +fct.min 0 +fct.max 249""" + +# One DDERIVE graph for +# tist timestamp timestamps +config_all += """ +multigraph tist_timestamp +graph_title TIST timestamp +graph_order timestamp +graph_args --base 1000 +graph_vlabel timestamp value +graph_category dabmod +graph_info This graph shows the timestamp value in seconds + +timestamp.info timestamp +timestamp.label timestamp +timestamp.type DDERIVE +timestamp.min 0""" + +# One DERIVE (min 0) multigraph for +# sdr underruns +# sdr latepackets +config_all += """ +multigraph sdr_stats +graph_title SDR device statistics +graph_order underruns latepackets +graph_args --base 1000 +graph_vlabel Number of underruns and late packets +graph_category dabmod +graph_info This graph shows the number of underruns and late packets + +underruns.info Number of SoapySDR/UHD underruns +underruns.label Number of SoapySDR/UHD underruns +underruns.type DERIVE +underruns.min 0 +latepackets.info Number of SoapySDR/UHD late packets +latepackets.label Number of SoapySDR/UHD late packets +latepackets.type DERIVE +latepackets.min 0""" + +# One DERIVE (min 0) graph for +# sdr frames +config_all += """ +multigraph sdr_frames +graph_title SDR number of frames transmitted +graph_order frames +graph_args --base 1000 +graph_vlabel Number of frames transmitted +graph_category dabmod +graph_info This graph shows the number of frames transmitted + +frames.info Number of SoapySDR/UHD frames +frames.label Number of SoapySDR/UHD frames +frames.type DERIVE +frames.min 0""" + +ctx = zmq.Context() + +class RCException(Exception): + pass + +if not os.environ.get("MUNIN_CAP_MULTIGRAPH"): + sys.stderr.write("This needs munin version 1.4 at least\n") + sys.exit(1) + +def do_transaction(message_parts, sock): + """To a send + receive transaction, quit whole program on timeout""" + if isinstance(message_parts, str): + sys.stderr.write("do_transaction expects a list!\n"); + sys.exit(1) + + for i, part in enumerate(message_parts): + if i == len(message_parts) - 1: + f = 0 + else: + f = zmq.SNDMORE + sock.send(part, flags=f) + + poller = zmq.Poller() + poller.register(sock, zmq.POLLIN) + + socks = dict(poller.poll(1000)) + if socks: + if socks.get(sock) == zmq.POLLIN: + rxpackets = sock.recv_multipart() + return rxpackets + + raise RCException("Could not receive data for command '{}'\n".format( + message_parts)) + +def connect(): + """Create a connection to the dabmod RC + + returns: the socket""" + + sock = zmq.Socket(ctx, zmq.REQ) + sock.set(zmq.LINGER, 5) + sock.connect("tcp://localhost:9400") + + try: + ping_answer = do_transaction([b"ping"], sock) + + if not ping_answer == [b"ok"]: + sys.stderr.write("Wrong answer to ping\n") + sys.exit(1) + except RCException as e: + print("connect failed because: {}".format(e)) + sys.exit(1) + + return sock + +def get_rc_value(module, name, sock): + try: + parts = do_transaction([b"get", module.encode(), name.encode()], sock) + if len(parts) != 1: + sys.stderr.write("Received unexpected multipart message {}\n".format( + parts)) + sys.exit(1) + return parts[0].decode() + except RCException as e: + print("get {} {} fail: {}".format(module, name, e)) + return "" + +def handle_re(graph_name, re, rc_value, group_number=1): + match = re.search(rc_value) + if match: + return "{} {}\n".format(graph_name, match.group(group_number)) + else: + return "{} U\n".format(graph_name) + +re_double_value = re.compile(r"(\d+\.\d+)", re.X) +re_int_value = re.compile(r"(\d+)", re.X) + +if len(sys.argv) == 1: + sock = connect() + + munin_values = "" + + munin_values += "multigraph ofdm_clip_stats\n" + ofdm_clip_stats = get_rc_value("ofdm", "clip_stats", sock) + re_clip_samples = re.compile(r"(\d+\.\d+)%\ samples\ clipped", re.X) + munin_values += handle_re("clip_ratio.value ", re_clip_samples, ofdm_clip_stats) + + re_clip_errors = re.compile(r"(\d+\.\d+)%\ errors\ clipped", re.X) + munin_values += handle_re("errorclip_ratio.value", + re_clip_errors, ofdm_clip_stats) + + munin_values += "multigraph ofdm_clip_stats_mer\n" + re_clip_mer = re.compile(r"MER\ after\ CFR:\ (\d+\.\d+)", re.X) + munin_values += handle_re("mer.value", + re_clip_mer, ofdm_clip_stats) + + munin_values += "multigraph ofdm_papr\n" + ofdm_papr_stats = get_rc_value("ofdm", "papr", sock) + + def muninise_papr(papr): + if "N/A" in papr: + return "U" + else: + return float(papr.strip()) + + # Format is as follows: + # "PAPR [dB]: " << std::fixed << + # (papr_before == 0 ? string("N/A") : to_string(papr_before)) << + # ", " << + # (papr_after == 0 ? string("N/A") : to_string(papr_after)); + try: + _, _, both_papr = ofdm_papr_stats.partition(":") + papr_before, papr_after = both_papr.split(",") + papr_before = muninise_papr(papr_before) + munin_values += "before_cfr.value {}\n".format(papr_before) + except: + munin_values += "before_cfr.value U\n" + + try: + _, _, both_papr = ofdm_papr_stats.partition(":") + papr_before, papr_after = both_papr.split(",") + papr_after = muninise_papr(papr_after) + munin_values += "after_cfr.value {}\n".format(papr_after) + except: + munin_values += "after_cfr.value U\n" + + + munin_values += "multigraph tist_offset\n" + tist_offset = get_rc_value("tist", "offset", sock) + munin_values += handle_re("offset.value", re_double_value, tist_offset) + + munin_values += "multigraph frame_fct\n" + tist_timestamp = get_rc_value("tist", "timestamp", sock) + re_tist_timestamp = re.compile(r"(\d+\.\d+)\ for\ frame\ FCT\ (\d+)", re.X) + munin_values += handle_re("fct.value", re_tist_timestamp, tist_timestamp, 2) + + munin_values += "multigraph tist_timestamp\n" + munin_values += handle_re("timestamp.value", re_tist_timestamp, tist_timestamp, 1) + + munin_values += "multigraph sdr_stats\n" + sdr_underruns = get_rc_value("sdr", "underruns", sock) + munin_values += handle_re("underruns.value", re_int_value, sdr_underruns) + sdr_latepackets = get_rc_value("sdr", "latepackets", sock) + munin_values += handle_re("latepacket.value", re_int_value, sdr_latepackets) + + munin_values += "multigraph sdr_frames\n" + sdr_frames = get_rc_value("sdr", "frames", sock) + munin_values += handle_re("frames.value", re_int_value, sdr_frames) + + print(munin_values) + +elif len(sys.argv) == 2 and sys.argv[1] == "config": + # No need to connect + print(config_all) +else: + sys.stderr.write("Invalid command line arguments") + sys.exit(1) + -- cgit v1.2.3