#!/usr/bin/env python3 # # A munin plugin for ODR-ZMQ2EDI # # Reads the logfile, and the previously rotated logfile (suffixed by .1) and # analyses the output. Generates a graph with percentage of frames late, and a # graph with min/max wait time. # # Copy this to /etc/munin/plugins/stats_zmq2edi_munin # and make it executable (chmod +x) # # Then make sure that zmq2edi log output gets written to LOGFILE below, # and setup up a logrotate script to rotate the log. The rotated log # filename must be appended with .1 # Every six seconds a line is output. We are polled in 5 min = 300s intervals NUM_LINES = int(300 / 6) LOGFILE = "/var/log/supervisor/zmq2edi.log" import time import sys import os import re munin_config = """ multigraph wait_time_zmq2edi graph_title zmq2edi wait_time graph_order high low graph_args --base 1000 graph_vlabel max/min wait times during last ${graph_period} graph_category zmq2edi graph_info This graph shows the min and max wait times high.info Max wait time high.label Max wait time ms high.min 0 high.warning 1: low.info Min wait time low.label Min wait time ms low.min -6000 low.warning 1: multigraph late_packets_zmq2edi graph_title EDI packets delivered too late graph_order late graph_args --base 1000 graph_vlabel late packets during last ${graph_period} graph_category zmq2edi graph_info This graph shows the number late EDI packets (250 packets = 6 seconds) late.info Number of late packets late.label Number of late packets late.min 0 late.max %s late.warning 0:0 """ % (NUM_LINES * 250,) def parse_logs(): # example lines: # Buffering time statistics [milliseconds]: min: 907.799 max: 981.409 mean: 944.335 stdev: 26.827 late: 0 of 250 (0%) # Values might also be in scientific form, e.g. -1.80938e+07 re_logline = re.compile(r"""Buffering time statistics.* min: (.+) max: (.+) mean: (.+) stdev: (.+) late: (.+) of 250""", flags=re.ASCII) # The following lines are output at startup and during a reset respectively: startup_pattern = "starting up" backoff_pattern = "Backoff" lines = [] # Check that the file exists and was last written to in the previous 2* 6s, # otherwise assume the tool isn't running if not os.path.exists(LOGFILE) or (time.time() - os.stat(LOGFILE).st_mtime) > 12: num_late = None t_min_period = None t_max_period = None else: # Keep only the last NUM_LINES # Read the previously rotated logfile too to make sure we have enough data for fname in [LOGFILE+ ".1", LOGFILE]: if os.path.exists(fname): with open(fname, "r") as fd: for line in fd: lines.append(line) if len(lines) > NUM_LINES: del lines[0] # Calculate min, max over the whole period, and sum the number of late num_late = 0 t_min_period = None t_max_period = None num_statistics = 0 for line in lines: if startup_pattern in line: num_late += 250 elif backoff_pattern in line: num_late += 250 else: match = re_logline.search(line) if match: num_statistics += 1 t_min = float(match.group(1)) t_max = float(match.group(2)) t_mean = float(match.group(3)) stdev = float(match.group(4)) late = int(match.group(5)) if t_min_period is None or t_min < t_min_period: t_min_period = t_min if t_max_period is None or t_max > t_max_period: t_max_period = t_max if num_late is None: num_late = 0 num_late += late # The min can take extremely low values, we clamp it here to -6 seconds # to keep the graph readable if t_min_period is not None and t_min_period < -6000: t_min_period = -6000 return num_late, round(t_min_period) if t_min_period is not None else None, round(t_max_period) if t_max_period is not None else None def muninify(value): """ According to http://guide.munin-monitoring.org/en/latest/develop/plugins/plugin-concise.html#plugin-concise "If the plugin - for any reason - has no value to report, then it may send the value U for undefined." """ return 'U' if value is None else value # No arguments means that munin wants values if len(sys.argv) == 1: num_late, t_min, t_max = parse_logs() munin_values = "multigraph wait_time_zmq2edi\n" munin_values += "high.value {}\n".format(muninify(t_max)) munin_values += "low.value {}\n".format(muninify(t_min)) munin_values += "multigraph late_packets_zmq2edi\n" munin_values += "late.value {}\n".format(muninify(num_late)) print(munin_values) elif len(sys.argv) == 2 and sys.argv[1] == "config": print(munin_config) else: sys.exit(1)