1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
#!/usr/bin/env python3
#
# A munin plugin for ODR-ZMQ2EDI
#
# Reads the logfile, and the previously rotated logfile (suffixed by .1) and
# analyses the output. Generates a graph with percentage of frames late, and a
# graph with min/max wait time.
#
# Copy this to /etc/munin/plugins/stats_zmq2edi_munin
# and make it executable (chmod +x)
#
# Then make sure that zmq2edi log output gets written to LOGFILE below,
# and setup up a logrotate script to rotate the log. The rotated log
# filename must be appended with .1
# Every six seconds a line is output. We are polled in 5 min = 300s intervals
NUM_LINES = int(300 / 6)
LOGFILE = "/var/log/supervisor/zmq2edi.log"
import time
import sys
import os
import re
munin_config = """
multigraph wait_time_zmq2edi
graph_title zmq2edi wait_time
graph_order high low
graph_args --base 1000
graph_vlabel max/min wait times during last ${graph_period}
graph_category zmq2edi
graph_info This graph shows the min and max wait times
high.info Max wait time
high.label Max wait time ms
high.min 0
high.warning 1:
low.info Min wait time
low.label Min wait time ms
low.min -6000
low.warning 1:
multigraph late_packets_zmq2edi
graph_title EDI packets delivered too late
graph_order late
graph_args --base 1000
graph_vlabel late packets during last ${graph_period}
graph_category zmq2edi
graph_info This graph shows the number late EDI packets (250 packets = 6 seconds)
late.info Number of late packets
late.label Number of late packets
late.min 0
late.max %s
late.warning 0:0
""" % (NUM_LINES * 250,)
def parse_logs():
# example lines:
# Buffering time statistics [milliseconds]: min: 907.799 max: 981.409 mean: 944.335 stdev: 26.827 late: 0 of 250 (0%)
# Values might also be in scientific form, e.g. -1.80938e+07
re_logline = re.compile(r"""Buffering time statistics.* min: (.+) max: (.+) mean: (.+) stdev: (.+) late: (.+) of 250""", flags=re.ASCII)
# The following lines are output at startup and during a reset respectively:
startup_pattern = "starting up"
backoff_pattern = "Backoff"
lines = []
# Check that the file exists and was last written to in the previous 2* 6s,
# otherwise assume the tool isn't running
if not os.path.exists(LOGFILE) or (time.time() - os.stat(LOGFILE).st_mtime) > 12:
num_late = None
t_min_period = None
t_max_period = None
else:
# Keep only the last NUM_LINES
# Read the previously rotated logfile too to make sure we have enough data
for fname in [LOGFILE+ ".1", LOGFILE]:
if os.path.exists(fname):
with open(fname, "r") as fd:
for line in fd:
lines.append(line)
if len(lines) > NUM_LINES:
del lines[0]
# Calculate min, max over the whole period, and sum the number of late
num_late = 0
t_min_period = None
t_max_period = None
num_statistics = 0
for line in lines:
if startup_pattern in line:
num_late += 250
elif backoff_pattern in line:
num_late += 250
else:
match = re_logline.search(line)
if match:
num_statistics += 1
t_min = float(match.group(1))
t_max = float(match.group(2))
t_mean = float(match.group(3))
stdev = float(match.group(4))
late = int(match.group(5))
if t_min_period is None or t_min < t_min_period:
t_min_period = t_min
if t_max_period is None or t_max > t_max_period:
t_max_period = t_max
if num_late is None:
num_late = 0
num_late += late
# The min can take extremely low values, we clamp it here to -6 seconds
# to keep the graph readable
if t_min_period is not None and t_min_period < -6000:
t_min_period = -6000
return num_late, round(t_min_period) if t_min_period is not None else None, round(t_max_period) if t_max_period is not None else None
def muninify(value):
""" According to http://guide.munin-monitoring.org/en/latest/develop/plugins/plugin-concise.html#plugin-concise
"If the plugin - for any reason - has no value to report, then it may send the value U for undefined."
"""
return 'U' if value is None else value
# No arguments means that munin wants values
if len(sys.argv) == 1:
num_late, t_min, t_max = parse_logs()
munin_values = "multigraph wait_time_zmq2edi\n"
munin_values += "high.value {}\n".format(muninify(t_max))
munin_values += "low.value {}\n".format(muninify(t_min))
munin_values += "multigraph late_packets_zmq2edi\n"
munin_values += "late.value {}\n".format(muninify(num_late))
print(munin_values)
elif len(sys.argv) == 2 and sys.argv[1] == "config":
print(munin_config)
else:
sys.exit(1)
|