aboutsummaryrefslogtreecommitdiffstats
path: root/mpm/python/usrp_mpm/sys_utils/watchdog.py
blob: 3c0a48f4a8f7770a9af356f05677cc4436cfdd46 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#
# Copyright 2018 Ettus Research, National Instruments Company
#
# SPDX-License-Identifier: GPL-3.0
#
"""
systemd watchdog control module
"""

import os
import time
import threading
from systemd import daemon

MPM_WATCHDOG_DEFAULT_TIMEOUT = 30
# How often per watchdog interval we send a ping
MPM_WATCHDOG_TIMEOUT_FRAC = 3.0

def has_watchdog():
    """Check if the system has a watchdog checking on us.

    We do this by checking on a set value for WATCHDOG_USEC.
    """
    return bool(os.environ.get('WATCHDOG_USEC', False))

def transfer_control(pid):
    """
    Transfer control of watchdog notifications to new PID.
    """
    daemon.notify("MAINPID={:d}".format(int(pid)))

def _watchdog_task(shared_state, log):
    """
    Continuously ping the watchdog to tell him that we're still alive.

    This will keep running until the parent thread dies, or
    shared_state.system_ready gets set to False by someone.
    """
    watchdog_timeout = \
            float(os.environ.get(
                'WATCHDOG_USEC',
                MPM_WATCHDOG_DEFAULT_TIMEOUT
            )) / 1e6
    watchdog_interval = watchdog_timeout / MPM_WATCHDOG_TIMEOUT_FRAC
    daemon.notify("READY=1")
    log.debug("Watchdog primed, going into watchdog loop (Interval: %s s)",
              watchdog_interval)
    while shared_state.system_ready.value:
        # Sleep first, then ping, that avoids the case where transfer_control()
        # is not yet complete before we call this for the first time, which
        # would lead in error messages popping up in the systemd journal.
        time.sleep(watchdog_interval)
        log.trace("Pinging watchdog....")
        daemon.notify("WATCHDOG=1")
    log.error("Terminating watchdog thread!")
    return

def spawn_watchdog_task(shared_state, log):
    """Spawn and return watchdog thread.

    Creates a daemonic thread, because we don't want the watchdog task to
    outlive the main thread.
    """
    task = threading.Thread(
        target=_watchdog_task,
        args=[shared_state, log],
        name="MPMWatchdogTask",
        daemon=True,
    )
    task.start()
    return task