From 97852aba2393a20fd5e0c25313d26be4ce316a25 Mon Sep 17 00:00:00 2001 From: Samuel O'Brien Date: Thu, 23 Jul 2020 16:30:32 -0500 Subject: mpm: Fix gevent errors on SIGTERM Sometimes when running usrp_hwd.py in a terminal and then canceling it with Ctrl+C, it prints a really large stacktrace into the terminal resulting from an uncaught gevent BlockingSwitchOutError. It seems like there was an attempt to catch this in usrp_hwd.py:kill_time(). This try-except was surrounding a call to Process.join() which, to the best of my knowledge, can't ever throw this exception. Based on my troubleshooting, this error comes from the SIGTERM signal handler of the RPC process. The handler (defined in rpc_server.py:_rpc_server_process), is just a direct call to RPCServer.stop(). When the server's backed is a thread pool, this call may block when joining the thread pool, causing gevent to complain about execution attempting to block in a signal handler. This commit resolves this issue by simply triggering an event in the signal handler which prompts a different thread to clean up the server and end the process. Signed-off-by: Samuel O'Brien --- mpm/python/usrp_mpm/rpc_server.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'mpm/python/usrp_mpm') diff --git a/mpm/python/usrp_mpm/rpc_server.py b/mpm/python/usrp_mpm/rpc_server.py index 8b87c52d8..65428f116 100644 --- a/mpm/python/usrp_mpm/rpc_server.py +++ b/mpm/python/usrp_mpm/rpc_server.py @@ -13,6 +13,8 @@ import copy from random import choice from string import ascii_letters, digits from multiprocessing import Process +import threading +import sys from gevent.server import StreamServer from gevent.pool import Pool from gevent import signal @@ -555,8 +557,17 @@ def _rpc_server_process(shared_state, port, default_args): handle=MPMServer(shared_state, default_args), spawn=connections) # catch signals and stop the stream server - signal(signal.SIGTERM, lambda *args: server.stop()) - signal(signal.SIGINT, lambda *args: server.stop()) + # Previously, the signal callbacks simply called server.stop() + # gevent doesn't like this because server.stop() may block waiting + # for greenlets to stop, and signal callbacks are not supposed to block + stop_event = threading.Event() + def stop_worker(): + stop_event.wait() + server.stop() + sys.exit(0) + threading.Thread(target=stop_worker, daemon=True).start() + signal(signal.SIGTERM, lambda *args: stop_event.set()) + signal(signal.SIGINT, lambda *args: stop_event.set()) server.serve_forever() -- cgit v1.2.3