correctly handle term signals

- adhoc now terminates gracefully
- avoid race condition on terminations by ignoring errors if
  worker might have been reaped between checking if active and termination call
- ansible-playbook now properly exits on sigint/term
- adhoc and playbook now give exceptions that we should not normally capture
  and rely on top level finally to reap children
- handle systemexit breaks in workers
- added debug to see at which frame we exit
partial fix for #14346
pull/14347/head
Brian Coca 8 years ago
parent 45755bc0e5
commit 5a1887cc76

@ -21,6 +21,7 @@ __metaclass__ = type
########################################################
import os
import signal
from ansible import constants as C
from ansible.cli import CLI
@ -88,6 +89,10 @@ class AdHocCLI(CLI):
tasks = [ dict(action=dict(module=self.options.module_name, args=parse_kv(self.options.module_args)), async=async, poll=poll) ]
)
def _terminate(self, signum=None, framenum=None):
if signum is not None:
raise SystemExit("Interrupt detected, shutting down gracefully")
def run(self):
''' use Runner lib to do SSH things '''
@ -170,6 +175,9 @@ class AdHocCLI(CLI):
# now create a task queue manager to execute the play
self._tqm = None
try:
# Manage user interruptions
signal.signal(signal.SIGTERM, self._terminate)
self._tqm = TaskQueueManager(
inventory=inventory,
variable_manager=variable_manager,
@ -180,6 +188,7 @@ class AdHocCLI(CLI):
run_additional_callbacks=C.DEFAULT_LOAD_CALLBACK_PLUGINS,
run_tree=run_tree,
)
result = self._tqm.run(play)
finally:
if self._tqm:

@ -69,7 +69,7 @@ class PlaybookExecutor:
may limit the runs to serialized groups, etc.
'''
signal.signal(signal.SIGINT, self._cleanup)
signal.signal(signal.SIGTERM, self._terminate)
result = 0
entrylist = []
@ -199,7 +199,7 @@ class PlaybookExecutor:
finally:
if self._tqm is not None:
self._cleanup()
self._tqm.cleanup()
if self._options.syntax:
display.display("No issues encountered")
@ -207,8 +207,9 @@ class PlaybookExecutor:
return result
def _cleanup(self, signum=None, framenum=None):
return self._tqm.cleanup()
def _terminate(self, signum=None, framenum=None):
display.debug(framenum)
raise SystemExit("Terminating run due to external signal")
def _get_serialized_batches(self, play):
'''

@ -163,7 +163,7 @@ class ResultProcess(multiprocessing.Process):
except queue.Empty:
pass
except (KeyboardInterrupt, IOError, EOFError):
except (KeyboardInterrupt, SystemExit, IOError, EOFError):
break
except:
# TODO: we should probably send a proper callback here instead of

@ -132,7 +132,7 @@ class WorkerProcess(multiprocessing.Process):
self._rslt_q.put(task_result, block=False)
except Exception as e:
if not isinstance(e, (IOError, EOFError, KeyboardInterrupt)) or isinstance(e, TemplateNotFound):
if not isinstance(e, (IOError, EOFError, KeyboardInterrupt, SystemExit)) or isinstance(e, TemplateNotFound):
try:
self._host.vars = dict()
self._host.groups = []
@ -140,7 +140,7 @@ class WorkerProcess(multiprocessing.Process):
self._rslt_q.put(task_result, block=False)
except:
debug(u"WORKER EXCEPTION: %s" % to_unicode(e))
debug(u"WORKER EXCEPTION: %s" % to_unicode(traceback.format_exc()))
debug(u"WORKER TRACEBACK: %s" % to_unicode(traceback.format_exc()))
debug("WORKER PROCESS EXITING")

@ -253,7 +253,10 @@ class TaskQueueManager:
rslt_q.close()
main_q.close()
if worker_prc and worker_prc.is_alive():
worker_prc.terminate()
try:
worker_prc.terminate()
except AttributeError:
pass
def clear_failed_hosts(self):
self._failed_hosts = dict()

Loading…
Cancel
Save