issue #202: ansible: forget all dependent contexts on Stream disconnect

This is a partial fix, there are still at least 2 cases needing covered:

- In-progress connections must have CallError or similar sent to any
  waiters
- Once connection delegation exists, it is possible for other worker
  processes to be active (and in any step in the process), trying to
  communicate with a context that we know can no longer be communicated
  with. The solution to that isn't clear yet.

Additionally ensure root has /bin/bash shell in both Docker images.
pull/203/head
David Wilson 6 years ago
parent c5fe817db2
commit dc4433ace6

@ -199,6 +199,29 @@ class ContextService(mitogen.service.Service):
self._shutdown(context)
self._lru_by_via = {}
def _on_stream_disconnect(self, stream):
"""
Respond to Stream disconnection by deleting any record of contexts
reached via that stream. This method runs in the Broker thread and must
not to block.
"""
# TODO: there is a race between creation of a context and disconnection
# of its related stream. An error reply should be sent to any message
# in _waiters_by_key below.
self._lock.acquire()
try:
for context, key in list(self._key_by_context.items()):
if context.context_id in stream.routes:
LOG.info('Dropping %r due to disconnect of %r',
context, stream)
self._response_by_key.pop(key, None)
self._waiters_by_key.pop(key, None)
self._refs_by_context.pop(context, None)
self._lru_by_via.pop(context, None)
self._refs_by_context.pop(context, None)
finally:
self._lock.release()
def _connect(self, key, method_name, **kwargs):
"""
Actual connect implementation. Arranges for the Mitogen connection to
@ -240,14 +263,24 @@ class ContextService(mitogen.service.Service):
if kwargs.get('via'):
self._update_lru(context, method_name=method_name, **kwargs)
else:
# For directly connected contexts, listen to the associated
# Stream's disconnect event and use it to invalidate dependent
# Contexts.
stream = self.router.stream_by_id(context.context_id)
mitogen.core.listen(stream, 'disconnect',
lambda: self._on_stream_disconnect(stream))
home_dir = context.call(os.path.expanduser, '~')
# We don't need to wait for the result of this. Ideally we'd check its
# return value somewhere, but logs will catch a failure anyway.
context.call_async(ansible_mitogen.target.start_fork_parent)
if os.environ.get('MITOGEN_DUMP_THREAD_STACKS'):
from mitogen import debug
context.call(debug.dump_to_logger)
self._key_by_context[context] = key
self._refs_by_context[context] = 0
return {

@ -1 +1,2 @@
- import_playbook: lru_one_target.yml
- import_playbook: reconnection.yml

@ -0,0 +1,30 @@
# Test ContextService ability to handle disconnections, including handling
# cleanup of dependent (via=) contexts.
- name: integration/context_service/reconnection.yml
hosts: all
any_errors_fatal: true
tasks:
- become: true
custom_python_detect_environment:
register: old_become_env
- become: true
# This must be >1 for vanilla Ansible.
shell: |
bash -c "( sleep 3; pkill -f sshd:; ) & disown"
- connection: local
shell: sleep 3
- wait_for_connection:
- become: true
custom_python_detect_environment:
register: new_become_env
# Verify the PIDs really changed (i.e. disconnection happened)
- assert:
that:
- old_become_env.pid != new_become_env.pid

@ -35,6 +35,7 @@ RUN yum clean all && \
DOCKERFILE = r"""
COPY data/001-mitogen.sudo /etc/sudoers.d/001-mitogen
RUN \
chsh -s /bin/bash && \
mkdir -p /var/run/sshd && \
echo i-am-mitogen-test-docker-image > /etc/sentinel && \
groupadd mitogen__sudo_nopw && \

Loading…
Cancel
Save