From acde13f9d63a329a6929ad07487ba5791df2213c Mon Sep 17 00:00:00 2001 From: Steven Robertson Date: Thu, 30 Apr 2020 17:53:06 -0700 Subject: [PATCH] handles a 'wait_for_connection' call right after a task caused a shutdown --- ansible_mitogen/mixins.py | 7 ++ ansible_mitogen/services.py | 6 ++ mitogen/service.py | 10 ++- tests/ansible/regression/all.yml | 1 + .../issue_655__wait_for_connection_error.yml | 85 +++++++++++++++++++ 5 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 tests/ansible/regression/issue_655__wait_for_connection_error.yml diff --git a/ansible_mitogen/mixins.py b/ansible_mitogen/mixins.py index 50ebfabe..7672618d 100644 --- a/ansible_mitogen/mixins.py +++ b/ansible_mitogen/mixins.py @@ -371,6 +371,13 @@ class ActionModuleMixin(ansible.plugins.action.ActionBase): self._compute_environment_string(env) self._set_temp_file_args(module_args, wrap_async) + # there's a case where if a task shuts down the node and then immediately calls + # wait_for_connection, the `ping` test from Ansible won't pass because we lost connection + # clearing out context forces a reconnect + # see https://github.com/dw/mitogen/issues/655 and Ansible's `wait_for_connection` module for more info + if module_name == 'ping' and type(self).__name__ == 'wait_for_connection': + self._connection.context = None + self._connection._connect() result = ansible_mitogen.planner.invoke( ansible_mitogen.planner.Invocation( diff --git a/ansible_mitogen/services.py b/ansible_mitogen/services.py index 52171903..2eb3b2e4 100644 --- a/ansible_mitogen/services.py +++ b/ansible_mitogen/services.py @@ -170,6 +170,12 @@ class ContextService(mitogen.service.Service): """ LOG.debug('%r.reset(%r)', self, stack) + # this could happen if we have a `shutdown -r` shell command + # and then a `wait_for_connection` right afterwards + # in this case, we have no stack to disconnect from + if not stack: + return False + l = mitogen.core.Latch() context = None with self._lock: diff --git a/mitogen/service.py b/mitogen/service.py index 6bd64eb0..3b244414 100644 --- a/mitogen/service.py +++ b/mitogen/service.py @@ -74,7 +74,7 @@ else: @mitogen.core.takes_router -def get_or_create_pool(size=None, router=None): +def get_or_create_pool(size=None, router=None, context=None): global _pool global _pool_pid @@ -84,6 +84,12 @@ def get_or_create_pool(size=None, router=None): _pool_lock.acquire() try: if _pool_pid != my_pid: + if router is None: + # fallback to trying to get router from context if that exists + if context is not None: + router = context.router + else: + raise ValueError("Unable to create Pool! Missing router.") _pool = Pool( router, services=[], @@ -119,7 +125,7 @@ def call(service_name, method_name, call_context=None, **kwargs): if call_context: return call_context.call_service(service_name, method_name, **kwargs) else: - pool = get_or_create_pool() + pool = get_or_create_pool(context=kwargs.get('context')) invoker = pool.get_invoker(service_name, msg=None) return getattr(invoker.service, method_name)(**kwargs) diff --git a/tests/ansible/regression/all.yml b/tests/ansible/regression/all.yml index 81780bb3..0d5e43cd 100644 --- a/tests/ansible/regression/all.yml +++ b/tests/ansible/regression/all.yml @@ -12,3 +12,4 @@ - include: issue_590__sys_modules_crap.yml - include: issue_591__setuptools_cwd_crash.yml - include: issue_615__streaming_transfer.yml +- include: issue_655__wait_for_connection_error.yml diff --git a/tests/ansible/regression/issue_655__wait_for_connection_error.yml b/tests/ansible/regression/issue_655__wait_for_connection_error.yml new file mode 100644 index 00000000..aa9472ec --- /dev/null +++ b/tests/ansible/regression/issue_655__wait_for_connection_error.yml @@ -0,0 +1,85 @@ +# https://github.com/dw/mitogen/issues/655 +# Spins up a Centos8 container and runs the wait_for_connection test inside of it +# Doing it this way because the shutdown command causes issues in our tests +# since things are ran on localhost; Azure DevOps loses connection and fails +# TODO: do we want to install docker a different way to be able to do this for other tests too +--- +# this should only run on our Mac hosts +- hosts: target + any_errors_fatal: True + gather_facts: yes + become: no + tasks: + - name: set up test container and run tests inside it + block: + - name: install deps + block: + - name: install docker + shell: | + # NOTE: for tracking purposes: https://github.com/docker/for-mac/issues/2359 + # using docker for mac CI workaround: https://github.com/drud/ddev/pull/1748/files#diff-19288f650af2dabdf1dcc5b354d1f245 + DOCKER_URL=https://download.docker.com/mac/stable/31259/Docker.dmg && + curl -O -sSL $DOCKER_URL && + open -W Docker.dmg && cp -r /Volumes/Docker/Docker.app /Applications + sudo /Applications/Docker.app/Contents/MacOS/Docker --quit-after-install --unattended && + ln -s /Applications/Docker.app/Contents/Resources/bin/docker /usr/local/bin/docker && + nohup /Applications/Docker.app/Contents/MacOS/Docker --unattended & + # wait 2 min for docker to come up + counter=0 && + while ! /usr/local/bin/docker ps 2>/dev/null ; do + if [ $counter -lt 24 ]; then + let counter=counter+1 + else + exit 1 + fi + sleep 5 + done + + # python bindings (docker_container) aren't working on this host, so gonna shell out + - name: create docker container + shell: /usr/local/bin/docker run --name testMitogen -d --rm centos:8 bash -c "sleep infinity & wait" + + - name: add container to inventory + add_host: + name: testMitogen + ansible_connection: docker + ansible_user: root + changed_when: false + environment: + PATH: /usr/local/bin/:{{ ansible_env.PATH }} + + - name: run tests + block: + # to repro the issue, will create /var/run/reboot-required + - name: create test file + file: + path: /var/run/reboot-required + state: touch + + - name: Check if reboot is required + stat: + path: /var/run/reboot-required + register: reboot_required + + - name: Reboot server + shell: sleep 2 && shutdown -r now "Ansible updates triggered" + async: 1 + poll: 0 + when: reboot_required.stat.exists == True + + - name: Wait 300 seconds for server to become available + wait_for_connection: + delay: 30 + timeout: 300 + when: reboot_required.stat.exists == True + + - name: cleanup test file + file: + path: /var/run/reboot-required + state: absent + delegate_to: testMitogen + environment: + PATH: /usr/local/bin/:{{ ansible_env.PATH }} + + - name: remove test container + shell: /usr/local/bin/docker stop testMitogen