handles a 'wait_for_connection' call right after a task caused a shutdown

pull/710/head
Steven Robertson 5 years ago
parent cee088fa80
commit acde13f9d6

@ -371,6 +371,13 @@ class ActionModuleMixin(ansible.plugins.action.ActionBase):
self._compute_environment_string(env) self._compute_environment_string(env)
self._set_temp_file_args(module_args, wrap_async) self._set_temp_file_args(module_args, wrap_async)
# there's a case where if a task shuts down the node and then immediately calls
# wait_for_connection, the `ping` test from Ansible won't pass because we lost connection
# clearing out context forces a reconnect
# see https://github.com/dw/mitogen/issues/655 and Ansible's `wait_for_connection` module for more info
if module_name == 'ping' and type(self).__name__ == 'wait_for_connection':
self._connection.context = None
self._connection._connect() self._connection._connect()
result = ansible_mitogen.planner.invoke( result = ansible_mitogen.planner.invoke(
ansible_mitogen.planner.Invocation( ansible_mitogen.planner.Invocation(

@ -170,6 +170,12 @@ class ContextService(mitogen.service.Service):
""" """
LOG.debug('%r.reset(%r)', self, stack) LOG.debug('%r.reset(%r)', self, stack)
# this could happen if we have a `shutdown -r` shell command
# and then a `wait_for_connection` right afterwards
# in this case, we have no stack to disconnect from
if not stack:
return False
l = mitogen.core.Latch() l = mitogen.core.Latch()
context = None context = None
with self._lock: with self._lock:

@ -74,7 +74,7 @@ else:
@mitogen.core.takes_router @mitogen.core.takes_router
def get_or_create_pool(size=None, router=None): def get_or_create_pool(size=None, router=None, context=None):
global _pool global _pool
global _pool_pid global _pool_pid
@ -84,6 +84,12 @@ def get_or_create_pool(size=None, router=None):
_pool_lock.acquire() _pool_lock.acquire()
try: try:
if _pool_pid != my_pid: if _pool_pid != my_pid:
if router is None:
# fallback to trying to get router from context if that exists
if context is not None:
router = context.router
else:
raise ValueError("Unable to create Pool! Missing router.")
_pool = Pool( _pool = Pool(
router, router,
services=[], services=[],
@ -119,7 +125,7 @@ def call(service_name, method_name, call_context=None, **kwargs):
if call_context: if call_context:
return call_context.call_service(service_name, method_name, **kwargs) return call_context.call_service(service_name, method_name, **kwargs)
else: else:
pool = get_or_create_pool() pool = get_or_create_pool(context=kwargs.get('context'))
invoker = pool.get_invoker(service_name, msg=None) invoker = pool.get_invoker(service_name, msg=None)
return getattr(invoker.service, method_name)(**kwargs) return getattr(invoker.service, method_name)(**kwargs)

@ -12,3 +12,4 @@
- include: issue_590__sys_modules_crap.yml - include: issue_590__sys_modules_crap.yml
- include: issue_591__setuptools_cwd_crash.yml - include: issue_591__setuptools_cwd_crash.yml
- include: issue_615__streaming_transfer.yml - include: issue_615__streaming_transfer.yml
- include: issue_655__wait_for_connection_error.yml

@ -0,0 +1,85 @@
# https://github.com/dw/mitogen/issues/655
# Spins up a Centos8 container and runs the wait_for_connection test inside of it
# Doing it this way because the shutdown command causes issues in our tests
# since things are ran on localhost; Azure DevOps loses connection and fails
# TODO: do we want to install docker a different way to be able to do this for other tests too
---
# this should only run on our Mac hosts
- hosts: target
any_errors_fatal: True
gather_facts: yes
become: no
tasks:
- name: set up test container and run tests inside it
block:
- name: install deps
block:
- name: install docker
shell: |
# NOTE: for tracking purposes: https://github.com/docker/for-mac/issues/2359
# using docker for mac CI workaround: https://github.com/drud/ddev/pull/1748/files#diff-19288f650af2dabdf1dcc5b354d1f245
DOCKER_URL=https://download.docker.com/mac/stable/31259/Docker.dmg &&
curl -O -sSL $DOCKER_URL &&
open -W Docker.dmg && cp -r /Volumes/Docker/Docker.app /Applications
sudo /Applications/Docker.app/Contents/MacOS/Docker --quit-after-install --unattended &&
ln -s /Applications/Docker.app/Contents/Resources/bin/docker /usr/local/bin/docker &&
nohup /Applications/Docker.app/Contents/MacOS/Docker --unattended &
# wait 2 min for docker to come up
counter=0 &&
while ! /usr/local/bin/docker ps 2>/dev/null ; do
if [ $counter -lt 24 ]; then
let counter=counter+1
else
exit 1
fi
sleep 5
done
# python bindings (docker_container) aren't working on this host, so gonna shell out
- name: create docker container
shell: /usr/local/bin/docker run --name testMitogen -d --rm centos:8 bash -c "sleep infinity & wait"
- name: add container to inventory
add_host:
name: testMitogen
ansible_connection: docker
ansible_user: root
changed_when: false
environment:
PATH: /usr/local/bin/:{{ ansible_env.PATH }}
- name: run tests
block:
# to repro the issue, will create /var/run/reboot-required
- name: create test file
file:
path: /var/run/reboot-required
state: touch
- name: Check if reboot is required
stat:
path: /var/run/reboot-required
register: reboot_required
- name: Reboot server
shell: sleep 2 && shutdown -r now "Ansible updates triggered"
async: 1
poll: 0
when: reboot_required.stat.exists == True
- name: Wait 300 seconds for server to become available
wait_for_connection:
delay: 30
timeout: 300
when: reboot_required.stat.exists == True
- name: cleanup test file
file:
path: /var/run/reboot-required
state: absent
delegate_to: testMitogen
environment:
PATH: /usr/local/bin/:{{ ansible_env.PATH }}
- name: remove test container
shell: /usr/local/bin/docker stop testMitogen
Loading…
Cancel
Save