From 9b799868593f80fd633ff69803a5cb3c68495675 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Sun, 18 Aug 2019 18:27:47 +0100 Subject: [PATCH 1/7] docs: use new manpage alias in one more place --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index fe15ca27..8b1ae2a5 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -80,7 +80,7 @@ Mitogen for Ansible ~~~~~~~~~~~~~~~~~~~ * :gh:issue:`363`: fix an obscure race matching *Permission denied* errors from - some versions of ``su`` running on heavily loaded machines. + some versions of :linux:man1:`su` running on heavily loaded machines. * :gh:issue:`410`: Uses of :linux:man7:`unix` sockets are replaced with traditional :linux:man7:`pipe` pairs when SELinux is detected, to work around From ed33236c7180845c17110aaed210272feca44865 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Sun, 18 Aug 2019 19:09:11 +0100 Subject: [PATCH 2/7] docs: , -> : --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8b1ae2a5..9e80963d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -41,7 +41,7 @@ Enhancements `_ are not yet handled. -* :gh:issue:`419`, :gh:issue:`470`, file descriptor usage is approximately +* :gh:issue:`419`, :gh:issue:`470`: file descriptor usage is approximately halved, as it is no longer necessary to separately manage read and write sides to work around a design problem. From b6d1df749cb1ec06b83d61ea44a9d5cc7171d96c Mon Sep 17 00:00:00 2001 From: David Wilson Date: Tue, 20 Aug 2019 13:59:01 +0100 Subject: [PATCH 3/7] issue #633: take inventory_hostname from task_vars It used to be set by on_action_run() from task_vars, but this doesn't work for meta: reset_connection. That meant MITOGEN_CPU_COUNT>1 would pick the wrong mux to reset the connection on. --- ansible_mitogen/connection.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ansible_mitogen/connection.py b/ansible_mitogen/connection.py index 2dd3bfa9..5e0e28b8 100644 --- a/ansible_mitogen/connection.py +++ b/ansible_mitogen/connection.py @@ -486,9 +486,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): # the case of the synchronize module. # - #: Set to the host name as it appears in inventory by on_action_run(). - inventory_hostname = None - #: Set to task_vars by on_action_run(). _task_vars = None @@ -527,7 +524,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): :param str loader_basedir: Loader base directory; see :attr:`loader_basedir`. """ - self.inventory_hostname = task_vars['inventory_hostname'] self._task_vars = task_vars self.host_vars = task_vars['hostvars'] self.delegate_to_hostname = delegate_to_hostname @@ -712,7 +708,7 @@ class Connection(ansible.plugins.connection.ConnectionBase): connection=self, play_context=self._play_context, transport=self.transport, - inventory_name=self.inventory_hostname, + inventory_name=self.get_task_var('inventory_hostname'), ) stack = self._stack_from_spec(spec) return spec.inventory_name(), stack From fc09b81949912da48ec1e491b48dffab04187023 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Tue, 20 Aug 2019 14:02:59 +0100 Subject: [PATCH 4/7] issue #633: handle meta: reset_connection when become is active - don't create a new connection during reset if no existing connection exists - strip off last hop in connection stack if PlayContext.become is True. - log a debug message if reset cannot find an existing connection --- ansible_mitogen/connection.py | 50 ++++++++----------- ansible_mitogen/services.py | 35 ++++++++++--- tests/ansible/integration/connection/all.yml | 1 + .../integration/connection/reset_become.yml | 42 ++++++++++++++++ 4 files changed, 93 insertions(+), 35 deletions(-) create mode 100644 tests/ansible/integration/connection/reset_become.yml diff --git a/ansible_mitogen/connection.py b/ansible_mitogen/connection.py index 5e0e28b8..21599b8d 100644 --- a/ansible_mitogen/connection.py +++ b/ansible_mitogen/connection.py @@ -528,7 +528,7 @@ class Connection(ansible.plugins.connection.ConnectionBase): self.host_vars = task_vars['hostvars'] self.delegate_to_hostname = delegate_to_hostname self.loader_basedir = loader_basedir - self._mitogen_reset(mode='put') + self._put_connection() def _get_task_vars(self): """ @@ -777,15 +777,11 @@ class Connection(ansible.plugins.connection.ConnectionBase): self.binding = worker_model.get_binding(inventory_name) self._connect_stack(stack) - def _mitogen_reset(self, mode): + def _put_connection(self): """ Forget everything we know about the connected context. This function cannot be called _reset() since that name is used as a public API by Ansible 2.4 wait_for_connection plug-in. - - :param str mode: - Name of ContextService method to use to discard the context, either - 'put' or 'reset'. """ if not self.context: return @@ -794,7 +790,7 @@ class Connection(ansible.plugins.connection.ConnectionBase): mitogen.service.call( call_context=self.binding.get_service_context(), service_name='ansible_mitogen.services.ContextService', - method_name=mode, + method_name='put', context=self.context ) @@ -809,24 +805,11 @@ class Connection(ansible.plugins.connection.ConnectionBase): gracefully shut down, and wait for shutdown to complete. Safe to call multiple times. """ - self._mitogen_reset(mode='put') + self._put_connection() if self.binding: self.binding.close() self.binding = None - def _reset_find_task_vars(self): - """ - Monsterous hack: since "meta: reset_connection" does not run from an - action, we cannot capture task variables via :meth:`on_action_run`. - Instead walk the parent frames searching for the `all_vars` local from - StrategyBase._execute_meta(). If this fails, just leave task_vars - unset, likely causing a subtly wrong configuration to be selected. - """ - frame = sys._getframe() - while frame and not self._task_vars: - self._task_vars = frame.f_locals.get('all_vars') - frame = frame.f_back - reset_compat_msg = ( 'Mitogen only supports "reset_connection" on Ansible 2.5.6 or later' ) @@ -838,9 +821,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): the 'disconnected' state, and informs ContextService the connection is bad somehow, and should be shut down and discarded. """ - if self._task_vars is None: - self._reset_find_task_vars() - if self._play_context.remote_addr is None: # <2.5.6 incorrectly populate PlayContext for reset_connection # https://github.com/ansible/ansible/issues/27520 @@ -848,10 +828,24 @@ class Connection(ansible.plugins.connection.ConnectionBase): self.reset_compat_msg ) - self._connect() - self._mitogen_reset(mode='reset') - self.binding.close() - self.binding = None + # Clear out state in case we were ever connected. + self.close() + + inventory_name, stack = self._build_stack() + if self._play_context.become: + stack = stack[:-1] + + worker_model = ansible_mitogen.process.get_worker_model() + binding = worker_model.get_binding(inventory_name) + try: + mitogen.service.call( + call_context=binding.get_service_context(), + service_name='ansible_mitogen.services.ContextService', + method_name='reset', + stack=mitogen.utils.cast(list(stack)), + ) + finally: + binding.close() # Compatibility with Ansible 2.4 wait_for_connection plug-in. _reset = reset diff --git a/ansible_mitogen/services.py b/ansible_mitogen/services.py index e6c41e5b..8cf4c092 100644 --- a/ansible_mitogen/services.py +++ b/ansible_mitogen/services.py @@ -156,20 +156,41 @@ class ContextService(mitogen.service.Service): @mitogen.service.expose(mitogen.service.AllowParents()) @mitogen.service.arg_spec({ - 'context': mitogen.core.Context + 'stack': list, }) - def reset(self, context): + def reset(self, stack): """ Return a reference, forcing close and discard of the underlying connection. Used for 'meta: reset_connection' or when some other error is detected. + + :returns: + :data:`True` if a connection was found to discard, otherwise + :data:`False`. """ - LOG.debug('%r.reset(%r)', self, context) - self._lock.acquire() - try: + LOG.debug('%r.reset(%r)', self, stack) + + l = mitogen.core.Latch() + context = None + with self._lock: + for spec in stack: + key = key_from_dict(via=context, **spec) + response = self._response_by_key.get(key) + if response is None: + LOG.debug('%r: could not find connection to shut down', + self) + return False + + context = response['context'] + + mitogen.core.listen(context, 'disconnect', l.put) self._shutdown_unlocked(context) - finally: - self._lock.release() + + # The timeout below is to turn a hang into a crash in case there is any + # possible race between 'disconnect' signal subscription, and the child + # abruptly disconnecting. + l.get(timeout=30.0) + return True @mitogen.service.expose(mitogen.service.AllowParents()) @mitogen.service.arg_spec({ diff --git a/tests/ansible/integration/connection/all.yml b/tests/ansible/integration/connection/all.yml index 4211f1b3..348857f5 100644 --- a/tests/ansible/integration/connection/all.yml +++ b/tests/ansible/integration/connection/all.yml @@ -8,3 +8,4 @@ - include: put_large_file.yml - include: put_small_file.yml - include: reset.yml +- include: reset_become.yml diff --git a/tests/ansible/integration/connection/reset_become.yml b/tests/ansible/integration/connection/reset_become.yml new file mode 100644 index 00000000..f2bf6bb1 --- /dev/null +++ b/tests/ansible/integration/connection/reset_become.yml @@ -0,0 +1,42 @@ +# issue #633: Connection.reset() should ignore "become", and apply to the login +# account. + +- hosts: test-targets + become: true + gather_facts: false + tasks: + - name: save pid of the become acct + custom_python_detect_environment: + register: become_acct + + - name: save pid of the login acct + become: false + custom_python_detect_environment: + register: login_acct + + - name: ensure login != become + assert: + that: + - become_acct.pid != login_acct.pid + + - name: reset the connection + meta: reset_connection + + - name: save new pid of the become acct + custom_python_detect_environment: + register: new_become_acct + + - name: ensure become_acct != new_become_acct + assert: + that: + - become_acct.pid != new_become_acct.pid + + - name: save new pid of login acct + become: false + custom_python_detect_environment: + register: new_login_acct + + - name: ensure login_acct != new_login_acct + assert: + that: + - login_acct.pid != new_login_acct.pid From efd82dd35a7de14c3ee50124a66cbf38517d315f Mon Sep 17 00:00:00 2001 From: David Wilson Date: Tue, 20 Aug 2019 14:47:33 +0100 Subject: [PATCH 5/7] issue #633: various task_vars fixes - take host_vars from task_vars too - make missing task_vars a hard error - update tests to provide stub task_vars --- ansible_mitogen/connection.py | 55 +++++++++++++++++--------- ansible_mitogen/services.py | 6 +-- tests/ansible/tests/connection_test.py | 8 +++- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/ansible_mitogen/connection.py b/ansible_mitogen/connection.py index 21599b8d..1686cbde 100644 --- a/ansible_mitogen/connection.py +++ b/ansible_mitogen/connection.py @@ -56,6 +56,12 @@ import ansible_mitogen.transport_config LOG = logging.getLogger(__name__) +task_vars_msg = ( + 'could not recover task_vars. This means some connection ' + 'settings may erroneously be reset to their defaults. ' + 'Please report a bug if you encounter this message.' +) + def get_remote_name(spec): """ @@ -489,9 +495,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): #: Set to task_vars by on_action_run(). _task_vars = None - #: Set to 'hostvars' by on_action_run() - host_vars = None - #: Set by on_action_run() delegate_to_hostname = None @@ -525,7 +528,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): Loader base directory; see :attr:`loader_basedir`. """ self._task_vars = task_vars - self.host_vars = task_vars['hostvars'] self.delegate_to_hostname = delegate_to_hostname self.loader_basedir = loader_basedir self._put_connection() @@ -548,8 +550,10 @@ class Connection(ansible.plugins.connection.ConnectionBase): for new connections to be constructed in addition to the preconstructed connection passed into any running action. """ - f = sys._getframe() + if self._task_vars is not None: + return self._task_vars + f = sys._getframe() while f: if f.f_code.co_name == 'run': f_locals = f.f_locals @@ -567,9 +571,23 @@ class Connection(ansible.plugins.connection.ConnectionBase): f = f.f_back - LOG.warning('could not recover task_vars. This means some connection ' - 'settings may erroneously be reset to their defaults. ' - 'Please report a bug if you encounter this message.') + raise ansible.errors.AnsibleConnectionFailure(task_vars_msg) + + def get_host_vars(self, inventory_hostname): + """ + Fetch the HostVars for a host. + + :returns: + Variables dictionary or :data:`None`. + :raises ansible.errors.AnsibleConnectionFailure: + Task vars unavailable. + """ + task_vars = self._get_task_vars() + hostvars = task_vars.get('hostvars') + if hostvars: + return hostvars.get(inventory_hostname) + + raise ansible.errors.AnsibleConnectionFailure(task_vars_msg) def get_task_var(self, key, default=None): """ @@ -582,17 +600,16 @@ class Connection(ansible.plugins.connection.ConnectionBase): does not make sense to extract connection-related configuration for the delegated-to machine from them. """ - task_vars = self._task_vars or self._get_task_vars() - if task_vars is not None: - if self.delegate_to_hostname is None: + task_vars = self._get_task_vars() + if self.delegate_to_hostname is None: + if key in task_vars: + return task_vars[key] + else: + delegated_vars = task_vars['ansible_delegated_vars'] + if self.delegate_to_hostname in delegated_vars: + task_vars = delegated_vars[self.delegate_to_hostname] if key in task_vars: return task_vars[key] - else: - delegated_vars = task_vars['ansible_delegated_vars'] - if self.delegate_to_hostname in delegated_vars: - task_vars = delegated_vars[self.delegate_to_hostname] - if key in task_vars: - return task_vars[key] return default @@ -624,7 +641,8 @@ class Connection(ansible.plugins.connection.ConnectionBase): # must use __contains__ to avoid a TypeError for a missing host on # Ansible 2.3. - if self.host_vars is None or inventory_name not in self.host_vars: + via_vars = self.get_host_vars(inventory_name) + if via_vars is None: raise ansible.errors.AnsibleConnectionFailure( self.unknown_via_msg % ( via_spec, @@ -632,7 +650,6 @@ class Connection(ansible.plugins.connection.ConnectionBase): ) ) - via_vars = self.host_vars[inventory_name] return ansible_mitogen.transport_config.MitogenViaSpec( inventory_name=inventory_name, play_context=self._play_context, diff --git a/ansible_mitogen/services.py b/ansible_mitogen/services.py index 8cf4c092..0c06d806 100644 --- a/ansible_mitogen/services.py +++ b/ansible_mitogen/services.py @@ -173,12 +173,12 @@ class ContextService(mitogen.service.Service): l = mitogen.core.Latch() context = None with self._lock: - for spec in stack: + for i, spec in enumerate(stack): key = key_from_dict(via=context, **spec) response = self._response_by_key.get(key) if response is None: - LOG.debug('%r: could not find connection to shut down', - self) + LOG.debug('%r: could not find connection to shut down; ' + 'failed at hop %d', self, i) return False context = response['context'] diff --git a/tests/ansible/tests/connection_test.py b/tests/ansible/tests/connection_test.py index 54ea3d99..71e1d042 100644 --- a/tests/ansible/tests/connection_test.py +++ b/tests/ansible/tests/connection_test.py @@ -46,7 +46,13 @@ class ConnectionMixin(MuxProcessMixin): def make_connection(self): play_context = ansible.playbook.play_context.PlayContext() - return self.klass(play_context, new_stdin=False) + conn = self.klass(play_context, new_stdin=False) + conn.on_action_run( + task_vars={}, + delegate_to_hostname=None, + loader_basedir=None, + ) + return conn def wait_for_completion(self): # put_data() is asynchronous, must wait for operation to happen. Do From e3c514d906667738bf7bc365ee03fa89e9bee399 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Tue, 20 Aug 2019 15:52:00 +0100 Subject: [PATCH 6/7] issue #633: update Changelog. --- docs/changelog.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9e80963d..594fdaed 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -21,7 +21,16 @@ v0.2.9 (unreleased) To avail of fixes in an unreleased version, please download a ZIP file `directly from GitHub `_. -*(no changes)* +* :gh:issue:`633`: :ans:mod:`meta: reset_connection ` could fail to reset + a connection when ``become: true`` was set on the playbook. + + +Thanks! +~~~~~~~ + +Mitogen would not be possible without the support of users. A huge thanks for +bug reports, testing, features and fixes in this release contributed by +`Can Ozokur httpe://github.com/canozokur/>`_, v0.2.8 (2019-08-18) @@ -266,7 +275,7 @@ Thanks! Mitogen would not be possible without the support of users. A huge thanks for bug reports, testing, features and fixes in this release contributed by -`Andreas Hubert `_. +`Andreas Hubert `_, `Anton Markelov `_, `Dan `_, `Dave Cottlehuber `_, From 3023ab3b7b7fc74ad87bee08829c97f6562f1a86 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Tue, 20 Aug 2019 16:11:10 +0100 Subject: [PATCH 7/7] issue #633: skip test on older Ansibles. --- tests/ansible/integration/connection/reset_become.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/ansible/integration/connection/reset_become.yml b/tests/ansible/integration/connection/reset_become.yml index f2bf6bb1..5a411e82 100644 --- a/tests/ansible/integration/connection/reset_become.yml +++ b/tests/ansible/integration/connection/reset_become.yml @@ -5,6 +5,12 @@ become: true gather_facts: false tasks: + - debug: msg="reset_become.yml skipped on Ansible<2.5.6" + when: ansible_version.full < '2.5.6' + + - meta: end_play + when: ansible_version.full < '2.5.6' + - name: save pid of the become acct custom_python_detect_environment: register: become_acct