From d032c591c2f861a66430ac0a2741be1b3b118198 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Tue, 10 Sep 2024 16:12:47 +0100 Subject: [PATCH] tests: Retry container process check during teardown I'm about 75% sure the check is an unavoidable race condition, see https://github.com/mitogen-hq/mitogen/issues/694#issuecomment-2338001694. If it occurs again, then reopen the issue. Fixes #694 --- docs/changelog.rst | 2 ++ tests/testlib.py | 34 ++++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 8cc24424..14f86e77 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -29,6 +29,8 @@ Unreleased * :gh:issue:`905` Initial support for templated ``ansible_ssh_args``, ``ansible_ssh_common_args``, and ``ansible_ssh_extra_args`` variables. NB: play or task scoped variables will probably still fail. +* :gh:issue:`694` CI: Fixed a race condition and some resource leaks causing + some of intermittent failures when running the test suite. v0.3.9 (2024-08-13) diff --git a/tests/testlib.py b/tests/testlib.py index 8c40e7ff..a52292ce 100644 --- a/tests/testlib.py +++ b/tests/testlib.py @@ -146,6 +146,17 @@ def data_path(suffix): return path +def retry(fn, on, max_attempts, delay): + for i in range(max_attempts): + try: + return fn() + except on: + if i >= max_attempts - 1: + raise + else: + time.sleep(delay) + + def threading__thread_is_alive(thread): """Return whether the thread is alive (Python version compatibility shim). @@ -562,18 +573,24 @@ class DockerizedSshDaemon(object): wait_for_port(self.get_host(), self.port, pattern='OpenSSH') def check_processes(self): - args = ['docker', 'exec', self.container_name, 'ps', '-o', 'comm='] + # Get Accounting name (ucomm) & command line (args) of each process + # in the container. No truncation (-ww). No column headers (foo=). + ps_output = subprocess.check_output([ + 'docker', 'exec', self.container_name, + 'ps', '-w', '-w', '-o', 'ucomm=', '-o', 'args=', + ]) + ps_lines = ps_output.decode().splitlines() + processes = [tuple(line.split(None, 1)) for line in ps_lines] counts = {} - for comm in subprocess.check_output(args).decode().splitlines(): - comm = comm.strip() - counts[comm] = counts.get(comm, 0) + 1 + for ucomm, _ in processes: + counts[ucomm] = counts.get(ucomm, 0) + 1 if counts != {'ps': 1, 'sshd': 1}: assert 0, ( 'Docker container %r contained extra running processes ' 'after test completed: %r' % ( self.container_name, - counts + processes, ) ) @@ -630,7 +647,12 @@ class DockerMixin(RouterMixin): @classmethod def tearDownClass(cls): - cls.dockerized_ssh.check_processes() + retry( + cls.dockerized_ssh.check_processes, + on=AssertionError, + max_attempts=5, + delay=0.1, + ) cls.dockerized_ssh.close() super(DockerMixin, cls).tearDownClass()