From 591152bef0beb0b4df483ac553aa782ebb669a51 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Mon, 4 Mar 2024 14:15:47 +0000 Subject: [PATCH] tests: Avoid intermittant 2 hour timeout in new style Ansible module tests This has been lurking for years, raising it's head at unpredictable times. This change doesn't fix it, but it should make it a lot less mysterious. --- ...m_python_new_style_missing_interpreter.yml | 5 ++ .../runner/custom_python_new_style_module.yml | 3 +- ...om_python_new_style_missing_interpreter.py | 49 +++++++++++++++++++ .../modules/custom_python_new_style_module.py | 49 +++++++++++++++++++ 4 files changed, 105 insertions(+), 1 deletion(-) diff --git a/tests/ansible/integration/runner/custom_python_new_style_missing_interpreter.yml b/tests/ansible/integration/runner/custom_python_new_style_missing_interpreter.yml index 0c620dac..0d7cf1b6 100644 --- a/tests/ansible/integration/runner/custom_python_new_style_missing_interpreter.yml +++ b/tests/ansible/integration/runner/custom_python_new_style_missing_interpreter.yml @@ -2,6 +2,11 @@ - name: integration/runner/custom_python_new_style_module.yml hosts: test-targets tasks: + # FIXME Without Mitogen Ansible often reads stdin before the module. + # Either don't read directly from stdin, or figure out the cause. + - meta: end_play + when: not is_mitogen + - custom_python_new_style_missing_interpreter: foo: true with_sequence: start=0 end={{end|default(1)}} diff --git a/tests/ansible/integration/runner/custom_python_new_style_module.yml b/tests/ansible/integration/runner/custom_python_new_style_module.yml index e2384f81..8435b158 100644 --- a/tests/ansible/integration/runner/custom_python_new_style_module.yml +++ b/tests/ansible/integration/runner/custom_python_new_style_module.yml @@ -1,7 +1,8 @@ - name: integration/runner/custom_python_new_style_module.yml hosts: test-targets tasks: - # without Mitogen Ansible 2.10 hangs on this play + # FIXME Without Mitogen Ansible often reads stdin before the module. + # Either don't read directly from stdin, or figure out the cause. - meta: end_play when: not is_mitogen diff --git a/tests/ansible/lib/modules/custom_python_new_style_missing_interpreter.py b/tests/ansible/lib/modules/custom_python_new_style_missing_interpreter.py index 2e0ef0da..728685f4 100644 --- a/tests/ansible/lib/modules/custom_python_new_style_missing_interpreter.py +++ b/tests/ansible/lib/modules/custom_python_new_style_missing_interpreter.py @@ -1,6 +1,20 @@ # I am an Ansible new-style Python module, but I lack an interpreter. +# See also custom_python_new_style_module, we should be updated in tandem. +import io +import json +import select +import signal import sys +import warnings + +# Ansible 2.7 changed how new style modules are invoked. It seems that module +# parameters are *sometimes* read before the module runs. Modules that try +# to read directly from stdin, such as this, are unable to. However it doesn't +# always fail, influences seem to include Ansible & Python version. As noted +# in ansible.module_utils.basic._load_params() we should probably use that. +# I think (medium confidence) I narrowed the inflection (with git bisect) to +# https://github.com/ansible/ansible/commit/52449cc01a71778ef94ea0237eed0284f5d75582 # As of Ansible 2.10, Ansible changed new-style detection: # https://github.com/ansible/ansible/pull/61196/files#diff-5675e463b6ce1fbe274e5e7453f83cd71e61091ea211513c93e7c0b4d527d637L828-R980 # NOTE: this import works for Mitogen, and the import below matches new-style Ansible 2.10 @@ -8,11 +22,46 @@ import sys # from ansible.module_utils. # import ansible.module_utils. +# These timeouts should prevent hard-to-attribute, 2+ hour CI job timeouts. +# Previously this module has waited on stdin forever (timeoutInMinutes=120). +SELECT_TIMEOUT = 5.0 # seconds +SIGNAL_TIMEOUT = 10 # seconds + + +def fail_json(msg, **kwargs): + kwargs.update(failed=True, msg=msg) + print(json.dumps(kwargs, sys.stdout, indent=2, sort_keys=True)) + sys.exit(1) + + +def sigalrm_handler(signum, frame): + fail_json("Still executing after SIGNAL_TIMEOUT=%ds" % (SIGNAL_TIMEOUT,)) + def usage(): sys.stderr.write('Usage: %s \n' % (sys.argv[0],)) sys.exit(1) + +# Wait SIGNAL_TIMEOUT seconds, exit with failure if still running. +signal.signal(signal.SIGALRM, sigalrm_handler) +signal.alarm(SIGNAL_TIMEOUT) + +# Wait SELECT_TIMEOUT seconds, exit with failure if no data appears on stdin. +# TODO Combine select() & read() in a loop, to handle slow trickle of data. +# Consider buffering, line buffering, `f.read()` vs `f.read1()`. +# TODO Document that sys.stdin may be a StringIO under Ansible + Mitogen. +try: + inputs_ready, _, _ = select.select([sys.stdin], [], [], SELECT_TIMEOUT) +except (AttributeError, TypeError, io.UnsupportedOperation) as exc: + # sys.stdin.fileno() doesn't exist or can't return a real file descriptor. + warnings.warn("Could not wait on sys.stdin=%r: %r" % (sys.stdin, exc)) +else: + if not inputs_ready: + fail_json("Gave up waiting on sys.stdin after SELECT_TIMEOUT=%ds" + % (SELECT_TIMEOUT,)) + +# Read all data on stdin. May block forever, if EOF is not reached. input_json = sys.stdin.read() print("{") diff --git a/tests/ansible/lib/modules/custom_python_new_style_module.py b/tests/ansible/lib/modules/custom_python_new_style_module.py index f9c176c1..c84d241a 100755 --- a/tests/ansible/lib/modules/custom_python_new_style_module.py +++ b/tests/ansible/lib/modules/custom_python_new_style_module.py @@ -1,16 +1,65 @@ #!/usr/bin/python # I am an Ansible new-style Python module. I should receive an encoding string. +# See also custom_python_new_style_module, we should be updated in tandem. +import io +import json +import select +import signal import sys +import warnings + +# Ansible 2.7 changed how new style modules are invoked. It seems that module +# parameters are *sometimes* read before the module runs. Modules that try +# to read directly from stdin, such as this, are unable to. However it doesn't +# always fail, influences seem to include Ansible & Python version. As noted +# in ansible.module_utils.basic._load_params() we should probably use that. +# I think (medium confidence) I narrowed the inflection (with git bisect) to +# https://github.com/ansible/ansible/commit/52449cc01a71778ef94ea0237eed0284f5d75582 # This is the magic marker Ansible looks for: # from ansible.module_utils. +# These timeouts should prevent hard-to-attribute, 2+ hour CI job timeouts. +# Previously this module has waited on stdin forever (timeoutInMinutes=120). +SELECT_TIMEOUT = 5.0 # seconds +SIGNAL_TIMEOUT = 10 # seconds + + +def fail_json(msg, **kwargs): + kwargs.update(failed=True, msg=msg) + print(json.dumps(kwargs, sys.stdout, indent=2, sort_keys=True)) + sys.exit(1) + + +def sigalrm_handler(signum, frame): + fail_json("Still executing after SIGNAL_TIMEOUT=%ds" % (SIGNAL_TIMEOUT,)) + def usage(): sys.stderr.write('Usage: %s \n' % (sys.argv[0],)) sys.exit(1) + +# Wait SIGNAL_TIMEOUT seconds, exit with failure if still running. +signal.signal(signal.SIGALRM, sigalrm_handler) +signal.alarm(SIGNAL_TIMEOUT) + +# Wait SELECT_TIMEOUT seconds, exit with failure if no data appears on stdin. +# TODO Combine select() & read() in a loop, to handle slow trickle of data. +# Consider buffering, line buffering, `f.read()` vs `f.read1()`. +# TODO Document that sys.stdin may be a StringIO under Ansible + Mitogen. +try: + inputs_ready, _, _ = select.select([sys.stdin], [], [], SELECT_TIMEOUT) +except (AttributeError, TypeError, io.UnsupportedOperation) as exc: + # sys.stdin.fileno() doesn't exist or can't return a real file descriptor. + warnings.warn("Could not wait on sys.stdin=%r: %r" % (sys.stdin, exc)) +else: + if not inputs_ready: + fail_json("Gave up waiting on sys.stdin after SELECT_TIMEOUT=%ds" + % (SELECT_TIMEOUT,)) + +# Read all data on stdin. May block forever, if EOF is not reached. input_json = sys.stdin.read() print("{")