From 553f51e7284d03582fe8d476a680422ff9e962fe Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Wed, 9 Aug 2023 13:09:43 -0500 Subject: [PATCH] Revert logic to use Popen.communicate (#80874) * Back out use of communicate, add better comments, add bufsize, and align with subprocess._communicate * tests * re-order logic slightly * more comments * loopty loop * yet another comment * Revert "yet another comment" This reverts commit 96cd8ada5fa0441b92f2298bdaa6cb40594847d2. * Revert "loopty loop" This reverts commit 96ea066f6a7d18902c04a14f18dd79b38e56f5e7. * ci_complete * Copy in comment too * Wording updates Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com> * Back out bufsize --------- Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com> --- lib/ansible/module_utils/basic.py | 123 ++++++++++-------- .../targets/command_shell/scripts/yoink.sh | 2 + .../targets/command_shell/tasks/main.yml | 5 + .../module_utils/basic/test_run_command.py | 20 +-- 4 files changed, 85 insertions(+), 65 deletions(-) create mode 100755 test/integration/targets/command_shell/scripts/yoink.sh diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py index 190e84099c1..a6d68284806 100644 --- a/lib/ansible/module_utils/basic.py +++ b/lib/ansible/module_utils/basic.py @@ -1844,6 +1844,14 @@ class AnsibleModule(object): ''' Execute a command, returns rc, stdout, and stderr. + The mechanism of this method for reading stdout and stderr differs from + that of CPython subprocess.Popen.communicate, in that this method will + stop reading once the spawned command has exited and stdout and stderr + have been consumed, as opposed to waiting until stdout/stderr are + closed. This can be an important distinction, when taken into account + that a forked or backgrounded process may hold stdout or stderr open + for longer than the spawned command. + :arg args: is the command to run * If args is a list, the command will be run with shell=False. * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False @@ -2023,17 +2031,17 @@ class AnsibleModule(object): if before_communicate_callback: before_communicate_callback(cmd) - # the communication logic here is essentially taken from that - # of the _communicate() function in ssh.py - stdout = b'' stderr = b'' - try: - selector = selectors.DefaultSelector() - except (IOError, OSError): - # Failed to detect default selector for the given platform - # Select PollSelector which is supported by major platforms + + # Mirror the CPython subprocess logic and preference for the selector to use. + # poll/select have the advantage of not requiring any extra file + # descriptor, contrarily to epoll/kqueue (also, they require a single + # syscall). + if hasattr(selectors, 'PollSelector'): selector = selectors.PollSelector() + else: + selector = selectors.SelectSelector() if data: if not binary_data: @@ -2041,53 +2049,58 @@ class AnsibleModule(object): if isinstance(data, text_type): data = to_bytes(data) - if not prompt_re: - stdout, stderr = cmd.communicate(input=data) - else: - # We only need this to look for a prompt, to abort instead of hanging - selector.register(cmd.stdout, selectors.EVENT_READ) - selector.register(cmd.stderr, selectors.EVENT_READ) - if os.name == 'posix': - fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) - fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) - - if data: - cmd.stdin.write(data) - cmd.stdin.close() - - while True: - events = selector.select(1) - for key, event in events: - b_chunk = key.fileobj.read() - if b_chunk == b(''): - selector.unregister(key.fileobj) - if key.fileobj == cmd.stdout: - stdout += b_chunk - elif key.fileobj == cmd.stderr: - stderr += b_chunk - # if we're checking for prompts, do it now - if prompt_re: - if prompt_re.search(stdout) and not data: - if encoding: - stdout = to_native(stdout, encoding=encoding, errors=errors) - return (257, stdout, "A prompt was encountered while running a command, but no input data was specified") - # only break out if no pipes are left to read or - # the pipes are completely read and - # the process is terminated - if (not events or not selector.get_map()) and cmd.poll() is not None: - break - # No pipes are left to read but process is not yet terminated - # Only then it is safe to wait for the process to be finished - # NOTE: Actually cmd.poll() is always None here if no selectors are left - elif not selector.get_map() and cmd.poll() is None: - cmd.wait() - # The process is terminated. Since no pipes to read from are - # left, there is no need to call select() again. - break - - cmd.stdout.close() - cmd.stderr.close() - selector.close() + selector.register(cmd.stdout, selectors.EVENT_READ) + selector.register(cmd.stderr, selectors.EVENT_READ) + + if os.name == 'posix': + fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) + fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK) + + if data: + cmd.stdin.write(data) + cmd.stdin.close() + + while True: + # A timeout of 1 is both a little short and a little long. + # With None we could deadlock, with a lower value we would + # waste cycles. As it is, this is a mild inconvenience if + # we need to exit, and likely doesn't waste too many cycles + events = selector.select(1) + stdout_changed = False + for key, event in events: + b_chunk = key.fileobj.read(32768) + if not b_chunk: + selector.unregister(key.fileobj) + elif key.fileobj == cmd.stdout: + stdout += b_chunk + stdout_changed = True + elif key.fileobj == cmd.stderr: + stderr += b_chunk + + # if we're checking for prompts, do it now, but only if stdout + # actually changed since the last loop + if prompt_re and stdout_changed and prompt_re.search(stdout) and not data: + if encoding: + stdout = to_native(stdout, encoding=encoding, errors=errors) + return (257, stdout, "A prompt was encountered while running a command, but no input data was specified") + + # break out if no pipes are left to read or the pipes are completely read + # and the process is terminated + if (not events or not selector.get_map()) and cmd.poll() is not None: + break + + # No pipes are left to read but process is not yet terminated + # Only then it is safe to wait for the process to be finished + # NOTE: Actually cmd.poll() is always None here if no selectors are left + elif not selector.get_map() and cmd.poll() is None: + cmd.wait() + # The process is terminated. Since no pipes to read from are + # left, there is no need to call select() again. + break + + cmd.stdout.close() + cmd.stderr.close() + selector.close() rc = cmd.returncode except (OSError, IOError) as e: diff --git a/test/integration/targets/command_shell/scripts/yoink.sh b/test/integration/targets/command_shell/scripts/yoink.sh new file mode 100755 index 00000000000..ca955da03e4 --- /dev/null +++ b/test/integration/targets/command_shell/scripts/yoink.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +sleep 10 diff --git a/test/integration/targets/command_shell/tasks/main.yml b/test/integration/targets/command_shell/tasks/main.yml index 5d3d9a82f41..d2cde9533e1 100644 --- a/test/integration/targets/command_shell/tasks/main.yml +++ b/test/integration/targets/command_shell/tasks/main.yml @@ -583,3 +583,8 @@ that: - shell_expand_failure is failed - "shell_expand_failure.msg == 'Unsupported parameters for (shell) module: expand_argument_vars'" + +- name: Run command that backgrounds, to ensure no hang + shell: '{{ role_path }}/scripts/yoink.sh &' + delegate_to: localhost + timeout: 5 diff --git a/test/units/module_utils/basic/test_run_command.py b/test/units/module_utils/basic/test_run_command.py index 2825870ab2e..259ac6c4516 100644 --- a/test/units/module_utils/basic/test_run_command.py +++ b/test/units/module_utils/basic/test_run_command.py @@ -109,7 +109,7 @@ def mock_subprocess(mocker): super(MockSelector, self).close() self._file_objs = [] - selectors.DefaultSelector = MockSelector + selectors.PollSelector = MockSelector subprocess = mocker.patch('ansible.module_utils.basic.subprocess') subprocess._output = {mocker.sentinel.stdout: SpecialBytesIO(b'', fh=mocker.sentinel.stdout), @@ -147,7 +147,7 @@ class TestRunCommandArgs: for (arg, cmd_lst, cmd_str), sh in product(ARGS_DATA, (True, False))), indirect=['stdin']) def test_args(self, cmd, expected, shell, rc_am): - rc_am.run_command(cmd, use_unsafe_shell=shell, prompt_regex='i_dont_exist') + rc_am.run_command(cmd, use_unsafe_shell=shell) assert rc_am._subprocess.Popen.called args, kwargs = rc_am._subprocess.Popen.call_args assert args == (expected, ) @@ -163,17 +163,17 @@ class TestRunCommandArgs: class TestRunCommandCwd: @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_cwd(self, mocker, rc_am): - rc_am.run_command('/bin/ls', cwd='/new', prompt_regex='i_dont_exist') + rc_am.run_command('/bin/ls', cwd='/new') assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/new' @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_cwd_relative_path(self, mocker, rc_am): - rc_am.run_command('/bin/ls', cwd='sub-dir', prompt_regex='i_dont_exist') + rc_am.run_command('/bin/ls', cwd='sub-dir') assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/home/foo/sub-dir' @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_cwd_not_a_dir(self, mocker, rc_am): - rc_am.run_command('/bin/ls', cwd='/not-a-dir', prompt_regex='i_dont_exist') + rc_am.run_command('/bin/ls', cwd='/not-a-dir') assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/not-a-dir' @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) @@ -212,14 +212,14 @@ class TestRunCommandRc: @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_check_rc_false(self, rc_am): rc_am._subprocess.Popen.return_value.returncode = 1 - (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False, prompt_regex='i_dont_exist') + (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False) assert rc == 1 @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_check_rc_true(self, rc_am): rc_am._subprocess.Popen.return_value.returncode = 1 with pytest.raises(SystemExit): - rc_am.run_command('/bin/false', check_rc=True, prompt_regex='i_dont_exist') + rc_am.run_command('/bin/false', check_rc=True) assert rc_am.fail_json.called args, kwargs = rc_am.fail_json.call_args assert kwargs['rc'] == 1 @@ -228,7 +228,7 @@ class TestRunCommandRc: class TestRunCommandOutput: @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) def test_text_stdin(self, rc_am): - (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world', prompt_regex='i_dont_exist') + (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world') assert rc_am._subprocess.Popen.return_value.stdin.getvalue() == b'hello world\n' @pytest.mark.parametrize('stdin', [{}], indirect=['stdin']) @@ -237,7 +237,7 @@ class TestRunCommandOutput: SpecialBytesIO(b'hello', fh=mocker.sentinel.stdout), mocker.sentinel.stderr: SpecialBytesIO(b'', fh=mocker.sentinel.stderr)} - (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt', prompt_regex='i_dont_exist') + (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt') assert rc == 0 # module_utils function. On py3 it returns text and py2 it returns # bytes because it's returning native strings @@ -251,7 +251,7 @@ class TestRunCommandOutput: mocker.sentinel.stderr: SpecialBytesIO(u'لرئيسية'.encode('utf-8'), fh=mocker.sentinel.stderr)} - (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly', prompt_regex='i_dont_exist') + (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly') assert rc == 0 # module_utils function. On py3 it returns text and py2 it returns # bytes because it's returning native strings