From 553f51e7284d03582fe8d476a680422ff9e962fe Mon Sep 17 00:00:00 2001
From: Matt Martz <matt@sivel.net>
Date: Wed, 9 Aug 2023 13:09:43 -0500
Subject: [PATCH] Revert logic to use Popen.communicate (#80874)

* Back out use of communicate, add better comments, add bufsize, and align with subprocess._communicate

* tests

* re-order logic slightly

* more comments

* loopty loop

* yet another comment

* Revert "yet another comment"

This reverts commit 96cd8ada5fa0441b92f2298bdaa6cb40594847d2.

* Revert "loopty loop"

This reverts commit 96ea066f6a7d18902c04a14f18dd79b38e56f5e7.

* ci_complete

* Copy in comment too

* Wording updates

Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com>

* Back out bufsize

---------

Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com>
---
 lib/ansible/module_utils/basic.py             | 123 ++++++++++--------
 .../targets/command_shell/scripts/yoink.sh    |   2 +
 .../targets/command_shell/tasks/main.yml      |   5 +
 .../module_utils/basic/test_run_command.py    |  20 +--
 4 files changed, 85 insertions(+), 65 deletions(-)
 create mode 100755 test/integration/targets/command_shell/scripts/yoink.sh

diff --git a/lib/ansible/module_utils/basic.py b/lib/ansible/module_utils/basic.py
index 190e84099c1..a6d68284806 100644
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@@ -1844,6 +1844,14 @@ class AnsibleModule(object):
         '''
         Execute a command, returns rc, stdout, and stderr.
 
+        The mechanism of this method for reading stdout and stderr differs from
+        that of CPython subprocess.Popen.communicate, in that this method will
+        stop reading once the spawned command has exited and stdout and stderr
+        have been consumed, as opposed to waiting until stdout/stderr are
+        closed. This can be an important distinction, when taken into account
+        that a forked or backgrounded process may hold stdout or stderr open
+        for longer than the spawned command.
+
         :arg args: is the command to run
             * If args is a list, the command will be run with shell=False.
             * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False
@@ -2023,17 +2031,17 @@ class AnsibleModule(object):
             if before_communicate_callback:
                 before_communicate_callback(cmd)
 
-            # the communication logic here is essentially taken from that
-            # of the _communicate() function in ssh.py
-
             stdout = b''
             stderr = b''
-            try:
-                selector = selectors.DefaultSelector()
-            except (IOError, OSError):
-                # Failed to detect default selector for the given platform
-                # Select PollSelector which is supported by major platforms
+
+            # Mirror the CPython subprocess logic and preference for the selector to use.
+            # poll/select have the advantage of not requiring any extra file
+            # descriptor, contrarily to epoll/kqueue (also, they require a single
+            # syscall).
+            if hasattr(selectors, 'PollSelector'):
                 selector = selectors.PollSelector()
+            else:
+                selector = selectors.SelectSelector()
 
             if data:
                 if not binary_data:
@@ -2041,53 +2049,58 @@ class AnsibleModule(object):
                 if isinstance(data, text_type):
                     data = to_bytes(data)
 
-            if not prompt_re:
-                stdout, stderr = cmd.communicate(input=data)
-            else:
-                # We only need this to look for a prompt, to abort instead of hanging
-                selector.register(cmd.stdout, selectors.EVENT_READ)
-                selector.register(cmd.stderr, selectors.EVENT_READ)
-                if os.name == 'posix':
-                    fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
-                    fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
-
-                if data:
-                    cmd.stdin.write(data)
-                    cmd.stdin.close()
-
-                while True:
-                    events = selector.select(1)
-                    for key, event in events:
-                        b_chunk = key.fileobj.read()
-                        if b_chunk == b(''):
-                            selector.unregister(key.fileobj)
-                        if key.fileobj == cmd.stdout:
-                            stdout += b_chunk
-                        elif key.fileobj == cmd.stderr:
-                            stderr += b_chunk
-                    # if we're checking for prompts, do it now
-                    if prompt_re:
-                        if prompt_re.search(stdout) and not data:
-                            if encoding:
-                                stdout = to_native(stdout, encoding=encoding, errors=errors)
-                            return (257, stdout, "A prompt was encountered while running a command, but no input data was specified")
-                    # only break out if no pipes are left to read or
-                    # the pipes are completely read and
-                    # the process is terminated
-                    if (not events or not selector.get_map()) and cmd.poll() is not None:
-                        break
-                    # No pipes are left to read but process is not yet terminated
-                    # Only then it is safe to wait for the process to be finished
-                    # NOTE: Actually cmd.poll() is always None here if no selectors are left
-                    elif not selector.get_map() and cmd.poll() is None:
-                        cmd.wait()
-                        # The process is terminated. Since no pipes to read from are
-                        # left, there is no need to call select() again.
-                        break
-
-                cmd.stdout.close()
-                cmd.stderr.close()
-                selector.close()
+            selector.register(cmd.stdout, selectors.EVENT_READ)
+            selector.register(cmd.stderr, selectors.EVENT_READ)
+
+            if os.name == 'posix':
+                fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
+                fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
+
+            if data:
+                cmd.stdin.write(data)
+                cmd.stdin.close()
+
+            while True:
+                # A timeout of 1 is both a little short and a little long.
+                # With None we could deadlock, with a lower value we would
+                # waste cycles. As it is, this is a mild inconvenience if
+                # we need to exit, and likely doesn't waste too many cycles
+                events = selector.select(1)
+                stdout_changed = False
+                for key, event in events:
+                    b_chunk = key.fileobj.read(32768)
+                    if not b_chunk:
+                        selector.unregister(key.fileobj)
+                    elif key.fileobj == cmd.stdout:
+                        stdout += b_chunk
+                        stdout_changed = True
+                    elif key.fileobj == cmd.stderr:
+                        stderr += b_chunk
+
+                # if we're checking for prompts, do it now, but only if stdout
+                # actually changed since the last loop
+                if prompt_re and stdout_changed and prompt_re.search(stdout) and not data:
+                    if encoding:
+                        stdout = to_native(stdout, encoding=encoding, errors=errors)
+                    return (257, stdout, "A prompt was encountered while running a command, but no input data was specified")
+
+                # break out if no pipes are left to read or the pipes are completely read
+                # and the process is terminated
+                if (not events or not selector.get_map()) and cmd.poll() is not None:
+                    break
+
+                # No pipes are left to read but process is not yet terminated
+                # Only then it is safe to wait for the process to be finished
+                # NOTE: Actually cmd.poll() is always None here if no selectors are left
+                elif not selector.get_map() and cmd.poll() is None:
+                    cmd.wait()
+                    # The process is terminated. Since no pipes to read from are
+                    # left, there is no need to call select() again.
+                    break
+
+            cmd.stdout.close()
+            cmd.stderr.close()
+            selector.close()
 
             rc = cmd.returncode
         except (OSError, IOError) as e:
diff --git a/test/integration/targets/command_shell/scripts/yoink.sh b/test/integration/targets/command_shell/scripts/yoink.sh
new file mode 100755
index 00000000000..ca955da03e4
--- /dev/null
+++ b/test/integration/targets/command_shell/scripts/yoink.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+sleep 10
diff --git a/test/integration/targets/command_shell/tasks/main.yml b/test/integration/targets/command_shell/tasks/main.yml
index 5d3d9a82f41..d2cde9533e1 100644
--- a/test/integration/targets/command_shell/tasks/main.yml
+++ b/test/integration/targets/command_shell/tasks/main.yml
@@ -583,3 +583,8 @@
     that:
       - shell_expand_failure is failed
       - "shell_expand_failure.msg == 'Unsupported parameters for (shell) module: expand_argument_vars'"
+
+- name: Run command that backgrounds, to ensure no hang
+  shell: '{{ role_path }}/scripts/yoink.sh &'
+  delegate_to: localhost
+  timeout: 5
diff --git a/test/units/module_utils/basic/test_run_command.py b/test/units/module_utils/basic/test_run_command.py
index 2825870ab2e..259ac6c4516 100644
--- a/test/units/module_utils/basic/test_run_command.py
+++ b/test/units/module_utils/basic/test_run_command.py
@@ -109,7 +109,7 @@ def mock_subprocess(mocker):
             super(MockSelector, self).close()
             self._file_objs = []
 
-    selectors.DefaultSelector = MockSelector
+    selectors.PollSelector = MockSelector
 
     subprocess = mocker.patch('ansible.module_utils.basic.subprocess')
     subprocess._output = {mocker.sentinel.stdout: SpecialBytesIO(b'', fh=mocker.sentinel.stdout),
@@ -147,7 +147,7 @@ class TestRunCommandArgs:
                               for (arg, cmd_lst, cmd_str), sh in product(ARGS_DATA, (True, False))),
                              indirect=['stdin'])
     def test_args(self, cmd, expected, shell, rc_am):
-        rc_am.run_command(cmd, use_unsafe_shell=shell, prompt_regex='i_dont_exist')
+        rc_am.run_command(cmd, use_unsafe_shell=shell)
         assert rc_am._subprocess.Popen.called
         args, kwargs = rc_am._subprocess.Popen.call_args
         assert args == (expected, )
@@ -163,17 +163,17 @@ class TestRunCommandArgs:
 class TestRunCommandCwd:
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_cwd(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='/new', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='/new')
         assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/new'
 
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_cwd_relative_path(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='sub-dir', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='sub-dir')
         assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/home/foo/sub-dir'
 
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_cwd_not_a_dir(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='/not-a-dir', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='/not-a-dir')
         assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/not-a-dir'
 
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
@@ -212,14 +212,14 @@ class TestRunCommandRc:
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_check_rc_false(self, rc_am):
         rc_am._subprocess.Popen.return_value.returncode = 1
-        (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False, prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False)
         assert rc == 1
 
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_check_rc_true(self, rc_am):
         rc_am._subprocess.Popen.return_value.returncode = 1
         with pytest.raises(SystemExit):
-            rc_am.run_command('/bin/false', check_rc=True, prompt_regex='i_dont_exist')
+            rc_am.run_command('/bin/false', check_rc=True)
         assert rc_am.fail_json.called
         args, kwargs = rc_am.fail_json.call_args
         assert kwargs['rc'] == 1
@@ -228,7 +228,7 @@ class TestRunCommandRc:
 class TestRunCommandOutput:
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
     def test_text_stdin(self, rc_am):
-        (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world')
         assert rc_am._subprocess.Popen.return_value.stdin.getvalue() == b'hello world\n'
 
     @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
@@ -237,7 +237,7 @@ class TestRunCommandOutput:
                                      SpecialBytesIO(b'hello', fh=mocker.sentinel.stdout),
                                      mocker.sentinel.stderr:
                                      SpecialBytesIO(b'', fh=mocker.sentinel.stderr)}
-        (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt')
         assert rc == 0
         # module_utils function.  On py3 it returns text and py2 it returns
         # bytes because it's returning native strings
@@ -251,7 +251,7 @@ class TestRunCommandOutput:
                                      mocker.sentinel.stderr:
                                      SpecialBytesIO(u'لرئيسية'.encode('utf-8'),
                                                     fh=mocker.sentinel.stderr)}
-        (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly')
         assert rc == 0
         # module_utils function.  On py3 it returns text and py2 it returns
         # bytes because it's returning native strings