Revert logic to use Popen.communicate (#80874)

* Back out use of communicate, add better comments, add bufsize, and align with subprocess._communicate * tests * re-order logic slightly * more comments * loopty loop * yet another comment * Revert "yet another comment" This reverts commit 96cd8ada5fa0441b92f2298bdaa6cb40594847d2. * Revert "loopty loop" This reverts commit 96ea066f6a7d18902c04a14f18dd79b38e56f5e7. * ci_complete * Copy in comment too * Wording updates Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com> * Back out bufsize --------- Co-authored-by: Matt Davis <6775756+nitzmahone@users.noreply.github.com>
2 years ago · 553f51e728
parent 85d3305889
commit 553f51e728
4 changed files with 85 additions and 65 deletions
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@ -1844,6 +1844,14 @@ class AnsibleModule(object):
        '''
        Execute a command, returns rc, stdout, and stderr.
        The mechanism of this method for reading stdout and stderr differs from
        that of CPython subprocess.Popen.communicate, in that this method will
        stop reading once the spawned command has exited and stdout and stderr
        have been consumed, as opposed to waiting until stdout/stderr are
        closed. This can be an important distinction, when taken into account
        that a forked or backgrounded process may hold stdout or stderr open
        for longer than the spawned command.
        :arg args: is the command to run
            * If args is a list, the command will be run with shell=False.
            * If args is a string and use_unsafe_shell=False it will split args to a list and run with shell=False
@ -2023,17 +2031,17 @@ class AnsibleModule(object):
            if before_communicate_callback:
                before_communicate_callback(cmd)
            # the communication logic here is essentially taken from that
            # of the _communicate() function in ssh.py
            stdout = b''
            stderr = b''
-            try:
+
-                selector = selectors.DefaultSelector()
+            # Mirror the CPython subprocess logic and preference for the selector to use.
-            except (IOError, OSError):
+            # poll/select have the advantage of not requiring any extra file
-                # Failed to detect default selector for the given platform
+            # descriptor, contrarily to epoll/kqueue (also, they require a single
-                # Select PollSelector which is supported by major platforms
+            # syscall).
            if hasattr(selectors, 'PollSelector'):
                selector = selectors.PollSelector()
            else:
                selector = selectors.SelectSelector()
            if data:
                if not binary_data:
@ -2041,53 +2049,58 @@ class AnsibleModule(object):
                if isinstance(data, text_type):
                    data = to_bytes(data)
-            if not prompt_re:
+            selector.register(cmd.stdout, selectors.EVENT_READ)
-                stdout, stderr = cmd.communicate(input=data)
+            selector.register(cmd.stderr, selectors.EVENT_READ)
-            else:
+
-                # We only need this to look for a prompt, to abort instead of hanging
+            if os.name == 'posix':
-                selector.register(cmd.stdout, selectors.EVENT_READ)
+                fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
-                selector.register(cmd.stderr, selectors.EVENT_READ)
+                fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
-                if os.name == 'posix':
+
-                    fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stdout.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
+            if data:
-                    fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_SETFL, fcntl.fcntl(cmd.stderr.fileno(), fcntl.F_GETFL) | os.O_NONBLOCK)
+                cmd.stdin.write(data)
-
+                cmd.stdin.close()
-                if data:
+
-                    cmd.stdin.write(data)
+            while True:
-                    cmd.stdin.close()
+                # A timeout of 1 is both a little short and a little long.
-
+                # With None we could deadlock, with a lower value we would
-                while True:
+                # waste cycles. As it is, this is a mild inconvenience if
-                    events = selector.select(1)
+                # we need to exit, and likely doesn't waste too many cycles
-                    for key, event in events:
+                events = selector.select(1)
-                        b_chunk = key.fileobj.read()
+                stdout_changed = False
-                        if b_chunk == b(''):
+                for key, event in events:
-                            selector.unregister(key.fileobj)
+                    b_chunk = key.fileobj.read(32768)
-                        if key.fileobj == cmd.stdout:
+                    if not b_chunk:
-                            stdout += b_chunk
+                        selector.unregister(key.fileobj)
-                        elif key.fileobj == cmd.stderr:
+                    elif key.fileobj == cmd.stdout:
-                            stderr += b_chunk
+                        stdout += b_chunk
-                    # if we're checking for prompts, do it now
+                        stdout_changed = True
-                    if prompt_re:
+                    elif key.fileobj == cmd.stderr:
-                        if prompt_re.search(stdout) and not data:
+                        stderr += b_chunk
-                            if encoding:
+
-                                stdout = to_native(stdout, encoding=encoding, errors=errors)
+                # if we're checking for prompts, do it now, but only if stdout
-                            return (257, stdout, "A prompt was encountered while running a command, but no input data was specified")
+                # actually changed since the last loop
-                    # only break out if no pipes are left to read or
+                if prompt_re and stdout_changed and prompt_re.search(stdout) and not data:
-                    # the pipes are completely read and
+                    if encoding:
-                    # the process is terminated
+                        stdout = to_native(stdout, encoding=encoding, errors=errors)
-                    if (not events or not selector.get_map()) and cmd.poll() is not None:
+                    return (257, stdout, "A prompt was encountered while running a command, but no input data was specified")
-                        break
+
-                    # No pipes are left to read but process is not yet terminated
+                # break out if no pipes are left to read or the pipes are completely read
-                    # Only then it is safe to wait for the process to be finished
+                # and the process is terminated
-                    # NOTE: Actually cmd.poll() is always None here if no selectors are left
+                if (not events or not selector.get_map()) and cmd.poll() is not None:
-                    elif not selector.get_map() and cmd.poll() is None:
+                    break
-                        cmd.wait()
+
-                        # The process is terminated. Since no pipes to read from are
+                # No pipes are left to read but process is not yet terminated
-                        # left, there is no need to call select() again.
+                # Only then it is safe to wait for the process to be finished
-                        break
+                # NOTE: Actually cmd.poll() is always None here if no selectors are left
-
+                elif not selector.get_map() and cmd.poll() is None:
-                cmd.stdout.close()
+                    cmd.wait()
-                cmd.stderr.close()
+                    # The process is terminated. Since no pipes to read from are
-                selector.close()
+                    # left, there is no need to call select() again.
                    break
            cmd.stdout.close()
            cmd.stderr.close()
            selector.close()
            rc = cmd.returncode
        except (OSError, IOError) as e:
--- a/test/integration/targets/command_shell/scripts/yoink.sh
+++ b/test/integration/targets/command_shell/scripts/yoink.sh
@ -0,0 +1,2 @@
 #!/usr/bin/env bash
 sleep 10
--- a/test/integration/targets/command_shell/tasks/main.yml
+++ b/test/integration/targets/command_shell/tasks/main.yml
@ -583,3 +583,8 @@
    that:
      - shell_expand_failure is failed
      - "shell_expand_failure.msg == 'Unsupported parameters for (shell) module: expand_argument_vars'"
 - name: Run command that backgrounds, to ensure no hang
  shell: '{{ role_path }}/scripts/yoink.sh &'
  delegate_to: localhost
  timeout: 5
--- a/test/units/module_utils/basic/test_run_command.py
+++ b/test/units/module_utils/basic/test_run_command.py
@ -109,7 +109,7 @@ def mock_subprocess(mocker):
            super(MockSelector, self).close()
            self._file_objs = []
-    selectors.DefaultSelector = MockSelector
+    selectors.PollSelector = MockSelector
    subprocess = mocker.patch('ansible.module_utils.basic.subprocess')
    subprocess._output = {mocker.sentinel.stdout: SpecialBytesIO(b'', fh=mocker.sentinel.stdout),
@ -147,7 +147,7 @@ class TestRunCommandArgs:
                              for (arg, cmd_lst, cmd_str), sh in product(ARGS_DATA, (True, False))),
                             indirect=['stdin'])
    def test_args(self, cmd, expected, shell, rc_am):
-        rc_am.run_command(cmd, use_unsafe_shell=shell, prompt_regex='i_dont_exist')
+        rc_am.run_command(cmd, use_unsafe_shell=shell)
        assert rc_am._subprocess.Popen.called
        args, kwargs = rc_am._subprocess.Popen.call_args
        assert args == (expected, )
@ -163,17 +163,17 @@ class TestRunCommandArgs:
 class TestRunCommandCwd:
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_cwd(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='/new', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='/new')
        assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/new'
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_cwd_relative_path(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='sub-dir', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='sub-dir')
        assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/home/foo/sub-dir'
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_cwd_not_a_dir(self, mocker, rc_am):
-        rc_am.run_command('/bin/ls', cwd='/not-a-dir', prompt_regex='i_dont_exist')
+        rc_am.run_command('/bin/ls', cwd='/not-a-dir')
        assert rc_am._subprocess.Popen.mock_calls[0][2]['cwd'] == b'/not-a-dir'
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
@ -212,14 +212,14 @@ class TestRunCommandRc:
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_check_rc_false(self, rc_am):
        rc_am._subprocess.Popen.return_value.returncode = 1
-        (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False, prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/false', check_rc=False)
        assert rc == 1
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_check_rc_true(self, rc_am):
        rc_am._subprocess.Popen.return_value.returncode = 1
        with pytest.raises(SystemExit):
-            rc_am.run_command('/bin/false', check_rc=True, prompt_regex='i_dont_exist')
+            rc_am.run_command('/bin/false', check_rc=True)
        assert rc_am.fail_json.called
        args, kwargs = rc_am.fail_json.call_args
        assert kwargs['rc'] == 1
@ -228,7 +228,7 @@ class TestRunCommandRc:
 class TestRunCommandOutput:
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
    def test_text_stdin(self, rc_am):
-        (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/foo', data='hello world')
        assert rc_am._subprocess.Popen.return_value.stdin.getvalue() == b'hello world\n'
    @pytest.mark.parametrize('stdin', [{}], indirect=['stdin'])
@ -237,7 +237,7 @@ class TestRunCommandOutput:
                                     SpecialBytesIO(b'hello', fh=mocker.sentinel.stdout),
                                     mocker.sentinel.stderr:
                                     SpecialBytesIO(b'', fh=mocker.sentinel.stderr)}
-        (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/cat hello.txt')
        assert rc == 0
        # module_utils function.  On py3 it returns text and py2 it returns
        # bytes because it's returning native strings
@ -251,7 +251,7 @@ class TestRunCommandOutput:
                                     mocker.sentinel.stderr:
                                     SpecialBytesIO(u'لرئيسية'.encode('utf-8'),
                                                    fh=mocker.sentinel.stderr)}
-        (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly', prompt_regex='i_dont_exist')
+        (rc, stdout, stderr) = rc_am.run_command('/bin/something_ugly')
        assert rc == 0
        # module_utils function.  On py3 it returns text and py2 it returns
        # bytes because it's returning native strings