From 40a2fbe50abd1b00870fd0740cfbc1818ea18688 Mon Sep 17 00:00:00 2001 From: Jordan Borean Date: Thu, 29 Aug 2024 07:27:52 +1000 Subject: [PATCH] ssh and psrp - Support more complex characters in fetch_file (#83753) (#83816) (#83848) * ssh and psrp - Support more complex chars in fetch_file Fixes the psrp and ssh (with piped) fetch function to work with paths that contains glob like characters in the path. For Windows this was needed when using paths that contain `[]` in the path. For ssh this was a problem with FreeBSD when using the piped transfer method with similar characters. Also tidies up the psrp logic to not inject the paths and buffer size in the script but pass it as an object through an argument/parameter. * Fix sanity check (cherry picked from commit 520fa688ba232ed165fc2dbd3b2be2ebde365ba1) --- changelogs/fragments/powershell-clixml.yml | 3 ++ lib/ansible/plugins/shell/powershell.py | 51 ++++++++++++++++--- .../targets/connection_winrm/tests.yml | 9 ++++ test/units/plugins/shell/test_powershell.py | 37 ++++++++++++-- 4 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 changelogs/fragments/powershell-clixml.yml diff --git a/changelogs/fragments/powershell-clixml.yml b/changelogs/fragments/powershell-clixml.yml new file mode 100644 index 00000000000..3da3222d754 --- /dev/null +++ b/changelogs/fragments/powershell-clixml.yml @@ -0,0 +1,3 @@ +bugfixes: + - powershell - Improve CLIXML decoding to decode all control characters and + unicode characters that are encoded as surrogate pairs. diff --git a/lib/ansible/plugins/shell/powershell.py b/lib/ansible/plugins/shell/powershell.py index f2e78cbe2e8..08c94e06d44 100644 --- a/lib/ansible/plugins/shell/powershell.py +++ b/lib/ansible/plugins/shell/powershell.py @@ -26,35 +26,70 @@ import ntpath from ansible.module_utils.common.text.converters import to_bytes, to_text from ansible.plugins.shell import ShellBase +# This is weird, we are matching on byte sequences that match the utf-16-be +# matches for '_x(a-fA-F0-9){4}_'. The \x00 and {8} will match the hex sequence +# when it is encoded as utf-16-be. +_STRING_DESERIAL_FIND = re.compile(rb"\x00_\x00x([\x00(a-fA-F0-9)]{8})\x00_") _common_args = ['PowerShell', '-NoProfile', '-NonInteractive', '-ExecutionPolicy', 'Unrestricted'] -def _parse_clixml(data, stream="Error"): +def _parse_clixml(data: bytes, stream: str = "Error") -> bytes: """ Takes a byte string like '#< CLIXML\r\n bytes: + match_hex = matchobj.group(1) + hex_string = match_hex.decode("utf-16-be") + return base64.b16decode(hex_string.upper()) # There are some scenarios where the stderr contains a nested CLIXML element like # '<# CLIXML\r\n<# CLIXML\r\n......'. # Parse each individual element and add the error strings to our stderr list. # https://github.com/ansible/ansible/issues/69550 while data: - end_idx = data.find(b"") + 7 - current_element = data[data.find(b"") + if start_idx == -1 or end_idx == -1: + break + + end_idx += 7 + current_element = data[start_idx:end_idx] data = data[end_idx:] clixml = ET.fromstring(current_element) namespace_match = re.match(r'{(.*)}', clixml.tag) - namespace = "{%s}" % namespace_match.group(1) if namespace_match else "" + namespace = f"{{{namespace_match.group(1)}}}" if namespace_match else "" + + entries = clixml.findall("./%sS" % namespace) + if not entries: + continue + + # If this is a new CLIXML element, add a newline to separate the messages. + if lines: + lines.append("\r\n") + + for string_entry in entries: + actual_stream = string_entry.attrib.get('S', None) + if actual_stream != stream: + continue + + b_line = (string_entry.text or "").encode("utf-16-be") + b_escaped = re.sub(_STRING_DESERIAL_FIND, rplcr, b_line) - strings = clixml.findall("./%sS" % namespace) - lines.extend([e.text.replace('_x000D__x000A_', '') for e in strings if e.attrib.get('S') == stream]) + lines.append(b_escaped.decode("utf-16-be", errors="surrogatepass")) - return to_bytes('\r\n'.join(lines)) + return to_bytes(''.join(lines), errors="surrogatepass") class ShellModule(ShellBase): diff --git a/test/integration/targets/connection_winrm/tests.yml b/test/integration/targets/connection_winrm/tests.yml index 53526aae639..8384ef41845 100644 --- a/test/integration/targets/connection_winrm/tests.yml +++ b/test/integration/targets/connection_winrm/tests.yml @@ -55,3 +55,12 @@ ansible_port: 5986 ansible_winrm_scheme: https ansible_winrm_server_cert_validation: ignore + + - name: emit raw CLIXML on stderr with special chars + raw: $host.UI.WriteErrorLine("Test 🎵 _x005F_ _x005Z_.") + register: stderr_clixml + + - name: assert emit raw CLIXML on stderr with special chars + assert: + that: + - stderr_clixml.stderr_lines == ['Test 🎵 _x005F_ _x005Z_.'] diff --git a/test/units/plugins/shell/test_powershell.py b/test/units/plugins/shell/test_powershell.py index c94baabbc7c..8d5ebb2ebb1 100644 --- a/test/units/plugins/shell/test_powershell.py +++ b/test/units/plugins/shell/test_powershell.py @@ -1,6 +1,8 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type +import pytest + from ansible.plugins.shell.powershell import _parse_clixml, ShellModule @@ -28,7 +30,8 @@ def test_parse_clixml_single_stream(): b'At line:1 char:1_x000D__x000A_' \ b'+ fake cmdlet_x000D__x000A_+ ~~~~_x000D__x000A_' \ b' + CategoryInfo : ObjectNotFound: (fake:String) [], CommandNotFoundException_x000D__x000A_' \ - b' + FullyQualifiedErrorId : CommandNotFoundException_x000D__x000A_ _x000D__x000A_' \ + b' + FullyQualifiedErrorId : CommandNotFoundException_x000D__x000A_' \ + b' _x000D__x000A_' \ b'' expected = b"fake : The term 'fake' is not recognized as the name of a cmdlet. Check \r\n" \ b"the spelling of the name, or if a path was included.\r\n" \ @@ -36,7 +39,8 @@ def test_parse_clixml_single_stream(): b"+ fake cmdlet\r\n" \ b"+ ~~~~\r\n" \ b" + CategoryInfo : ObjectNotFound: (fake:String) [], CommandNotFoundException\r\n" \ - b" + FullyQualifiedErrorId : CommandNotFoundException\r\n " + b" + FullyQualifiedErrorId : CommandNotFoundException\r\n" \ + b" \r\n" actual = _parse_clixml(single_stream) assert actual == expected @@ -50,8 +54,9 @@ def test_parse_clixml_multiple_streams(): b' + CategoryInfo : ObjectNotFound: (fake:String) [], CommandNotFoundException_x000D__x000A_' \ b' + FullyQualifiedErrorId : CommandNotFoundException_x000D__x000A_ _x000D__x000A_' \ b'hi info' \ + b'other' \ b'' - expected = b"hi info" + expected = b"hi infoother" actual = _parse_clixml(multiple_stream, stream="Info") assert actual == expected @@ -75,6 +80,32 @@ def test_parse_clixml_multiple_elements(): assert actual == expected +@pytest.mark.parametrize('clixml, expected', [ + ('', ''), + ('just newline _x000A_', 'just newline \n'), + ('surrogate pair _xD83C__xDFB5_', 'surrogate pair 🎵'), + ('null char _x0000_', 'null char \0'), + ('normal char _x0061_', 'normal char a'), + ('escaped literal _x005F_x005F_', 'escaped literal _x005F_'), + ('underscope before escape _x005F__x000A_', 'underscope before escape _\n'), + ('surrogate high _xD83C_', 'surrogate high \uD83C'), + ('surrogate low _xDFB5_', 'surrogate low \uDFB5'), + ('lower case hex _x005f_', 'lower case hex _'), + ('invalid hex _x005G_', 'invalid hex _x005G_'), +]) +def test_parse_clixml_with_comlex_escaped_chars(clixml, expected): + clixml_data = ( + '<# CLIXML\r\n' + '' + f'{clixml}' + '' + ).encode() + b_expected = expected.encode(errors="surrogatepass") + + actual = _parse_clixml(clixml_data) + assert actual == b_expected + + def test_join_path_unc(): pwsh = ShellModule() unc_path_parts = ['\\\\host\\share\\dir1\\\\dir2\\', '\\dir3/dir4', 'dir5', 'dir6\\']