From f914f4ad5a860d3aeea346557d60a691b5607056 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Wed, 16 Jul 2025 17:23:07 +0200 Subject: [PATCH 1/9] Fix UTF-8 BOM handling in content encoding for better compatibility --- lib/ansible/modules/uri.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/ansible/modules/uri.py b/lib/ansible/modules/uri.py index e19450b358d..5c1ce2462bc 100644 --- a/lib/ansible/modules/uri.py +++ b/lib/ansible/modules/uri.py @@ -756,7 +756,14 @@ def main(): # Default content_encoding to try if isinstance(content, binary_type): - u_content = to_text(content, encoding=content_encoding) + # Check for UTF-8 BOM (EF BB BF) + if content.startswith(b'\xef\xbb\xbf'): + # Use utf-8-sig encoding which handles the BOM correctly + u_content = to_text(content, encoding='utf-8-sig') + else: + # Use the detected encoding if no BOM + u_content = to_text(content, encoding=content_encoding) + if maybe_json: try: js = json.loads(u_content) From 8e3fd03cd23625b3050e95d39ad6590943210c0d Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Wed, 16 Jul 2025 17:33:23 +0200 Subject: [PATCH 2/9] Fix formatting in uri.py for improved readability --- lib/ansible/modules/uri.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ansible/modules/uri.py b/lib/ansible/modules/uri.py index 5c1ce2462bc..05650677225 100644 --- a/lib/ansible/modules/uri.py +++ b/lib/ansible/modules/uri.py @@ -763,7 +763,7 @@ def main(): else: # Use the detected encoding if no BOM u_content = to_text(content, encoding=content_encoding) - + if maybe_json: try: js = json.loads(u_content) From 1cdd3e1743d2e5a8e905b2eba7e695c37071ccf0 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Thu, 17 Jul 2025 10:11:58 +0200 Subject: [PATCH 3/9] Add changelog fragment for uri module --- changelogs/fragments/85492-fix-utf8-bom.yml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 changelogs/fragments/85492-fix-utf8-bom.yml diff --git a/changelogs/fragments/85492-fix-utf8-bom.yml b/changelogs/fragments/85492-fix-utf8-bom.yml new file mode 100644 index 00000000000..ef75ff491c0 --- /dev/null +++ b/changelogs/fragments/85492-fix-utf8-bom.yml @@ -0,0 +1,2 @@ +bugfixes: + - uri - add detection of UTF-8 BOM in responses and proper handling using utf-8-sig encoding. \ No newline at end of file From e2768957084dcbe8b01d6941c4b895b58fecf401 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Thu, 17 Jul 2025 13:13:26 +0200 Subject: [PATCH 4/9] Add test for UTF-8 BOM handling in uri module response content --- test/units/modules/test_uri.py | 69 +++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/test/units/modules/test_uri.py b/test/units/modules/test_uri.py index 7c7459cd4a5..51889c88aff 100644 --- a/test/units/modules/test_uri.py +++ b/test/units/modules/test_uri.py @@ -8,13 +8,13 @@ from __future__ import annotations from unittest.mock import MagicMock, patch import pytest +import json from ansible.module_utils.testing import patch_module_args from ansible.modules import uri class TestUri: - def test_main_no_args(self): """Module must fail if called with no args.""" with pytest.raises(SystemExit), \ @@ -44,3 +44,70 @@ class TestUri: uri.main() fetch_url.assert_called_once() assert fetch_url.call_args[1].get("force") + + def test_utf8_bom_handling(self, capsys): + """Test that UTF-8 BOM is properly handled in response content. + + The uri module should strip the UTF-8 BOM (Byte Order Mark) from + response content before parsing JSON to prevent parsing errors. + """ + # UTF-8 BOM bytes (EF BB BF) followed by valid JSON + bom_content = b'\xef\xbb\xbf{"name": "dollarsign"}' + expected_json = {"name": "dollarsign"} + + # Mock the HTTP response with BOM content + resp = MagicMock() + # Set up headers mock with proper content type and charset + headers_mock = MagicMock() + headers_mock.get_content_type.return_value = "application/json" + + # Create a more specific mock for the charset parameter + def get_param_mock(param=None): + """Return charset parameter when requested, mimicking HTTP header behavior. + + The uri module uses this method to extract charset information from headers. + """ + if param == "charset": + return "utf-8" + return None + + headers_mock.get_param = get_param_mock + resp.headers = headers_mock + + resp.read.return_value = bom_content + # The fp and closed attributes are required to properly simulate an HTTP response object + # as the uri module checks for these properties during processing + resp.fp = MagicMock() + resp.closed = False + + # Mock successful HTTP response info + info = {"url": "http://example.com/", "status": 200} + + module_args = {"url": "http://example.com/", "return_content": True} + + with ( + patch.object(uri, "fetch_url", return_value=(resp, info)) as mock_fetch_url, + patch_module_args(module_args), + ): + # Module should exit normally after processing + with pytest.raises(SystemExit): + uri.main() + + mock_fetch_url.assert_called_once() + + # Capture and verify the module output + captured = capsys.readouterr() + + # Parse the JSON output from the module + try: + output = json.loads(captured.out) + except json.JSONDecodeError as e: + pytest.fail(f"Module output is not valid JSON: {e}") + + # These assertions verify two critical aspects of BOM handling: + # 1. The JSON was successfully parsed (BOM was properly stripped) + # 2. The content matches what we expect after BOM removal + assert "json" in output, "Module output should contain 'json' key" + assert output["json"] == expected_json, ( + f"Expected {expected_json}, but got {output['json']}" + ) From 24de8b7426f42c89e06f1ddc8f58b5ff227d97d6 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Thu, 17 Jul 2025 13:44:21 +0200 Subject: [PATCH 5/9] Refactor test case for python 3.8 compatibility --- test/units/modules/test_uri.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/units/modules/test_uri.py b/test/units/modules/test_uri.py index 51889c88aff..f2b35c0c9f6 100644 --- a/test/units/modules/test_uri.py +++ b/test/units/modules/test_uri.py @@ -85,10 +85,9 @@ class TestUri: module_args = {"url": "http://example.com/", "return_content": True} - with ( - patch.object(uri, "fetch_url", return_value=(resp, info)) as mock_fetch_url, - patch_module_args(module_args), - ): + with patch.object(uri, "fetch_url", return_value=(resp, info)) as mock_fetch_url, \ + patch_module_args(module_args): + # Module should exit normally after processing with pytest.raises(SystemExit): uri.main() From 2cabe3879bfa34e5277d40e1c2581c9da6b62520 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Thu, 17 Jul 2025 16:01:11 +0200 Subject: [PATCH 6/9] Enhance UTF BOM handling in content encoding for improved compatibility --- lib/ansible/modules/uri.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/lib/ansible/modules/uri.py b/lib/ansible/modules/uri.py index 05650677225..baa89f34256 100644 --- a/lib/ansible/modules/uri.py +++ b/lib/ansible/modules/uri.py @@ -432,6 +432,7 @@ url: sample: https://www.ansible.com/ """ +import codecs import http import json import os @@ -756,13 +757,15 @@ def main(): # Default content_encoding to try if isinstance(content, binary_type): - # Check for UTF-8 BOM (EF BB BF) - if content.startswith(b'\xef\xbb\xbf'): - # Use utf-8-sig encoding which handles the BOM correctly - u_content = to_text(content, encoding='utf-8-sig') - else: - # Use the detected encoding if no BOM - u_content = to_text(content, encoding=content_encoding) + # Check for UTF BOM (Byte Order Mark) + if content.startswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + content_encoding = 'utf-32' + elif content.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + content_encoding = 'utf-16' + elif content.startswith(codecs.BOM_UTF8): + content_encoding = 'utf-8-sig' + + u_content = to_text(content, encoding=content_encoding) if maybe_json: try: From 8a6350d9e7c5c365158a24c94edf766da99b18a2 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Fri, 18 Jul 2025 10:12:02 +0200 Subject: [PATCH 7/9] Add handler for returning JSON with UTF-8 BOM prefix --- test/integration/targets/uri/files/testserver.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/integration/targets/uri/files/testserver.py b/test/integration/targets/uri/files/testserver.py index 1792829091b..30fbe065709 100644 --- a/test/integration/targets/uri/files/testserver.py +++ b/test/integration/targets/uri/files/testserver.py @@ -18,6 +18,14 @@ if __name__ == '__main__': b'a\r\n' # size of the chunk (0xa = 10) b'123456' ) + elif self.path == '/bom_json': + # Return JSON with UTF-8 BOM prefix + self.send_response(200) + self.send_header("Content-type", content_type_json) + self.end_headers() + # \xef\xbb\xbf is the UTF-8 BOM + response = b'\xef\xbb\xbf{"name": "dollarsign", "symbol": "$"}' + self.wfile.write(response) elif self.path.endswith('json'): try: with open(self.path[1:]) as f: From 52f62c77ea4b35fb05f413a08da15c15e5e5a993 Mon Sep 17 00:00:00 2001 From: DollarSign23 Date: Fri, 18 Jul 2025 10:14:29 +0200 Subject: [PATCH 8/9] Add tests for handling UTF-8 BOM in JSON responses --- test/integration/targets/uri/tasks/main.yml | 3 +++ .../targets/uri/tasks/test_bom_json.yml | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 test/integration/targets/uri/tasks/test_bom_json.yml diff --git a/test/integration/targets/uri/tasks/main.yml b/test/integration/targets/uri/tasks/main.yml index fdf14b80431..0c9c75bd10b 100644 --- a/test/integration/targets/uri/tasks/main.yml +++ b/test/integration/targets/uri/tasks/main.yml @@ -755,3 +755,6 @@ assert: that: - uri_check.msg == "This action (uri) does not support check mode." + +- name: Include BOM JSON tests + include_tasks: test_bom_json.yml diff --git a/test/integration/targets/uri/tasks/test_bom_json.yml b/test/integration/targets/uri/tasks/test_bom_json.yml new file mode 100644 index 00000000000..6df0293efae --- /dev/null +++ b/test/integration/targets/uri/tasks/test_bom_json.yml @@ -0,0 +1,18 @@ +--- +- name: Test UTF-8 BOM in JSON response + block: + - name: Get JSON with UTF-8 BOM prefix + uri: + url: http://localhost:{{ http_port }}/bom_json + return_content: true + register: bom_json_result + + - name: Verify JSON is correctly parsed despite BOM prefix + assert: + that: + - bom_json_result is success + - bom_json_result.json is defined + - bom_json_result.json.name == "dollarsign" + - bom_json_result.json.symbol == "$" + fail_msg: "Failed to properly parse JSON with UTF-8 BOM. Result: {{ bom_json_result }}" + success_msg: "Successfully parsed JSON with UTF-8 BOM prefix" \ No newline at end of file From 379b4b582bdbbdfededb6633a29314fec881c409 Mon Sep 17 00:00:00 2001 From: DollarSign23 <142776995+DollarSign23@users.noreply.github.com> Date: Sat, 13 Sep 2025 13:10:51 +0200 Subject: [PATCH 9/9] Enhance resilience of BOM JSON task --- test/integration/targets/uri/tasks/test_bom_json.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/integration/targets/uri/tasks/test_bom_json.yml b/test/integration/targets/uri/tasks/test_bom_json.yml index 6df0293efae..d59c30bd00b 100644 --- a/test/integration/targets/uri/tasks/test_bom_json.yml +++ b/test/integration/targets/uri/tasks/test_bom_json.yml @@ -6,6 +6,10 @@ url: http://localhost:{{ http_port }}/bom_json return_content: true register: bom_json_result + # make it more resilient + until: bom_json_result.status == 200 + retries: 3 + delay: 1 - name: Verify JSON is correctly parsed despite BOM prefix assert: @@ -15,4 +19,4 @@ - bom_json_result.json.name == "dollarsign" - bom_json_result.json.symbol == "$" fail_msg: "Failed to properly parse JSON with UTF-8 BOM. Result: {{ bom_json_result }}" - success_msg: "Successfully parsed JSON with UTF-8 BOM prefix" \ No newline at end of file + success_msg: "Successfully parsed JSON with UTF-8 BOM prefix"