diff --git a/changelogs/fragments/85492-fix-utf8-bom.yml b/changelogs/fragments/85492-fix-utf8-bom.yml new file mode 100644 index 00000000000..ef75ff491c0 --- /dev/null +++ b/changelogs/fragments/85492-fix-utf8-bom.yml @@ -0,0 +1,2 @@ +bugfixes: + - uri - add detection of UTF-8 BOM in responses and proper handling using utf-8-sig encoding. \ No newline at end of file diff --git a/lib/ansible/modules/uri.py b/lib/ansible/modules/uri.py index ceb6bcae764..181d3964b97 100644 --- a/lib/ansible/modules/uri.py +++ b/lib/ansible/modules/uri.py @@ -432,6 +432,7 @@ url: sample: https://www.ansible.com/ """ +import codecs import http import json import os @@ -755,7 +756,16 @@ def main(): # Default content_encoding to try if isinstance(content, bytes): + # Check for UTF BOM (Byte Order Mark) + if content.startswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + content_encoding = 'utf-32' + elif content.startswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + content_encoding = 'utf-16' + elif content.startswith(codecs.BOM_UTF8): + content_encoding = 'utf-8-sig' + u_content = to_text(content, encoding=content_encoding) + if maybe_json: try: js = json.loads(u_content) diff --git a/test/integration/targets/uri/files/testserver.py b/test/integration/targets/uri/files/testserver.py index 1792829091b..30fbe065709 100644 --- a/test/integration/targets/uri/files/testserver.py +++ b/test/integration/targets/uri/files/testserver.py @@ -18,6 +18,14 @@ if __name__ == '__main__': b'a\r\n' # size of the chunk (0xa = 10) b'123456' ) + elif self.path == '/bom_json': + # Return JSON with UTF-8 BOM prefix + self.send_response(200) + self.send_header("Content-type", content_type_json) + self.end_headers() + # \xef\xbb\xbf is the UTF-8 BOM + response = b'\xef\xbb\xbf{"name": "dollarsign", "symbol": "$"}' + self.wfile.write(response) elif self.path.endswith('json'): try: with open(self.path[1:]) as f: diff --git a/test/integration/targets/uri/tasks/main.yml b/test/integration/targets/uri/tasks/main.yml index 62748d7591f..fbdb1c42b2c 100644 --- a/test/integration/targets/uri/tasks/main.yml +++ b/test/integration/targets/uri/tasks/main.yml @@ -774,3 +774,6 @@ assert: that: - uri_check.msg == "This action (uri) does not support check mode." + +- name: Include BOM JSON tests + include_tasks: test_bom_json.yml diff --git a/test/integration/targets/uri/tasks/test_bom_json.yml b/test/integration/targets/uri/tasks/test_bom_json.yml new file mode 100644 index 00000000000..d59c30bd00b --- /dev/null +++ b/test/integration/targets/uri/tasks/test_bom_json.yml @@ -0,0 +1,22 @@ +--- +- name: Test UTF-8 BOM in JSON response + block: + - name: Get JSON with UTF-8 BOM prefix + uri: + url: http://localhost:{{ http_port }}/bom_json + return_content: true + register: bom_json_result + # make it more resilient + until: bom_json_result.status == 200 + retries: 3 + delay: 1 + + - name: Verify JSON is correctly parsed despite BOM prefix + assert: + that: + - bom_json_result is success + - bom_json_result.json is defined + - bom_json_result.json.name == "dollarsign" + - bom_json_result.json.symbol == "$" + fail_msg: "Failed to properly parse JSON with UTF-8 BOM. Result: {{ bom_json_result }}" + success_msg: "Successfully parsed JSON with UTF-8 BOM prefix" diff --git a/test/units/modules/test_uri.py b/test/units/modules/test_uri.py index 7c7459cd4a5..f2b35c0c9f6 100644 --- a/test/units/modules/test_uri.py +++ b/test/units/modules/test_uri.py @@ -8,13 +8,13 @@ from __future__ import annotations from unittest.mock import MagicMock, patch import pytest +import json from ansible.module_utils.testing import patch_module_args from ansible.modules import uri class TestUri: - def test_main_no_args(self): """Module must fail if called with no args.""" with pytest.raises(SystemExit), \ @@ -44,3 +44,69 @@ class TestUri: uri.main() fetch_url.assert_called_once() assert fetch_url.call_args[1].get("force") + + def test_utf8_bom_handling(self, capsys): + """Test that UTF-8 BOM is properly handled in response content. + + The uri module should strip the UTF-8 BOM (Byte Order Mark) from + response content before parsing JSON to prevent parsing errors. + """ + # UTF-8 BOM bytes (EF BB BF) followed by valid JSON + bom_content = b'\xef\xbb\xbf{"name": "dollarsign"}' + expected_json = {"name": "dollarsign"} + + # Mock the HTTP response with BOM content + resp = MagicMock() + # Set up headers mock with proper content type and charset + headers_mock = MagicMock() + headers_mock.get_content_type.return_value = "application/json" + + # Create a more specific mock for the charset parameter + def get_param_mock(param=None): + """Return charset parameter when requested, mimicking HTTP header behavior. + + The uri module uses this method to extract charset information from headers. + """ + if param == "charset": + return "utf-8" + return None + + headers_mock.get_param = get_param_mock + resp.headers = headers_mock + + resp.read.return_value = bom_content + # The fp and closed attributes are required to properly simulate an HTTP response object + # as the uri module checks for these properties during processing + resp.fp = MagicMock() + resp.closed = False + + # Mock successful HTTP response info + info = {"url": "http://example.com/", "status": 200} + + module_args = {"url": "http://example.com/", "return_content": True} + + with patch.object(uri, "fetch_url", return_value=(resp, info)) as mock_fetch_url, \ + patch_module_args(module_args): + + # Module should exit normally after processing + with pytest.raises(SystemExit): + uri.main() + + mock_fetch_url.assert_called_once() + + # Capture and verify the module output + captured = capsys.readouterr() + + # Parse the JSON output from the module + try: + output = json.loads(captured.out) + except json.JSONDecodeError as e: + pytest.fail(f"Module output is not valid JSON: {e}") + + # These assertions verify two critical aspects of BOM handling: + # 1. The JSON was successfully parsed (BOM was properly stripped) + # 2. The content matches what we expect after BOM removal + assert "json" in output, "Module output should contain 'json' key" + assert output["json"] == expected_json, ( + f"Expected {expected_json}, but got {output['json']}" + )