From 5bfb79070aca52a7321cc9c8115bc80c1767ebb8 Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Fri, 17 Oct 2025 12:24:26 -0400 Subject: [PATCH 1/4] Fix detecting (and by extension retrying) IncompleteRead when downloading collections --- ...e-galaxy-retry-IncompleteRead-download.yml | 5 ++ .../collection/concrete_artifact_manager.py | 27 +++++++-- test/units/cli/test_galaxy.py | 15 ++++- test/units/galaxy/test_collection.py | 56 +++++++++++++++++-- 4 files changed, 91 insertions(+), 12 deletions(-) create mode 100644 changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml diff --git a/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml b/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml new file mode 100644 index 00000000000..f9557757928 --- /dev/null +++ b/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml @@ -0,0 +1,5 @@ +bugfixes: +- >- + ansible-galaxy - Fix retrying IncompleteRead when downloading collections, + instead of potentially failing for a mismatched artifact hash on the first attempt. + (https://github.com/ansible/ansible/pull/86025) diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index 1659bc46b49..e7f44d132a5 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -14,6 +14,7 @@ import yaml from contextlib import contextmanager from hashlib import sha256 +from http.client import IncompleteRead from urllib.error import URLError from urllib.parse import urldefrag from shutil import rmtree @@ -494,7 +495,10 @@ def _download_file(url, b_path, expected_hash, validate_certs, token=None, timeo ) with open(b_file_path, 'wb') as download_file: # type: t.BinaryIO - actual_hash = _consume_file(resp, write_to=download_file) + try: + actual_hash = _consume_file(resp, write_to=download_file, length=resp.length) + except IncompleteRead as orig_exc: + raise AnsibleError(f"Downloading {url} failed", orig_exc=orig_exc) from orig_exc if expected_hash: display.vvvv( @@ -508,17 +512,28 @@ def _download_file(url, b_path, expected_hash, validate_certs, token=None, timeo return b_file_path -def _consume_file(read_from, write_to=None): - # type: (t.BinaryIO, t.BinaryIO) -> str +def _consume_file(read_from: t.BinaryIO, write_to: t.BinaryIO = None, *, length: None | str | int = None) -> str: bufsize = 65536 sha256_digest = sha256() - data = read_from.read(bufsize) - while data: + actual_length = 0 + remaining = None + while True: + if isinstance(length, int) and (remaining := length - actual_length) <= 0: + break + + if not (data := read_from.read(min(bufsize, remaining or bufsize))): + break + if write_to is not None: write_to.write(data) write_to.flush() + sha256_digest.update(data) - data = read_from.read(bufsize) + actual_length += len(data) + + if isinstance(length, int) and length > actual_length: + read_len = b' ' * actual_length if actual_length else b'' + raise IncompleteRead(read_len, length - actual_length) return sha256_digest.hexdigest() diff --git a/test/units/cli/test_galaxy.py b/test/units/cli/test_galaxy.py index 9b486b2fa2e..3c42c8cb06f 100644 --- a/test/units/cli/test_galaxy.py +++ b/test/units/cli/test_galaxy.py @@ -918,10 +918,21 @@ def test_collection_install_in_collection_dir(collection_install, monkeypatch): assert mock_install.call_args[0][6] is False # force_deps -def test_collection_install_with_url(monkeypatch, collection_install): +def test_collection_install_with_url( + monkeypatch: pytest.MonkeyPatch, + collection_install: tuple[MagicMock, MagicMock, str] +) -> None: mock_install, dummy, output_dir = collection_install - mock_open = MagicMock(return_value=BytesIO()) + class MockHTTPResponse: + def __init__(self) -> None: + self._stream = BytesIO() + self.length = 'UNKNOWN' + + def read(self, size: int = -1) -> bytes: + return self._stream.read(size) + + mock_open = MagicMock(return_value=MockHTTPResponse()) monkeypatch.setattr(collection.concrete_artifact_manager, 'open_url', mock_open) mock_metadata = MagicMock(return_value={'namespace': 'foo', 'name': 'bar', 'version': 'v1.0.0'}) diff --git a/test/units/galaxy/test_collection.py b/test/units/galaxy/test_collection.py index 8da860da284..30b176b803b 100644 --- a/test/units/galaxy/test_collection.py +++ b/test/units/galaxy/test_collection.py @@ -890,7 +890,26 @@ def test_publish_with_wait(galaxy_server, collection_artifact, monkeypatch): % galaxy_server.api_server -def test_download_file(tmp_path_factory, monkeypatch): +class MockHTTPResponses: + def __init__(self, responses: list[tuple[bytes, int | None]]) -> None: + self._body, self.length = responses[0] + self._responses = responses[1:] + self._stream = BytesIO(self._body) + + def read(self, size: int = -1) -> bytes: + data = self._stream.read(size) + + if not data: + # reset the stream to simulate retries + if self._responses: + self._body, self.length = self._responses[0] + del self._responses[0] + self._stream = BytesIO(self._body) + + return data + + +def test_download_file(tmp_path_factory: pytest.TempPathFactory, monkeypatch: pytest.MonkeyPatch) -> None: temp_dir = to_bytes(tmp_path_factory.mktemp('test-ÅÑŚÌβŁÈ Collections')) data = b"\x00\x01\x02\x03" @@ -898,7 +917,7 @@ def test_download_file(tmp_path_factory, monkeypatch): sha256_hash.update(data) mock_open = MagicMock() - mock_open.return_value = BytesIO(data) + mock_open.return_value = MockHTTPResponses([(data, len(data))]) monkeypatch.setattr(collection.concrete_artifact_manager, 'open_url', mock_open) expected = temp_dir @@ -913,13 +932,13 @@ def test_download_file(tmp_path_factory, monkeypatch): assert mock_open.mock_calls[0][1][0] == 'http://google.com/file' -def test_download_file_hash_mismatch(tmp_path_factory, monkeypatch): +def test_download_file_hash_mismatch(tmp_path_factory: pytest.TempPathFactory, monkeypatch: pytest.MonkeyPatch) -> None: temp_dir = to_bytes(tmp_path_factory.mktemp('test-ÅÑŚÌβŁÈ Collections')) data = b"\x00\x01\x02\x03" mock_open = MagicMock() - mock_open.return_value = BytesIO(data) + mock_open.return_value = MockHTTPResponses([(data, len(data))]) monkeypatch.setattr(collection.concrete_artifact_manager, 'open_url', mock_open) expected = "Mismatch artifact hash with downloaded file" @@ -927,6 +946,35 @@ def test_download_file_hash_mismatch(tmp_path_factory, monkeypatch): collection._download_file('http://google.com/file', temp_dir, 'bad', True) +def test_download_file_incomplete_read( + tmp_path_factory: pytest.TempPathFactory, + monkeypatch: pytest.MonkeyPatch +) -> None: + temp_dir = to_bytes(tmp_path_factory.mktemp('test-ÅÑŚÌβŁÈ Collections')) + + incomplete_data = b"\x00\x01\x02\x03" + data = incomplete_data + b"\x04" + sha256_hash = sha256(data) + + # Test retries succeed + responses = [(incomplete_data, len(data)), (data, len(data))] + mock_open = MagicMock(return_value=MockHTTPResponses(responses)) + monkeypatch.setattr(collection.concrete_artifact_manager, 'open_url', mock_open) + + collection._download_file('http://google.com/file', temp_dir, sha256_hash.hexdigest(), True) + + # Test error is correct + mock_open.return_value = MockHTTPResponses([(data, len(data) + 1)]) + monkeypatch.setattr(collection.concrete_artifact_manager, 'open_url', mock_open) + + expected_error = re.escape( + "Downloading http://google.com/file failed: " + "IncompleteRead(5 bytes read, 1 more expected)" + ) + with pytest.raises(AnsibleError, match=expected_error): + collection._download_file('http://google.com/file', temp_dir, sha256_hash.hexdigest(), True) + + def test_extract_tar_file_invalid_hash(tmp_tarfile): temp_dir, tfile, filename, dummy = tmp_tarfile From 836eed77b5e10ea1a8f622b88e385b33e81d08ac Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Tue, 21 Oct 2025 16:15:45 -0400 Subject: [PATCH 2/4] Fix exception handling --- lib/ansible/galaxy/collection/concrete_artifact_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index e7f44d132a5..58d21b71abb 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -498,7 +498,7 @@ def _download_file(url, b_path, expected_hash, validate_certs, token=None, timeo try: actual_hash = _consume_file(resp, write_to=download_file, length=resp.length) except IncompleteRead as orig_exc: - raise AnsibleError(f"Downloading {url} failed", orig_exc=orig_exc) from orig_exc + raise AnsibleError(f"Downloading {url} failed") from orig_exc if expected_hash: display.vvvv( From 5445f9ecf0a7c617984739ff567fcb6b6e2528e8 Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Wed, 22 Oct 2025 11:24:19 -0400 Subject: [PATCH 3/4] revise changelog --- .../ansible-galaxy-retry-IncompleteRead-download.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml b/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml index f9557757928..b92b342252c 100644 --- a/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml +++ b/changelogs/fragments/ansible-galaxy-retry-IncompleteRead-download.yml @@ -1,5 +1,6 @@ bugfixes: - >- - ansible-galaxy - Fix retrying IncompleteRead when downloading collections, - instead of potentially failing for a mismatched artifact hash on the first attempt. + ansible-galaxy - Fix attempting to download the collection again if the + response from the server is shorter than expected, instead of failing due to + the mismatched artifact hash on the first attempt. (https://github.com/ansible/ansible/pull/86025) From 02e36211ff6d9e5296f7ed066af85cf5b0a948b0 Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Wed, 5 Nov 2025 13:17:23 -0500 Subject: [PATCH 4/4] Remove unnecessary parameter, and correct type hint --- lib/ansible/galaxy/collection/concrete_artifact_manager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index 58d21b71abb..b69990b4e4a 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -14,7 +14,7 @@ import yaml from contextlib import contextmanager from hashlib import sha256 -from http.client import IncompleteRead +from http.client import IncompleteRead, HTTPResponse from urllib.error import URLError from urllib.parse import urldefrag from shutil import rmtree @@ -496,7 +496,7 @@ def _download_file(url, b_path, expected_hash, validate_certs, token=None, timeo with open(b_file_path, 'wb') as download_file: # type: t.BinaryIO try: - actual_hash = _consume_file(resp, write_to=download_file, length=resp.length) + actual_hash = _consume_file(resp, write_to=download_file) except IncompleteRead as orig_exc: raise AnsibleError(f"Downloading {url} failed") from orig_exc @@ -512,9 +512,10 @@ def _download_file(url, b_path, expected_hash, validate_certs, token=None, timeo return b_file_path -def _consume_file(read_from: t.BinaryIO, write_to: t.BinaryIO = None, *, length: None | str | int = None) -> str: +def _consume_file(read_from: t.BinaryIO | HTTPResponse, write_to: t.BinaryIO | None = None) -> str: bufsize = 65536 sha256_digest = sha256() + length = getattr(read_from, 'length', None) actual_length = 0 remaining = None while True: