From b3a5ad29e7ab024202af8b53d146c251834a7cbb Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Wed, 12 Nov 2025 16:24:26 -0500 Subject: [PATCH 1/2] Improve the ansible-galaxy server cache Initialize the in-memory cache after the API call succeeds Fix updating the global cache for individual API servers Bump cache version to 2 to reset existing cache files Add integration tests Update unit tests to match the cache version --- .../fragments/85918-fix-galaxy-cache.yml | 10 +++++ lib/ansible/galaxy/api.py | 40 ++++++++++++------- .../tasks/install_offline.yml | 8 ++-- .../ansible-galaxy-collection/tasks/main.yml | 18 +++++++++ .../ansible-galaxy-collection/vars/main.yml | 2 +- test/units/galaxy/test_api.py | 30 +++++++------- 6 files changed, 75 insertions(+), 33 deletions(-) create mode 100644 changelogs/fragments/85918-fix-galaxy-cache.yml diff --git a/changelogs/fragments/85918-fix-galaxy-cache.yml b/changelogs/fragments/85918-fix-galaxy-cache.yml new file mode 100644 index 00000000000..bcd37d98f34 --- /dev/null +++ b/changelogs/fragments/85918-fix-galaxy-cache.yml @@ -0,0 +1,10 @@ +bugfixes: + - >- + Fix initializing the in-memory ansible-galaxy server cache with required fields. + (https://github.com/ansible/ansible/issues/85918) + - >- + Fix ansible-galaxy server cache key uniqueness for servers that use the same hostname and port. + The cache version has been updated to 2 to reset files using the hostname and port as keys. + - >- + Fix updating the persistent ansible-galaxy server cache. + Now servers update their portion of the cache without overwriting cache updates of other servers. diff --git a/lib/ansible/galaxy/api.py b/lib/ansible/galaxy/api.py index e145687b70f..85c4331e215 100644 --- a/lib/ansible/galaxy/api.py +++ b/lib/ansible/galaxy/api.py @@ -141,13 +141,13 @@ def get_cache_id(url): pass # While the URL is probably invalid, let the caller figure that out when using it # Cannot use netloc because it could contain credentials if the server specified had them in there. - return '%s:%s' % (url_info.hostname, port or '') + return f"{url_info.hostname}:{port or ''}{url_info.path}" @cache_lock def _load_cache(b_cache_path): """ Loads the cache file requested if possible. The file must not be world writable. """ - cache_version = 1 + cache_version = 2 if not os.path.isfile(b_cache_path): display.vvvv("Creating Galaxy API response cache file at '%s'" % to_text(b_cache_path)) @@ -332,7 +332,7 @@ class GalaxyAPI: def _call_galaxy(self, url, args=None, headers=None, method=None, auth_required=False, error_context_msg=None, cache=False, cache_key=None): url_info = urlparse(url) - cache_id = get_cache_id(url) + cache_id = get_cache_id(self.api_server) if not cache_key: cache_key = url_info.path query = parse_qs(url_info.query) @@ -379,10 +379,6 @@ class GalaxyAPI: # The cache entry had expired or does not exist, start a new blank entry to be filled later. expires = datetime.datetime.now(datetime.timezone.utc) expires += datetime.timedelta(days=1) - server_cache[cache_key] = { - 'expires': expires.strftime(iso_datetime_format), - 'paginated': False, - } headers = headers or {} self._add_auth_token(headers, url, required=auth_required) @@ -404,7 +400,12 @@ class GalaxyAPI: % (resp.url, to_native(resp_data))) if cache and self._cache: - path_cache = self._cache[cache_id][cache_key] + if not valid: + server_cache[cache_key] = { + 'expires': expires.strftime(iso_datetime_format), + 'paginated': False, + 'results': [], + } # v3 can return data or results for paginated results. Scan the result so we can determine what to cache. paginated_key = None @@ -414,13 +415,12 @@ class GalaxyAPI: break if paginated_key: - path_cache['paginated'] = True - results = path_cache.setdefault('results', []) + server_cache[cache_key]['paginated'] = True for result in data[paginated_key]: - results.append(result) + server_cache[cache_key]['results'].append(result) else: - path_cache['results'] = data + server_cache[cache_key]['results'] = data return data @@ -438,8 +438,20 @@ class GalaxyAPI: @cache_lock def _set_cache(self): + try: + with open(self._b_cache_path, mode='r') as fd: + global_cache = json.load(fd) + except FileNotFoundError: + global_cache = {} + + cache_id = get_cache_id(self.api_server) + if self._cache and (data := self._cache.get(cache_id)): + global_cache[cache_id] = data + else: + global_cache[cache_id] = {} + with open(self._b_cache_path, mode='wb') as fd: - fd.write(to_bytes(json.dumps(self._cache), errors='surrogate_or_strict')) + fd.write(to_bytes(json.dumps(global_cache), errors='surrogate_or_strict')) @g_connect(['v1']) def authenticate(self, github_token): @@ -812,7 +824,7 @@ class GalaxyAPI: # We should only rely on the cache if the collection has not changed. This may slow things down but it ensures # we are not waiting a day before finding any new collections that have been published. if self._cache: - server_cache = self._cache.setdefault(get_cache_id(versions_url), {}) + server_cache = self._cache.setdefault(get_cache_id(self.api_server), {}) modified_cache = server_cache.setdefault('modified', {}) try: diff --git a/test/integration/targets/ansible-galaxy-collection/tasks/install_offline.yml b/test/integration/targets/ansible-galaxy-collection/tasks/install_offline.yml index 9c889635432..11bdb5396a0 100644 --- a/test/integration/targets/ansible-galaxy-collection/tasks/install_offline.yml +++ b/test/integration/targets/ansible-galaxy-collection/tasks/install_offline.yml @@ -95,10 +95,12 @@ - galaxy_err in missing_dep.stderr - missing_err in missing_dep_offline.stderr vars: - galaxy_err: "Unknown error when attempting to call Galaxy at '{{ offline_server }}'" + galaxy_err: >- + Error when finding available api versions from offline ({{ offline_server }}) + (HTTP Code: 404, Message: Not Found) missing_err: |- - Failed to resolve the requested dependencies map. Could not satisfy the following requirements: - * ns.coll2:>=1.0.0 (dependency of ns.coll1:1.0.0) + Failed to resolve the requested dependencies map. Could not satisfy the following requirements: + * ns.coll2:>=1.0.0 (dependency of ns.coll1:1.0.0) - name: install the dependency from the tarfile command: ansible-galaxy collection install {{ build_dir }}/ns-coll2-1.0.0.tar.gz -p {{ install_dir }} diff --git a/test/integration/targets/ansible-galaxy-collection/tasks/main.yml b/test/integration/targets/ansible-galaxy-collection/tasks/main.yml index 7bcc38286d8..621ca00265e 100644 --- a/test/integration/targets/ansible-galaxy-collection/tasks/main.yml +++ b/test/integration/targets/ansible-galaxy-collection/tasks/main.yml @@ -183,11 +183,29 @@ - >- "'child_dep.child_dep2:1.2.2' obtained from server galaxy_ng" in install_cross_dep.stdout + - install_cross_dep.stderr is not search("Skipping Galaxy server") - (install_cross_dep_actual.results[0].content | b64decode | from_json).collection_info.version == '1.0.0' - (install_cross_dep_actual.results[1].content | b64decode | from_json).collection_info.version == '1.0.0' - (install_cross_dep_actual.results[2].content | b64decode | from_json).collection_info.version == '0.9.9' - (install_cross_dep_actual.results[3].content | b64decode | from_json).collection_info.version == '1.2.2' +- vars: + cache_file: "{{ remote_tmp_dir }}/galaxy_cache/api.json" + cache: "{{ lookup('file', cache_file) | from_json }}" + block: + - name: Debug GalaxyAPI cache + debug: + msg: "{{ cache }}" + + - name: Assert cache contains the expected keys + assert: + that: + - cache.keys() | length == 3 + - '"version" in cache' + - cache['version'] == 2 + - '"galaxy-pulp:/api/galaxy/content/primary/" in cache' + - '"galaxy-pulp:/api/galaxy/content/secondary/" in cache' + - name: run ansible-galaxy collection download tests include_tasks: download.yml args: diff --git a/test/integration/targets/ansible-galaxy-collection/vars/main.yml b/test/integration/targets/ansible-galaxy-collection/vars/main.yml index 855b382e5ca..015c2ffcc7e 100644 --- a/test/integration/targets/ansible-galaxy-collection/vars/main.yml +++ b/test/integration/targets/ansible-galaxy-collection/vars/main.yml @@ -2,7 +2,7 @@ galaxy_verbosity: "{{ '' if not ansible_verbosity else '-' ~ ('v' * ansible_verb gpg_homedir: "{{ galaxy_dir }}/gpg" -offline_server: https://test-hub.demolab.local/api/galaxy/content/api/ +offline_server: "{{ galaxy_ng_server }}content/broken/" # Test oldest and most recently supported, and versions with notable changes. # NOTE: If ansible-galaxy incorporates new resolvelib features, this matrix should be updated to verify the features work on all supported versions. diff --git a/test/units/galaxy/test_api.py b/test/units/galaxy/test_api.py index a2dbf0b1abc..18fb76af06a 100644 --- a/test/units/galaxy/test_api.py +++ b/test/units/galaxy/test_api.py @@ -1034,13 +1034,13 @@ def test_missing_cache_dir(cache_dir): cache_file = os.path.join(cache_dir, 'api.json') with open(cache_file) as fd: actual_cache = fd.read() - assert actual_cache == '{"version": 1}' + assert actual_cache == '{"version": 2}' assert stat.S_IMODE(os.stat(cache_file).st_mode) == 0o600 def test_existing_cache(cache_dir): cache_file = os.path.join(cache_dir, 'api.json') - cache_file_contents = '{"version": 1, "test": "json"}' + cache_file_contents = '{"version": 2, "test": "json"}' with open(cache_file, mode='w') as fd: fd.write(cache_file_contents) os.chmod(cache_file, 0o655) @@ -1059,8 +1059,8 @@ def test_existing_cache(cache_dir): 'value', '{"de" "finit" "ely" [\'invalid"]}', '[]', - '{"version": 2, "test": "json"}', - '{"version": 2, "key": "ÅÑŚÌβŁÈ"}', + '{"version": 1, "test": "json"}', + '{"version": 1, "key": "ÅÑŚÌβŁÈ"}', ]) def test_cache_invalid_cache_content(content, cache_dir): cache_file = os.path.join(cache_dir, 'api.json') @@ -1072,7 +1072,7 @@ def test_cache_invalid_cache_content(content, cache_dir): with open(cache_file) as fd: actual_cache = fd.read() - assert actual_cache == '{"version": 1}' + assert actual_cache == '{"version": 2}' assert stat.S_IMODE(os.stat(cache_file).st_mode) == 0o664 @@ -1097,7 +1097,7 @@ def test_cache_complete_pagination(cache_dir, monkeypatch): with open(cache_file) as fd: final_cache = json.loads(fd.read()) - cached_server = final_cache['galaxy.server.com:'] + cached_server = final_cache['galaxy.server.com:/api/'] cached_collection = cached_server['/api/v3/collections/namespace/collection/versions/'] cached_versions = [r['version'] for r in cached_collection['results']] @@ -1126,7 +1126,7 @@ def test_cache_complete_pagination_v3(cache_dir, monkeypatch): with open(cache_file) as fd: final_cache = json.loads(fd.read()) - cached_server = final_cache['galaxy.server.com:'] + cached_server = final_cache['galaxy.server.com:/api/'] cached_collection = cached_server['/api/v3/collections/namespace/collection/versions/'] cached_versions = [r['version'] for r in cached_collection['results']] @@ -1164,8 +1164,8 @@ def test_cache_flaky_pagination(cache_dir, monkeypatch): final_cache = json.loads(fd.read()) assert final_cache == { - 'version': 1, - 'galaxy.server.com:': { + 'version': 2, + 'galaxy.server.com:/api/': { 'modified': { 'namespace.collection': responses[0]['updated_at'] } @@ -1190,7 +1190,7 @@ def test_cache_flaky_pagination(cache_dir, monkeypatch): with open(cache_file) as fd: final_cache = json.loads(fd.read()) - cached_server = final_cache['galaxy.server.com:'] + cached_server = final_cache['galaxy.server.com:/api/'] cached_collection = cached_server['/api/v3/collections/namespace/collection/versions/'] cached_versions = [r['version'] for r in cached_collection['results']] @@ -1250,17 +1250,17 @@ def test_clear_cache(cache_dir): with open(cache_file) as fd: actual_cache = fd.read() - assert actual_cache == '{"version": 1}' + assert actual_cache == '{"version": 2}' assert stat.S_IMODE(os.stat(cache_file).st_mode) == 0o600 @pytest.mark.parametrize(['url', 'expected'], [ - ('http://hostname/path', 'hostname:'), - ('http://hostname:80/path', 'hostname:80'), + ('http://hostname/path', 'hostname:/path'), + ('http://hostname:80/path', 'hostname:80/path'), ('https://testing.com:invalid', 'testing.com:'), ('https://testing.com:1234', 'testing.com:1234'), - ('https://username:password@testing.com/path', 'testing.com:'), - ('https://username:password@testing.com:443/path', 'testing.com:443'), + ('https://username:password@testing.com/path', 'testing.com:/path'), + ('https://username:password@testing.com:443/path', 'testing.com:443/path'), ]) def test_cache_id(url, expected): actual = galaxy_api.get_cache_id(url) From 8f20125059c012c37c92cd152f0f1bd61d6825fd Mon Sep 17 00:00:00 2001 From: s-hertel <19572925+s-hertel@users.noreply.github.com> Date: Fri, 14 Nov 2025 11:53:44 -0500 Subject: [PATCH 2/2] Restore resetting expired cache Don't reset paginated responses mid-pagination --- lib/ansible/galaxy/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ansible/galaxy/api.py b/lib/ansible/galaxy/api.py index 85c4331e215..cb182f7d7d0 100644 --- a/lib/ansible/galaxy/api.py +++ b/lib/ansible/galaxy/api.py @@ -379,6 +379,7 @@ class GalaxyAPI: # The cache entry had expired or does not exist, start a new blank entry to be filled later. expires = datetime.datetime.now(datetime.timezone.utc) expires += datetime.timedelta(days=1) + server_cache.pop(cache_key, None) headers = headers or {} self._add_auth_token(headers, url, required=auth_required) @@ -400,7 +401,7 @@ class GalaxyAPI: % (resp.url, to_native(resp_data))) if cache and self._cache: - if not valid: + if not valid and not is_paginated_url: server_cache[cache_key] = { 'expires': expires.strftime(iso_datetime_format), 'paginated': False,