From 093986dfaff20295a2e8dfb90bb4063900a8c6ee Mon Sep 17 00:00:00 2001 From: Sloane Hertel <19572925+s-hertel@users.noreply.github.com> Date: Mon, 17 May 2021 17:04:16 -0400 Subject: [PATCH] [2.10] ansible-galaxy - increase page size and add retry decorator (#74649) * ansible-galaxy - increase page size and add retry decorator for throttling (#74240) * Get available collection versions with page_size=100 for v2 and limit=100 for v3 * Update unit tests for larger page sizes * Add a generic retry decorator in module_utils/api.py that accepts an Iterable of delays and a callable to determine if an exception inheriting from Exception should be retried * Use the new decorator to handle Galaxy API rate limiting * Add unit tests for new retry decorator * Preserve the decorated function's metadata with functools.wraps ci_complete Co-authored-by: Matt Martz Co-authored-by: Sviatoslav Sydorenko (cherry picked from commit ee725846f070fc6b0dd79b5e8c5199ec652faf87) * Add changelog for ansible-galaxy improvements (#74738) Changelog for #74240 (cherry picked from commit 9cfedcd9c911f994f830d494739a6bfe4de1b080) --- ...ncrease-pagesize-and-handle-throttling.yml | 6 ++ lib/ansible/galaxy/api.py | 29 ++++++- lib/ansible/module_utils/api.py | 50 +++++++++++ test/units/galaxy/test_api.py | 34 +++++--- test/units/module_utils/test_api.py | 85 +++++++++++++++++-- 5 files changed, 184 insertions(+), 20 deletions(-) create mode 100644 changelogs/fragments/74240-ansible-galaxy-increase-pagesize-and-handle-throttling.yml diff --git a/changelogs/fragments/74240-ansible-galaxy-increase-pagesize-and-handle-throttling.yml b/changelogs/fragments/74240-ansible-galaxy-increase-pagesize-and-handle-throttling.yml new file mode 100644 index 00000000000..28edf4dfb4f --- /dev/null +++ b/changelogs/fragments/74240-ansible-galaxy-increase-pagesize-and-handle-throttling.yml @@ -0,0 +1,6 @@ +bugfixes: +- >- + Improve resilience of ``ansible-galaxy collection`` by increasing the page + size to make fewer requests overall and retrying queries with a jittered + exponential backoff when rate limiting HTTP codes (520 and 429) occur. + (https://github.com/ansible/ansible/issues/74191) diff --git a/lib/ansible/galaxy/api.py b/lib/ansible/galaxy/api.py index 4dd3cded762..2db91300f9f 100644 --- a/lib/ansible/galaxy/api.py +++ b/lib/ansible/galaxy/api.py @@ -15,9 +15,11 @@ import time from ansible import constants as C from ansible.errors import AnsibleError from ansible.galaxy.user_agent import user_agent +from ansible.module_utils.api import retry_with_delays_and_condition +from ansible.module_utils.api import generate_jittered_backoff from ansible.module_utils.six import string_types from ansible.module_utils.six.moves.urllib.error import HTTPError -from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse +from ansible.module_utils.six.moves.urllib.parse import quote as urlquote, urlencode, urlparse, parse_qs, urljoin from ansible.module_utils._text import to_bytes, to_native, to_text from ansible.module_utils.urls import open_url, prepare_multipart from ansible.utils.display import Display @@ -30,6 +32,18 @@ except ImportError: from urlparse import urlparse display = Display() +COLLECTION_PAGE_SIZE = 100 +RETRY_HTTP_ERROR_CODES = [ # TODO: Allow user-configuration + 429, # Too Many Requests + 520, # Galaxy rate limit error code (Cloudflare unknown error) +] + + +def is_rate_limit_exception(exception): + # Note: cloud.redhat.com masks rate limit errors with 403 (Forbidden) error codes. + # Since 403 could reflect the actual problem (such as an expired token), we should + # not retry by default. + return isinstance(exception, GalaxyError) and exception.http_code in RETRY_HTTP_ERROR_CODES def g_connect(versions): @@ -187,6 +201,10 @@ class GalaxyAPI: # Calling g_connect will populate self._available_api_versions return self._available_api_versions + @retry_with_delays_and_condition( + backoff_iterator=generate_jittered_backoff(retries=6, delay_base=2, delay_threshold=40), + should_retry_error=is_rate_limit_exception + ) def _call_galaxy(self, url, args=None, headers=None, method=None, auth_required=False, error_context_msg=None): headers = headers or {} self._add_auth_token(headers, url, required=auth_required) @@ -554,7 +572,9 @@ class GalaxyAPI: results_key = 'results' pagination_path = ['next'] - n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/') + page_size_name = 'limit' if 'v3' in self.available_api_versions else 'page_size' + n_url = _urljoin(self.api_server, api_path, 'collections', namespace, name, 'versions', '/?%s=%d' % (page_size_name, COLLECTION_PAGE_SIZE)) + n_url_info = urlparse(n_url) error_context_msg = 'Error when getting available collection versions for %s.%s from %s (%s)' \ % (namespace, name, self.name, self.api_server) @@ -573,7 +593,10 @@ class GalaxyAPI: elif relative_link: # TODO: This assumes the pagination result is relative to the root server. Will need to be verified # with someone who knows the AH API. - next_link = n_url.replace(urlparse(n_url).path, next_link) + + # Remove the query string from the versions_url to use the next_link's query + n_url = urljoin(n_url, urlparse(n_url).path) + next_link = n_url.replace(n_url_info.path, next_link) data = self._call_galaxy(to_native(next_link, errors='surrogate_or_strict'), error_context_msg=error_context_msg) diff --git a/lib/ansible/module_utils/api.py b/lib/ansible/module_utils/api.py index 46a036d3747..e780ec6b50f 100644 --- a/lib/ansible/module_utils/api.py +++ b/lib/ansible/module_utils/api.py @@ -26,6 +26,8 @@ The 'api' module provides the following common argument specs: from __future__ import (absolute_import, division, print_function) __metaclass__ = type +import functools +import random import sys import time @@ -114,3 +116,51 @@ def retry(retries=None, retry_pause=1): return retried return wrapper + + +def generate_jittered_backoff(retries=10, delay_base=3, delay_threshold=60): + """The "Full Jitter" backoff strategy. + + Ref: https://www.awsarchitectureblog.com/2015/03/backoff.html + + :param retries: The number of delays to generate. + :param delay_base: The base time in seconds used to calculate the exponential backoff. + :param delay_threshold: The maximum time in seconds for any delay. + """ + for retry in range(0, retries): + yield random.randint(0, min(delay_threshold, delay_base * 2 ** retry)) + + +def retry_never(exception_or_result): + return False + + +def retry_with_delays_and_condition(backoff_iterator, should_retry_error=None): + """Generic retry decorator. + + :param backoff_iterator: An iterable of delays in seconds. + :param should_retry_error: A callable that takes an exception of the decorated function and decides whether to retry or not (returns a bool). + """ + if should_retry_error is None: + should_retry_error = retry_never + + def function_wrapper(function): + @functools.wraps(function) + def run_function(*args, **kwargs): + """This assumes the function has not already been called. + If backoff_iterator is empty, we should still run the function a single time with no delay. + """ + call_retryable_function = functools.partial(function, *args, **kwargs) + + for delay in backoff_iterator: + try: + return call_retryable_function() + except Exception as e: + if not should_retry_error(e): + raise + time.sleep(delay) + + # Only or final attempt + return call_retryable_function() + return run_function + return function_wrapper diff --git a/test/units/galaxy/test_api.py b/test/units/galaxy/test_api.py index f333a64b57e..a4b494c91b7 100644 --- a/test/units/galaxy/test_api.py +++ b/test/units/galaxy/test_api.py @@ -727,9 +727,10 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m actual = api.get_collection_versions('namespace', 'collection') assert actual == [u'1.0.0', u'1.0.1'] + page_query = '?limit=100' if api_version == 'v3' else '?page_size=100' assert mock_open.call_count == 1 assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ - 'versions/' % api_version + 'versions/%s' % (api_version, page_query) if token_ins: assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type @@ -738,9 +739,9 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m ('v2', None, None, [ { 'count': 6, - 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2', + 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100', 'previous': None, - 'results': [ + 'results': [ # Pay no mind, using more manageable results than page_size would indicate { 'version': '1.0.0', 'href': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/1.0.0', @@ -753,7 +754,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m }, { 'count': 6, - 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3', + 'next': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=3&page_size=100', 'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions', 'results': [ { @@ -769,7 +770,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m { 'count': 6, 'next': None, - 'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2', + 'previous': 'https://galaxy.server.com/api/v2/collections/namespace/collection/versions/?page=2&page_size=100', 'results': [ { 'version': '1.0.4', @@ -786,7 +787,8 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m { 'count': 6, 'links': { - 'next': '/api/v3/collections/namespace/collection/versions/?page=2', + # v3 links are relative and the limit is included during pagination + 'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100', 'previous': None, }, 'data': [ @@ -803,7 +805,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m { 'count': 6, 'links': { - 'next': '/api/v3/collections/namespace/collection/versions/?page=3', + 'next': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=200', 'previous': '/api/v3/collections/namespace/collection/versions', }, 'data': [ @@ -821,7 +823,7 @@ def test_get_collection_versions(api_version, token_type, token_ins, response, m 'count': 6, 'links': { 'next': None, - 'previous': '/api/v3/collections/namespace/collection/versions/?page=2', + 'previous': '/api/v3/collections/namespace/collection/versions/?limit=100&offset=100', }, 'data': [ { @@ -852,12 +854,22 @@ def test_get_collection_versions_pagination(api_version, token_type, token_ins, assert actual == [u'1.0.0', u'1.0.1', u'1.0.2', u'1.0.3', u'1.0.4', u'1.0.5'] assert mock_open.call_count == 3 + + if api_version == 'v3': + query_1 = 'limit=100' + query_2 = 'limit=100&offset=100' + query_3 = 'limit=100&offset=200' + else: + query_1 = 'page_size=100' + query_2 = 'page=2&page_size=100' + query_3 = 'page=3&page_size=100' + assert mock_open.mock_calls[0][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ - 'versions/' % api_version + 'versions/?%s' % (api_version, query_1) assert mock_open.mock_calls[1][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ - 'versions/?page=2' % api_version + 'versions/?%s' % (api_version, query_2) assert mock_open.mock_calls[2][1][0] == 'https://galaxy.server.com/api/%s/collections/namespace/collection/' \ - 'versions/?page=3' % api_version + 'versions/?%s' % (api_version, query_3) if token_type: assert mock_open.mock_calls[0][2]['headers']['Authorization'] == '%s my token' % token_type diff --git a/test/units/module_utils/test_api.py b/test/units/module_utils/test_api.py index 0eaea046598..f7e768a8be5 100644 --- a/test/units/module_utils/test_api.py +++ b/test/units/module_utils/test_api.py @@ -7,11 +7,19 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type -from ansible.module_utils.api import rate_limit, retry +from ansible.module_utils.api import rate_limit, retry, retry_with_delays_and_condition import pytest +class CustomException(Exception): + pass + + +class CustomBaseException(BaseException): + pass + + class TestRateLimit: def test_ratelimit(self): @@ -26,17 +34,16 @@ class TestRateLimit: class TestRetry: def test_no_retry_required(self): - self.counter = 0 - @retry(retries=4, retry_pause=2) def login_database(): - self.counter += 1 + login_database.counter += 1 return 'success' + login_database.counter = 0 r = login_database() assert r == 'success' - assert self.counter == 1 + assert login_database.counter == 1 def test_catch_exception(self): @@ -44,5 +51,71 @@ class TestRetry: def login_database(): return 'success' - with pytest.raises(Exception): + with pytest.raises(Exception, match="Retry"): login_database() + + def test_no_retries(self): + + @retry() + def login_database(): + assert False, "Should not execute" + + login_database() + + +class TestRetryWithDelaysAndCondition: + + def test_empty_retry_iterator(self): + @retry_with_delays_and_condition(backoff_iterator=[]) + def login_database(): + login_database.counter += 1 + + login_database.counter = 0 + r = login_database() + assert login_database.counter == 1 + + def test_no_retry_exception(self): + @retry_with_delays_and_condition( + backoff_iterator=[1], + should_retry_error=lambda x: False, + ) + def login_database(): + login_database.counter += 1 + if login_database.counter == 1: + raise CustomException("Error") + + login_database.counter = 0 + with pytest.raises(CustomException, match="Error"): + login_database() + assert login_database.counter == 1 + + def test_no_retry_baseexception(self): + @retry_with_delays_and_condition( + backoff_iterator=[1], + should_retry_error=lambda x: True, # Retry all exceptions inheriting from Exception + ) + def login_database(): + login_database.counter += 1 + if login_database.counter == 1: + # Raise an exception inheriting from BaseException + raise CustomBaseException("Error") + + login_database.counter = 0 + with pytest.raises(CustomBaseException, match="Error"): + login_database() + assert login_database.counter == 1 + + def test_retry_exception(self): + @retry_with_delays_and_condition( + backoff_iterator=[1], + should_retry_error=lambda x: isinstance(x, CustomException), + ) + def login_database(): + login_database.counter += 1 + if login_database.counter == 1: + raise CustomException("Retry") + return 'success' + + login_database.counter = 0 + assert login_database() == 'success' + assert login_database.counter == 2