CloudRetry/AWSRetry backoff decorator with unit tests (#17039)

* Added aws_retry decorator function with unit tests

* Restructured the code to be used with a base class.

This base class CloudRetry can be reused by any other cloud provider.
This decorator should be used in situations, where you need to implement
a backoff algorithm and want to retry based on the status code from the
exception.

* updated documentation

* fixed tabs

* added botocore and boto3 to requirements.txt

* removed cloud.py from py24 tests, as it depends on boto3

* fix relative imports

* updated test to be 2.6 compat

* updated method name from retry to backoff

* readded lxd

* Updated default backoff from 2 seconds to 1.1s.

This will be about a total of 48 seconds in 10 tries. This is
configurable.
pull/17554/head
Allen Sanabria 8 years ago committed by Ryan Brown
parent 60706cc12e
commit b510abce17

@ -0,0 +1,108 @@
#
# (c) 2016 Allen Sanabria, <asanabria@linuxdynasty.org>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
#
"""
This module adds shared support for generic cloud modules
In order to use this module, include it as part of a custom
module as shown below.
from ansible.module_utils.cloud import *
The 'cloud' module provides the following common classes:
* CloudRetry
- The base class to be used by other cloud providers, in order to
provide a backoff/retry decorator based on status codes.
- Example using the AWSRetry class which inherits from CloudRetry.
@AWSRetry.retry(tries=20, delay=2, backoff=2)
get_ec2_security_group_ids_from_names()
"""
from functools import wraps
import syslog
import time
from ansible.module_utils.pycompat24 import get_exception
class CloudRetry(object):
""" CloudRetry can be used by any cloud provider, in order to implement a
backoff algorithm/retry effect based on Status Code from Exceptions.
"""
# This is the base class of the exception.
# AWS Example botocore.exceptions.ClientError
base_class = None
@staticmethod
def status_code_from_exception(error):
""" Return the status code from the exception object
Args:
error (object): The exception itself.
"""
pass
@staticmethod
def found(response_code):
""" Return True if the Response Code to retry on was found.
Args:
response_code (str): This is the Response Code that is being matched against.
"""
pass
@classmethod
def backoff(cls, tries=10, delay=3, backoff=1.1):
""" Retry calling the Cloud decorated function using an exponential backoff.
Kwargs:
tries (int): Number of times to try (not retry) before giving up
default=10
delay (int): Initial delay between retries in seconds
default=3
backoff (int): backoff multiplier e.g. value of 2 will double the delay each retry
default=2
"""
def deco(f):
@wraps(f)
def retry_func(*args, **kwargs):
max_tries, max_delay = tries, delay
while max_tries > 1:
try:
return f(*args, **kwargs)
except Exception:
e = get_exception()
if isinstance(e, cls.base_class):
response_code = cls.status_code_from_exception(e)
if cls.found(response_code):
msg = "{0}: Retrying in {1} seconds...".format(str(e), max_delay)
syslog.syslog(syslog.LOG_INFO, msg)
time.sleep(max_delay)
max_tries -= 1
max_delay *= backoff
else:
# Return original exception if exception is not a ClientError
raise e
else:
# Return original exception if exception is not a ClientError
raise e
return f(*args, **kwargs)
return retry_func # true decorator
return deco

@ -27,8 +27,11 @@
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os
import re
from time import sleep
from ansible.module_utils.cloud import CloudRetry
try:
import boto
import boto.ec2 #boto does weird import stuff
@ -55,6 +58,29 @@ class AnsibleAWSError(Exception):
pass
class AWSRetry(CloudRetry):
base_class = botocore.exceptions.ClientError
@staticmethod
def status_code_from_exception(error):
return error.response['Error']['Code']
@staticmethod
def found(response_code):
# This list of failures is based on this API Reference
# http://docs.aws.amazon.com/AWSEC2/latest/APIReference/errors-overview.html
retry_on = [
'RequestLimitExceeded', 'Unavailable', 'ServiceUnavailable',
'InternalFailure', 'InternalError'
]
not_found = re.compile(r'^\w+.NotFound')
if response_code in retry_on or not_found.search(response_code):
return True
else:
return False
def boto3_conn(module, conn_type=None, resource=None, region=None, endpoint=None, **params):
try:
return _boto3_conn(conn_type=conn_type, resource=resource, region=region, endpoint=endpoint, **params)

@ -0,0 +1,87 @@
# -*- coding: utf-8 -*-
# (c) 2015, Allen Sanabria <asanabria@linuxdynasty.org>
#
# This file is part of Ansible
#
# Ansible is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ansible is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
import unittest
import botocore
import boto3
from ansible.module_utils.ec2 import AWSRetry
class RetryTestCase(unittest.TestCase):
def test_no_failures(self):
self.counter = 0
@AWSRetry.backoff(tries=2, delay=0.1)
def no_failures():
self.counter += 1
r = no_failures()
self.assertEqual(self.counter, 1)
def test_retry_once(self):
self.counter = 0
err_msg = {'Error': {'Code': 'InstanceId.NotFound'}}
@AWSRetry.backoff(tries=2, delay=0.1)
def retry_once():
self.counter += 1
if self.counter < 2:
raise botocore.exceptions.ClientError(err_msg, 'Could not find you')
else:
return 'success'
r = retry_once()
self.assertEqual(r, 'success')
self.assertEqual(self.counter, 2)
def test_reached_limit(self):
self.counter = 0
err_msg = {'Error': {'Code': 'RequestLimitExceeded'}}
@AWSRetry.backoff(tries=4, delay=0.1)
def fail():
self.counter += 1
raise botocore.exceptions.ClientError(err_msg, 'toooo fast!!')
#with self.assertRaises(botocore.exceptions.ClientError):
try:
fail()
except Exception as e:
self.assertEqual(e.response['Error']['Code'], 'RequestLimitExceeded')
self.assertEqual(self.counter, 4)
def test_unexpected_exception_does_not_retry(self):
self.counter = 0
err_msg = {'Error': {'Code': 'AuthFailure'}}
@AWSRetry.backoff(tries=4, delay=0.1)
def raise_unexpected_error():
self.counter += 1
raise botocore.exceptions.ClientError(err_msg, 'unexpected error')
#with self.assertRaises(botocore.exceptions.ClientError):
try:
raise_unexpected_error()
except Exception as e:
self.assertEqual(e.response['Error']['Code'], 'AuthFailure')
self.assertEqual(self.counter, 1)
if __name__ == '__main__':
unittest.main()

@ -12,7 +12,7 @@ if [ "${TOXENV}" = 'py24' ]; then
fi
python2.4 -V
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
python2.4 -m compileall -fq -x 'module_utils/(a10|rax|openstack|cloud|ec2|gce|lxd|docker_common|azure_rm_common|vca|vmware|gcp|gcdns).py' lib/ansible/module_utils
else
if [ "${install_deps}" != "" ]; then
pip install -r "${source_root}/test/utils/shippable/sanity-requirements.txt" --upgrade

@ -11,3 +11,5 @@ unittest2
redis
python3-memcached
python-systemd
botocore
boto3

@ -12,3 +12,5 @@ redis
python-memcached
python-systemd
pycrypto
botocore
boto3

Loading…
Cancel
Save