From c5fe40661d26094b93c40ca3fc99e0d9d72840b0 Mon Sep 17 00:00:00 2001 From: tedder Date: Mon, 29 Dec 2014 16:38:08 -0800 Subject: [PATCH] feature pull request: catch and retry recoverable errors boto can throw SSLError when timeouts occur (among other SSL errors). Catch these so proper JSON can be returned, and also add the ability to retry the operation. There's an open issue in boto for this: https://github.com/boto/boto/issues/2409 Here's a sample stacktrace that inspired me to work on this. I'm on 1.7, but there's no meaningful differences in the 1.8 release that would affect this. I've added line breaks to the trace for readability. failed to parse: Traceback (most recent call last): File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 2031, in main() File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 353, in main download_s3file(module, s3, bucket, obj, dest) File "/home/ubuntu/.ansible/tmp/ansible-tmp-1419895753.17-160808281985012/s3", line 234, in download_s3file key.get_contents_to_filename(dest) File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1665, in get_contents_to_filename response_headers=response_headers) File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1603, in get_contents_to_file response_headers=response_headers) File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1435, in get_file query_args=None) File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 1488, in _get_file_internal for bytes in self: File "/usr/local/lib/python2.7/dist-packages/boto/s3/key.py", line 368, in next data = self.resp.read(self.BufferSize) File "/usr/local/lib/python2.7/dist-packages/boto/connection.py", line 416, in read return httplib.HTTPResponse.read(self, amt) File "/usr/lib/python2.7/httplib.py", line 567, in read s = self.fp.read(amt) File "/usr/lib/python2.7/socket.py", line 380, in read data = self._sock.recv(left) File "/usr/lib/python2.7/ssl.py", line 341, in recv return self.read(buflen) File "/usr/lib/python2.7/ssl.py", line 260, in read return self._sslobj.read(len) ssl.SSLError: The read operation timed out --- cloud/amazon/s3.py | 45 ++++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/cloud/amazon/s3.py b/cloud/amazon/s3.py index 6438c6405e7..48b4eb73890 100644 --- a/cloud/amazon/s3.py +++ b/cloud/amazon/s3.py @@ -95,6 +95,13 @@ options: required: false default: null version_added: "1.8" + retries: + description: + - On recoverable failure, how many times to retry before actually failing. + required: false + default: 0 + version_added: "1.9" + requirements: [ "boto" ] author: Lester Wade, Ralph Tice @@ -133,6 +140,7 @@ import sys import os import urlparse import hashlib +from ssl import SSLError try: import boto @@ -237,14 +245,23 @@ def upload_s3file(module, s3, bucket, obj, src, expiry, metadata): except s3.provider.storage_copy_error, e: module.fail_json(msg= str(e)) -def download_s3file(module, s3, bucket, obj, dest): - try: - bucket = s3.lookup(bucket) - key = bucket.lookup(obj) - key.get_contents_to_filename(dest) - module.exit_json(msg="GET operation complete", changed=True) - except s3.provider.storage_copy_error, e: - module.fail_json(msg= str(e)) +def download_s3file(module, s3, bucket, obj, dest, retries): + # retries is the number of loops; range/xrange needs to be one + # more to get that count of loops. + bucket = s3.lookup(bucket) + key = bucket.lookup(obj) + for x in xrange(0, retries + 1): + try: + key.get_contents_to_filename(dest) + module.exit_json(msg="GET operation complete", changed=True) + except s3.provider.storage_copy_error, e: + module.fail_json(msg= str(e)) + except SSLError as e: + # actually fail on last pass through the loop. + if x == retries: + module.fail_json(msg="s3 download failed; %s" % e) + # otherwise, try again, this may be a transient timeout. + pass def download_s3str(module, s3, bucket, obj): try: @@ -292,7 +309,8 @@ def main(): expiry = dict(default=600, aliases=['expiration']), s3_url = dict(aliases=['S3_URL']), overwrite = dict(aliases=['force'], default=True, type='bool'), - metadata = dict(type='dict'), + metadata = dict(type='dict'), + retries = dict(aliases=['retry'], type='str', default=0), ), ) module = AnsibleModule(argument_spec=argument_spec) @@ -307,6 +325,7 @@ def main(): s3_url = module.params.get('s3_url') overwrite = module.params.get('overwrite') metadata = module.params.get('metadata') + retries = int(module.params.get('retries')) ec2_url, aws_access_key, aws_secret_key, region = get_ec2_creds(module) @@ -368,7 +387,7 @@ def main(): # If the destination path doesn't exist, no need to md5um etag check, so just download. pathrtn = path_check(dest) if pathrtn is False: - download_s3file(module, s3, bucket, obj, dest) + download_s3file(module, s3, bucket, obj, dest, retries) # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. if pathrtn is True: @@ -377,13 +396,13 @@ def main(): if md5_local == md5_remote: sum_matches = True if overwrite is True: - download_s3file(module, s3, bucket, obj, dest) + download_s3file(module, s3, bucket, obj, dest, retries) else: module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False) else: sum_matches = False if overwrite is True: - download_s3file(module, s3, bucket, obj, dest) + download_s3file(module, s3, bucket, obj, dest, retries) else: module.fail_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.", failed=True) @@ -393,7 +412,7 @@ def main(): # At this point explicitly define the overwrite condition. if sum_matches is True and pathrtn is True and overwrite is True: - download_s3file(module, s3, bucket, obj, dest) + download_s3file(module, s3, bucket, obj, dest, retries) # If sum does not match but the destination exists, we