From ed5ea7e921c0a8a776b2cf42ef86b3e871d643f8 Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Thu, 4 Jun 2015 14:28:57 +0800 Subject: [PATCH 1/2] Prevent memory-error from a large file --- cloud/amazon/s3.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cloud/amazon/s3.py b/cloud/amazon/s3.py index 6f8e447397d..545955e90cd 100644 --- a/cloud/amazon/s3.py +++ b/cloud/amazon/s3.py @@ -296,6 +296,15 @@ def is_walrus(s3_url): else: return False +def get_md5_digest(local_file): + md5 = hashlib.md5() + with open(local_file, 'rb') as f: + while True: + data = f.read(1024 ** 2) + if not data: break + md5.update(data) + return md5.hexdigest() + def main(): argument_spec = ec2_argument_spec() @@ -410,7 +419,7 @@ def main(): # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. if pathrtn is True: md5_remote = keysum(module, s3, bucket, obj) - md5_local = hashlib.md5(open(dest, 'rb').read()).hexdigest() + md5_local = get_md5_digest(dest) if md5_local == md5_remote: sum_matches = True if overwrite == 'always': @@ -454,7 +463,8 @@ def main(): # Lets check key state. Does it exist and if it does, compute the etag md5sum. if bucketrtn is True and keyrtn is True: md5_remote = keysum(module, s3, bucket, obj) - md5_local = hashlib.md5(open(src, 'rb').read()).hexdigest() + md5_local = get_md5_digest(src) + if md5_local == md5_remote: sum_matches = True if overwrite == 'always': From 1517ae8ab27d2493a51fb1eff7cf0c30b5c54f0a Mon Sep 17 00:00:00 2001 From: "Ching Yi, Chan" Date: Fri, 5 Jun 2015 23:29:11 +0800 Subject: [PATCH 2/2] Refactoring for easier to read --- cloud/amazon/s3.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cloud/amazon/s3.py b/cloud/amazon/s3.py index 545955e90cd..d08d1b6a46c 100644 --- a/cloud/amazon/s3.py +++ b/cloud/amazon/s3.py @@ -299,9 +299,7 @@ def is_walrus(s3_url): def get_md5_digest(local_file): md5 = hashlib.md5() with open(local_file, 'rb') as f: - while True: - data = f.read(1024 ** 2) - if not data: break + for data in f.read(1024 ** 2): md5.update(data) return md5.hexdigest()