[cloud] Change S3 MD5 checksum for multipart support in aws_s3 (#34216)

- If file has only one part and is uploaded as multipart, it is still computed the same way as having multiple parts
pull/27012/merge
Vinay Dandekar 7 years ago committed by Ryan Brown
parent 0587aedc01
commit f20af4b909

@ -271,8 +271,9 @@ s3_keys:
- prefix1/key2 - prefix1/key2
''' '''
import os import hashlib
import mimetypes import mimetypes
import os
import traceback import traceback
from ansible.module_utils.six.moves.urllib.parse import urlparse from ansible.module_utils.six.moves.urllib.parse import urlparse
from ssl import SSLError from ssl import SSLError
@ -306,7 +307,34 @@ def key_check(module, s3, bucket, obj, version=None, validate=True):
return exists return exists
def keysum(module, s3, bucket, obj, version=None): def keysum_compare(module, local_file, s3, bucket, obj, version=None):
s3_keysum = keysum(s3, bucket, obj, version=version)
if '-' in s3_keysum: # Check for multipart, ETag is not a proper MD5 sum
parts = int(s3_keysum.split('-')[1])
md5s = []
with open(local_file, 'rb') as f:
for part_num in range(1, parts + 1):
# Get the part size for every part of the multipart uploaded object
if version:
key_head = s3.head_object(Bucket=bucket, Key=obj, VersionId=version, PartNumber=part_num)
else:
key_head = s3.head_object(Bucket=bucket, Key=obj, PartNumber=part_num)
part_size = int(key_head['ContentLength'])
data = f.read(part_size)
hash = hashlib.md5(data)
md5s.append(hash)
digests = b''.join(m.digest() for m in md5s)
digests_md5 = hashlib.md5(digests)
local_keysum = '{0}-{1}'.format(digests_md5.hexdigest(), len(md5s))
else: # Compute the MD5 sum normally
local_keysum = module.md5(local_file)
return s3_keysum == local_keysum
def keysum(s3, bucket, obj, version=None):
if version: if version:
key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version) key_check = s3.head_object(Bucket=bucket, Key=obj, VersionId=version)
else: else:
@ -314,8 +342,6 @@ def keysum(module, s3, bucket, obj, version=None):
if not key_check: if not key_check:
return None return None
md5_remote = key_check['ETag'][1:-1] md5_remote = key_check['ETag'][1:-1]
if '-' in md5_remote: # Check for multipart, etag is not md5
return None
return md5_remote return md5_remote
@ -706,11 +732,11 @@ def main():
else: else:
module.fail_json(msg="Key %s does not exist." % obj) module.fail_json(msg="Key %s does not exist." % obj)
# If the destination path doesn't exist or overwrite is True, no need to do the md5um etag check, so just download. # If the destination path doesn't exist or overwrite is True, no need to do the md5sum ETag check, so just download.
# Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists. # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
if path_check(dest): if path_check(dest):
# Determine if the remote and local object are identical # Determine if the remote and local object are identical
if keysum(module, s3, bucket, obj, version=version) == module.md5(dest): if keysum_compare(module, dest, s3, bucket, obj, version=version):
sum_matches = True sum_matches = True
if overwrite == 'always': if overwrite == 'always':
download_s3file(module, s3, bucket, obj, dest, retries, version=version) download_s3file(module, s3, bucket, obj, dest, retries, version=version)
@ -740,10 +766,10 @@ def main():
if bucketrtn: if bucketrtn:
keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate) keyrtn = key_check(module, s3, bucket, obj, version=version, validate=validate)
# Lets check key state. Does it exist and if it does, compute the etag md5sum. # Lets check key state. Does it exist and if it does, compute the ETag md5sum.
if bucketrtn and keyrtn: if bucketrtn and keyrtn:
# Compare the local and remote object # Compare the local and remote object
if module.md5(src) == keysum(module, s3, bucket, obj): if keysum_compare(module, src, s3, bucket, obj):
sum_matches = True sum_matches = True
if overwrite == 'always': if overwrite == 'always':
# only use valid object acls for the upload_s3file function # only use valid object acls for the upload_s3file function

Loading…
Cancel
Save