Rewritten s3 module to allow for additional modes of operation.

reviewable/pr18780/r1
lwade 12 years ago
parent 36aa5943d1
commit 99ac050a43

@ -24,102 +24,223 @@ version_added: "1.1"
options: options:
bucket: bucket:
description: description:
- bucket you wish to present/absent for the key (file in path). - Bucket name.
required: true required: true
default: null default: null
aliases: [] aliases: []
state: object:
description: description:
- desired state for both bucket and file. - Keyname of the object inside the bucket. Can be used to create "virtual directories", see examples.
default: null
aliases: []
path:
description:
- path to the key (file) which you wish to be present/absent in the bucket.
required: false required: false
default: null default: null
aliases: [] aliases: []
dest: version_added: "1.3"
src:
description: description:
- the destination in S3, if different from path - The source file path when performing a PUT operation.
required: false required: false
default: null default: null
aliases: [] aliases: []
expiry: version_added: "1.3"
dest:
description: description:
- expiry period (in seconds) for returned download URL. - The destination file path when downloading an object/key with a GET operation.
required: false required: false
default: 600 default: 600
aliases: [] aliases: []
version_added: "1.3"
overwrite: overwrite:
description: description:
- force overwrite if a file with the same name already exists. Does not support files uploaded to S3 with multipart upload. - Force overwrite either locally on the filesystem or remotely with the object/key. Used with PUT and GET operations.
required: false required: false
default: false default: false
version_added: "1.2" version_added: "1.2"
mode:
description:
- Switches the module behaviour between put (upload), get (download), create (bucket) and delete (bucket).
required: true
default: null
aliases: []
expiry:
description:
- Time limit (in seconds) for the URL generated and returned by S3/Walrus when performing a mode=put or mode=geturl operation.
required: false
default: null
aliases: []
requirements: [ "boto" ] requirements: [ "boto" ]
author: Lester Wade, Ralph Tice author: Lester Wade, Ralph Tice
''' '''
EXAMPLES = ''' EXAMPLES = '''
# Simple PUT operation # Simple PUT operation
- s3: bucket: mybucket - s3: bucket=mybucket object=/my/desired/key.txt src=/usr/local/myfile.txt mode=put
path: /path/to/file # Simple GET operation
state: present - s3: bucket=mybucket object=/my/desired/key.txt dest=/usr/local/myfile.txt mode=get
# Force and overwrite if checksums don't match # GET/download and overwrite local file (trust remote)
- s3: bucket: mybucket - s3: bucket=mybucket object=/my/desired/key.txt dest=/usr/local/myfile.txt mode=get overwrite=true
path: /path/to/file # PUT/upload and overwrite remote file (trust local)
state: present - s3: bucket=mybucket object=/my/desired/key.txt src=/usr/local/myfile.txt mode=put overwrite=true
overwrite: yes # Create an empty bucket
- s3: bucket=mybucket mode=create
# Delete a bucket and all contents
- s3: bucket=mybucket mode=delete
''' '''
import sys import sys
import os import os
import urlparse import urlparse
import hashlib
try: try:
import boto import boto
import hashlib
except ImportError: except ImportError:
print "failed=True msg='boto required for this module'" print "failed=True msg='boto required for this module'"
sys.exit(1) sys.exit(1)
def upload_s3file(module, s3, bucket, key_name, path, expiry): def key_check(module, s3, bucket, obj):
try:
bucket = s3.lookup(bucket)
key_check = bucket.get_key(obj)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
if key_check:
return True
else:
return False
def keysum(module, s3, bucket, obj):
bucket = s3.lookup(bucket)
key_check = bucket.get_key(obj)
if key_check:
md5_remote = key_check.etag[1:-1]
etag_multipart = md5_remote.find('-')!=-1 #Check for multipart, etag is not md5
if etag_multipart is True:
module.fail_json(msg="Files uploaded with multipart of s3 are not supported with checksum, unable to compute checksum.")
sys.exit(0)
return md5_remote
def bucket_check(module, s3, bucket):
try:
result = s3.lookup(bucket)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
if result:
return True
else:
return False
def create_bucket(module, s3, bucket):
try:
bucket = s3.create_bucket(bucket)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
if bucket:
return True
def delete_bucket(module, s3, bucket):
try:
bucket = s3.lookup(bucket)
bucket_contents = bucket.list()
bucket.delete_keys([key.name for key in bucket_contents])
bucket.delete()
return True
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
def delete_key(module, s3, bucket, obj):
try:
bucket = s3.lookup(bucket)
bucket.delete_key(obj)
module.exit_json(msg="Object deleted from bucket %s"%bucket, changed=True)
sys.exit(0)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
def create_key(module, s3, bucket, obj):
try:
bucket = s3.lookup(bucket)
bucket.new_key(obj)
module.exit_json(msg="Object %s created in bucket %s" % (obj, bucket), changed=True)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
def upload_file_check(src):
if os.path.exists(src):
file_exists is True
else:
file_exists is False
if os.path.isdir(src):
module.fail_json(msg="Specifying a directory is not a valid source for upload.", failed=True)
sys.exit(0)
return file_exists
def path_check(path):
if os.path.exists(path):
return True
else:
return False
def upload_s3file(module, s3, bucket, obj, src, expiry):
try: try:
key = bucket.new_key(key_name) bucket = s3.lookup(bucket)
key.set_contents_from_filename(path) key = bucket.new_key(obj)
key.set_contents_from_filename(src)
url = key.generate_url(expiry) url = key.generate_url(expiry)
module.exit_json(msg="Put operation complete", url=url, changed=True) module.exit_json(msg="PUT operation complete", url=url, changed=True)
sys.exit(0) sys.exit(0)
except s3.provider.storage_copy_error, e: except s3.provider.storage_copy_error, e:
module.fail_json(msg= str(e)) module.fail_json(msg= str(e))
def download_s3file(module, s3, bucket, obj, dest):
try:
bucket = s3.lookup(bucket)
key = bucket.lookup(obj)
key.get_contents_to_filename(dest)
module.exit_json(msg="GET operation complete", changed=True)
sys.exit(0)
except s3.provider.storage_copy_error, e:
module.fail_json(msg= str(e))
def get_download_url(module, s3, bucket, obj, expiry):
try:
bucket = s3.lookup(bucket)
key = bucket.lookup(obj)
url = key.generate_url(expiry)
module.exit_json(msg="Download url:", url=url, expiry=expiry, changed=True)
sys.exit(0)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
def main(): def main():
module = AnsibleModule( module = AnsibleModule(
argument_spec = dict( argument_spec = dict(
bucket = dict(), bucket = dict(required=True),
path = dict(), object = dict(),
src = dict(),
dest = dict(), dest = dict(),
state = dict(choices=['present', 'absent']), mode = dict(choices=['get', 'put', 'delete', 'create', 'geturl'], required=True),
expiry = dict(default=600, aliases=['expiration']), expiry = dict(default=600, aliases=['expiration']),
s3_url = dict(aliases=['S3_URL']), s3_url = dict(aliases=['S3_URL']),
ec2_secret_key = dict(aliases=['EC2_SECRET_KEY']), ec2_secret_key = dict(aliases=['EC2_SECRET_KEY']),
ec2_access_key = dict(aliases=['EC2_ACCESS_KEY']), ec2_access_key = dict(aliases=['EC2_ACCESS_KEY']),
overwrite = dict(default=False, type='bool'), overwrite = dict(default=False, type='bool'),
), ),
required_together=[ ['bucket', 'path', 'state'] ],
) )
bucket_name = module.params.get('bucket') bucket = module.params.get('bucket')
path = os.path.expanduser(module.params['path']) obj = module.params.get('object')
src = module.params.get('src')
dest = module.params.get('dest') dest = module.params.get('dest')
state = module.params.get('state') mode = module.params.get('mode')
expiry = int(module.params['expiry']) expiry = int(module.params['expiry'])
s3_url = module.params.get('s3_url') s3_url = module.params.get('s3_url')
ec2_secret_key = module.params.get('ec2_secret_key') ec2_secret_key = module.params.get('ec2_secret_key')
ec2_access_key = module.params.get('ec2_access_key') ec2_access_key = module.params.get('ec2_access_key')
overwrite = module.params.get('overwrite') overwrite = module.params.get('overwrite')
if module.params.get('object'):
obj = os.path.expanduser(module.params['object'])
# allow eucarc environment variables to be used if ansible vars aren't set # allow eucarc environment variables to be used if ansible vars aren't set
if not s3_url and 'S3_URL' in os.environ: if not s3_url and 'S3_URL' in os.environ:
s3_url = os.environ['S3_URL'] s3_url = os.environ['S3_URL']
@ -141,117 +262,140 @@ def main():
except boto.exception.NoAuthHandlerFound, e: except boto.exception.NoAuthHandlerFound, e:
module.fail_json(msg = str(e)) module.fail_json(msg = str(e))
# README - Future features this module should have: # If our mode is a GET operation (download), go through the procedure as appropriate ...
# enhanced path (contents of a directory) if mode == 'get':
# md5sum check of file vs. key in bucket
# a user-friendly way to fetch the key (maybe a "fetch" parameter option)
# persistent download URL if desired
# Lets get some information from the s3 connection, including bucket check ...
bucket = s3.lookup(bucket_name)
if bucket:
bucket_exists = True
else:
bucket_exists = False
# Lets list the contents # First, we check to see if the bucket exists, we get "bucket" returned.
if bucket_exists is True: bucketrtn = bucket_check(module, s3, bucket)
bucket_contents = bucket.list() if bucketrtn is False:
module.fail_json(msg="Target bucket cannot be found", failed=True)
sys.exit(0)
# Check filename is valid, if not downloading # Next, we check to see if the key in the bucket exists. If it exists, it also returns key_matches md5sum check.
if path: keyrtn = key_check(module, s3, bucket, obj)
if not os.path.exists(path): if keyrtn is False:
failed = True module.fail_json(msg="Target key cannot be found", failed=True)
module.fail_json(msg="Source %s cannot be found" % (path), failed=failed)
sys.exit(0) sys.exit(0)
# Default to setting the key to the same as the filename if dest is not provided. # If the destination path doesn't exist, no need to md5um etag check, so just download.
if dest is None: pathrtn = path_check(dest)
key_name = os.path.basename(path) if pathrtn is False:
else: download_s3file(module, s3, bucket, obj, dest)
key_name = dest
# Check to see if the key already exists # Compare the remote MD5 sum of the object with the local dest md5sum, if it already exists.
key_exists = False if pathrtn is True:
if bucket_exists is True: md5_remote = keysum(module, s3, bucket, obj)
try: md5_local = hashlib.md5(open(dest, 'rb').read()).hexdigest()
key_check = bucket.get_key(key_name) if md5_local == md5_remote:
if key_check: sum_matches = True
key_exists = True if overwrite is True:
download_s3file(module, s3, bucket, obj, dest)
else: else:
key_exists = False module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)
except s3.provider.storage_response_error, e: else:
module.fail_json(msg= str(e)) sum_matches = False
if overwrite is True:
download_s3file(module, s3, bucket, obj, dest)
else:
module.fail_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force download.", failed=True)
if key_exists is True and overwrite: # If destination file doesn't already exist we can go ahead and download.
# Retrieve MD5 Checksums. if pathrtn is False:
md5_remote = key_check.etag[1:-1] # Strip Quotation marks from etag: https://code.google.com/p/boto/issues/detail?id=391 download_s3file(module, s3, bucket, obj, dest)
etag_multipart = md5_remote.find('-')!=-1 # Find out if this is a multipart upload -> etag is not md5: https://forums.aws.amazon.com/message.jspa?messageID=222158
if etag_multipart is True: # Firstly, if key_matches is TRUE and overwrite is not enabled, we EXIT with a helpful message.
module.fail_json(msg="Files uploaded with multipart to s3 are not supported with checksum. They do not contain a valid md5 checksum, use overwrite=no instead.") if sum_matches is True and overwrite is False:
module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)
# At this point explicitly define the overwrite condition.
if sum_matches is True and pathrtn is True and overwrite is True:
download_s3file(module, s3, bucket, obj, dest)
# If sum does not match but the destination exists, we
# if our mode is a PUT operation (upload), go through the procedure as appropriate ...
if mode == 'put':
# Use this snippet to debug through conditionals:
# module.exit_json(msg="Bucket return %s"%bucketrtn)
# sys.exit(0)
# Lets check the src path.
pathrtn = path_check(src)
if pathrtn is False:
module.fail_json(msg="Local object for PUT does not exist", failed=True)
sys.exit(0) sys.exit(0)
md5_local = hashlib.md5(open(path, 'rb').read()).hexdigest()
md5_equal = md5_local == md5_remote # Lets check to see if bucket exists to get ground truth.
bucketrtn = bucket_check(module, s3, bucket)
if state == 'present': keyrtn = key_check(module, s3, bucket, obj)
if bucket_exists is True and key_exists is True:
if overwrite is False: # Lets check key state. Does it exist and if it does, compute the etag md5sum.
exists = True if bucketrtn is True and keyrtn is True:
changed = False md5_remote = keysum(module, s3, bucket, obj)
module.exit_json(msg="Bucket and key already exist", changed=changed) md5_local = hashlib.md5(open(src, 'rb').read()).hexdigest()
if md5_local == md5_remote:
sum_matches = True
if overwrite is True: if overwrite is True:
if md5_equal is True: upload_s3file(module, s3, bucket, obj, src, expiry)
module.exit_json(msg="Remote and local file checksums identical.", changed=False) else:
if md5_equal is False: module.exit_json(msg="Local and remote object are identical, ignoring. Use overwrite parameter to force.", changed=False)
upload_s3file(module, s3, bucket, key_name, path, expiry) else:
sys.exit(0) sum_matches = False
if overwrite is True:
upload_s3file(module, s3, bucket, obj, src, expiry)
else:
module.exit_json(msg="WARNING: Checksums do not match. Use overwrite parameter to force upload.", failed=True)
# If neither exist (based on bucket existence), we can create both.
if bucketrtn is False and pathrtn is True:
create_bucket(module, s3, bucket)
upload_s3file(module, s3, bucket, obj, src, expiry)
# If bucket exists but key doesn't, just upload.
if bucketrtn is True and pathrtn is True and keyrtn is False:
upload_s3file(module, s3, bucket, obj, src, expiry)
# If bucket exists, there cannot be a key within, lets create it ... # Support for deleting an object if we have both params.
if state == 'present': if mode == 'delete':
if bucket_exists is False: if bucket:
try: bucketrtn = bucket_check(module, s3, bucket)
bucket = s3.create_bucket(bucket_name) if bucketrtn is True:
bucket_exists = True deletertn = delete_bucket(module, s3, bucket)
key_exists = False if deletertn is True:
changed = True module.exit_json(msg="Bucket %s and all keys have been deleted."%bucket, changed=True)
except s3.provider.storage_create_error, e: else:
module.fail_json(msg = str(e)) module.fail_json(msg="Bucket does not exist.", failed=True)
else:
module.fail_json(msg="Bucket parameter is required.", failed=True)
# If bucket now exists but key doesn't or overwrite is True, create the key # Need to research how to create directories without "populating" a key, so this should just do bucket creation for now.
if state == 'present': # WE SHOULD ENABLE SOME WAY OF CREATING AN EMPTY KEY TO CREATE "DIRECTORY" STRUCTURE, AWS CONSOLE DOES THIS.
if bucket_exists is True and key_exists is False: if mode == 'create':
upload_s3file(module, s3, bucket, key_name, path, expiry) if bucket:
bucketrtn = bucket_check(module, s3, bucket)
if bucketrtn is True:
module.exit_json(msg="Bucket already exists.", changed=False)
else:
created = create_bucket(module, s3, bucket)
if bucket and obj:
module.fail_json(msg="mode=create can only be used for bucket creation.", failed=True)
# If state is absent and the bucket exists (doesn't matter about key since the bucket is the container), delete it. # Support for grabbing the time-expired URL for an object in S3/Walrus.
if state == 'absent': if mode == 'geturl':
if bucket_exists is True: if bucket and obj:
try: bucketrtn = bucket_check(module, s3, bucket)
for contents in bucket.list(): if bucketrtn is False:
bucket.delete_key(contents) module.fail_json(msg="Bucket %s does not exist."%bucket, failed=True)
s3.delete_bucket(bucket)
changed = True
module.exit_json(msg="Bucket and key removed.", changed=changed)
sys.exit(0)
except s3.provider.storage_response_error, e:
module.fail_json(msg= str(e))
else: else:
changed = False keyrtn = key_check(module, s3, bucket, obj)
module.exit_json(msg="Bucket and key do not exist", changed=changed) if keyrtn is True:
get_download_url(module, s3, bucket, obj, expiry)
# TO DO - ADD BUCKET DOWNLOAD OPTION else:
# # If download is specified, fetch it module.fail_json(msg="Key %s does not exist."%obj, failed=True)
# if download: else:
# if bucket_exists is True and key_exists is True: module.fail_json(msg="Bucket and Object parameters must be set", failed=True)
# try: sys.exit(0)
# getkey = bucket.lookup(key_name)
# getkey.get_contents_to_filename(path)
# url = getkey.generate_url(expiry)
# module.exit_json(msg="GET operation complete", url=url, changed=True)
# sys.exit(0)
# except s3.provider.storage_copy_error, e:
# module.fail_json(msg= str(e))
sys.exit(0) sys.exit(0)

Loading…
Cancel
Save