From 188a13efc08fde35f29d9d93b316f63af8cd1569 Mon Sep 17 00:00:00 2001 From: Martin Joehren Date: Sun, 8 Jun 2014 00:36:55 +0200 Subject: [PATCH 1/3] azure - fix for: temporary redirect, not deleted vhds, missing locations and role types, async api calls --- library/cloud/azure | 226 ++++++++++++++++++----------- plugins/inventory/windows_azure.py | 14 +- 2 files changed, 155 insertions(+), 85 deletions(-) diff --git a/library/cloud/azure b/library/cloud/azure index 9e6b1c328b7..f9b42fa7d7e 100644 --- a/library/cloud/azure +++ b/library/cloud/azure @@ -92,6 +92,11 @@ options: wait_timeout: description: - how long before wait gives up, in seconds + default: 600 + aliases: [] + wait_timeout_redirects: + description: + - how long before wait gives up for redirects, in seconds default: 300 aliases: [] state: @@ -137,23 +142,59 @@ from urlparse import urlparse AZURE_LOCATIONS = ['East Asia', 'Southeast Asia', + 'Brazil South', 'North Europe', 'West Europe', + 'Japan West', 'East US', + 'South Central US', 'West US'] -AZURE_ROLE_SIZES = ['Small', 'Medium', 'Large', 'ExtraLarge', 'A5', 'A6', 'A7'] +AZURE_ROLE_SIZES = ['ExtraSmall', + 'Small', + 'Medium', + 'Large', + 'ExtraLarge', + 'A5', + 'A6', + 'A7', + 'A8', + 'A9', + 'Basic_A0', + 'Basic_A1', + 'Basic_A2', + 'Basic_A3', + 'Basic_A4'] try: - import azure + import azure as windows_azure + from azure import WindowsAzureError, WindowsAzureMissingResourceError from azure.servicemanagement import (ServiceManagementService, OSVirtualHardDisk, SSH, PublicKeys, PublicKey, LinuxConfigurationSet, ConfigurationSetInputEndpoints, ConfigurationSetInputEndpoint) except ImportError: - print "failed=True msg='azure required for this module'" + print + "failed=True msg='azure required for this module'" sys.exit(1) +from distutils.version import LooseVersion +from types import MethodType +import json + + +def _wait_for_completion(azure, promise, wait_timeout, msg): + if not promise: return + wait_timeout = time.time() + wait_timeout + while wait_timeout > time.time(): + operation_result = azure.get_operation_status(promise.request_id) + time.sleep(5) + if operation_result.status == "Succeeded": + return + + raise WindowsAzureError('Timed out waiting for async operation ' + msg + ' "' + str(promise.request_id) + '" to complete.') + + def get_ssh_certificate_tokens(module, ssh_cert_path): """ Returns the sha1 fingerprint and a base64-encoded PKCS12 version of the certificate. @@ -162,7 +203,7 @@ def get_ssh_certificate_tokens(module, ssh_cert_path): rc, stdout, stderr = module.run_command(['openssl', 'x509', '-in', ssh_cert_path, '-fingerprint', '-noout']) if rc != 0: module.fail_json(msg="failed to generate the key fingerprint, error was: %s" % stderr) - fingerprint = stdout.strip()[17:].replace(':','') + fingerprint = stdout.strip()[17:].replace(':', '') rc, stdout, stderr = module.run_command(['openssl', 'pkcs12', '-export', '-in', ssh_cert_path, '-nokeys', '-password', 'pass:']) if rc != 0: @@ -183,7 +224,6 @@ def create_virtual_machine(module, azure): True if a new virtual machine was created, false otherwise """ - name = module.params.get('name') hostname = module.params.get('hostname') or name + ".cloudapp.net" endpoints = module.params.get('endpoints').split(',') @@ -198,24 +238,15 @@ def create_virtual_machine(module, azure): wait_timeout = int(module.params.get('wait_timeout')) # Check if a deployment with the same name already exists - deployment = None - try: - deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) - except WindowsAzureMissingResourceError as e: - pass # no such deployment - except WindowsAzureError as e: - module.fail_json(msg="failed to create the new deployment, error was: %s" % str(e)) - - if deployment: + cloud_service_name_available = azure.check_hosted_service_name_availability(name) + if not cloud_service_name_available.result: changed = False else: changed = True - # Create cloud service if necessary try: - existing_service_names = [service.service_name for service in azure.list_hosted_services()] - if not name in existing_service_names: - azure.create_hosted_service(service_name=name, label=name, location=location) + result = azure.create_hosted_service(service_name=name, label=name, location=location) + _wait_for_completion(azure, result, wait_timeout, "create_hosted_service") except WindowsAzureError as e: module.fail_json(msg="failed to create the new service name, it already exists: %s" % str(e)) @@ -227,7 +258,9 @@ def create_virtual_machine(module, azure): if ssh_cert_path: fingerprint, pkcs12_base64 = get_ssh_certificate_tokens(module, ssh_cert_path) # Add certificate to cloud service - azure.add_service_certificate(name, pkcs12_base64, 'pfx', '') + result = azure.add_service_certificate(name, pkcs12_base64, 'pfx', '') + _wait_for_completion(azure, result, wait_timeout, "add_service_certificate") + # Create ssh config ssh_config = SSH() ssh_config.public_keys = PublicKeys() @@ -255,36 +288,22 @@ def create_virtual_machine(module, azure): # Spin up virtual machine try: - azure.create_virtual_machine_deployment(service_name=name, - deployment_name=name, - deployment_slot='production', - label=name, - role_name=name, - system_config=linux_config, - network_config=network_config, - os_virtual_hard_disk=os_hd, - role_size=role_size) + result = azure.create_virtual_machine_deployment(service_name=name, + deployment_name=name, + deployment_slot='production', + label=name, + role_name=name, + system_config=linux_config, + network_config=network_config, + os_virtual_hard_disk=os_hd, + role_size=role_size) + _wait_for_completion(azure, result, wait_timeout, "create_virtual_machine_deployment") except WindowsAzureError as e: module.fail_json(msg="failed to create the new virtual machine, error was: %s" % str(e)) - # wait here until the deployment is up - deployment = None - wait_timeout = time.time() + wait_timeout - while wait_timeout > time.time() and not deployment: - try: - deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) - except WindowsAzureMissingResourceError as e: - pass # deployment still not available - except WindowsAzureError as e: - # got a bad response from azure, wait a second and then try again - time.sleep(1) - continue - if deployment: - break - else: - time.sleep(5) - return changed + deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) + return (changed, urlparse(deployment.url).hostname, deployment) def terminate_virtual_machine(module, azure): @@ -309,10 +328,11 @@ def terminate_virtual_machine(module, azure): changed = False deployment = None + disk_names = [] try: deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) except WindowsAzureMissingResourceError as e: - pass # no such deployment + pass # no such deployment or service except WindowsAzureError as e: module.fail_json(msg="failed to find the deployment, error was: %s" % str(e)) @@ -320,25 +340,27 @@ def terminate_virtual_machine(module, azure): if deployment: changed = True try: - # TODO: Also find a way to delete old hard drives - azure.delete_deployment(service_name=name,deployment_name=name) - wait_timeout = time.time() + wait_timeout - while wait_timeout > time.time() and deployment: - try: - deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) - except WindowsAzureMissingResourceError as e: - break # successfully deleted - except WindowsAzureError as e: - # Azure api error, wait a second and retry - time.sleep(1) - continue - time.sleep(5) + # gather disk info + results = [] + for role in deployment.role_list: + role_props = azure.get_role(name, deployment.name, role.role_name) + if role_props.os_virtual_hard_disk.disk_name not in disk_names: + disk_names.append(role_props.os_virtual_hard_disk.disk_name) + + result = azure.delete_deployment(name, deployment.name) + _wait_for_completion(azure, result, wait_timeout, "delete_deployment") + + for disk_name in disk_names: + azure.delete_disk(disk_name, True) + # Now that the vm is deleted, remove the cloud service - azure.delete_hosted_service(service_name=name) + result = azure.delete_hosted_service(service_name=name) + _wait_for_completion(azure, result, wait_timeout, "delete_hosted_service") except WindowsAzureError as e: module.fail_json(msg="failed to delete the service %s, error was: %s" % (name, str(e))) - return changed + return changed, urlparse(deployment.url).hostname, deployment + def get_azure_creds(module): # Check modul args for credentials, then check environment vars @@ -351,34 +373,41 @@ def get_azure_creds(module): return subscription_id, management_cert_path + def main(): module = AnsibleModule( - argument_spec = dict( - ssh_cert_path = dict(), - name = dict(), - hostname = dict(), - location = dict(choices=AZURE_LOCATIONS), - role_size = dict(choices=AZURE_ROLE_SIZES), - subscription_id = dict(no_log=True), - storage_account = dict(), - management_cert_path = dict(), - endpoints = dict(default='22'), - user = dict(), - password = dict(), - image = dict(), - state = dict(default='present'), - wait = dict(type='bool', default=False), - wait_timeout = dict(default=300) + argument_spec=dict( + ssh_cert_path=dict(), + name=dict(), + hostname=dict(), + location=dict(choices=AZURE_LOCATIONS), + role_size=dict(choices=AZURE_ROLE_SIZES), + subscription_id=dict(no_log=True), + storage_account=dict(), + management_cert_path=dict(), + endpoints=dict(default='22'), + user=dict(), + password=dict(), + image=dict(), + state=dict(default='present'), + wait=dict(type='bool', default=False), + wait_timeout=dict(default=600), + wait_timeout_redirects=dict(default=300) ) ) - # create azure ServiceManagementService object subscription_id, management_cert_path = get_azure_creds(module) - azure = ServiceManagementService(subscription_id, management_cert_path) - if module.params.get('state') == 'absent': - changed = terminate_virtual_machine(module, azure) + wait_timeout_redirects = int(module.params.get('wait_timeout_redirects')) + if LooseVersion(windows_azure.__version__) <= "0.8.0": + #wrapper for handling redirects which the sdk <= 0.8.0 is not following + azure = Wrapper(ServiceManagementService(subscription_id, management_cert_path), wait_timeout_redirects) + else: + azure = ServiceManagementService(subscription_id, management_cert_path), wait_timeout_redirects + cloud_service_raw = None + if module.params.get('state') == 'absent': + (changed, public_dns_name, deployment) = terminate_virtual_machine(module, azure) elif module.params.get('state') == 'present': # Changed is always set to true when provisioning new instances @@ -392,10 +421,41 @@ def main(): module.fail_json(msg='location parameter is required for new instance') if not module.params.get('storage_account'): module.fail_json(msg='storage_account parameter is required for new instance') - changed = create_virtual_machine(module, azure) + (changed, public_dns_name, deployment) = create_virtual_machine(module, azure) + + module.exit_json(changed=changed, public_dns_name=public_dns_name, deployment=json.loads(json.dumps(deployment, default=lambda o: o.__dict__))) + + +class Wrapper(object): + def __init__(self, obj, wait_timeout): + self.other = obj + self.wait_timeout = wait_timeout + def __getattr__(self, name): + if hasattr(self.other, name): + func = getattr(self.other, name) + return lambda *args, **kwargs: self._wrap(func, args, kwargs) + raise AttributeError(name) + + def _wrap(self, func, args, kwargs): + if type(func) == MethodType: + result = self._handle_temporary_redirects(lambda: func(*args, **kwargs)) + else: + result = self._handle_temporary_redirects(lambda: func(self.other, *args, **kwargs)) + return result + + def _handle_temporary_redirects(self, f): + wait_timeout = time.time() + self.wait_timeout + while wait_timeout > time.time(): + try: + return f() + except WindowsAzureError as e: + if not str(e).lower().find("temporary redirect") == -1: + time.sleep(5) + pass + else: + raise e - module.exit_json(changed=changed) # import module snippets from ansible.module_utils.basic import * diff --git a/plugins/inventory/windows_azure.py b/plugins/inventory/windows_azure.py index 594e69b5c50..13029679075 100755 --- a/plugins/inventory/windows_azure.py +++ b/plugins/inventory/windows_azure.py @@ -80,8 +80,9 @@ class AzureInventory(object): elif not self.is_cache_valid(): self.do_api_calls_update_cache() - # Data to print - if self.args.list: + if self.args.list_images: + data_to_print = self.json_format_dict(self.get_images(), True) + elif self.args.list: # Display list of nodes for inventory if len(self.inventory) == 0: data_to_print = self.get_inventory_from_cache() @@ -90,6 +91,13 @@ class AzureInventory(object): print data_to_print + def get_images(self): + images = [] + for image in self.sms.list_os_images(): + if str(image.label).lower().find(self.args.list_images.lower()) >= 0: + images.append(vars(image)) + return json.loads(json.dumps(images, default=lambda o: o.__dict__)) + def is_cache_valid(self): """Determines if the cache file has expired, or if it is still valid.""" if os.path.isfile(self.cache_path_cache): @@ -131,6 +139,8 @@ class AzureInventory(object): parser = argparse.ArgumentParser(description='Produce an Ansible Inventory file based on Azure') parser.add_argument('--list', action='store_true', default=True, help='List nodes (default: True)') + parser.add_argument('--list-images', action='store', + help='Get all available images.') parser.add_argument('--refresh-cache', action='store_true', default=False, help='Force refresh of cache by making API requests to Azure (default: False - use cache files)') self.args = parser.parse_args() From 98ff559fa095e76566d178885e3a29410caacae7 Mon Sep 17 00:00:00 2001 From: Martin Joehren Date: Sun, 8 Jun 2014 21:39:31 +0200 Subject: [PATCH 2/3] azure - fixed missing return statement --- library/cloud/azure | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/library/cloud/azure b/library/cloud/azure index f9b42fa7d7e..ec9aa9ec901 100644 --- a/library/cloud/azure +++ b/library/cloud/azure @@ -223,7 +223,6 @@ def create_virtual_machine(module, azure): Returns: True if a new virtual machine was created, false otherwise """ - name = module.params.get('name') hostname = module.params.get('hostname') or name + ".cloudapp.net" endpoints = module.params.get('endpoints').split(',') @@ -302,8 +301,8 @@ def create_virtual_machine(module, azure): module.fail_json(msg="failed to create the new virtual machine, error was: %s" % str(e)) - deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) - return (changed, urlparse(deployment.url).hostname, deployment) + deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) + return (changed, urlparse(deployment.url).hostname, deployment) def terminate_virtual_machine(module, azure): From 1173a8d6af50bb48e38bf5ad307a5754e0293cd0 Mon Sep 17 00:00:00 2001 From: James Cammarata Date: Thu, 12 Jun 2014 10:25:31 -0500 Subject: [PATCH 3/3] Catch error in azure related to a failed deployment when creating a vm --- library/cloud/azure | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/library/cloud/azure b/library/cloud/azure index ec9aa9ec901..3e193e215dc 100644 --- a/library/cloud/azure +++ b/library/cloud/azure @@ -301,8 +301,11 @@ def create_virtual_machine(module, azure): module.fail_json(msg="failed to create the new virtual machine, error was: %s" % str(e)) - deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) - return (changed, urlparse(deployment.url).hostname, deployment) + try: + deployment = azure.get_deployment_by_name(service_name=name, deployment_name=name) + return (changed, urlparse(deployment.url).hostname, deployment) + except WindowsAzureError as e: + module.fail_json(msg="failed to lookup the deployment information for %s, error was: %s" % (name, str(e))) def terminate_virtual_machine(module, azure): @@ -399,7 +402,7 @@ def main(): wait_timeout_redirects = int(module.params.get('wait_timeout_redirects')) if LooseVersion(windows_azure.__version__) <= "0.8.0": - #wrapper for handling redirects which the sdk <= 0.8.0 is not following + # wrapper for handling redirects which the sdk <= 0.8.0 is not following azure = Wrapper(ServiceManagementService(subscription_id, management_cert_path), wait_timeout_redirects) else: azure = ServiceManagementService(subscription_id, management_cert_path), wait_timeout_redirects