diff --git a/changelogs/fragments/66779-redshift-backoff.yml b/changelogs/fragments/66779-redshift-backoff.yml new file mode 100644 index 00000000000..a18c0be7cd4 --- /dev/null +++ b/changelogs/fragments/66779-redshift-backoff.yml @@ -0,0 +1,2 @@ +minor_changes: +- 'redshift: Add AWSRetry calls for errors outside our control' diff --git a/hacking/aws_config/testing_policies/database-policy.json b/hacking/aws_config/testing_policies/database-policy.json index 39fd43ac494..2f15baaf3ed 100644 --- a/hacking/aws_config/testing_policies/database-policy.json +++ b/hacking/aws_config/testing_policies/database-policy.json @@ -11,6 +11,15 @@ } } }, + { + "Action": "iam:CreateServiceLinkedRole", + "Effect": "Allow", + "Resource": "arn:aws:iam::*:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift", + "Condition": { + "StringLike": { + "iam:AWSServiceName": "redshift.amazonaws.com"} + } + }, { "Sid": "AllowRDSReadEverywhere", "Effect": "Allow", diff --git a/lib/ansible/modules/cloud/amazon/redshift.py b/lib/ansible/modules/cloud/amazon/redshift.py index fbc5ce0cf8a..48d1d46abca 100644 --- a/lib/ansible/modules/cloud/amazon/redshift.py +++ b/lib/ansible/modules/cloud/amazon/redshift.py @@ -261,8 +261,9 @@ cluster: try: import botocore except ImportError: - pass # handled by AnsibleAWSModule -from ansible.module_utils.ec2 import ec2_argument_spec, snake_dict_to_camel_dict + pass # caught by AnsibleAWSModule + +from ansible.module_utils.ec2 import AWSRetry, snake_dict_to_camel_dict from ansible.module_utils.aws.core import AnsibleAWSModule, is_boto3_error_code @@ -303,6 +304,45 @@ def _collect_facts(resource): return facts +@AWSRetry.jittered_backoff() +def _describe_cluster(redshift, identifier): + ''' + Basic wrapper around describe_clusters with a retry applied + ''' + return redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0] + + +@AWSRetry.jittered_backoff() +def _create_cluster(redshift, **kwargs): + ''' + Basic wrapper around create_cluster with a retry applied + ''' + return redshift.create_cluster(**kwargs) + + +# Simple wrapper around delete, try to avoid throwing an error if some other +# operation is in progress +@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState']) +def _delete_cluster(redshift, **kwargs): + ''' + Basic wrapper around delete_cluster with a retry applied. + Explicitly catches 'InvalidClusterState' (~ Operation in progress) so that + we can still delete a cluster if some kind of change operation was in + progress. + ''' + return redshift.delete_cluster(**kwargs) + + +@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState']) +def _modify_cluster(redshift, **kwargs): + ''' + Basic wrapper around modify_cluster with a retry applied. + Explicitly catches 'InvalidClusterState' (~ Operation in progress) for cases + where another modification is still in progress + ''' + return redshift.modify_cluster(**kwargs) + + def create_cluster(module, redshift): """ Create a new cluster @@ -340,15 +380,16 @@ def create_cluster(module, redshift): params['d_b_name'] = d_b_name try: - redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0] + _describe_cluster(redshift, identifier) changed = False except is_boto3_error_code('ClusterNotFound'): try: - redshift.create_cluster(ClusterIdentifier=identifier, - NodeType=node_type, - MasterUsername=username, - MasterUserPassword=password, - **snake_dict_to_camel_dict(params, capitalize_first=True)) + _create_cluster(redshift, + ClusterIdentifier=identifier, + NodeType=node_type, + MasterUsername=username, + MasterUserPassword=password, + **snake_dict_to_camel_dict(params, capitalize_first=True)) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json_aws(e, msg="Failed to create cluster") except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: # pylint: disable=duplicate-except @@ -364,7 +405,7 @@ def create_cluster(module, redshift): except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg="Timeout waiting for the cluster creation") try: - resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0] + resource = _describe_cluster(redshift, identifier) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json_aws(e, msg="Failed to describe cluster") @@ -381,7 +422,7 @@ def describe_cluster(module, redshift): identifier = module.params.get('identifier') try: - resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0] + resource = _describe_cluster(redshift, identifier) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json_aws(e, msg="Error describing cluster") @@ -409,10 +450,10 @@ def delete_cluster(module, redshift): params[p] = module.params.get(p) try: - redshift.delete_cluster( + _delete_cluster( + redshift, ClusterIdentifier=identifier, - **snake_dict_to_camel_dict(params, capitalize_first=True) - ) + **snake_dict_to_camel_dict(params, capitalize_first=True)) except is_boto3_error_code('ClusterNotFound'): return(False, {}) except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: # pylint: disable=duplicate-except @@ -459,8 +500,10 @@ def modify_cluster(module, redshift): # enhanced_vpc_routing parameter change needs an exclusive request if module.params.get('enhanced_vpc_routing') is not None: try: - redshift.modify_cluster(ClusterIdentifier=identifier, - EnhancedVpcRouting=module.params.get('enhanced_vpc_routing')) + _modify_cluster( + redshift, + ClusterIdentifier=identifier, + EnhancedVpcRouting=module.params.get('enhanced_vpc_routing')) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier) if wait: @@ -478,8 +521,10 @@ def modify_cluster(module, redshift): # change the rest try: - redshift.modify_cluster(ClusterIdentifier=identifier, - **snake_dict_to_camel_dict(params, capitalize_first=True)) + _modify_cluster( + redshift, + ClusterIdentifier=identifier, + **snake_dict_to_camel_dict(params, capitalize_first=True)) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier) @@ -497,7 +542,7 @@ def modify_cluster(module, redshift): except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e: module.fail_json_aws(e, msg="Timeout waiting for cluster modification") try: - resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0] + resource = _describe_cluster(redshift, identifier) except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e: module.fail_json(e, msg="Couldn't modify redshift cluster %s " % identifier) @@ -505,8 +550,7 @@ def modify_cluster(module, redshift): def main(): - argument_spec = ec2_argument_spec() - argument_spec.update(dict( + argument_spec = dict( command=dict(choices=['create', 'facts', 'delete', 'modify'], required=True), identifier=dict(required=True), node_type=dict(choices=['ds1.xlarge', 'ds1.8xlarge', 'ds2.xlarge', @@ -538,7 +582,7 @@ def main(): enhanced_vpc_routing=dict(type='bool', default=False), wait=dict(type='bool', default=False), wait_timeout=dict(type='int', default=300), - )) + ) required_if = [ ('command', 'delete', ['skip_final_cluster_snapshot']), diff --git a/test/integration/targets/redshift/aliases b/test/integration/targets/redshift/aliases index ff9ed177c8c..a112c3d1bb2 100644 --- a/test/integration/targets/redshift/aliases +++ b/test/integration/targets/redshift/aliases @@ -1,3 +1,2 @@ -unstable cloud/aws shippable/aws/group1