diff --git a/changelogs/fragments/66779-redshift-backoff.yml b/changelogs/fragments/66779-redshift-backoff.yml
new file mode 100644
index 00000000000..a18c0be7cd4
--- /dev/null
+++ b/changelogs/fragments/66779-redshift-backoff.yml
@@ -0,0 +1,2 @@
+minor_changes:
+- 'redshift: Add AWSRetry calls for errors outside our control'
diff --git a/hacking/aws_config/testing_policies/database-policy.json b/hacking/aws_config/testing_policies/database-policy.json
index 39fd43ac494..2f15baaf3ed 100644
--- a/hacking/aws_config/testing_policies/database-policy.json
+++ b/hacking/aws_config/testing_policies/database-policy.json
@@ -11,6 +11,15 @@
                 }
             }
         },
+        {
+            "Action": "iam:CreateServiceLinkedRole",
+            "Effect": "Allow",
+            "Resource": "arn:aws:iam::*:role/aws-service-role/redshift.amazonaws.com/AWSServiceRoleForRedshift",
+            "Condition": {
+                "StringLike": {
+                    "iam:AWSServiceName": "redshift.amazonaws.com"}
+                }
+        },
         {
             "Sid": "AllowRDSReadEverywhere",
             "Effect": "Allow",
diff --git a/lib/ansible/modules/cloud/amazon/redshift.py b/lib/ansible/modules/cloud/amazon/redshift.py
index fbc5ce0cf8a..48d1d46abca 100644
--- a/lib/ansible/modules/cloud/amazon/redshift.py
+++ b/lib/ansible/modules/cloud/amazon/redshift.py
@@ -261,8 +261,9 @@ cluster:
 try:
     import botocore
 except ImportError:
-    pass  # handled by AnsibleAWSModule
-from ansible.module_utils.ec2 import ec2_argument_spec, snake_dict_to_camel_dict
+    pass  # caught by AnsibleAWSModule
+
+from ansible.module_utils.ec2 import AWSRetry, snake_dict_to_camel_dict
 from ansible.module_utils.aws.core import AnsibleAWSModule, is_boto3_error_code
 
 
@@ -303,6 +304,45 @@ def _collect_facts(resource):
     return facts
 
 
+@AWSRetry.jittered_backoff()
+def _describe_cluster(redshift, identifier):
+    '''
+    Basic wrapper around describe_clusters with a retry applied
+    '''
+    return redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+
+
+@AWSRetry.jittered_backoff()
+def _create_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around create_cluster with a retry applied
+    '''
+    return redshift.create_cluster(**kwargs)
+
+
+# Simple wrapper around delete, try to avoid throwing an error if some other
+# operation is in progress
+@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState'])
+def _delete_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around delete_cluster with a retry applied.
+    Explicitly catches 'InvalidClusterState' (~ Operation in progress) so that
+    we can still delete a cluster if some kind of change operation was in
+    progress.
+    '''
+    return redshift.delete_cluster(**kwargs)
+
+
+@AWSRetry.jittered_backoff(catch_extra_error_codes=['InvalidClusterState'])
+def _modify_cluster(redshift, **kwargs):
+    '''
+    Basic wrapper around modify_cluster with a retry applied.
+    Explicitly catches 'InvalidClusterState' (~ Operation in progress) for cases
+    where another modification is still in progress
+    '''
+    return redshift.modify_cluster(**kwargs)
+
+
 def create_cluster(module, redshift):
     """
     Create a new cluster
@@ -340,15 +380,16 @@ def create_cluster(module, redshift):
         params['d_b_name'] = d_b_name
 
     try:
-        redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        _describe_cluster(redshift, identifier)
         changed = False
     except is_boto3_error_code('ClusterNotFound'):
         try:
-            redshift.create_cluster(ClusterIdentifier=identifier,
-                                    NodeType=node_type,
-                                    MasterUsername=username,
-                                    MasterUserPassword=password,
-                                    **snake_dict_to_camel_dict(params, capitalize_first=True))
+            _create_cluster(redshift,
+                            ClusterIdentifier=identifier,
+                            NodeType=node_type,
+                            MasterUsername=username,
+                            MasterUserPassword=password,
+                            **snake_dict_to_camel_dict(params, capitalize_first=True))
         except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
             module.fail_json_aws(e, msg="Failed to create cluster")
     except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:  # pylint: disable=duplicate-except
@@ -364,7 +405,7 @@ def create_cluster(module, redshift):
         except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
             module.fail_json_aws(e, msg="Timeout waiting for the cluster creation")
     try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
     except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
         module.fail_json_aws(e, msg="Failed to describe cluster")
 
@@ -381,7 +422,7 @@ def describe_cluster(module, redshift):
     identifier = module.params.get('identifier')
 
     try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
     except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
         module.fail_json_aws(e, msg="Error describing cluster")
 
@@ -409,10 +450,10 @@ def delete_cluster(module, redshift):
                 params[p] = module.params.get(p)
 
     try:
-        redshift.delete_cluster(
+        _delete_cluster(
+            redshift,
             ClusterIdentifier=identifier,
-            **snake_dict_to_camel_dict(params, capitalize_first=True)
-        )
+            **snake_dict_to_camel_dict(params, capitalize_first=True))
     except is_boto3_error_code('ClusterNotFound'):
         return(False, {})
     except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:  # pylint: disable=duplicate-except
@@ -459,8 +500,10 @@ def modify_cluster(module, redshift):
     # enhanced_vpc_routing parameter change needs an exclusive request
     if module.params.get('enhanced_vpc_routing') is not None:
         try:
-            redshift.modify_cluster(ClusterIdentifier=identifier,
-                                    EnhancedVpcRouting=module.params.get('enhanced_vpc_routing'))
+            _modify_cluster(
+                redshift,
+                ClusterIdentifier=identifier,
+                EnhancedVpcRouting=module.params.get('enhanced_vpc_routing'))
         except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
             module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier)
     if wait:
@@ -478,8 +521,10 @@ def modify_cluster(module, redshift):
 
     # change the rest
     try:
-        redshift.modify_cluster(ClusterIdentifier=identifier,
-                                **snake_dict_to_camel_dict(params, capitalize_first=True))
+        _modify_cluster(
+            redshift,
+            ClusterIdentifier=identifier,
+            **snake_dict_to_camel_dict(params, capitalize_first=True))
     except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
         module.fail_json_aws(e, msg="Couldn't modify redshift cluster %s " % identifier)
 
@@ -497,7 +542,7 @@ def modify_cluster(module, redshift):
         except (botocore.exceptions.ClientError, botocore.exceptions.BotoCoreError) as e:
             module.fail_json_aws(e, msg="Timeout waiting for cluster modification")
     try:
-        resource = redshift.describe_clusters(ClusterIdentifier=identifier)['Clusters'][0]
+        resource = _describe_cluster(redshift, identifier)
     except (botocore.exceptions.BotoCoreError, botocore.exceptions.ClientError) as e:
         module.fail_json(e, msg="Couldn't modify redshift cluster %s " % identifier)
 
@@ -505,8 +550,7 @@ def modify_cluster(module, redshift):
 
 
 def main():
-    argument_spec = ec2_argument_spec()
-    argument_spec.update(dict(
+    argument_spec = dict(
         command=dict(choices=['create', 'facts', 'delete', 'modify'], required=True),
         identifier=dict(required=True),
         node_type=dict(choices=['ds1.xlarge', 'ds1.8xlarge', 'ds2.xlarge',
@@ -538,7 +582,7 @@ def main():
         enhanced_vpc_routing=dict(type='bool', default=False),
         wait=dict(type='bool', default=False),
         wait_timeout=dict(type='int', default=300),
-    ))
+    )
 
     required_if = [
         ('command', 'delete', ['skip_final_cluster_snapshot']),
diff --git a/test/integration/targets/redshift/aliases b/test/integration/targets/redshift/aliases
index ff9ed177c8c..a112c3d1bb2 100644
--- a/test/integration/targets/redshift/aliases
+++ b/test/integration/targets/redshift/aliases
@@ -1,3 +1,2 @@
-unstable
 cloud/aws
 shippable/aws/group1