From ae4c246a3c3b1eff91d5acf7d4b4e131f6ecd22f Mon Sep 17 00:00:00 2001 From: msven Date: Mon, 5 Mar 2018 10:47:31 -0600 Subject: [PATCH] ec2_asg: fix #28087 and #35993 (#36679) Fixes #35993 - Changes to update_size in commit eb4cc31 made it so the group dict passed into update_size was not modified. As a result, the 'replace' call does not see an updated min_size like it previously did and doesn't pause to wait for any new instances to spin up. Instead, it moves straight into terminating old instances. Fix is to add batch_size to min_size when calling wait_for_new_inst. Fixes #28087 - Make replace_all_instances and replace_instances behave exactly the same by setting replace_instances = current list of instances when replace_all_instances used. Root cause of issue was that without lc_check terminate_batch will terminate all instances passed to it and after updating the asg size we were querying the asg again for the list of instances - so terminate batch saw the list including new ones just spun up. When creating new asg with replace_all_instances: yes and lc_check: false the instances that are initially created are then subsequently replaced. This change makes it so replace only occurs if the asg already existed. Add integration tests for #28087 and #35993. (cherry picked from commit a2b3120e85d0d079a9d46da4b43e0e3b4e4e2c84) --- lib/ansible/modules/cloud/amazon/ec2_asg.py | 18 ++- .../targets/ec2_asg/tasks/main.yml | 116 ++++++++++++++++++ 2 files changed, 132 insertions(+), 2 deletions(-) diff --git a/lib/ansible/modules/cloud/amazon/ec2_asg.py b/lib/ansible/modules/cloud/amazon/ec2_asg.py index 517f9bc83b8..6184083fd79 100644 --- a/lib/ansible/modules/cloud/amazon/ec2_asg.py +++ b/lib/ansible/modules/cloud/amazon/ec2_asg.py @@ -1145,6 +1145,7 @@ def replace(connection): desired_capacity = module.params.get('desired_capacity') lc_check = module.params.get('lc_check') replace_instances = module.params.get('replace_instances') + replace_all_instances = module.params.get('replace_all_instances') as_group = describe_autoscaling_groups(connection, group_name)[0] if desired_capacity is None: @@ -1153,6 +1154,10 @@ def replace(connection): wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'], 'viable_instances') props = get_properties(as_group) instances = props['instances'] + if replace_all_instances: + # If replacing all instances, then set replace_instances to current set + # This allows replace_instances and replace_all_instances to behave same + replace_instances = instances if replace_instances: instances = replace_instances # check to see if instances are replaceable if checking launch configs @@ -1189,7 +1194,7 @@ def replace(connection): as_group = describe_autoscaling_groups(connection, group_name)[0] update_size(connection, as_group, max_size + batch_size, min_size + batch_size, desired_capacity + batch_size) - wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'], 'viable_instances') + wait_for_new_inst(connection, group_name, wait_timeout, as_group['MinSize'] + batch_size, 'viable_instances') wait_for_elb(connection, group_name) wait_for_target_group(connection, group_name) as_group = describe_autoscaling_groups(connection, group_name)[0] @@ -1368,6 +1373,12 @@ def wait_for_new_inst(connection, group_name, wait_timeout, desired_size, prop): return props +def asg_exists(connection): + group_name = module.params.get('name') + as_group = describe_autoscaling_groups(connection, group_name) + return bool(len(as_group)) + + def main(): argument_spec = ec2_argument_spec() argument_spec.update( @@ -1425,13 +1436,16 @@ def main(): endpoint=ec2_url, **aws_connect_params) changed = create_changed = replace_changed = False + exists = asg_exists(connection) if state == 'present': create_changed, asg_properties = create_autoscaling_group(connection) elif state == 'absent': changed = delete_autoscaling_group(connection) module.exit_json(changed=changed) - if replace_all_instances or replace_instances: + + # Only replace instances if asg existed at start of call + if exists and (replace_all_instances or replace_instances): replace_changed, asg_properties = replace(connection) if create_changed or replace_changed: changed = True diff --git a/test/integration/targets/ec2_asg/tasks/main.yml b/test/integration/targets/ec2_asg/tasks/main.yml index 9b742a7712f..094458f7fdf 100644 --- a/test/integration/targets/ec2_asg/tasks/main.yml +++ b/test/integration/targets/ec2_asg/tasks/main.yml @@ -362,6 +362,122 @@ # ============================================================ + # perform rolling replace with new launch configuration and lc_check:false + + # Note - this is done async so we can query asg_facts during + # the execution. Issues #28087 and #35993 result in correct + # end result, but spin up extraneous instances during execution. + - name: "perform rolling update to new AMI with lc_check: false" + ec2_asg: + name: "{{ resource_prefix }}-asg" + launch_config_name: "{{ resource_prefix }}-lc-2" + health_check_type: EC2 + desired_capacity: 3 + min_size: 1 + max_size: 5 + health_check_period: 900 + load_balancers: [] + vpc_zone_identifier: "{{ testing_subnet.subnet.id }}" + wait_for_instances: yes + replace_all_instances: yes + replace_batch_size: 3 + lc_check: false + wait_timeout: 1800 + state: present + <<: *aws_connection_info + async: 1800 + poll: 0 + register: asg_job + + - name: get ec2_asg facts for 3 minutes + ec2_asg_facts: + name: "{{ resource_prefix }}-asg" + <<: *aws_connection_info + register: output + loop_control: + pause: 15 + with_sequence: count=12 + + - set_fact: + inst_id_json_query: 'results[*].results[*].instances[*].instance_id' + + # Since we started with 3 servers and replace all of them. + # We should see 6 servers total. + - assert: + that: + - "lookup('flattened',output|json_query(inst_id_json_query)).split(',')|unique|length == 6" + + - name: Ensure ec2_asg task completes + async_status: jid="{{ asg_job.ansible_job_id }}" + register: status + until: status.finished + retries: 200 + delay: 15 + + # ============================================================ + + - name: kill asg + ec2_asg: + name: "{{ resource_prefix }}-asg" + state: absent + <<: *aws_connection_info + async: 300 + + # Create new asg with replace_all_instances and lc_check:false + + # Note - this is done async so we can query asg_facts during + # the execution. Issues #28087 results in correct + # end result, but spin up extraneous instances during execution. + - name: "new asg with lc_check: false" + ec2_asg: + name: "{{ resource_prefix }}-asg" + launch_config_name: "{{ resource_prefix }}-lc" + health_check_type: EC2 + desired_capacity: 3 + min_size: 1 + max_size: 5 + health_check_period: 900 + load_balancers: [] + vpc_zone_identifier: "{{ testing_subnet.subnet.id }}" + wait_for_instances: yes + replace_all_instances: yes + replace_batch_size: 3 + lc_check: false + wait_timeout: 1800 + state: present + <<: *aws_connection_info + async: 1800 + poll: 0 + register: asg_job + + # Collect ec2_asg_facts for 3 minutes + - name: get ec2_asg facts + ec2_asg_facts: + name: "{{ resource_prefix }}-asg" + <<: *aws_connection_info + register: output + loop_control: + pause: 15 + with_sequence: count=12 + + - set_fact: + inst_id_json_query: 'results[*].results[*].instances[*].instance_id' + + # Get all instance_ids we saw and assert we saw number expected + # Should only see 3 (don't replace instances we just created) + - assert: + that: + - "lookup('flattened',output|json_query(inst_id_json_query)).split(',')|unique|length == 3" + + - name: Ensure ec2_asg task completes + async_status: jid="{{ asg_job.ansible_job_id }}" + register: status + until: status.finished + retries: 200 + delay: 15 + +# ============================================================ + always: - name: kill asg