From 0ac57941c1f54e172ac22eda9f6575cf73afcec2 Mon Sep 17 00:00:00 2001 From: Mario Lenz Date: Mon, 26 Aug 2019 15:40:38 +0200 Subject: [PATCH] VMware: Add missing HA admission control policies in vmware_cluster_ha (#60233) * Add missing HA admission control policies * Add new parameter host_isolation_response * Bugfix: 'das_vm_config' referenced before assignment * Implement test cases --- .../modules/cloud/vmware/vmware_cluster_ha.py | 226 +++++++++++++++--- .../targets/vmware_cluster_ha/tasks/main.yml | 83 ++++++- 2 files changed, 269 insertions(+), 40 deletions(-) diff --git a/lib/ansible/modules/cloud/vmware/vmware_cluster_ha.py b/lib/ansible/modules/cloud/vmware/vmware_cluster_ha.py index 62e7d85b0ae..b1c7106e007 100644 --- a/lib/ansible/modules/cloud/vmware/vmware_cluster_ha.py +++ b/lib/ansible/modules/cloud/vmware/vmware_cluster_ha.py @@ -7,6 +7,7 @@ # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) from __future__ import absolute_import, division, print_function + __metaclass__ = type ANSIBLE_METADATA = { @@ -65,19 +66,66 @@ options: type: str choices: ['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'] default: 'vmMonitoringDisabled' - ha_failover_level: + host_isolation_response: description: - - Number of host failures that should be tolerated, still guaranteeing sufficient resources to - restart virtual machines on available hosts. - - Accepts integer values only. - type: int - default: 2 - ha_admission_control_enabled: + - Indicates whether or VMs should be powered off if a host determines that it is isolated from the rest of the compute resource. + - If set to C(none), do not power off VMs in the event of a host network isolation. + - If set to C(powerOff), power off VMs in the event of a host network isolation. + - If set to C(shutdown), shut down VMs guest operating system in the event of a host network isolation. + type: str + choices: ['none', 'powerOff', 'shutdown'] + default: 'none' + slot_based_admission_control: description: - - Determines if strict admission control is enabled. - - It is recommended to set this parameter to C(True), please refer documentation for more details. - default: True - type: bool + - Configure slot based admission control policy. + - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. + suboptions: + failover_level: + description: + - Number of host failures that should be tolerated. + type: int + required: true + type: dict + reservation_based_admission_control: + description: + - Configure reservation based admission control policy. + - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. + suboptions: + failover_level: + description: + - Number of host failures that should be tolerated. + type: int + required: true + auto_compute_percentages: + description: + - By default, C(failover_level) is used to calculate C(cpu_failover_resources_percent) and C(memory_failover_resources_percent). + If a user wants to override the percentage values, he has to set this field to false. + type: bool + default: true + cpu_failover_resources_percent: + description: + - Percentage of CPU resources in the cluster to reserve for failover. + Ignored if C(auto_compute_percentages) is not set to false. + type: int + default: 50 + memory_failover_resources_percent: + description: + - Percentage of memory resources in the cluster to reserve for failover. + Ignored if C(auto_compute_percentages) is not set to false. + type: int + default: 50 + type: dict + failover_host_admission_control: + description: + - Configure dedicated failover hosts. + - C(slot_based_admission_control), C(reservation_based_admission_control) and C(failover_host_admission_control) are mutually exclusive. + suboptions: + failover_hosts: + description: + - List of dedicated failover hosts. + type: list + required: true + type: dict ha_vm_failure_interval: description: - The number of seconds after which virtual machine is declared as failed @@ -129,7 +177,7 @@ extends_documentation_fragment: vmware.documentation ''' EXAMPLES = r""" -- name: Enable HA +- name: Enable HA without admission control vmware_cluster_ha: hostname: '{{ vcenter_hostname }}' username: '{{ vcenter_username }}' @@ -139,7 +187,7 @@ EXAMPLES = r""" enable_ha: yes delegate_to: localhost -- name: Enable HA and VM monitoring +- name: Enable HA and VM monitoring without admission control vmware_cluster_ha: hostname: "{{ vcenter_hostname }}" username: "{{ vcenter_username }}" @@ -151,6 +199,21 @@ EXAMPLES = r""" ha_vm_monitoring: vmMonitoringOnly enable_vsan: True delegate_to: localhost + +- name: Enable HA with admission control reserving 50% of resources for HA + vmware_cluster_ha: + hostname: '{{ vcenter_hostname }}' + username: '{{ vcenter_username }}' + password: '{{ vcenter_password }}' + datacenter_name: datacenter + cluster_name: cluster + enable_ha: yes + reservation_based_admission_control: + auto_compute_percentages: False + failover_level: 1 + cpu_failover_resources_percent: 50 + memory_failover_resources_percent: 50 + delegate_to: localhost """ RETURN = r"""# @@ -175,6 +238,15 @@ class VMwareCluster(PyVmomi): self.enable_ha = module.params['enable_ha'] self.datacenter = None self.cluster = None + self.host_isolation_response = getattr(vim.cluster.DasVmSettings.IsolationResponse, self.params.get('host_isolation_response')) + + if self.enable_ha and ( + self.params.get('slot_based_admission_control') or + self.params.get('reservation_based_admission_control') or + self.params.get('failover_host_admission_control')): + self.ha_admission_control = True + else: + self.ha_admission_control = False self.datacenter = find_datacenter_by_name(self.content, self.datacenter_name) if self.datacenter is None: @@ -184,6 +256,23 @@ class VMwareCluster(PyVmomi): if self.cluster is None: self.module.fail_json(msg="Cluster %s does not exist." % self.cluster_name) + def get_failover_hosts(self): + """ + Get failover hosts for failover_host_admission_control policy + Returns: List of ESXi hosts sorted by name + + """ + policy = self.params.get('failover_host_admission_control') + hosts = [] + all_hosts = dict((h.name, h) for h in self.get_all_hosts_by_cluster(self.cluster_name)) + for host in policy.get('failover_hosts'): + if host in all_hosts: + hosts.append(all_hosts.get(host)) + else: + self.module.fail_json(msg="Host %s is not a member of cluster %s." % (host, self.cluster_name)) + hosts.sort(key=lambda h: h.name) + return hosts + def check_ha_config_diff(self): """ Check HA configuration diff @@ -191,19 +280,47 @@ class VMwareCluster(PyVmomi): """ das_config = self.cluster.configurationEx.dasConfig - if das_config.enabled != self.enable_ha or \ - das_config.admissionControlPolicy.failoverLevel != self.params.get('ha_failover_level') or \ - das_config.vmMonitoring != self.params.get('ha_vm_monitoring') or \ - das_config.hostMonitoring != self.params.get('ha_host_monitoring') or \ - das_config.admissionControlPolicy.failoverLevel != self.params.get('ha_failover_level') or \ - das_config.admissionControlEnabled != self.params.get('ha_admission_control_enabled') or \ - das_config.defaultVmSettings.restartPriority != self.params.get('ha_restart_priority') or \ - das_config.defaultVmSettings.vmToolsMonitoringSettings.vmMonitoring != self.params.get('ha_vm_monitoring') or \ - das_config.defaultVmSettings.vmToolsMonitoringSettings.failureInterval != self.params.get('ha_vm_failure_interval') or \ - das_config.defaultVmSettings.vmToolsMonitoringSettings.minUpTime != self.params.get('ha_vm_min_up_time') or \ - das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailures != self.params.get('ha_vm_max_failures') or \ - das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailureWindow != self.params.get('ha_vm_max_failure_window'): + if das_config.enabled != self.enable_ha: return True + + if self.enable_ha and ( + das_config.vmMonitoring != self.params.get('ha_vm_monitoring') or + das_config.hostMonitoring != self.params.get('ha_host_monitoring') or + das_config.admissionControlEnabled != self.ha_admission_control or + das_config.defaultVmSettings.restartPriority != self.params.get('ha_restart_priority') or + das_config.defaultVmSettings.isolationResponse != self.host_isolation_response or + das_config.defaultVmSettings.vmToolsMonitoringSettings.vmMonitoring != self.params.get('ha_vm_monitoring') or + das_config.defaultVmSettings.vmToolsMonitoringSettings.failureInterval != self.params.get('ha_vm_failure_interval') or + das_config.defaultVmSettings.vmToolsMonitoringSettings.minUpTime != self.params.get('ha_vm_min_up_time') or + das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailures != self.params.get('ha_vm_max_failures') or + das_config.defaultVmSettings.vmToolsMonitoringSettings.maxFailureWindow != self.params.get('ha_vm_max_failure_window')): + return True + + if self.ha_admission_control: + if self.params.get('slot_based_admission_control'): + policy = self.params.get('slot_based_admission_control') + if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverLevelAdmissionControlPolicy) or \ + das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'): + return True + elif self.params.get('reservation_based_admission_control'): + policy = self.params.get('reservation_based_admission_control') + auto_compute_percentages = policy.get('auto_compute_percentages') + if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverResourcesAdmissionControlPolicy) or \ + das_config.admissionControlPolicy.autoComputePercentages != auto_compute_percentages or \ + das_config.admissionControlPolicy.failoverLevel != policy.get('failover_level'): + return True + if not auto_compute_percentages: + if das_config.admissionControlPolicy.cpuFailoverResourcesPercent != policy.get('cpu_failover_resources_percent') or \ + das_config.admissionControlPolicy.memoryFailoverResourcesPercent != policy.get('memory_failover_resources_percent'): + return True + elif self.params.get('failover_host_admission_control'): + policy = self.params.get('failover_host_admission_control') + if not isinstance(das_config.admissionControlPolicy, vim.cluster.FailoverHostAdmissionControlPolicy): + return True + das_config.admissionControlPolicy.failoverHosts.sort(key=lambda h: h.name) + if das_config.admissionControlPolicy.failoverHosts != self.get_failover_hosts(): + return True + return False def configure_ha(self): @@ -218,15 +335,11 @@ class VMwareCluster(PyVmomi): cluster_config_spec = vim.cluster.ConfigSpecEx() cluster_config_spec.dasConfig = vim.cluster.DasConfigInfo() cluster_config_spec.dasConfig.enabled = self.enable_ha - cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverLevelAdmissionControlPolicy() - cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = self.params.get('ha_failover_level') - ha_vm_monitoring = self.params.get('ha_vm_monitoring') - das_vm_config = None - if ha_vm_monitoring in ['vmMonitoringOnly', 'vmAndAppMonitoring']: + if self.enable_ha: vm_tool_spec = vim.cluster.VmToolsMonitoringSettings() vm_tool_spec.enabled = True - vm_tool_spec.vmMonitoring = ha_vm_monitoring + vm_tool_spec.vmMonitoring = self.params.get('ha_vm_monitoring') vm_tool_spec.failureInterval = self.params.get('ha_vm_failure_interval') vm_tool_spec.minUpTime = self.params.get('ha_vm_min_up_time') vm_tool_spec.maxFailures = self.params.get('ha_vm_max_failures') @@ -234,14 +347,36 @@ class VMwareCluster(PyVmomi): das_vm_config = vim.cluster.DasVmSettings() das_vm_config.restartPriority = self.params.get('ha_restart_priority') - das_vm_config.isolationResponse = None + das_vm_config.isolationResponse = self.host_isolation_response das_vm_config.vmToolsMonitoringSettings = vm_tool_spec + cluster_config_spec.dasConfig.defaultVmSettings = das_vm_config + + cluster_config_spec.dasConfig.admissionControlEnabled = self.ha_admission_control - cluster_config_spec.dasConfig.admissionControlEnabled = self.params.get('ha_admission_control_enabled') + if self.ha_admission_control: + if self.params.get('slot_based_admission_control'): + cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverLevelAdmissionControlPolicy() + policy = self.params.get('slot_based_admission_control') + cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level') + elif self.params.get('reservation_based_admission_control'): + cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverResourcesAdmissionControlPolicy() + policy = self.params.get('reservation_based_admission_control') + auto_compute_percentages = policy.get('auto_compute_percentages') + cluster_config_spec.dasConfig.admissionControlPolicy.autoComputePercentages = auto_compute_percentages + cluster_config_spec.dasConfig.admissionControlPolicy.failoverLevel = policy.get('failover_level') + if not auto_compute_percentages: + cluster_config_spec.dasConfig.admissionControlPolicy.cpuFailoverResourcesPercent = \ + policy.get('cpu_failover_resources_percent') + cluster_config_spec.dasConfig.admissionControlPolicy.memoryFailoverResourcesPercent = \ + policy.get('memory_failover_resources_percent') + elif self.params.get('failover_host_admission_control'): + cluster_config_spec.dasConfig.admissionControlPolicy = vim.cluster.FailoverHostAdmissionControlPolicy() + policy = self.params.get('failover_host_admission_control') + cluster_config_spec.dasConfig.admissionControlPolicy.failoverHosts = self.get_failover_hosts() cluster_config_spec.dasConfig.hostMonitoring = self.params.get('ha_host_monitoring') - cluster_config_spec.dasConfig.vmMonitoring = ha_vm_monitoring - cluster_config_spec.dasConfig.defaultVmSettings = das_vm_config + cluster_config_spec.dasConfig.vmMonitoring = self.params.get('ha_vm_monitoring') + try: task = self.cluster.ReconfigureComputeResource_Task(cluster_config_spec, True) changed, result = wait_for_task(task) @@ -267,10 +402,12 @@ def main(): datacenter=dict(type='str', required=True, aliases=['datacenter_name']), # HA enable_ha=dict(type='bool', default=False), - ha_failover_level=dict(type='int', default=2), ha_host_monitoring=dict(type='str', default='enabled', choices=['enabled', 'disabled']), + host_isolation_response=dict(type='str', + default='none', + choices=['none', 'powerOff', 'shutdown']), # HA VM Monitoring related parameters ha_vm_monitoring=dict(type='str', choices=['vmAndAppMonitoring', 'vmMonitoringOnly', 'vmMonitoringDisabled'], @@ -283,12 +420,27 @@ def main(): ha_restart_priority=dict(type='str', choices=['high', 'low', 'medium', 'disabled'], default='medium'), - ha_admission_control_enabled=dict(type='bool', default=True), + # HA Admission Control related parameters + slot_based_admission_control=dict(type='dict', options=dict( + failover_level=dict(type='int', required=True), + )), + reservation_based_admission_control=dict(type='dict', options=dict( + auto_compute_percentages=dict(type='bool', default=True), + failover_level=dict(type='int', required=True), + cpu_failover_resources_percent=dict(type='int', default=50), + memory_failover_resources_percent=dict(type='int', default=50), + )), + failover_host_admission_control=dict(type='dict', options=dict( + failover_hosts=dict(type='list', elements='str', required=True), + )), )) module = AnsibleModule( argument_spec=argument_spec, supports_check_mode=True, + mutually_exclusive=[ + ['slot_based_admission_control', 'reservation_based_admission_control', 'failover_host_admission_control'] + ] ) vmware_cluster_ha = VMwareCluster(module) diff --git a/test/integration/targets/vmware_cluster_ha/tasks/main.yml b/test/integration/targets/vmware_cluster_ha/tasks/main.yml index d7be523f469..d132a41d40e 100644 --- a/test/integration/targets/vmware_cluster_ha/tasks/main.yml +++ b/test/integration/targets/vmware_cluster_ha/tasks/main.yml @@ -33,7 +33,84 @@ that: - "{{ cluster_ha_result_0001.changed == true }}" -# Testcase 0002: Disable HA +# Testcase 0002: Enable Slot based Admission Control +- name: Enable Slot based Admission Control + vmware_cluster_ha: + validate_certs: False + hostname: "{{ vcenter_hostname }}" + username: "{{ vcenter_username }}" + password: "{{ vcenter_password }}" + datacenter_name: "{{ dc1 }}" + cluster_name: test_cluster_ha + enable_ha: yes + slot_based_admission_control: + failover_level: 1 + register: cluster_ha_result_0002 + +- name: Ensure Admission Cotrol is enabled + assert: + that: + - "{{ cluster_ha_result_0002.changed == true }}" + +# Testcase 0003: Enable Cluster resource Percentage based Admission Control +- name: Enable Cluster resource Percentage based Admission Control + vmware_cluster_ha: + validate_certs: False + hostname: "{{ vcenter_hostname }}" + username: "{{ vcenter_username }}" + password: "{{ vcenter_password }}" + datacenter_name: "{{ dc1 }}" + cluster_name: test_cluster_ha + enable_ha: yes + reservation_based_admission_control: + auto_compute_percentages: false + failover_level: 1 + cpu_failover_resources_percent: 33 + memory_failover_resources_percent: 33 + register: cluster_ha_result_0003 + +- name: Ensure Admission Cotrol is enabled + assert: + that: + - "{{ cluster_ha_result_0003.changed == true }}" + +# Testcase 0004: Set Isolation Response to powerOff +- name: Set Isolation Response to powerOff + vmware_cluster_ha: + validate_certs: False + hostname: "{{ vcenter_hostname }}" + username: "{{ vcenter_username }}" + password: "{{ vcenter_password }}" + datacenter_name: "{{ dc1 }}" + cluster_name: test_cluster_ha + enable_ha: yes + host_isolation_response: 'powerOff' + register: cluster_ha_result_0004 + +- name: Ensure Isolation Response is enabled + assert: + that: + - "{{ cluster_ha_result_0004.changed == true }}" + +# Testcase 0005: Set Isolation Response to shutdown +- name: Set Isolation Response to shutdown + vmware_cluster_ha: + validate_certs: False + hostname: "{{ vcenter_hostname }}" + username: "{{ vcenter_username }}" + password: "{{ vcenter_password }}" + datacenter_name: "{{ dc1 }}" + cluster_name: test_cluster_ha + enable_ha: yes + host_isolation_response: 'shutdown' + register: cluster_ha_result_0005 + +- name: Ensure Isolation Response is enabled + assert: + that: + - "{{ cluster_ha_result_0005.changed == true }}" + +# Testcase 0006: Disable HA - name: Disable HA vmware_cluster_ha: validate_certs: False @@ -43,12 +120,12 @@ datacenter_name: "{{ dc1 }}" cluster_name: test_cluster_ha enable_ha: no - register: cluster_ha_result_0002 + register: cluster_ha_result_0006 - name: Ensure HA is disabled assert: that: - - "{{ cluster_ha_result_0002.changed == true }}" + - "{{ cluster_ha_result_0006.changed == true }}" # Delete test cluster - name: Delete test cluster