From 52959ebdc18bfe96de083b5317b32c5216abf80a Mon Sep 17 00:00:00 2001 From: Dag Wieers Date: Wed, 1 Mar 2017 20:00:49 +0100 Subject: [PATCH] wait_for_connection: Wait for system to become reachable (#20011) * WIP: wait_for_connection: Wait for system to be reachable This action plugin allows to check when a system is back online and usable by Ansible. As an example, when doing a SysPrep and running Enable-WinRM.ps1, it takes between 10 to 20 seconds between the WinRM TCP port to open, and it actually being able to server Ansible requests. This time is variable and depends on the boot process. Current implementation is specific for Windows (WinRM) only, this will be fixed shortly. This fixes #19998 * Support other transport types * Various improvements - Fix reported typo - Add transport_test support in accelerate plugin - Ensure port is an integer * Improve examples * Small fixes - Use correct ConfigureRemotingForAnsible.ps1 script name - Only use win_ping when remote shell is known to be Powershell - Add integration tests to CI framework --- .../utilities/logic/wait_for_connection.py | 108 ++++++++++++++++ .../plugins/action/wait_for_connection.py | 119 ++++++++++++++++++ lib/ansible/plugins/connection/accelerate.py | 8 ++ .../plugins/connection/paramiko_ssh.py | 8 ++ lib/ansible/plugins/connection/ssh.py | 9 +- lib/ansible/plugins/connection/winrm.py | 9 ++ .../targets/connection/test_connection.yml | 3 + .../targets/wait_for_connection/aliases | 2 + .../wait_for_connection/tasks/main.yml | 5 + 9 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 lib/ansible/modules/utilities/logic/wait_for_connection.py create mode 100644 lib/ansible/plugins/action/wait_for_connection.py create mode 100644 test/integration/targets/wait_for_connection/aliases create mode 100644 test/integration/targets/wait_for_connection/tasks/main.yml diff --git a/lib/ansible/modules/utilities/logic/wait_for_connection.py b/lib/ansible/modules/utilities/logic/wait_for_connection.py new file mode 100644 index 00000000000..ecdfcdfe322 --- /dev/null +++ b/lib/ansible/modules/utilities/logic/wait_for_connection.py @@ -0,0 +1,108 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# (c) 2017, Dag Wieers +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +ANSIBLE_METADATA = {'status': ['stableinterface'], + 'supported_by': 'core', + 'version': '1.0'} + +DOCUMENTATION = r''' +--- +module: wait_for_connection +short_description: Waits until remote system is reachable/usable +description: +- Waits for a total of C(timeout) seconds. +- Retries the transport connection after a timeout of C(connect_timeout). +- Tests the transport connection every C(sleep) seconds. +- This module makes use of internal ansible transport (and configuration) and the ping/win_ping module to guarantee correct end-to-end functioning. +version_added: "2.3" +options: + connect_timeout: + description: + - Maximum number of seconds to wait for a connection to happen before closing and retrying. + default: 5 + delay: + description: + - Number of seconds to wait before starting to poll. + default: 0 + sleep: + default: 1 + description: + - Number of seconds to sleep between checks. + timeout: + description: + - Maximum number of seconds to wait for. + default: 300 +author: "Dag Wieers (@dagwieers)" +''' + +EXAMPLES = r''' +- name: Wait 300 seconds for target connection to become reachable/usable + wait_for_connection: + +- name: Wait 600 seconds, but only start checking after 60 seconds + wait_for_connection: + delay: 60 + timeout: 600 + +# Wake desktops, wait for them to become ready and continue playbook +- hosts: all + gather_facts: no + tasks: + - name: Send magic Wake-On-Lan packet to turn on individual systems + wakeonlan: + mac: '{{ mac }}' + broadcast: 192.168.0.255 + delegate_to: localhost + + - name: Wait for system to become reachable + wait_for_connection: + + - name: Gather facts for first time + setup: + +# Build a new VM, wait for it to become ready and continue playbook +- hosts: all + gather_facts: no + tasks: + - name: Clone new VM, if missing + vmware_guest: + hostname: '{{ vcenter_ipaddress }}' + name: '{{ inventory_hostname_short }}' + template: Windows 2012R2 + customization: + hostname: '{{ vm_shortname }}' + runonce: + - powershell.exe -ExecutionPolicy Unrestricted -File C:\Windows\Temp\ConfigureRemotingForAnsible.ps1 -CertValidityDays 3650 -ForceNewSSLCert + delegate_to: localhost + + - name: Wait for system to become reachable over WinRM + wait_for_connection: + + - name: Gather facts for first time + setup: +''' + +RETURN = r''' +elapsed: + description: The number of seconds that elapsed waiting for the connection to appear. + returned: always + type: integer + sample: 23 +''' diff --git a/lib/ansible/plugins/action/wait_for_connection.py b/lib/ansible/plugins/action/wait_for_connection.py new file mode 100644 index 00000000000..f3f856a2176 --- /dev/null +++ b/lib/ansible/plugins/action/wait_for_connection.py @@ -0,0 +1,119 @@ +# (c) 2017, Dag Wieers +# +# This file is part of Ansible +# +# Ansible is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Ansible is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Ansible. If not, see . + +# CI-required python3 boilerplate +from __future__ import (absolute_import, division, print_function) +__metaclass__ = type + +import time +from datetime import datetime, timedelta + +from ansible.module_utils.pycompat24 import get_exception +from ansible.plugins.action import ActionBase + +try: + from __main__ import display +except ImportError: + from ansible.utils.display import Display + display = Display() + + +class TimedOutException(Exception): + pass + + +class ActionModule(ActionBase): + TRANSFERS_FILES = False + + DEFAULT_CONNECT_TIMEOUT = 5 + DEFAULT_DELAY = 0 + DEFAULT_SLEEP = 1 + DEFAULT_TIMEOUT = 600 + + def do_until_success_or_timeout(self, what, timeout, connect_timeout, what_desc, sleep=1): + max_end_time = datetime.utcnow() + timedelta(seconds=timeout) + + while datetime.utcnow() < max_end_time: + try: + what(connect_timeout) + if what_desc: + display.debug("wait_for_connection: %s success" % what_desc) + return + except Exception: + e = get_exception() + if what_desc: + display.debug("wait_for_connection: %s fail (expected), retrying in %d seconds..." % (what_desc, sleep)) + time.sleep(sleep) + + raise TimedOutException("timed out waiting for %s: %s" % (what_desc, str(e))) + + def run(self, tmp=None, task_vars=None): + if task_vars is None: + task_vars = dict() + + connect_timeout = int(self._task.args.get('connect_timeout', self.DEFAULT_CONNECT_TIMEOUT)) + delay = int(self._task.args.get('delay', self.DEFAULT_DELAY)) + sleep = int(self._task.args.get('sleep', self.DEFAULT_SLEEP)) + timeout = int(self._task.args.get('timeout', self.DEFAULT_TIMEOUT)) + + if self._play_context.check_mode: + display.vvv("wait_for_connection: skipping for check_mode") + return dict(skipped=True) + + result = super(ActionModule, self).run(tmp, task_vars) + + def ping_module_test(connect_timeout): + ''' Test ping module, if available ''' + display.vvv("wait_for_connection: attempting ping module test") + # call connection reset between runs if it's there + try: + self._connection._reset() + except AttributeError: + pass + + # Use win_ping on winrm/powershell, else use ping + if hasattr(self._connection, '_shell_type') and self._connection._shell_type == 'powershell': + ping_result = self._execute_module(module_name='win_ping', module_args=dict(), tmp=tmp, task_vars=task_vars) + else: + ping_result = self._execute_module(module_name='ping', module_args=dict(), tmp=tmp, task_vars=task_vars) + + # Test module output + if ping_result['ping'] != 'pong': + raise Exception('ping test failed') + + start = datetime.now() + + if delay: + time.sleep(delay) + + try: + # If the connection has a transport_test method, use it first + if hasattr(self._connection, 'transport_test'): + self.do_until_success_or_timeout(self._connection.transport_test, timeout, connect_timeout, what_desc="connection port up", sleep=sleep) + + # Use the ping module test to determine end-to-end connectivity + self.do_until_success_or_timeout(ping_module_test, timeout, connect_timeout, what_desc="ping module test success", sleep=sleep) + + except TimedOutException: + e = get_exception() + result['failed'] = True + result['msg'] = str(e) + + elapsed = datetime.now() - start + result['elapsed'] = elapsed.seconds + + return result diff --git a/lib/ansible/plugins/connection/accelerate.py b/lib/ansible/plugins/connection/accelerate.py index 757350467ad..968304ffa5d 100644 --- a/lib/ansible/plugins/connection/accelerate.py +++ b/lib/ansible/plugins/connection/accelerate.py @@ -96,6 +96,14 @@ class Connection(ConnectionBase): self._connected = True return self + def transport_test(self, connect_timeout): + ''' Test the transport mechanism, if available ''' + host = self._play_context.remote_addr + port = int(self._play_context.accelerate_port or 5099) + display.vvv("attempting transport test to %s:%s" % (host, port)) + sock = socket.create_connection((host, port), connect_timeout) + sock.close() + def send_data(self, data): packed_len = struct.pack('!Q',len(data)) return self.conn.sendall(packed_len + data) diff --git a/lib/ansible/plugins/connection/paramiko_ssh.py b/lib/ansible/plugins/connection/paramiko_ssh.py index eac773620a0..790d1a22c14 100644 --- a/lib/ansible/plugins/connection/paramiko_ssh.py +++ b/lib/ansible/plugins/connection/paramiko_ssh.py @@ -133,6 +133,14 @@ class Connection(ConnectionBase): transport = 'paramiko' + def transport_test(self, connect_timeout): + ''' Test the transport mechanism, if available ''' + host = self._play_context.remote_addr + port = int(self._play_context.port or 22) + display.vvv("attempting transport test to %s:%s" % (host, port)) + sock = socket.create_connection((host, port), connect_timeout) + sock.close() + def _cache_key(self): return "%s__%s__" % (self._play_context.remote_addr, self._play_context.remote_user) diff --git a/lib/ansible/plugins/connection/ssh.py b/lib/ansible/plugins/connection/ssh.py index 9becda8ee07..279d77beb00 100644 --- a/lib/ansible/plugins/connection/ssh.py +++ b/lib/ansible/plugins/connection/ssh.py @@ -25,6 +25,7 @@ import fcntl import hashlib import os import pty +import socket import subprocess import time @@ -61,7 +62,7 @@ class Connection(ConnectionBase): super(Connection, self).__init__(*args, **kwargs) self.host = self._play_context.remote_addr - self.port = self._play_context.port + self.port = int(self._play_context.port or 22) self.user = self._play_context.remote_user self.control_path = C.ANSIBLE_SSH_CONTROL_PATH self.control_path_dir = C.ANSIBLE_SSH_CONTROL_PATH_DIR @@ -73,6 +74,12 @@ class Connection(ConnectionBase): def _connect(self): return self + def transport_test(self, connect_timeout): + ''' Test the transport mechanism, if available ''' + display.vvv("attempting transport test to %s:%s" % (self.host, self.port)) + sock = socket.create_connection((self.host, self.port), connect_timeout) + sock.close() + @staticmethod def _create_control_path(host, port, user): '''Make a hash for the controlpath based on con attributes''' diff --git a/lib/ansible/plugins/connection/winrm.py b/lib/ansible/plugins/connection/winrm.py index c5b610b3c2a..f69ab1b3b08 100644 --- a/lib/ansible/plugins/connection/winrm.py +++ b/lib/ansible/plugins/connection/winrm.py @@ -23,6 +23,7 @@ import inspect import os import re import shlex +import socket import traceback import json import tempfile @@ -89,6 +90,14 @@ class Connection(ConnectionBase): super(Connection, self).__init__(*args, **kwargs) + def transport_test(self, connect_timeout): + ''' Test the transport mechanism, if available ''' + host = self._play_context.remote_addr + port = int(self._play_context.port or 5986) + display.vvv("attempting transport test to %s:%s" % (host, port)) + sock = socket.create_connection((host, port), connect_timeout) + sock.close() + def set_host_overrides(self, host, hostvars=None): ''' Override WinRM-specific options from host variables. diff --git a/test/integration/targets/connection/test_connection.yml b/test/integration/targets/connection/test_connection.yml index 2f0a98ccdfe..18ca2bada49 100644 --- a/test/integration/targets/connection/test_connection.yml +++ b/test/integration/targets/connection/test_connection.yml @@ -38,3 +38,6 @@ local_action: file path={{ local_tmp }}-汉语 state=absent - name: remove remote temp file action: "{{ action_prefix }}file path={{ remote_tmp }}-汉语 state=absent" + + ### test wait_for_connection plugin + - wait_for_connection: diff --git a/test/integration/targets/wait_for_connection/aliases b/test/integration/targets/wait_for_connection/aliases new file mode 100644 index 00000000000..e4b76175b1e --- /dev/null +++ b/test/integration/targets/wait_for_connection/aliases @@ -0,0 +1,2 @@ +posix/ci/group1 +windows/ci/group1 diff --git a/test/integration/targets/wait_for_connection/tasks/main.yml b/test/integration/targets/wait_for_connection/tasks/main.yml new file mode 100644 index 00000000000..07bf56f6ded --- /dev/null +++ b/test/integration/targets/wait_for_connection/tasks/main.yml @@ -0,0 +1,5 @@ +- name: Test normal connection to target node + wait_for_connection: + connect_timeout: 5 + sleep: 1 + timeout: 10