mirror of https://github.com/ansible/ansible.git
Add tests to cover win_reboot incidental paths (#79856)
* Add tests to cover win_reboot incidental paths * Fix sanity issuespull/79857/head
parent
c33c8cf086
commit
d16ec2455d
@ -0,0 +1,101 @@
|
||||
# Copyright: (c) 2018, Matt Davis <mdavis@ansible.com>
|
||||
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||
|
||||
from __future__ import (absolute_import, division, print_function)
|
||||
__metaclass__ = type
|
||||
|
||||
from ansible.errors import AnsibleError
|
||||
from ansible.module_utils.common.text.converters import to_native
|
||||
from ansible.module_utils.common.validation import check_type_str, check_type_float
|
||||
from ansible.plugins.action import ActionBase
|
||||
from ansible.utils.display import Display
|
||||
|
||||
from ansible_collections.ansible.windows.plugins.plugin_utils._reboot import reboot_host
|
||||
|
||||
display = Display()
|
||||
|
||||
|
||||
def _positive_float(val):
|
||||
float_val = check_type_float(val)
|
||||
if float_val < 0:
|
||||
return 0
|
||||
|
||||
else:
|
||||
return float_val
|
||||
|
||||
|
||||
class ActionModule(ActionBase):
|
||||
TRANSFERS_FILES = False
|
||||
_VALID_ARGS = frozenset((
|
||||
'boot_time_command',
|
||||
'connect_timeout',
|
||||
'connect_timeout_sec',
|
||||
'msg',
|
||||
'post_reboot_delay',
|
||||
'post_reboot_delay_sec',
|
||||
'pre_reboot_delay',
|
||||
'pre_reboot_delay_sec',
|
||||
'reboot_timeout',
|
||||
'reboot_timeout_sec',
|
||||
'shutdown_timeout',
|
||||
'shutdown_timeout_sec',
|
||||
'test_command',
|
||||
))
|
||||
|
||||
def run(self, tmp=None, task_vars=None):
|
||||
self._supports_check_mode = True
|
||||
self._supports_async = True
|
||||
|
||||
if self._play_context.check_mode:
|
||||
return {'changed': True, 'elapsed': 0, 'rebooted': True}
|
||||
|
||||
if task_vars is None:
|
||||
task_vars = {}
|
||||
|
||||
super(ActionModule, self).run(tmp, task_vars)
|
||||
|
||||
parameters = {}
|
||||
for names, check_func in [
|
||||
(['boot_time_command'], check_type_str),
|
||||
(['connect_timeout', 'connect_timeout_sec'], _positive_float),
|
||||
(['msg'], check_type_str),
|
||||
(['post_reboot_delay', 'post_reboot_delay_sec'], _positive_float),
|
||||
(['pre_reboot_delay', 'pre_reboot_delay_sec'], _positive_float),
|
||||
(['reboot_timeout', 'reboot_timeout_sec'], _positive_float),
|
||||
(['test_command'], check_type_str),
|
||||
]:
|
||||
for name in names:
|
||||
value = self._task.args.get(name, None)
|
||||
if value:
|
||||
break
|
||||
else:
|
||||
value = None
|
||||
|
||||
# Defaults are applied in reboot_action so skip adding to kwargs if the input wasn't set (None)
|
||||
if value is not None:
|
||||
try:
|
||||
value = check_func(value)
|
||||
except TypeError as e:
|
||||
raise AnsibleError("Invalid value given for '%s': %s." % (names[0], to_native(e)))
|
||||
|
||||
# Setting a lower value and kill PowerShell when sending the shutdown command. Just use the defaults
|
||||
# if this is the case.
|
||||
if names[0] == 'pre_reboot_delay' and value < 2:
|
||||
continue
|
||||
|
||||
parameters[names[0]] = value
|
||||
|
||||
result = reboot_host(self._task.action, self._connection, **parameters)
|
||||
|
||||
# Not needed for testing and collection_name kwargs causes sanity error
|
||||
# Historical behaviour had ignore_errors=True being able to ignore unreachable hosts and not just task errors.
|
||||
# This snippet will allow that to continue but state that it will be removed in a future version and to use
|
||||
# ignore_unreachable to ignore unreachable hosts.
|
||||
# if result['unreachable'] and self._task.ignore_errors and not self._task.ignore_unreachable:
|
||||
# dep_msg = "Host was unreachable but is being skipped because ignore_errors=True is set. In the future " \
|
||||
# "only ignore_unreachable will be able to ignore an unreachable host for %s" % self._task.action
|
||||
# display.deprecated(dep_msg, date="2023-05-01", collection_name="ansible.windows")
|
||||
# result['unreachable'] = False
|
||||
# result['failed'] = True
|
||||
|
||||
return result
|
@ -0,0 +1,114 @@
|
||||
# Copyright (c) 2021 Ansible Project
|
||||
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||
|
||||
"""Quoting helpers for Windows
|
||||
|
||||
This contains code to help with quoting values for use in the variable Windows
|
||||
shell. Right now it should only be used in ansible.windows as the interface is
|
||||
not final and could be subject to change.
|
||||
"""
|
||||
|
||||
# FOR INTERNAL COLLECTION USE ONLY
|
||||
# The interfaces in this file are meant for use within the ansible.windows collection
|
||||
# and may not remain stable to outside uses. Changes may be made in ANY release, even a bugfix release.
|
||||
# See also: https://github.com/ansible/community/issues/539#issuecomment-780839686
|
||||
# Please open an issue if you have questions about this.
|
||||
|
||||
from __future__ import (absolute_import, division, print_function)
|
||||
__metaclass__ = type
|
||||
|
||||
import re
|
||||
|
||||
from ansible.module_utils.six import text_type
|
||||
|
||||
|
||||
_UNSAFE_C = re.compile(u'[\\s\t"]')
|
||||
_UNSAFE_CMD = re.compile(u'[\\s\\(\\)\\^\\|%!"<>&]')
|
||||
|
||||
# PowerShell has 5 characters it uses as a single quote, we need to double up on all of them.
|
||||
# https://github.com/PowerShell/PowerShell/blob/b7cb335f03fe2992d0cbd61699de9d9aafa1d7c1/src/System.Management.Automation/engine/parser/CharTraits.cs#L265-L272
|
||||
# https://github.com/PowerShell/PowerShell/blob/b7cb335f03fe2992d0cbd61699de9d9aafa1d7c1/src/System.Management.Automation/engine/parser/CharTraits.cs#L18-L21
|
||||
_UNSAFE_PWSH = re.compile(u"(['\u2018\u2019\u201a\u201b])")
|
||||
|
||||
|
||||
def quote_c(s): # type: (text_type) -> text_type
|
||||
"""Quotes a value for the raw Win32 process command line.
|
||||
|
||||
Quotes a value to be safely used by anything that calls the Win32
|
||||
CreateProcess API.
|
||||
|
||||
Args:
|
||||
s: The string to quote.
|
||||
|
||||
Returns:
|
||||
(text_type): The quoted string value.
|
||||
"""
|
||||
# https://docs.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way
|
||||
if not s:
|
||||
return u'""'
|
||||
|
||||
if not _UNSAFE_C.search(s):
|
||||
return s
|
||||
|
||||
# Replace any double quotes in an argument with '\"'.
|
||||
s = s.replace('"', '\\"')
|
||||
|
||||
# We need to double up on any '\' chars that preceded a double quote (now '\"').
|
||||
s = re.sub(r'(\\+)\\"', r'\1\1\"', s)
|
||||
|
||||
# Double up '\' at the end of the argument so it doesn't escape out end quote.
|
||||
s = re.sub(r'(\\+)$', r'\1\1', s)
|
||||
|
||||
# Finally wrap the entire argument in double quotes now we've escaped the double quotes within.
|
||||
return u'"{0}"'.format(s)
|
||||
|
||||
|
||||
def quote_cmd(s): # type: (text_type) -> text_type
|
||||
"""Quotes a value for cmd.
|
||||
|
||||
Quotes a value to be safely used by a command prompt call.
|
||||
|
||||
Args:
|
||||
s: The string to quote.
|
||||
|
||||
Returns:
|
||||
(text_type): The quoted string value.
|
||||
"""
|
||||
# https://docs.microsoft.com/en-us/archive/blogs/twistylittlepassagesallalike/everyone-quotes-command-line-arguments-the-wrong-way#a-better-method-of-quoting
|
||||
if not s:
|
||||
return u'""'
|
||||
|
||||
if not _UNSAFE_CMD.search(s):
|
||||
return s
|
||||
|
||||
# Escape the metachars as we are quoting the string to stop cmd from interpreting that metachar. For example
|
||||
# 'file &whoami.exe' would result in 'whoami.exe' being executed and then that output being used as the argument
|
||||
# instead of the literal string.
|
||||
# https://stackoverflow.com/questions/3411771/multiple-character-replace-with-python
|
||||
for c in u'^()%!"<>&|': # '^' must be the first char that we scan and replace
|
||||
if c in s:
|
||||
# I can't find any docs that explicitly say this but to escape ", it needs to be prefixed with \^.
|
||||
s = s.replace(c, (u"\\^" if c == u'"' else u"^") + c)
|
||||
|
||||
return u'^"{0}^"'.format(s)
|
||||
|
||||
|
||||
def quote_pwsh(s): # type: (text_type) -> text_type
|
||||
"""Quotes a value for PowerShell.
|
||||
|
||||
Quotes a value to be safely used by a PowerShell expression. The input
|
||||
string because something that is safely wrapped in single quotes.
|
||||
|
||||
Args:
|
||||
s: The string to quote.
|
||||
|
||||
Returns:
|
||||
(text_type): The quoted string value.
|
||||
"""
|
||||
# https://docs.microsoft.com/en-us/powershell/module/microsoft.powershell.core/about/about_quoting_rules?view=powershell-5.1
|
||||
if not s:
|
||||
return u"''"
|
||||
|
||||
# We should always quote values in PowerShell as it has conflicting rules where strings can and can't be quoted.
|
||||
# This means we quote the entire arg with single quotes and just double up on the single quote equivalent chars.
|
||||
return u"'{0}'".format(_UNSAFE_PWSH.sub(u'\\1\\1', s))
|
@ -0,0 +1,620 @@
|
||||
# Copyright: (c) 2021, Ansible Project
|
||||
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
|
||||
|
||||
"""Reboot action for Windows hosts
|
||||
|
||||
This contains the code to reboot a Windows host for use by other action plugins
|
||||
in this collection. Right now it should only be used in this collection as the
|
||||
interface is not final and count be subject to change.
|
||||
"""
|
||||
|
||||
# FOR INTERNAL COLLECTION USE ONLY
|
||||
# The interfaces in this file are meant for use within the ansible.windows collection
|
||||
# and may not remain stable to outside uses. Changes may be made in ANY release, even a bugfix release.
|
||||
# See also: https://github.com/ansible/community/issues/539#issuecomment-780839686
|
||||
# Please open an issue if you have questions about this.
|
||||
|
||||
import datetime
|
||||
import json
|
||||
import random
|
||||
import time
|
||||
import traceback
|
||||
import uuid
|
||||
import typing as t
|
||||
|
||||
from ansible.errors import AnsibleConnectionFailure, AnsibleError
|
||||
from ansible.module_utils.common.text.converters import to_text
|
||||
from ansible.plugins.connection import ConnectionBase
|
||||
from ansible.utils.display import Display
|
||||
|
||||
from ansible_collections.ansible.windows.plugins.plugin_utils._quote import quote_pwsh
|
||||
|
||||
|
||||
# This is not ideal but the psrp connection plugin doesn't catch all these exceptions as an AnsibleConnectionFailure.
|
||||
# Until we can guarantee we are using a version of psrp that handles all this we try to handle those issues.
|
||||
try:
|
||||
from requests.exceptions import (
|
||||
RequestException,
|
||||
)
|
||||
except ImportError:
|
||||
RequestException = AnsibleConnectionFailure
|
||||
|
||||
|
||||
_LOGON_UI_KEY = (
|
||||
r"HKLM:\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Winlogon\AutoLogonChecked"
|
||||
)
|
||||
|
||||
_DEFAULT_BOOT_TIME_COMMAND = (
|
||||
"(Get-CimInstance -ClassName Win32_OperatingSystem -Property LastBootUpTime)"
|
||||
".LastBootUpTime.ToFileTime()"
|
||||
)
|
||||
|
||||
T = t.TypeVar("T")
|
||||
|
||||
display = Display()
|
||||
|
||||
|
||||
class _ReturnResultException(Exception):
|
||||
"""Used to sneak results back to the return dict from an exception"""
|
||||
|
||||
def __init__(self, msg, **result):
|
||||
super().__init__(msg)
|
||||
self.result = result
|
||||
|
||||
|
||||
class _TestCommandFailure(Exception):
|
||||
"""Differentiates between a connection failure and just a command assertion failure during the reboot loop"""
|
||||
|
||||
|
||||
def reboot_host(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
boot_time_command: str = _DEFAULT_BOOT_TIME_COMMAND,
|
||||
connect_timeout: int = 5,
|
||||
msg: str = "Reboot initiated by Ansible",
|
||||
post_reboot_delay: int = 0,
|
||||
pre_reboot_delay: int = 2,
|
||||
reboot_timeout: int = 600,
|
||||
test_command: t.Optional[str] = None,
|
||||
) -> t.Dict[str, t.Any]:
|
||||
"""Reboot a Windows Host.
|
||||
|
||||
Used by action plugins in ansible.windows to reboot a Windows host. It
|
||||
takes in the connection plugin so it can run the commands on the targeted
|
||||
host and monitor the reboot process. The return dict will have the
|
||||
following keys set:
|
||||
|
||||
changed: Whether a change occurred (reboot was done)
|
||||
elapsed: Seconds elapsed between the reboot and it coming back online
|
||||
failed: Whether a failure occurred
|
||||
unreachable: Whether it failed to connect to the host on the first cmd
|
||||
rebooted: Whether the host was rebooted
|
||||
|
||||
When failed=True there may be more keys to give some information around
|
||||
the failure like msg, exception. There are other keys that might be
|
||||
returned as well but they are dependent on the failure that occurred.
|
||||
|
||||
Verbosity levels used:
|
||||
2: Message when each reboot step is completed
|
||||
4: Connection plugin operations and their results
|
||||
5: Raw commands run and the results of those commands
|
||||
Debug: Everything, very verbose
|
||||
|
||||
Args:
|
||||
task_action: The name of the action plugin that is running for logging.
|
||||
connection: The connection plugin to run the reboot commands on.
|
||||
boot_time_command: The command to run when getting the boot timeout.
|
||||
connect_timeout: Override the connection timeout of the connection
|
||||
plugin when polling the rebooted host.
|
||||
msg: The message to display to interactive users when rebooting the
|
||||
host.
|
||||
post_reboot_delay: Seconds to wait after sending the reboot command
|
||||
before checking to see if it has returned.
|
||||
pre_reboot_delay: Seconds to wait when sending the reboot command.
|
||||
reboot_timeout: Seconds to wait while polling for the host to come
|
||||
back online.
|
||||
test_command: Command to run when the host is back online and
|
||||
determines the machine is ready for management. When not defined
|
||||
the default command should wait until the reboot is complete and
|
||||
all pre-login configuration has completed.
|
||||
|
||||
Returns:
|
||||
(Dict[str, Any]): The return result as a dictionary. Use the 'failed'
|
||||
key to determine if there was a failure or not.
|
||||
"""
|
||||
result: t.Dict[str, t.Any] = {
|
||||
"changed": False,
|
||||
"elapsed": 0,
|
||||
"failed": False,
|
||||
"unreachable": False,
|
||||
"rebooted": False,
|
||||
}
|
||||
host_context = {"do_close_on_reset": True}
|
||||
|
||||
# Get current boot time. A lot of tasks that require a reboot leave the WSMan stack in a bad place. Will try to
|
||||
# get the initial boot time 3 times before giving up.
|
||||
try:
|
||||
previous_boot_time = _do_until_success_or_retry_limit(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
"pre-reboot boot time check",
|
||||
3,
|
||||
_get_system_boot_time,
|
||||
task_action,
|
||||
connection,
|
||||
boot_time_command,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Report a the failure based on the last exception received.
|
||||
if isinstance(e, _ReturnResultException):
|
||||
result.update(e.result)
|
||||
|
||||
if isinstance(e, AnsibleConnectionFailure):
|
||||
result["unreachable"] = True
|
||||
else:
|
||||
result["failed"] = True
|
||||
|
||||
result["msg"] = str(e)
|
||||
result["exception"] = traceback.format_exc()
|
||||
return result
|
||||
|
||||
# Get the original connection_timeout option var so it can be reset after
|
||||
original_connection_timeout: t.Optional[float] = None
|
||||
try:
|
||||
original_connection_timeout = connection.get_option("connection_timeout")
|
||||
display.vvvv(
|
||||
f"{task_action}: saving original connection_timeout of {original_connection_timeout}"
|
||||
)
|
||||
except KeyError:
|
||||
display.vvvv(
|
||||
f"{task_action}: connection_timeout connection option has not been set"
|
||||
)
|
||||
|
||||
# Initiate reboot
|
||||
# This command may be wrapped in other shells or command making it hard to detect what shutdown.exe actually
|
||||
# returned. We use this hackery to return a json that contains the stdout/stderr/rc as a structured object for our
|
||||
# code to parse and detect if something went wrong.
|
||||
reboot_command = """$ErrorActionPreference = 'Continue'
|
||||
|
||||
if ($%s) {
|
||||
Remove-Item -LiteralPath '%s' -Force -ErrorAction SilentlyContinue
|
||||
}
|
||||
|
||||
$stdout = $null
|
||||
$stderr = . { shutdown.exe /r /t %s /c %s | Set-Variable stdout } 2>&1 | ForEach-Object ToString
|
||||
|
||||
ConvertTo-Json -Compress -InputObject @{
|
||||
stdout = (@($stdout) -join "`n")
|
||||
stderr = (@($stderr) -join "`n")
|
||||
rc = $LASTEXITCODE
|
||||
}
|
||||
""" % (
|
||||
str(not test_command),
|
||||
_LOGON_UI_KEY,
|
||||
int(pre_reboot_delay),
|
||||
quote_pwsh(msg),
|
||||
)
|
||||
|
||||
expected_test_result = (
|
||||
None # We cannot have an expected result if the command is user defined
|
||||
)
|
||||
if not test_command:
|
||||
# It turns out that LogonUI will create this registry key if it does not exist when it's about to show the
|
||||
# logon prompt. Normally this is a volatile key but if someone has explicitly created it that might no longer
|
||||
# be the case. We ensure it is not present on a reboot so we can wait until LogonUI creates it to determine
|
||||
# the host is actually online and ready, e.g. no configurations/updates still to be applied.
|
||||
# We echo a known successful statement to catch issues with powershell failing to start but the rc mysteriously
|
||||
# being 0 causing it to consider a successful reboot too early (seen on ssh connections).
|
||||
expected_test_result = f"success-{uuid.uuid4()}"
|
||||
test_command = f"Get-Item -LiteralPath '{_LOGON_UI_KEY}' -ErrorAction Stop; '{expected_test_result}'"
|
||||
|
||||
start = None
|
||||
try:
|
||||
_perform_reboot(task_action, connection, reboot_command)
|
||||
|
||||
start = datetime.datetime.utcnow()
|
||||
result["changed"] = True
|
||||
result["rebooted"] = True
|
||||
|
||||
if post_reboot_delay != 0:
|
||||
display.vv(
|
||||
f"{task_action}: waiting an additional {post_reboot_delay} seconds"
|
||||
)
|
||||
time.sleep(post_reboot_delay)
|
||||
|
||||
# Keep on trying to run the last boot time check until it is successful or the timeout is raised
|
||||
display.vv(f"{task_action} validating reboot")
|
||||
_do_until_success_or_timeout(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
"last boot time check",
|
||||
reboot_timeout,
|
||||
_check_boot_time,
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
previous_boot_time,
|
||||
boot_time_command,
|
||||
connect_timeout,
|
||||
)
|
||||
|
||||
# Reset the connection plugin connection timeout back to the original
|
||||
if original_connection_timeout is not None:
|
||||
_set_connection_timeout(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
original_connection_timeout,
|
||||
)
|
||||
|
||||
# Run test command until ti is successful or a timeout occurs
|
||||
display.vv(f"{task_action} running post reboot test command")
|
||||
_do_until_success_or_timeout(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
"post-reboot test command",
|
||||
reboot_timeout,
|
||||
_run_test_command,
|
||||
task_action,
|
||||
connection,
|
||||
test_command,
|
||||
expected=expected_test_result,
|
||||
)
|
||||
|
||||
display.vv(f"{task_action}: system successfully rebooted")
|
||||
|
||||
except Exception as e:
|
||||
if isinstance(e, _ReturnResultException):
|
||||
result.update(e.result)
|
||||
|
||||
result["failed"] = True
|
||||
result["msg"] = str(e)
|
||||
result["exception"] = traceback.format_exc()
|
||||
|
||||
if start:
|
||||
elapsed = datetime.datetime.utcnow() - start
|
||||
result["elapsed"] = elapsed.seconds
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _check_boot_time(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
previous_boot_time: int,
|
||||
boot_time_command: str,
|
||||
timeout: int,
|
||||
):
|
||||
"""Checks the system boot time has been changed or not"""
|
||||
display.vvvv("%s: attempting to get system boot time" % task_action)
|
||||
|
||||
# override connection timeout from defaults to custom value
|
||||
if timeout:
|
||||
_set_connection_timeout(task_action, connection, host_context, timeout)
|
||||
|
||||
# try and get boot time
|
||||
current_boot_time = _get_system_boot_time(
|
||||
task_action, connection, boot_time_command
|
||||
)
|
||||
if current_boot_time == previous_boot_time:
|
||||
raise _TestCommandFailure("boot time has not changed")
|
||||
|
||||
|
||||
def _do_until_success_or_retry_limit(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
action_desc: str,
|
||||
retries: int,
|
||||
func: t.Callable[..., T],
|
||||
*args: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> t.Optional[T]:
|
||||
"""Runs the function multiple times ignoring errors until the retry limit is hit"""
|
||||
|
||||
def wait_condition(idx):
|
||||
return idx < retries
|
||||
|
||||
return _do_until_success_or_condition(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
action_desc,
|
||||
wait_condition,
|
||||
func,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def _do_until_success_or_timeout(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
action_desc: str,
|
||||
timeout: float,
|
||||
func: t.Callable[..., T],
|
||||
*args: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> t.Optional[T]:
|
||||
"""Runs the function multiple times ignoring errors until a timeout occurs"""
|
||||
max_end_time = datetime.datetime.utcnow() + datetime.timedelta(seconds=timeout)
|
||||
|
||||
def wait_condition(idx):
|
||||
return datetime.datetime.utcnow() < max_end_time
|
||||
|
||||
try:
|
||||
return _do_until_success_or_condition(
|
||||
task_action,
|
||||
connection,
|
||||
host_context,
|
||||
action_desc,
|
||||
wait_condition,
|
||||
func,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"Timed out waiting for %s (timeout=%s)" % (action_desc, timeout)
|
||||
)
|
||||
|
||||
|
||||
def _do_until_success_or_condition(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
action_desc: str,
|
||||
condition: t.Callable[[int], bool],
|
||||
func: t.Callable[..., T],
|
||||
*args: t.Any,
|
||||
**kwargs: t.Any,
|
||||
) -> t.Optional[T]:
|
||||
"""Runs the function multiple times ignoring errors until the condition is false"""
|
||||
fail_count = 0
|
||||
max_fail_sleep = 12
|
||||
reset_required = False
|
||||
last_error = None
|
||||
|
||||
while fail_count == 0 or condition(fail_count):
|
||||
try:
|
||||
if reset_required:
|
||||
# Keep on trying the reset until it succeeds.
|
||||
_reset_connection(task_action, connection, host_context)
|
||||
reset_required = False
|
||||
|
||||
else:
|
||||
res = func(*args, **kwargs)
|
||||
display.vvvvv("%s: %s success" % (task_action, action_desc))
|
||||
|
||||
return res
|
||||
|
||||
except Exception as e:
|
||||
last_error = e
|
||||
|
||||
if not isinstance(e, _TestCommandFailure):
|
||||
# The error may be due to a connection problem, just reset the connection just in case
|
||||
reset_required = True
|
||||
|
||||
# Use exponential backoff with a max timeout, plus a little bit of randomness
|
||||
random_int = random.randint(0, 1000) / 1000
|
||||
fail_sleep = 2**fail_count + random_int
|
||||
if fail_sleep > max_fail_sleep:
|
||||
fail_sleep = max_fail_sleep + random_int
|
||||
|
||||
try:
|
||||
error = str(e).splitlines()[-1]
|
||||
except IndexError:
|
||||
error = str(e)
|
||||
|
||||
display.vvvvv(
|
||||
"{action}: {desc} fail {e_type} '{err}', retrying in {sleep:.4} seconds...\n{tcb}".format(
|
||||
action=task_action,
|
||||
desc=action_desc,
|
||||
e_type=type(e).__name__,
|
||||
err=error,
|
||||
sleep=fail_sleep,
|
||||
tcb=traceback.format_exc(),
|
||||
)
|
||||
)
|
||||
|
||||
fail_count += 1
|
||||
time.sleep(fail_sleep)
|
||||
|
||||
if last_error:
|
||||
raise last_error
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _execute_command(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
command: str,
|
||||
) -> t.Tuple[int, str, str]:
|
||||
"""Runs a command on the Windows host and returned the result"""
|
||||
display.vvvvv(f"{task_action}: running command: {command}")
|
||||
|
||||
# Need to wrap the command in our PowerShell encoded wrapper. This is done to align the command input to a
|
||||
# common shell and to allow the psrp connection plugin to report the correct exit code without manually setting
|
||||
# $LASTEXITCODE for just that plugin.
|
||||
command = connection._shell._encode_script(command)
|
||||
|
||||
try:
|
||||
rc, stdout, stderr = connection.exec_command(
|
||||
command, in_data=None, sudoable=False
|
||||
)
|
||||
except RequestException as e:
|
||||
# The psrp connection plugin should be doing this but until we can guarantee it does we just convert it here
|
||||
# to ensure AnsibleConnectionFailure refers to actual connection errors.
|
||||
raise AnsibleConnectionFailure(f"Failed to connect to the host: {e}")
|
||||
|
||||
rc = rc or 0
|
||||
stdout = to_text(stdout, errors="surrogate_or_strict").strip()
|
||||
stderr = to_text(stderr, errors="surrogate_or_strict").strip()
|
||||
|
||||
display.vvvvv(
|
||||
f"{task_action}: command result - rc: {rc}, stdout: {stdout}, stderr: {stderr}"
|
||||
)
|
||||
|
||||
return rc, stdout, stderr
|
||||
|
||||
|
||||
def _get_system_boot_time(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
boot_time_command: str,
|
||||
) -> str:
|
||||
"""Gets a unique identifier to represent the boot time of the Windows host"""
|
||||
display.vvvv(f"{task_action}: getting boot time")
|
||||
rc, stdout, stderr = _execute_command(task_action, connection, boot_time_command)
|
||||
|
||||
if rc != 0:
|
||||
msg = f"{task_action}: failed to get host boot time info"
|
||||
raise _ReturnResultException(msg, rc=rc, stdout=stdout, stderr=stderr)
|
||||
|
||||
display.vvvv(f"{task_action}: last boot time: {stdout}")
|
||||
return stdout
|
||||
|
||||
|
||||
def _perform_reboot(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
reboot_command: str,
|
||||
handle_abort: bool = True,
|
||||
) -> None:
|
||||
"""Runs the reboot command"""
|
||||
display.vv(f"{task_action}: rebooting server...")
|
||||
|
||||
stdout = stderr = None
|
||||
try:
|
||||
rc, stdout, stderr = _execute_command(task_action, connection, reboot_command)
|
||||
|
||||
except AnsibleConnectionFailure as e:
|
||||
# If the connection is closed too quickly due to the system being shutdown, carry on
|
||||
display.vvvv(f"{task_action}: AnsibleConnectionFailure caught and handled: {e}")
|
||||
rc = 0
|
||||
|
||||
if stdout:
|
||||
try:
|
||||
reboot_result = json.loads(stdout)
|
||||
except getattr(json.decoder, "JSONDecodeError", ValueError):
|
||||
# While the reboot command should output json it may have failed for some other reason. We continue
|
||||
# reporting with that output instead
|
||||
pass
|
||||
else:
|
||||
stdout = reboot_result.get("stdout", stdout)
|
||||
stderr = reboot_result.get("stderr", stderr)
|
||||
rc = int(reboot_result.get("rc", rc))
|
||||
|
||||
# Test for "A system shutdown has already been scheduled. (1190)" and handle it gracefully
|
||||
if handle_abort and (rc == 1190 or (rc != 0 and stderr and "(1190)" in stderr)):
|
||||
display.warning("A scheduled reboot was pre-empted by Ansible.")
|
||||
|
||||
# Try to abort (this may fail if it was already aborted)
|
||||
rc, stdout, stderr = _execute_command(
|
||||
task_action, connection, "shutdown.exe /a"
|
||||
)
|
||||
display.vvvv(
|
||||
f"{task_action}: result from trying to abort existing shutdown - rc: {rc}, stdout: {stdout}, stderr: {stderr}"
|
||||
)
|
||||
|
||||
return _perform_reboot(
|
||||
task_action, connection, reboot_command, handle_abort=False
|
||||
)
|
||||
|
||||
if rc != 0:
|
||||
msg = f"{task_action}: Reboot command failed"
|
||||
raise _ReturnResultException(msg, rc=rc, stdout=stdout, stderr=stderr)
|
||||
|
||||
|
||||
def _reset_connection(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
ignore_errors: bool = False,
|
||||
) -> None:
|
||||
"""Resets the connection handling any errors"""
|
||||
|
||||
def _wrap_conn_err(func, *args, **kwargs):
|
||||
try:
|
||||
func(*args, **kwargs)
|
||||
|
||||
except (AnsibleError, RequestException) as e:
|
||||
if ignore_errors:
|
||||
return False
|
||||
|
||||
raise AnsibleError(e)
|
||||
|
||||
return True
|
||||
|
||||
# While reset() should probably better handle this some connection plugins don't clear the existing connection on
|
||||
# reset() leaving resources still in use on the target (WSMan shells). Instead we try to manually close the
|
||||
# connection then call reset. If it fails once we want to skip closing to avoid a perpetual loop and just hope
|
||||
# reset() brings us back into a good state. If it's successful we still want to try it again.
|
||||
if host_context["do_close_on_reset"]:
|
||||
display.vvvv(f"{task_action}: closing connection plugin")
|
||||
try:
|
||||
success = _wrap_conn_err(connection.close)
|
||||
|
||||
except Exception:
|
||||
host_context["do_close_on_reset"] = False
|
||||
raise
|
||||
|
||||
host_context["do_close_on_reset"] = success
|
||||
|
||||
# For some connection plugins (ssh) reset actually does something more than close so we also class that
|
||||
display.vvvv(f"{task_action}: resetting connection plugin")
|
||||
try:
|
||||
_wrap_conn_err(connection.reset)
|
||||
|
||||
except AttributeError:
|
||||
# Not all connection plugins have reset so we just ignore those, close should have done our job.
|
||||
pass
|
||||
|
||||
|
||||
def _run_test_command(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
command: str,
|
||||
expected: t.Optional[str] = None,
|
||||
) -> None:
|
||||
"""Runs the user specified test command until the host is able to run it properly"""
|
||||
display.vvvv(f"{task_action}: attempting post-reboot test command")
|
||||
|
||||
rc, stdout, stderr = _execute_command(task_action, connection, command)
|
||||
|
||||
if rc != 0:
|
||||
msg = f"{task_action}: Test command failed - rc: {rc}, stdout: {stdout}, stderr: {stderr}"
|
||||
raise _TestCommandFailure(msg)
|
||||
|
||||
if expected and expected not in stdout:
|
||||
msg = f"{task_action}: Test command failed - '{expected}' was not in stdout: {stdout}"
|
||||
raise _TestCommandFailure(msg)
|
||||
|
||||
|
||||
def _set_connection_timeout(
|
||||
task_action: str,
|
||||
connection: ConnectionBase,
|
||||
host_context: t.Dict[str, t.Any],
|
||||
timeout: float,
|
||||
) -> None:
|
||||
"""Sets the connection plugin connection_timeout option and resets the connection"""
|
||||
try:
|
||||
current_connection_timeout = connection.get_option("connection_timeout")
|
||||
except KeyError:
|
||||
# Not all connection plugins implement this, just ignore the setting if it doesn't work
|
||||
return
|
||||
|
||||
if timeout == current_connection_timeout:
|
||||
return
|
||||
|
||||
display.vvvv(f"{task_action}: setting connect_timeout {timeout}")
|
||||
connection.set_option("connection_timeout", timeout)
|
||||
|
||||
_reset_connection(task_action, connection, host_context, ignore_errors=True)
|
Loading…
Reference in New Issue