calculate max fail against all hosts in batch

currently it is doing only from the 'active' hosts in the batch which means
the percentage goes up as hosts fail instead of staying the same.
added debug info for max fail

fixes #32255
pull/32357/head
Brian Coca 7 years ago committed by Toshio Kuratomi
parent 41685fb516
commit 4fb9e54c50

@ -203,7 +203,9 @@ class PlayIterator:
self._host_states = {} self._host_states = {}
start_at_matched = False start_at_matched = False
for host in inventory.get_hosts(self._play.hosts): batch = inventory.get_hosts(self._play.hosts)
self.batch_size = len(batch)
for host in batch:
self._host_states[host.name] = HostState(blocks=self._blocks) self._host_states[host.name] = HostState(blocks=self._blocks)
# if we're looking to start at a specific task, iterate through # if we're looking to start at a specific task, iterate through
# the tasks for this host until we find the specified task # the tasks for this host until we find the specified task

@ -242,22 +242,6 @@ class TaskQueueManager:
loader=self._loader, loader=self._loader,
) )
# Fork # of forks, # of hosts or serial, whichever is lowest
num_hosts = len(self._inventory.get_hosts(new_play.hosts, ignore_restrictions=True))
max_serial = 0
if new_play.serial:
# the play has not been post_validated here, so we may need
# to convert the scalar value to a list at this point
serial_items = new_play.serial
if not isinstance(serial_items, list):
serial_items = [serial_items]
max_serial = max([pct_to_int(x, num_hosts) for x in serial_items])
contenders = [self._options.forks, max_serial, num_hosts]
contenders = [v for v in contenders if v is not None and v > 0]
self._initialize_processes(min(contenders))
play_context = PlayContext(new_play, self._options, self.passwords, self._connection_lockfile.fileno()) play_context = PlayContext(new_play, self._options, self.passwords, self._connection_lockfile.fileno())
for callback_plugin in self._callback_plugins: for callback_plugin in self._callback_plugins:
if hasattr(callback_plugin, 'set_play_context'): if hasattr(callback_plugin, 'set_play_context'):
@ -268,11 +252,6 @@ class TaskQueueManager:
# initialize the shared dictionary containing the notified handlers # initialize the shared dictionary containing the notified handlers
self._initialize_notified_handlers(new_play) self._initialize_notified_handlers(new_play)
# load the specified strategy (or the default linear one)
strategy = strategy_loader.get(new_play.strategy, self)
if strategy is None:
raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)
# build the iterator # build the iterator
iterator = PlayIterator( iterator = PlayIterator(
inventory=self._inventory, inventory=self._inventory,
@ -283,6 +262,14 @@ class TaskQueueManager:
start_at_done=self._start_at_done, start_at_done=self._start_at_done,
) )
# adjust to # of workers to configured forks or size of batch, whatever is lower
self._initialize_processes(min(self._options.forks, iterator.batch_size))
# load the specified strategy (or the default linear one)
strategy = strategy_loader.get(new_play.strategy, self)
if strategy is None:
raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)
# Because the TQM may survive multiple play runs, we start by marking # Because the TQM may survive multiple play runs, we start by marking
# any hosts as failed in the iterator here which may have been marked # any hosts as failed in the iterator here which may have been marked
# as failed in previous runs. Then we clear the internal list of failed # as failed in previous runs. Then we clear the internal list of failed

@ -401,7 +401,7 @@ class StrategyModule(StrategyBase):
if iterator._play.max_fail_percentage is not None and len(results) > 0: if iterator._play.max_fail_percentage is not None and len(results) > 0:
percentage = iterator._play.max_fail_percentage / 100.0 percentage = iterator._play.max_fail_percentage / 100.0
if (len(self._tqm._failed_hosts) / len(results)) > percentage: if (len(self._tqm._failed_hosts) / iterator.batch_size) > percentage:
for host in hosts_left: for host in hosts_left:
# don't double-mark hosts, or the iterator will potentially # don't double-mark hosts, or the iterator will potentially
# fail them out of the rescue/always states # fail them out of the rescue/always states
@ -410,6 +410,7 @@ class StrategyModule(StrategyBase):
iterator.mark_host_failed(host) iterator.mark_host_failed(host)
self._tqm.send_callback('v2_playbook_on_no_hosts_remaining') self._tqm.send_callback('v2_playbook_on_no_hosts_remaining')
result |= self._tqm.RUN_FAILED_BREAK_PLAY result |= self._tqm.RUN_FAILED_BREAK_PLAY
display.debug('(%s failed / %s total )> %s max fail' % (len(self._tqm._failed_hosts), iterator.batch_size, percentage))
display.debug("done checking for max_fail_percentage") display.debug("done checking for max_fail_percentage")
display.debug("checking to see if all hosts have failed and the running result is not ok") display.debug("checking to see if all hosts have failed and the running result is not ok")

Loading…
Cancel
Save