From 4fb9e54c5045098e9908a1487dd18cb1eb0d15e0 Mon Sep 17 00:00:00 2001
From: Brian Coca <brian.coca+git@gmail.com>
Date: Mon, 30 Oct 2017 16:39:15 -0400
Subject: [PATCH] calculate max fail against all hosts in batch

currently it is doing only from the 'active' hosts in the batch which means
the percentage goes up as hosts fail instead of staying the same.
added debug info for max fail

fixes #32255
---
 lib/ansible/executor/play_iterator.py      |  4 ++-
 lib/ansible/executor/task_queue_manager.py | 29 ++++++----------------
 lib/ansible/plugins/strategy/linear.py     |  3 ++-
 3 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/lib/ansible/executor/play_iterator.py b/lib/ansible/executor/play_iterator.py
index 329a9f018ca..158ed0d9b98 100644
--- a/lib/ansible/executor/play_iterator.py
+++ b/lib/ansible/executor/play_iterator.py
@@ -203,7 +203,9 @@ class PlayIterator:
 
         self._host_states = {}
         start_at_matched = False
-        for host in inventory.get_hosts(self._play.hosts):
+        batch = inventory.get_hosts(self._play.hosts)
+        self.batch_size = len(batch)
+        for host in batch:
             self._host_states[host.name] = HostState(blocks=self._blocks)
             # if we're looking to start at a specific task, iterate through
             # the tasks for this host until we find the specified task
diff --git a/lib/ansible/executor/task_queue_manager.py b/lib/ansible/executor/task_queue_manager.py
index d8f41ccf658..9270d16aab2 100644
--- a/lib/ansible/executor/task_queue_manager.py
+++ b/lib/ansible/executor/task_queue_manager.py
@@ -242,22 +242,6 @@ class TaskQueueManager:
             loader=self._loader,
         )
 
-        # Fork # of forks, # of hosts or serial, whichever is lowest
-        num_hosts = len(self._inventory.get_hosts(new_play.hosts, ignore_restrictions=True))
-
-        max_serial = 0
-        if new_play.serial:
-            # the play has not been post_validated here, so we may need
-            # to convert the scalar value to a list at this point
-            serial_items = new_play.serial
-            if not isinstance(serial_items, list):
-                serial_items = [serial_items]
-            max_serial = max([pct_to_int(x, num_hosts) for x in serial_items])
-
-        contenders = [self._options.forks, max_serial, num_hosts]
-        contenders = [v for v in contenders if v is not None and v > 0]
-        self._initialize_processes(min(contenders))
-
         play_context = PlayContext(new_play, self._options, self.passwords, self._connection_lockfile.fileno())
         for callback_plugin in self._callback_plugins:
             if hasattr(callback_plugin, 'set_play_context'):
@@ -268,11 +252,6 @@ class TaskQueueManager:
         # initialize the shared dictionary containing the notified handlers
         self._initialize_notified_handlers(new_play)
 
-        # load the specified strategy (or the default linear one)
-        strategy = strategy_loader.get(new_play.strategy, self)
-        if strategy is None:
-            raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)
-
         # build the iterator
         iterator = PlayIterator(
             inventory=self._inventory,
@@ -283,6 +262,14 @@ class TaskQueueManager:
             start_at_done=self._start_at_done,
         )
 
+        # adjust to # of workers to configured forks or size of batch, whatever is lower
+        self._initialize_processes(min(self._options.forks, iterator.batch_size))
+
+        # load the specified strategy (or the default linear one)
+        strategy = strategy_loader.get(new_play.strategy, self)
+        if strategy is None:
+            raise AnsibleError("Invalid play strategy specified: %s" % new_play.strategy, obj=play._ds)
+
         # Because the TQM may survive multiple play runs, we start by marking
         # any hosts as failed in the iterator here which may have been marked
         # as failed in previous runs. Then we clear the internal list of failed
diff --git a/lib/ansible/plugins/strategy/linear.py b/lib/ansible/plugins/strategy/linear.py
index 3db70d2c1c4..9beba1f283e 100644
--- a/lib/ansible/plugins/strategy/linear.py
+++ b/lib/ansible/plugins/strategy/linear.py
@@ -401,7 +401,7 @@ class StrategyModule(StrategyBase):
                 if iterator._play.max_fail_percentage is not None and len(results) > 0:
                     percentage = iterator._play.max_fail_percentage / 100.0
 
-                    if (len(self._tqm._failed_hosts) / len(results)) > percentage:
+                    if (len(self._tqm._failed_hosts) / iterator.batch_size) > percentage:
                         for host in hosts_left:
                             # don't double-mark hosts, or the iterator will potentially
                             # fail them out of the rescue/always states
@@ -410,6 +410,7 @@ class StrategyModule(StrategyBase):
                                 iterator.mark_host_failed(host)
                         self._tqm.send_callback('v2_playbook_on_no_hosts_remaining')
                         result |= self._tqm.RUN_FAILED_BREAK_PLAY
+                    display.debug('(%s failed / %s total )> %s max fail' % (len(self._tqm._failed_hosts), iterator.batch_size, percentage))
                 display.debug("done checking for max_fail_percentage")
 
                 display.debug("checking to see if all hosts have failed and the running result is not ok")