Tweak how strategies evaluate failed hosts via the iterator and bug fixes

* Added additional methods to the iterator code to assess host failures
  while also taking into account the block rescue/always states
* Fixed bugs in the free strategy, where results were not always being
  processed after being collected
* Added some prettier printing to the state output from iterator

Fixes #13699
pull/13709/head
James Cammarata 9 years ago
parent 6f2f7a79b3
commit 210cf06d9a

@ -57,14 +57,32 @@ class HostState:
self.always_child_state = None self.always_child_state = None
def __repr__(self): def __repr__(self):
return "HOST STATE: block=%d, task=%d, rescue=%d, always=%d, role=%s, run_state=%d, fail_state=%d, pending_setup=%s, tasks child state? %s, rescue child state? %s, always child state? %s" % ( def _run_state_to_string(n):
states = ["ITERATING_SETUP", "ITERATING_TASKS", "ITERATING_RESCUE", "ITERATING_ALWAYS", "ITERATING_COMPLETE"]
try:
return states[n]
except IndexError:
return "UNKNOWN STATE"
def _failed_state_to_string(n):
states = {1:"FAILED_SETUP", 2:"FAILED_TASKS", 4:"FAILED_RESCUE", 8:"FAILED_ALWAYS"}
if n == 0:
return "FAILED_NONE"
else:
ret = []
for i in (1, 2, 4, 8):
if n & i:
ret.append(states[i])
return "|".join(ret)
return "HOST STATE: block=%d, task=%d, rescue=%d, always=%d, role=%s, run_state=%s, fail_state=%s, pending_setup=%s, tasks child state? %s, rescue child state? %s, always child state? %s" % (
self.cur_block, self.cur_block,
self.cur_regular_task, self.cur_regular_task,
self.cur_rescue_task, self.cur_rescue_task,
self.cur_always_task, self.cur_always_task,
self.cur_role, self.cur_role,
self.run_state, _run_state_to_string(self.run_state),
self.fail_state, _failed_state_to_string(self.fail_state),
self.pending_setup, self.pending_setup,
self.tasks_child_state, self.tasks_child_state,
self.rescue_child_state, self.rescue_child_state,
@ -347,6 +365,28 @@ class PlayIterator:
def get_failed_hosts(self): def get_failed_hosts(self):
return dict((host, True) for (host, state) in iteritems(self._host_states) if state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE) return dict((host, True) for (host, state) in iteritems(self._host_states) if state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE)
def _check_failed_state(self, state):
if state is None:
return False
elif state.run_state == self.ITERATING_TASKS and self._check_failed_state(state.tasks_child_state):
return True
elif state.run_state == self.ITERATING_RESCUE and self._check_failed_state(state.rescue_child_state):
return True
elif state.run_state == self.ITERATING_ALWAYS and self._check_failed_state(state.always_child_state):
return True
elif state.run_state == self.ITERATING_COMPLETE and state.fail_state != self.FAILED_NONE:
if state.run_state == self.ITERATING_RESCUE and state.fail_state&self.FAILED_RESCUE == 0:
return False
elif state.run_state == self.ITERATING_ALWAYS and state.fail_state&self.FAILED_ALWAYS == 0:
return False
else:
return True
return False
def is_failed(self, host):
s = self.get_host_state(host)
return self._check_failed_state(s)
def get_original_task(self, host, task): def get_original_task(self, host, task):
''' '''
Finds the task in the task list which matches the UUID of the given task. Finds the task in the task list which matches the UUID of the given task.

@ -78,7 +78,7 @@ class StrategyModule(StrategyBase):
(state, task) = iterator.get_next_task_for_host(host, peek=True) (state, task) = iterator.get_next_task_for_host(host, peek=True)
display.debug("free host state: %s" % state) display.debug("free host state: %s" % state)
display.debug("free host task: %s" % task) display.debug("free host task: %s" % task)
if host_name not in self._tqm._failed_hosts and host_name not in self._tqm._unreachable_hosts and task: if not iterator.is_failed(host) and host_name not in self._tqm._unreachable_hosts and task:
# set the flag so the outer loop knows we've still found # set the flag so the outer loop knows we've still found
# some work which needs to be done # some work which needs to be done
@ -135,7 +135,7 @@ class StrategyModule(StrategyBase):
if last_host == starting_host: if last_host == starting_host:
break break
results = self._process_pending_results(iterator) results = self._wait_on_pending_results(iterator)
host_results.extend(results) host_results.extend(results)
try: try:
@ -176,13 +176,7 @@ class StrategyModule(StrategyBase):
display.debug("done adding collected blocks to iterator") display.debug("done adding collected blocks to iterator")
# pause briefly so we don't spin lock # pause briefly so we don't spin lock
time.sleep(0.05) time.sleep(0.001)
try:
results = self._wait_on_pending_results(iterator)
host_results.extend(results)
except Exception as e:
pass
# run the base class run() method, which executes the cleanup function # run the base class run() method, which executes the cleanup function
# and runs any outstanding handlers which have been triggered # and runs any outstanding handlers which have been triggered

@ -54,7 +54,8 @@ class StrategyModule(StrategyBase):
host_tasks = {} host_tasks = {}
display.debug("building list of next tasks for hosts") display.debug("building list of next tasks for hosts")
for host in hosts: for host in hosts:
host_tasks[host.name] = iterator.get_next_task_for_host(host, peek=True) if not iterator.is_failed(host):
host_tasks[host.name] = iterator.get_next_task_for_host(host, peek=True)
display.debug("done building task lists") display.debug("done building task lists")
num_setups = 0 num_setups = 0
@ -98,7 +99,7 @@ class StrategyModule(StrategyBase):
rvals = [] rvals = []
display.debug("starting to advance hosts") display.debug("starting to advance hosts")
for host in hosts: for host in hosts:
host_state_task = host_tasks[host.name] host_state_task = host_tasks.get(host.name)
if host_state_task is None: if host_state_task is None:
continue continue
(s, t) = host_state_task (s, t) = host_state_task

Loading…
Cancel
Save