diff --git a/changelogs/fragments/gather_facts_fix_parallel.yml b/changelogs/fragments/gather_facts_fix_parallel.yml new file mode 100644 index 00000000000..e33571c1a20 --- /dev/null +++ b/changelogs/fragments/gather_facts_fix_parallel.yml @@ -0,0 +1,4 @@ +bugfixes: + - gather_facts parallel option was doing the reverse of what was stated, now it does run modules in parallel when True and serially when False. +minor_changes: + - gather_facts now will use gather_timeout setting to limit parallel execution of modules that do not themselves use gather_timeout. diff --git a/lib/ansible/modules/gather_facts.py b/lib/ansible/modules/gather_facts.py index b099cd87d15..aa67a0e3fd9 100644 --- a/lib/ansible/modules/gather_facts.py +++ b/lib/ansible/modules/gather_facts.py @@ -26,13 +26,15 @@ options: - A toggle that controls if the fact modules are executed in parallel or serially and in order. This can guarantee the merge order of module facts at the expense of performance. - By default it will be true if more than one fact module is used. + - For low cost/delay fact modules parallelism overhead might end up meaning the whole process takes longer. + Test your specific case to see if it is a speed improvement or not. type: bool attributes: action: support: full async: - details: multiple modules can be executed in parallel or serially, but the action itself will not be async - support: partial + details: while this action does not support the task 'async' keywords it can do its own parallel processing using the C(parallel) option. + support: none bypass_host_loop: support: none check_mode: @@ -48,6 +50,8 @@ attributes: notes: - This is mostly a wrapper around other fact gathering modules. - Options passed into this action must be supported by all the underlying fact modules configured. + - If using C(gather_timeout) and parallel execution, it will limit the total execution time of + modules that do not accept C(gather_timeout) themselves. - Facts returned by each module will be merged, conflicts will favor 'last merged'. Order is not guaranteed, when doing parallel gathering on multiple modules. author: diff --git a/lib/ansible/plugins/action/gather_facts.py b/lib/ansible/plugins/action/gather_facts.py index 3ff7beb5a84..11cdcc39cd0 100644 --- a/lib/ansible/plugins/action/gather_facts.py +++ b/lib/ansible/plugins/action/gather_facts.py @@ -6,6 +6,7 @@ __metaclass__ = type import os import time +import typing as t from ansible import constants as C from ansible.executor.module_common import get_action_args_with_defaults @@ -16,12 +17,13 @@ from ansible.utils.vars import merge_hash class ActionModule(ActionBase): - def _get_module_args(self, fact_module, task_vars): + def _get_module_args(self, fact_module: str, task_vars: dict[str, t.Any]) -> dict[str, t.Any]: mod_args = self._task.args.copy() # deal with 'setup specific arguments' if fact_module not in C._ACTION_SETUP: + # TODO: remove in favor of controller side argspec detecing valid arguments # network facts modules must support gather_subset try: @@ -30,16 +32,16 @@ class ActionModule(ActionBase): name = self._connection._load_name.split('.')[-1] if name not in ('network_cli', 'httpapi', 'netconf'): subset = mod_args.pop('gather_subset', None) - if subset not in ('all', ['all']): - self._display.warning('Ignoring subset(%s) for %s' % (subset, fact_module)) + if subset not in ('all', ['all'], None): + self._display.warning('Not passing subset(%s) to %s' % (subset, fact_module)) timeout = mod_args.pop('gather_timeout', None) if timeout is not None: - self._display.warning('Ignoring timeout(%s) for %s' % (timeout, fact_module)) + self._display.warning('Not passing timeout(%s) to %s' % (timeout, fact_module)) fact_filter = mod_args.pop('filter', None) if fact_filter is not None: - self._display.warning('Ignoring filter(%s) for %s' % (fact_filter, fact_module)) + self._display.warning('Not passing filter(%s) to %s' % (fact_filter, fact_module)) # Strip out keys with ``None`` values, effectively mimicking ``omit`` behavior # This ensures we don't pass a ``None`` value as an argument expecting a specific type @@ -57,7 +59,7 @@ class ActionModule(ActionBase): return mod_args - def _combine_task_result(self, result, task_result): + def _combine_task_result(self, result: dict[str, t.Any], task_result: dict[str, t.Any]) -> dict[str, t.Any]: filtered_res = { 'ansible_facts': task_result.get('ansible_facts', {}), 'warnings': task_result.get('warnings', []), @@ -67,7 +69,7 @@ class ActionModule(ActionBase): # on conflict the last plugin processed wins, but try to do deep merge and append to lists. return merge_hash(result, filtered_res, list_merge='append_rp') - def run(self, tmp=None, task_vars=None): + def run(self, tmp: t.Optional[str] = None, task_vars: t.Optional[dict[str, t.Any]] = None) -> dict[str, t.Any]: self._supports_check_mode = True @@ -87,16 +89,23 @@ class ActionModule(ActionBase): failed = {} skipped = {} - if parallel is None and len(modules) >= 1: - parallel = True + if parallel is None: + if len(modules) > 1: + parallel = True + else: + parallel = False else: parallel = boolean(parallel) - if parallel: + timeout = self._task.args.get('gather_timeout', None) + async_val = self._task.async_val + + if not parallel: # serially execute each module for fact_module in modules: # just one module, no need for fancy async mod_args = self._get_module_args(fact_module, task_vars) + # TODO: use gather_timeout to cut module execution if module itself does not support gather_timeout res = self._execute_module(module_name=fact_module, module_args=mod_args, task_vars=task_vars, wrap_async=False) if res.get('failed', False): failed[fact_module] = res @@ -107,10 +116,21 @@ class ActionModule(ActionBase): self._remove_tmp_path(self._connection._shell.tmpdir) else: - # do it async + # do it async, aka parallel jobs = {} + for fact_module in modules: mod_args = self._get_module_args(fact_module, task_vars) + + # if module does not handle timeout, use timeout to handle module, hijack async_val as this is what async_wrapper uses + # TODO: make this action compain about async/async settings, use parallel option instead .. or remove parallel in favor of async settings? + if timeout and 'gather_timeout' not in mod_args: + self._task.async_val = int(timeout) + elif async_val != 0: + self._task.async_val = async_val + else: + self._task.async_val = 0 + self._display.vvvv("Running %s" % fact_module) jobs[fact_module] = (self._execute_module(module_name=fact_module, module_args=mod_args, task_vars=task_vars, wrap_async=True)) @@ -132,6 +152,10 @@ class ActionModule(ActionBase): else: time.sleep(0.5) + # restore value for post processing + if self._task.async_val != async_val: + self._task.async_val = async_val + if skipped: result['msg'] = "The following modules were skipped: %s\n" % (', '.join(skipped.keys())) result['skipped_modules'] = skipped diff --git a/test/integration/targets/gathering_facts/library/dummy1 b/test/integration/targets/gathering_facts/library/dummy1 new file mode 100755 index 00000000000..5a10e2dd582 --- /dev/null +++ b/test/integration/targets/gathering_facts/library/dummy1 @@ -0,0 +1,19 @@ +#!/bin/sh + +CANARY="${OUTPUT_DIR}/canary.txt" + +echo "$0" >> "${CANARY}" +LINES=0 + +until test "${LINES}" -gt 2 +do + LINES=`wc -l "${CANARY}" |awk '{print $1}'` + sleep 1 +done + +echo '{ + "changed": false, + "ansible_facts": { + "dummy": "$0" + } +}' diff --git a/test/integration/targets/gathering_facts/library/dummy2 b/test/integration/targets/gathering_facts/library/dummy2 new file mode 120000 index 00000000000..71676cd9c0f --- /dev/null +++ b/test/integration/targets/gathering_facts/library/dummy2 @@ -0,0 +1 @@ +dummy1 \ No newline at end of file diff --git a/test/integration/targets/gathering_facts/library/dummy3 b/test/integration/targets/gathering_facts/library/dummy3 new file mode 120000 index 00000000000..71676cd9c0f --- /dev/null +++ b/test/integration/targets/gathering_facts/library/dummy3 @@ -0,0 +1 @@ +dummy1 \ No newline at end of file diff --git a/test/integration/targets/gathering_facts/library/slow b/test/integration/targets/gathering_facts/library/slow new file mode 100644 index 00000000000..3984662e77d --- /dev/null +++ b/test/integration/targets/gathering_facts/library/slow @@ -0,0 +1,26 @@ +#!/bin/sh + +sleep 10 + +echo '{ + "changed": false, + "ansible_facts": { + "factsone": "from slow module", + "common_fact": "also from slow module", + "common_dict_fact": { + "key_one": "from slow ", + "key_two": "from slow " + }, + "common_list_fact": [ + "never", + "does", + "see" + ], + "common_list_fact2": [ + "see", + "does", + "never", + "theee" + ] + } +}' diff --git a/test/integration/targets/gathering_facts/runme.sh b/test/integration/targets/gathering_facts/runme.sh index c1df560c3be..a90de0f06d5 100755 --- a/test/integration/targets/gathering_facts/runme.sh +++ b/test/integration/targets/gathering_facts/runme.sh @@ -25,3 +25,17 @@ ansible-playbook test_module_defaults.yml "$@" --tags default_fact_module ANSIBLE_FACTS_MODULES='ansible.legacy.setup' ansible-playbook test_module_defaults.yml "$@" --tags custom_fact_module ansible-playbook test_module_defaults.yml "$@" --tags networking + +# test it works by default +ANSIBLE_FACTS_MODULES='ansible.legacy.slow' ansible -m gather_facts localhost --playbook-dir ./ "$@" + +# test that gather_facts will timeout parallel modules that dont support gather_timeout when using gather_Timeout +ANSIBLE_FACTS_MODULES='ansible.legacy.slow' ansible -m gather_facts localhost --playbook-dir ./ -a 'gather_timeout=1 parallel=true' "$@" 2>&1 |grep 'Timeout exceeded' + +# test that gather_facts parallel w/o timing out +ANSIBLE_FACTS_MODULES='ansible.legacy.slow' ansible -m gather_facts localhost --playbook-dir ./ -a 'gather_timeout=30 parallel=true' "$@" 2>&1 |grep -v 'Timeout exceeded' + + +# test parallelism +ANSIBLE_FACTS_MODULES='dummy1,dummy2,dummy3' ansible -m gather_facts localhost --playbook-dir ./ -a 'gather_timeout=30 parallel=true' "$@" 2>&1 +rm "${OUTPUT_DIR}/canary.txt" diff --git a/test/sanity/ignore.txt b/test/sanity/ignore.txt index 411d370588d..d7c4f7530fa 100644 --- a/test/sanity/ignore.txt +++ b/test/sanity/ignore.txt @@ -148,8 +148,10 @@ test/integration/targets/collections_relative_imports/collection_root/ansible_co test/integration/targets/collections_relative_imports/collection_root/ansible_collections/my_ns/my_col/plugins/module_utils/my_util2.py pylint:relative-beyond-top-level test/integration/targets/fork_safe_stdio/vendored_pty.py pep8!skip # vendored code test/integration/targets/gathering_facts/library/bogus_facts shebang +test/integration/targets/gathering_facts/library/dummy1 shebang test/integration/targets/gathering_facts/library/facts_one shebang test/integration/targets/gathering_facts/library/facts_two shebang +test/integration/targets/gathering_facts/library/slow shebang test/integration/targets/incidental_win_reboot/templates/post_reboot.ps1 pslint!skip test/integration/targets/json_cleanup/library/bad_json shebang test/integration/targets/lookup_csvfile/files/crlf.csv line-endings