From 90de03be50cabdc46ba9c97905e0ac13bd4053b9 Mon Sep 17 00:00:00 2001 From: Brian Coca Date: Mon, 19 Aug 2024 10:25:29 -0400 Subject: [PATCH] Gather mount facts, fallback for when multiproc is not feasable (#83750) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fallback to 'single threaded gathering' for when multiproc fails Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) --- changelogs/fragments/gather_facts_single.yml | 2 + .../module_utils/facts/hardware/linux.py | 108 +++++++++++------- .../lib/multriprocessing/__init__.py | 1 + .../lib/multriprocessing/pool/__init__.py | 7 ++ .../targets/gathering_facts/no_threads.yml | 21 ++++ .../targets/gathering_facts/runme.sh | 4 + 6 files changed, 104 insertions(+), 39 deletions(-) create mode 100644 changelogs/fragments/gather_facts_single.yml create mode 100644 test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py create mode 100644 test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py create mode 100644 test/integration/targets/gathering_facts/no_threads.yml diff --git a/changelogs/fragments/gather_facts_single.yml b/changelogs/fragments/gather_facts_single.yml new file mode 100644 index 00000000000..65e4f57193d --- /dev/null +++ b/changelogs/fragments/gather_facts_single.yml @@ -0,0 +1,2 @@ +bugfixes: + - setup module (fact gathering), added fallbcak code path to handle mount fact gathering in linux when threading is not available diff --git a/lib/ansible/module_utils/facts/hardware/linux.py b/lib/ansible/module_utils/facts/hardware/linux.py index abd8dd5c617..a0772eff2dc 100644 --- a/lib/ansible/module_utils/facts/hardware/linux.py +++ b/lib/ansible/module_utils/facts/hardware/linux.py @@ -21,6 +21,7 @@ import glob import json import os import re +import signal import sys import time @@ -38,6 +39,10 @@ from ansible.module_utils.six import iteritems from ansible.module_utils.facts import timeout +def _timeout_handler(signum, frame): + raise TimeoutError(f"Timeout reached in:{frame}") + + def get_partition_uuid(partname): try: uuids = os.listdir("/dev/disk/by-uuid") @@ -577,7 +582,12 @@ class LinuxHardware(Hardware): # start threads to query each mount results = {} - pool = ThreadPool(processes=min(len(mtab_entries), cpu_count())) + pool = None + try: + pool = ThreadPool(processes=min(len(mtab_entries), cpu_count())) + except (IOError, OSError) as e: + self.module.warn(f"Cannot use multiprocessing, falling back on serial execution: {e}") + maxtime = timeout.GATHER_TIMEOUT or timeout.DEFAULT_GATHER_TIMEOUT for fields in mtab_entries: # Transform octal escape sequences @@ -601,47 +611,67 @@ class LinuxHardware(Hardware): if not self.MTAB_BIND_MOUNT_RE.match(options): mount_info['options'] += ",bind" - results[mount] = {'info': mount_info, - 'extra': pool.apply_async(self.get_mount_info, (mount, device, uuids)), - 'timelimit': time.time() + maxtime} + results[mount] = {'info': mount_info, 'timelimit': time.time() + maxtime} + if pool is None: + old_handler = signal.signal(signal.SIGALRM, _timeout_handler) + signal.alarm(maxtime) + try: + size, uuid = self.get_mount_info(mount, device, uuids) + except TimeoutError as e: + results[mount]['info']['note'] = 'Could not get extra information due to timeout' + self.module.log(f"Timeout while gathering mount {mount} data: {e}") + self.module.warn(f"Timeout exceeded when getting mount info for {mount}") + finally: + signal.alarm(0) + signal.signal(signal.SIGALRM, old_handler) + + if size: + results[mount]['info'].update(size) + results[mount]['info']['uuid'] = uuid or 'N/A' + else: + # use multiproc pool, handle results below + results[mount]['extra'] = pool.apply_async(self.get_mount_info, (mount, device, uuids)) - pool.close() # done with new workers, start gc + if pool is None: + # serial processing, just assing results + mounts.append(results[mount]['info']) + else: + pool.close() # done with spawing new workers, start gc - # wait for workers and get results - while results: - for mount in list(results): - done = False - res = results[mount]['extra'] - try: - if res.ready(): - done = True - if res.successful(): - mount_size, uuid = res.get() - if mount_size: - results[mount]['info'].update(mount_size) - results[mount]['info']['uuid'] = uuid or 'N/A' - else: - # failed, try to find out why, if 'res.successful' we know there are no exceptions - results[mount]['info']['note'] = 'Could not get extra information: %s.' % (to_text(res.get())) - - elif time.time() > results[mount]['timelimit']: + while results: # wait for workers and get results + for mount in list(results): + done = False + res = results[mount]['extra'] + try: + if res.ready(): + done = True + if res.successful(): + mount_size, uuid = res.get() + if mount_size: + results[mount]['info'].update(mount_size) + results[mount]['info']['uuid'] = uuid or 'N/A' + else: + # failed, try to find out why, if 'res.successful' we know there are no exceptions + results[mount]['info']['note'] = 'Could not get extra information: %s.' % (to_text(res.get())) + + elif time.time() > results[mount]['timelimit']: + done = True + self.module.warn("Timeout exceeded when getting mount info for %s" % mount) + results[mount]['info']['note'] = 'Could not get extra information due to timeout' + except Exception as e: + import traceback done = True - self.module.warn("Timeout exceeded when getting mount info for %s" % mount) - results[mount]['info']['note'] = 'Could not get extra information due to timeout' - except Exception as e: - import traceback - done = True - results[mount]['info'] = 'N/A' - self.module.warn("Error prevented getting extra info for mount %s: [%s] %s." % (mount, type(e), to_text(e))) - self.module.debug(traceback.format_exc()) - - if done: - # move results outside and make loop only handle pending - mounts.append(results[mount]['info']) - del results[mount] - - # avoid cpu churn, sleep between retrying for loop with remaining mounts - time.sleep(0.1) + results[mount]['info'] = 'N/A' + self.module.warn("Error prevented getting extra info for mount %s: [%s] %s." % (mount, type(e), to_text(e))) + self.module.debug(traceback.format_exc()) + + if done: + # move results outside and make loop only handle pending + mounts.append(results[mount]['info']) + del results[mount] + + # avoid cpu churn, sleep between retrying for loop with remaining mounts + time.sleep(0.1) return {'mounts': mounts} diff --git a/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py b/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py new file mode 100644 index 00000000000..9d48db4f9f8 --- /dev/null +++ b/test/integration/targets/gathering_facts/lib/multriprocessing/__init__.py @@ -0,0 +1 @@ +from __future__ import annotations diff --git a/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py b/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py new file mode 100644 index 00000000000..9c5a5d26a66 --- /dev/null +++ b/test/integration/targets/gathering_facts/lib/multriprocessing/pool/__init__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + + +class ThreadPool: + + def __init__(self, *args, **kwargs): + raise PermissionError("To test single proc ansible") diff --git a/test/integration/targets/gathering_facts/no_threads.yml b/test/integration/targets/gathering_facts/no_threads.yml new file mode 100644 index 00000000000..f8e21cd8078 --- /dev/null +++ b/test/integration/targets/gathering_facts/no_threads.yml @@ -0,0 +1,21 @@ +- hosts: localhost + tasks: + - block: + - set_fact: + normal_devices: "{{ ansible_facts['devices'].keys() }}" + + - name: facts already gathered normally, but now we do mounts again w/o multithreading + gather_facts: + gather_subset: mounts + register: no_multi + environment: + PYTHONPATH: "${PWD}/lib" + + - set_fact: + single_devices: "{{no_multi['ansible_facts']['ansible_devices'].keys()}}" + + - assert: + that: + - normal_devices == single_devices + when: + - ansible_facts['os_family'] not in ['FreeBSD', 'Darwin'] diff --git a/test/integration/targets/gathering_facts/runme.sh b/test/integration/targets/gathering_facts/runme.sh index a90de0f06d5..39824a4a525 100755 --- a/test/integration/targets/gathering_facts/runme.sh +++ b/test/integration/targets/gathering_facts/runme.sh @@ -38,4 +38,8 @@ ANSIBLE_FACTS_MODULES='ansible.legacy.slow' ansible -m gather_facts localhost -- # test parallelism ANSIBLE_FACTS_MODULES='dummy1,dummy2,dummy3' ansible -m gather_facts localhost --playbook-dir ./ -a 'gather_timeout=30 parallel=true' "$@" 2>&1 + +# test lack of threads +ansible-playbook no_threads.yml "$@" 2>&1 + rm "${OUTPUT_DIR}/canary.txt"