# (c) 2019 Ansible Project # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) # CAUTION: This implementation of the collection loader is used by ansible-test. # Because of this, it must be compatible with all Python versions supported on the controller or remote. from __future__ import (absolute_import, division, print_function) __metaclass__ = type import itertools import os import os.path import pkgutil import re import sys from keyword import iskeyword from tokenize import Name as _VALID_IDENTIFIER_REGEX # DO NOT add new non-stdlib import deps here, this loader is used by external tools (eg ansible-test import sanity) # that only allow stdlib and module_utils from ansible.module_utils.common.text.converters import to_native, to_text, to_bytes from ansible.module_utils.six import string_types, PY3 from ._collection_config import AnsibleCollectionConfig from contextlib import contextmanager from types import ModuleType try: from importlib import import_module except ImportError: def import_module(name): # type: ignore[misc] __import__(name) return sys.modules[name] try: from importlib import reload as reload_module except ImportError: # 2.7 has a global reload function instead... reload_module = reload # type: ignore[name-defined] # pylint:disable=undefined-variable try: from importlib.abc import TraversableResources except ImportError: TraversableResources = object # type: ignore[assignment,misc] try: from importlib.util import find_spec, spec_from_loader except ImportError: pass try: from importlib.machinery import FileFinder except ImportError: HAS_FILE_FINDER = False else: HAS_FILE_FINDER = True try: import pathlib except ImportError: pass # NB: this supports import sanity test providing a different impl try: from ._collection_meta import _meta_yml_to_dict except ImportError: _meta_yml_to_dict = None if not hasattr(__builtins__, 'ModuleNotFoundError'): # this was introduced in Python 3.6 ModuleNotFoundError = ImportError _VALID_IDENTIFIER_STRING_REGEX = re.compile( ''.join((_VALID_IDENTIFIER_REGEX, r'\Z')), ) try: # NOTE: py3/py2 compat # py2 mypy can't deal with try/excepts is_python_identifier = str.isidentifier # type: ignore[attr-defined] except AttributeError: # Python 2 def is_python_identifier(self): # type: (str) -> bool """Determine whether the given string is a Python identifier.""" # Ref: https://stackoverflow.com/a/55802320/595220 return bool(re.match(_VALID_IDENTIFIER_STRING_REGEX, self)) PB_EXTENSIONS = ('.yml', '.yaml') SYNTHETIC_PACKAGE_NAME = '' class _AnsibleNSTraversable: """Class that implements the ``importlib.resources.abc.Traversable`` interface for the following ``ansible_collections`` namespace packages:: * ``ansible_collections`` * ``ansible_collections.`` These namespace packages operate differently from a normal Python namespace package, in that the same namespace can be distributed across multiple directories on the filesystem and still function as a single namespace, such as:: * ``/usr/share/ansible/collections/ansible_collections/ansible/posix/`` * ``/home/user/.ansible/collections/ansible_collections/ansible/windows/`` This class will mimic the behavior of various ``pathlib.Path`` methods, by combining the results of multiple root paths into the output. This class does not do anything to remove duplicate collections from the list, so when traversing either namespace patterns supported by this class, it is possible to have the same collection located in multiple root paths, but precedence rules only use one. When iterating or traversing these package roots, there is the potential to see the same collection in multiple places without indication of which would be used. In such a circumstance, it is best to then call ``importlib.resources.files`` for an individual collection package rather than continuing to traverse from the namespace package. Several methods will raise ``NotImplementedError`` as they do not make sense for these namespace packages. """ def __init__(self, *paths): self._paths = [pathlib.Path(p) for p in paths] def __repr__(self): return "_AnsibleNSTraversable('%s')" % "', '".join(map(to_text, self._paths)) def iterdir(self): return itertools.chain.from_iterable(p.iterdir() for p in self._paths if p.is_dir()) def is_dir(self): return any(p.is_dir() for p in self._paths) def is_file(self): return False def glob(self, pattern): return itertools.chain.from_iterable(p.glob(pattern) for p in self._paths if p.is_dir()) def _not_implemented(self, *args, **kwargs): raise NotImplementedError('not usable on namespaces') joinpath = __truediv__ = read_bytes = read_text = _not_implemented class _AnsibleTraversableResources(TraversableResources): """Implements ``importlib.resources.abc.TraversableResources`` for the collection Python loaders. The result of ``files`` will depend on whether a particular collection, or a sub package of a collection was referenced, as opposed to ``ansible_collections`` or a particular namespace. For a collection and its subpackages, a ``pathlib.Path`` instance will be returned, whereas for the higher level namespace packages, ``_AnsibleNSTraversable`` will be returned. """ def __init__(self, package, loader): self._package = package self._loader = loader def _get_name(self, package): try: # spec return package.name except AttributeError: # module return package.__name__ def _get_package(self, package): try: # spec return package.__parent__ except AttributeError: # module return package.__package__ def _get_path(self, package): try: # spec return package.origin except AttributeError: # module return package.__file__ def _is_ansible_ns_package(self, package): origin = getattr(package, 'origin', None) if not origin: return False if origin == SYNTHETIC_PACKAGE_NAME: return True module_filename = os.path.basename(origin) return module_filename in {'__synthetic__', '__init__.py'} def _ensure_package(self, package): if self._is_ansible_ns_package(package): # Short circuit our loaders return if self._get_package(package) != package.__name__: raise TypeError('%r is not a package' % package.__name__) def files(self): package = self._package parts = package.split('.') is_ns = parts[0] == 'ansible_collections' and len(parts) < 3 if isinstance(package, string_types): if is_ns: # Don't use ``spec_from_loader`` here, because that will point # to exactly 1 location for a namespace. Use ``find_spec`` # to get a list of all locations for the namespace package = find_spec(package) else: package = spec_from_loader(package, self._loader) elif not isinstance(package, ModuleType): raise TypeError('Expected string or module, got %r' % package.__class__.__name__) self._ensure_package(package) if is_ns: return _AnsibleNSTraversable(*package.submodule_search_locations) return pathlib.Path(self._get_path(package)).parent class _AnsibleCollectionFinder: def __init__(self, paths=None, scan_sys_paths=True): # TODO: accept metadata loader override self._ansible_pkg_path = to_native(os.path.dirname(to_bytes(sys.modules['ansible'].__file__))) if isinstance(paths, string_types): paths = [paths] elif paths is None: paths = [] # expand any placeholders in configured paths paths = [os.path.expanduser(to_native(p, errors='surrogate_or_strict')) for p in paths] # add syspaths if needed if scan_sys_paths: paths.extend(sys.path) good_paths = [] # expand any placeholders in configured paths for p in paths: # ensure we always have ansible_collections if os.path.basename(p) == 'ansible_collections': p = os.path.dirname(p) if p not in good_paths and os.path.isdir(to_bytes(os.path.join(p, 'ansible_collections'), errors='surrogate_or_strict')): good_paths.append(p) self._n_configured_paths = good_paths self._n_cached_collection_paths = None self._n_cached_collection_qualified_paths = None self._n_playbook_paths = [] @classmethod def _remove(cls): for mps in sys.meta_path: if isinstance(mps, _AnsibleCollectionFinder): sys.meta_path.remove(mps) # remove any path hooks that look like ours for ph in sys.path_hooks: if hasattr(ph, '__self__') and isinstance(ph.__self__, _AnsibleCollectionFinder): sys.path_hooks.remove(ph) # zap any cached path importer cache entries that might refer to us sys.path_importer_cache.clear() AnsibleCollectionConfig._collection_finder = None # validate via the public property that we really killed it if AnsibleCollectionConfig.collection_finder is not None: raise AssertionError('_AnsibleCollectionFinder remove did not reset AnsibleCollectionConfig.collection_finder') def _install(self): self._remove() sys.meta_path.insert(0, self) sys.path_hooks.insert(0, self._ansible_collection_path_hook) AnsibleCollectionConfig.collection_finder = self def _ansible_collection_path_hook(self, path): path = to_native(path) interesting_paths = self._n_cached_collection_qualified_paths if not interesting_paths: interesting_paths = [] for p in self._n_collection_paths: if os.path.basename(p) != 'ansible_collections': p = os.path.join(p, 'ansible_collections') if p not in interesting_paths: interesting_paths.append(p) interesting_paths.insert(0, self._ansible_pkg_path) self._n_cached_collection_qualified_paths = interesting_paths if any(path.startswith(p) for p in interesting_paths): return _AnsiblePathHookFinder(self, path) raise ImportError('not interested') @property def _n_collection_paths(self): paths = self._n_cached_collection_paths if not paths: self._n_cached_collection_paths = paths = self._n_playbook_paths + self._n_configured_paths return paths def set_playbook_paths(self, playbook_paths): if isinstance(playbook_paths, string_types): playbook_paths = [playbook_paths] # track visited paths; we have to preserve the dir order as-passed in case there are duplicate collections (first one wins) added_paths = set() # de-dupe self._n_playbook_paths = [os.path.join(to_native(p), 'collections') for p in playbook_paths if not (p in added_paths or added_paths.add(p))] self._n_cached_collection_paths = None # HACK: playbook CLI sets this relatively late, so we've already loaded some packages whose paths might depend on this. Fix those up. # NB: this should NOT be used for late additions; ideally we'd fix the playbook dir setup earlier in Ansible init # to prevent this from occurring for pkg in ['ansible_collections', 'ansible_collections.ansible']: self._reload_hack(pkg) def _reload_hack(self, fullname): m = sys.modules.get(fullname) if not m: return reload_module(m) def _get_loader(self, fullname, path=None): split_name = fullname.split('.') toplevel_pkg = split_name[0] module_to_find = split_name[-1] part_count = len(split_name) if toplevel_pkg not in ['ansible', 'ansible_collections']: # not interested in anything other than ansible_collections (and limited cases under ansible) return None # sanity check what we're getting from import, canonicalize path values if part_count == 1: if path: raise ValueError('path should not be specified for top-level packages (trying to find {0})'.format(fullname)) else: # seed the path to the configured collection roots path = self._n_collection_paths if part_count > 1 and path is None: raise ValueError('path must be specified for subpackages (trying to find {0})'.format(fullname)) if toplevel_pkg == 'ansible': # something under the ansible package, delegate to our internal loader in case of redirections initialize_loader = _AnsibleInternalRedirectLoader elif part_count == 1: initialize_loader = _AnsibleCollectionRootPkgLoader elif part_count == 2: # ns pkg eg, ansible_collections, ansible_collections.somens initialize_loader = _AnsibleCollectionNSPkgLoader elif part_count == 3: # collection pkg eg, ansible_collections.somens.somecoll initialize_loader = _AnsibleCollectionPkgLoader else: # anything below the collection initialize_loader = _AnsibleCollectionLoader # NB: actual "find"ing is delegated to the constructors on the various loaders; they'll ImportError if not found try: return initialize_loader(fullname=fullname, path_list=path) except ImportError: # TODO: log attempt to load context return None def find_module(self, fullname, path=None): # Figure out what's being asked for, and delegate to a special-purpose loader return self._get_loader(fullname, path) def find_spec(self, fullname, path, target=None): loader = self._get_loader(fullname, path) if loader is None: return None spec = spec_from_loader(fullname, loader) if spec is not None and hasattr(loader, '_subpackage_search_paths'): spec.submodule_search_locations = loader._subpackage_search_paths return spec # Implements a path_hook finder for iter_modules (since it's only path based). This finder does not need to actually # function as a finder in most cases, since our meta_path finder is consulted first for *almost* everything, except # pkgutil.iter_modules, and under py2, pkgutil.get_data if the parent package passed has not been loaded yet. class _AnsiblePathHookFinder: def __init__(self, collection_finder, pathctx): # when called from a path_hook, find_module doesn't usually get the path arg, so this provides our context self._pathctx = to_native(pathctx) self._collection_finder = collection_finder if PY3: # cache the native FileFinder (take advantage of its filesystem cache for future find/load requests) self._file_finder = None # class init is fun- this method has a self arg that won't get used def _get_filefinder_path_hook(self=None): _file_finder_hook = None if PY3: # try to find the FileFinder hook to call for fallback path-based imports in Py3 _file_finder_hook = [ph for ph in sys.path_hooks if 'FileFinder' in repr(ph)] if len(_file_finder_hook) != 1: raise Exception('need exactly one FileFinder import hook (found {0})'.format(len(_file_finder_hook))) _file_finder_hook = _file_finder_hook[0] return _file_finder_hook _filefinder_path_hook = _get_filefinder_path_hook() def _get_finder(self, fullname): split_name = fullname.split('.') toplevel_pkg = split_name[0] if toplevel_pkg == 'ansible_collections': # collections content? delegate to the collection finder return self._collection_finder else: # Something else; we'd normally restrict this to `ansible` descendent modules so that any weird loader # behavior that arbitrary Python modules have can be serviced by those loaders. In some dev/test # scenarios (eg a venv under a collection) our path_hook signs us up to load non-Ansible things, and # it's too late by the time we've reached this point, but also too expensive for the path_hook to figure # out what we *shouldn't* be loading with the limited info it has. So we'll just delegate to the # normal path-based loader as best we can to service it. This also allows us to take advantage of Python's # built-in FS caching and byte-compilation for most things. if PY3: # create or consult our cached file finder for this path if not self._file_finder: try: self._file_finder = _AnsiblePathHookFinder._filefinder_path_hook(self._pathctx) except ImportError: # FUTURE: log at a high logging level? This is normal for things like python36.zip on the path, but # might not be in some other situation... return None return self._file_finder # call py2's internal loader return pkgutil.ImpImporter(self._pathctx) def find_module(self, fullname, path=None): # we ignore the passed in path here- use what we got from the path hook init finder = self._get_finder(fullname) if finder is None: return None elif HAS_FILE_FINDER and isinstance(finder, FileFinder): # this codepath is erroneously used under some cases in py3, # and the find_module method on FileFinder does not accept the path arg # see https://github.com/pypa/setuptools/pull/2918 return finder.find_module(fullname) else: return finder.find_module(fullname, path=[self._pathctx]) def find_spec(self, fullname, target=None): split_name = fullname.split('.') toplevel_pkg = split_name[0] finder = self._get_finder(fullname) if finder is None: return None elif toplevel_pkg == 'ansible_collections': return finder.find_spec(fullname, path=[self._pathctx]) else: return finder.find_spec(fullname) def iter_modules(self, prefix): # NB: this currently represents only what's on disk, and does not handle package redirection return _iter_modules_impl([self._pathctx], prefix) def __repr__(self): return "{0}(path='{1}')".format(self.__class__.__name__, self._pathctx) class _AnsibleCollectionPkgLoaderBase: _allows_package_code = False def __init__(self, fullname, path_list=None): self._fullname = fullname self._redirect_module = None self._split_name = fullname.split('.') self._rpart_name = fullname.rpartition('.') self._parent_package_name = self._rpart_name[0] # eg ansible_collections for ansible_collections.somens, '' for toplevel self._package_to_load = self._rpart_name[2] # eg somens for ansible_collections.somens self._source_code_path = None self._decoded_source = None self._compiled_code = None self._validate_args() self._candidate_paths = self._get_candidate_paths([to_native(p) for p in path_list]) self._subpackage_search_paths = self._get_subpackage_search_paths(self._candidate_paths) self._validate_final() # allow subclasses to validate args and sniff split values before we start digging around def _validate_args(self): if self._split_name[0] != 'ansible_collections': raise ImportError('this loader can only load packages from the ansible_collections package, not {0}'.format(self._fullname)) # allow subclasses to customize candidate path filtering def _get_candidate_paths(self, path_list): return [os.path.join(p, self._package_to_load) for p in path_list] # allow subclasses to customize finding paths def _get_subpackage_search_paths(self, candidate_paths): # filter candidate paths for existence (NB: silently ignoring package init code and same-named modules) return [p for p in candidate_paths if os.path.isdir(to_bytes(p))] # allow subclasses to customize state validation/manipulation before we return the loader instance def _validate_final(self): return @staticmethod @contextmanager def _new_or_existing_module(name, **kwargs): # handle all-or-nothing sys.modules creation/use-existing/delete-on-exception-if-created behavior created_module = False module = sys.modules.get(name) try: if not module: module = ModuleType(name) created_module = True sys.modules[name] = module # always override the values passed, except name (allow reference aliasing) for attr, value in kwargs.items(): setattr(module, attr, value) yield module except Exception: if created_module: if sys.modules.get(name): sys.modules.pop(name) raise # basic module/package location support # NB: this does not support distributed packages! @staticmethod def _module_file_from_path(leaf_name, path): has_code = True package_path = os.path.join(to_native(path), to_native(leaf_name)) module_path = None # if the submodule is a package, assemble valid submodule paths, but stop looking for a module if os.path.isdir(to_bytes(package_path)): # is there a package init? module_path = os.path.join(package_path, '__init__.py') if not os.path.isfile(to_bytes(module_path)): module_path = os.path.join(package_path, '__synthetic__') has_code = False else: module_path = package_path + '.py' package_path = None if not os.path.isfile(to_bytes(module_path)): raise ImportError('{0} not found at {1}'.format(leaf_name, path)) return module_path, has_code, package_path def get_resource_reader(self, fullname): return _AnsibleTraversableResources(fullname, self) def exec_module(self, module): # short-circuit redirect; avoid reinitializing existing modules if self._redirect_module: return # execute the module's code in its namespace code_obj = self.get_code(self._fullname) if code_obj is not None: # things like NS packages that can't have code on disk will return None exec(code_obj, module.__dict__) def create_module(self, spec): # short-circuit redirect; we've already imported the redirected module, so just alias it and return it if self._redirect_module: return self._redirect_module else: return None def load_module(self, fullname): # short-circuit redirect; we've already imported the redirected module, so just alias it and return it if self._redirect_module: sys.modules[self._fullname] = self._redirect_module return self._redirect_module # we're actually loading a module/package module_attrs = dict( __loader__=self, __file__=self.get_filename(fullname), __package__=self._parent_package_name # sane default for non-packages ) # eg, I am a package if self._subpackage_search_paths is not None: # empty is legal module_attrs['__path__'] = self._subpackage_search_paths module_attrs['__package__'] = fullname # per PEP366 with self._new_or_existing_module(fullname, **module_attrs) as module: # execute the module's code in its namespace code_obj = self.get_code(fullname) if code_obj is not None: # things like NS packages that can't have code on disk will return None exec(code_obj, module.__dict__) return module def is_package(self, fullname): if fullname != self._fullname: raise ValueError('this loader cannot answer is_package for {0}, only {1}'.format(fullname, self._fullname)) return self._subpackage_search_paths is not None def get_source(self, fullname): if self._decoded_source: return self._decoded_source if fullname != self._fullname: raise ValueError('this loader cannot load source for {0}, only {1}'.format(fullname, self._fullname)) if not self._source_code_path: return None # FIXME: what do we want encoding/newline requirements to be? self._decoded_source = self.get_data(self._source_code_path) return self._decoded_source def get_data(self, path): if not path: raise ValueError('a path must be specified') # TODO: ensure we're being asked for a path below something we own # TODO: try to handle redirects internally? if not path[0] == '/': # relative to current package, search package paths if possible (this may not be necessary) # candidate_paths = [os.path.join(ssp, path) for ssp in self._subpackage_search_paths] raise ValueError('relative resource paths not supported') else: candidate_paths = [path] for p in candidate_paths: b_path = to_bytes(p) if os.path.isfile(b_path): with open(b_path, 'rb') as fd: return fd.read() # HACK: if caller asks for __init__.py and the parent dir exists, return empty string (this keep consistency # with "collection subpackages don't require __init__.py" working everywhere with get_data elif b_path.endswith(b'__init__.py') and os.path.isdir(os.path.dirname(b_path)): return '' return None def _synthetic_filename(self, fullname): return SYNTHETIC_PACKAGE_NAME def get_filename(self, fullname): if fullname != self._fullname: raise ValueError('this loader cannot find files for {0}, only {1}'.format(fullname, self._fullname)) filename = self._source_code_path if not filename and self.is_package(fullname): if len(self._subpackage_search_paths) == 1: filename = os.path.join(self._subpackage_search_paths[0], '__synthetic__') else: filename = self._synthetic_filename(fullname) return filename def get_code(self, fullname): if self._compiled_code: return self._compiled_code # this may or may not be an actual filename, but it's the value we'll use for __file__ filename = self.get_filename(fullname) if not filename: filename = '' source_code = self.get_source(fullname) # for things like synthetic modules that really have no source on disk, don't return a code object at all # vs things like an empty package init (which has an empty string source on disk) if source_code is None: return None self._compiled_code = compile(source=source_code, filename=filename, mode='exec', flags=0, dont_inherit=True) return self._compiled_code def iter_modules(self, prefix): return _iter_modules_impl(self._subpackage_search_paths, prefix) def __repr__(self): return '{0}(path={1})'.format(self.__class__.__name__, self._subpackage_search_paths or self._source_code_path) class _AnsibleCollectionRootPkgLoader(_AnsibleCollectionPkgLoaderBase): def _validate_args(self): super(_AnsibleCollectionRootPkgLoader, self)._validate_args() if len(self._split_name) != 1: raise ImportError('this loader can only load the ansible_collections toplevel package, not {0}'.format(self._fullname)) # Implements Ansible's custom namespace package support. # The ansible_collections package and one level down (collections namespaces) are Python namespace packages # that search across all configured collection roots. The collection package (two levels down) is the first one found # on the configured collection root path, and Python namespace package aggregation is not allowed at or below # the collection. Implements implicit package (package dir) support for both Py2/3. Package init code is ignored # by this loader. class _AnsibleCollectionNSPkgLoader(_AnsibleCollectionPkgLoaderBase): def _validate_args(self): super(_AnsibleCollectionNSPkgLoader, self)._validate_args() if len(self._split_name) != 2: raise ImportError('this loader can only load collections namespace packages, not {0}'.format(self._fullname)) def _validate_final(self): # special-case the `ansible` namespace, since `ansible.builtin` is magical if not self._subpackage_search_paths and self._package_to_load != 'ansible': raise ImportError('no {0} found in {1}'.format(self._package_to_load, self._candidate_paths)) # handles locating the actual collection package and associated metadata class _AnsibleCollectionPkgLoader(_AnsibleCollectionPkgLoaderBase): def _validate_args(self): super(_AnsibleCollectionPkgLoader, self)._validate_args() if len(self._split_name) != 3: raise ImportError('this loader can only load collection packages, not {0}'.format(self._fullname)) def _validate_final(self): if self._split_name[1:3] == ['ansible', 'builtin']: # we don't want to allow this one to have on-disk search capability self._subpackage_search_paths = [] elif not self._subpackage_search_paths: raise ImportError('no {0} found in {1}'.format(self._package_to_load, self._candidate_paths)) else: # only search within the first collection we found self._subpackage_search_paths = [self._subpackage_search_paths[0]] def _load_module(self, module): if not _meta_yml_to_dict: raise ValueError('ansible.utils.collection_loader._meta_yml_to_dict is not set') module._collection_meta = {} # TODO: load collection metadata, cache in __loader__ state collection_name = '.'.join(self._split_name[1:3]) if collection_name == 'ansible.builtin': # ansible.builtin is a synthetic collection, get its routing config from the Ansible distro ansible_pkg_path = os.path.dirname(import_module('ansible').__file__) metadata_path = os.path.join(ansible_pkg_path, 'config/ansible_builtin_runtime.yml') with open(to_bytes(metadata_path), 'rb') as fd: raw_routing = fd.read() else: b_routing_meta_path = to_bytes(os.path.join(module.__path__[0], 'meta/runtime.yml')) if os.path.isfile(b_routing_meta_path): with open(b_routing_meta_path, 'rb') as fd: raw_routing = fd.read() else: raw_routing = '' try: if raw_routing: routing_dict = _meta_yml_to_dict(raw_routing, (collection_name, 'runtime.yml')) module._collection_meta = self._canonicalize_meta(routing_dict) except Exception as ex: raise ValueError('error parsing collection metadata: {0}'.format(to_native(ex))) AnsibleCollectionConfig.on_collection_load.fire(collection_name=collection_name, collection_path=os.path.dirname(module.__file__)) return module def exec_module(self, module): super(_AnsibleCollectionPkgLoader, self).exec_module(module) self._load_module(module) def create_module(self, spec): return None def load_module(self, fullname): module = super(_AnsibleCollectionPkgLoader, self).load_module(fullname) return self._load_module(module) def _canonicalize_meta(self, meta_dict): # TODO: rewrite import keys and all redirect targets that start with .. (current namespace) and . (current collection) # OR we could do it all on the fly? # if not meta_dict: # return {} # # ns_name = '.'.join(self._split_name[0:2]) # collection_name = '.'.join(self._split_name[0:3]) # # # # for routing_type, routing_type_dict in iteritems(meta_dict.get('plugin_routing', {})): # for plugin_key, plugin_dict in iteritems(routing_type_dict): # redirect = plugin_dict.get('redirect', '') # if redirect.startswith('..'): # redirect = redirect[2:] return meta_dict # loads everything under a collection, including handling redirections defined by the collection class _AnsibleCollectionLoader(_AnsibleCollectionPkgLoaderBase): # HACK: stash this in a better place _redirected_package_map = {} # type: dict[str, str] _allows_package_code = True def _validate_args(self): super(_AnsibleCollectionLoader, self)._validate_args() if len(self._split_name) < 4: raise ValueError('this loader is only for sub-collection modules/packages, not {0}'.format(self._fullname)) def _get_candidate_paths(self, path_list): if len(path_list) != 1 and self._split_name[1:3] != ['ansible', 'builtin']: raise ValueError('this loader requires exactly one path to search') return path_list def _get_subpackage_search_paths(self, candidate_paths): collection_name = '.'.join(self._split_name[1:3]) collection_meta = _get_collection_metadata(collection_name) # check for explicit redirection, as well as ancestor package-level redirection (only load the actual code once!) redirect = None explicit_redirect = False routing_entry = _nested_dict_get(collection_meta, ['import_redirection', self._fullname]) if routing_entry: redirect = routing_entry.get('redirect') if redirect: explicit_redirect = True else: redirect = _get_ancestor_redirect(self._redirected_package_map, self._fullname) # NB: package level redirection requires hooking all future imports beneath the redirected source package # in order to ensure sanity on future relative imports. We always import everything under its "real" name, # then add a sys.modules entry with the redirected name using the same module instance. If we naively imported # the source for each redirection, most submodules would import OK, but we'd have N runtime copies of the module # (one for each name), and relative imports that ascend above the redirected package would break (since they'd # see the redirected ancestor package contents instead of the package where they actually live). if redirect: # FIXME: wrap this so we can be explicit about a failed redirection self._redirect_module = import_module(redirect) if explicit_redirect and hasattr(self._redirect_module, '__path__') and self._redirect_module.__path__: # if the import target looks like a package, store its name so we can rewrite future descendent loads self._redirected_package_map[self._fullname] = redirect # if we redirected, don't do any further custom package logic return None # we're not doing a redirect- try to find what we need to actually load a module/package # this will raise ImportError if we can't find the requested module/package at all if not candidate_paths: # noplace to look, just ImportError raise ImportError('package has no paths') found_path, has_code, package_path = self._module_file_from_path(self._package_to_load, candidate_paths[0]) # still here? we found something to load... if has_code: self._source_code_path = found_path if package_path: return [package_path] # always needs to be a list return None # This loader only answers for intercepted Ansible Python modules. Normal imports will fail here and be picked up later # by our path_hook importer (which proxies the built-in import mechanisms, allowing normal caching etc to occur) class _AnsibleInternalRedirectLoader: def __init__(self, fullname, path_list): self._redirect = None split_name = fullname.split('.') toplevel_pkg = split_name[0] module_to_load = split_name[-1] if toplevel_pkg != 'ansible': raise ImportError('not interested') builtin_meta = _get_collection_metadata('ansible.builtin') routing_entry = _nested_dict_get(builtin_meta, ['import_redirection', fullname]) if routing_entry: self._redirect = routing_entry.get('redirect') if not self._redirect: raise ImportError('not redirected, go ask path_hook') def get_resource_reader(self, fullname): return _AnsibleTraversableResources(fullname, self) def exec_module(self, module): # should never see this if not self._redirect: raise ValueError('no redirect found for {0}'.format(module.__spec__.name)) # Replace the module with the redirect sys.modules[module.__spec__.name] = import_module(self._redirect) def create_module(self, spec): return None def load_module(self, fullname): # since we're delegating to other loaders, this should only be called for internal redirects where we answered # find_module with this loader, in which case we'll just directly import the redirection target, insert it into # sys.modules under the name it was requested by, and return the original module. # should never see this if not self._redirect: raise ValueError('no redirect found for {0}'.format(fullname)) # FIXME: smuggle redirection context, provide warning/error that we tried and failed to redirect mod = import_module(self._redirect) sys.modules[fullname] = mod return mod class AnsibleCollectionRef: # FUTURE: introspect plugin loaders to get these dynamically? VALID_REF_TYPES = frozenset(to_text(r) for r in ['action', 'become', 'cache', 'callback', 'cliconf', 'connection', 'doc_fragments', 'filter', 'httpapi', 'inventory', 'lookup', 'module_utils', 'modules', 'netconf', 'role', 'shell', 'strategy', 'terminal', 'test', 'vars', 'playbook']) # FIXME: tighten this up to match Python identifier reqs, etc VALID_SUBDIRS_RE = re.compile(to_text(r'^\w+(\.\w+)*$')) VALID_FQCR_RE = re.compile(to_text(r'^\w+(\.\w+){2,}$')) # can have 0-N included subdirs as well def __init__(self, collection_name, subdirs, resource, ref_type): """ Create an AnsibleCollectionRef from components :param collection_name: a collection name of the form 'namespace.collectionname' :param subdirs: optional subdir segments to be appended below the plugin type (eg, 'subdir1.subdir2') :param resource: the name of the resource being references (eg, 'mymodule', 'someaction', 'a_role') :param ref_type: the type of the reference, eg 'module', 'role', 'doc_fragment' """ collection_name = to_text(collection_name, errors='strict') if subdirs is not None: subdirs = to_text(subdirs, errors='strict') resource = to_text(resource, errors='strict') ref_type = to_text(ref_type, errors='strict') if not self.is_valid_collection_name(collection_name): raise ValueError('invalid collection name (must be of the form namespace.collection): {0}'.format(to_native(collection_name))) if ref_type not in self.VALID_REF_TYPES: raise ValueError('invalid collection ref_type: {0}'.format(ref_type)) self.collection = collection_name if subdirs: if not re.match(self.VALID_SUBDIRS_RE, subdirs): raise ValueError('invalid subdirs entry: {0} (must be empty/None or of the form subdir1.subdir2)'.format(to_native(subdirs))) self.subdirs = subdirs else: self.subdirs = u'' self.resource = resource self.ref_type = ref_type package_components = [u'ansible_collections', self.collection] fqcr_components = [self.collection] self.n_python_collection_package_name = to_native('.'.join(package_components)) if self.ref_type == u'role': package_components.append(u'roles') elif self.ref_type == u'playbook': package_components.append(u'playbooks') else: # we assume it's a plugin package_components += [u'plugins', self.ref_type] if self.subdirs: package_components.append(self.subdirs) fqcr_components.append(self.subdirs) if self.ref_type in (u'role', u'playbook'): # playbooks and roles are their own resource package_components.append(self.resource) fqcr_components.append(self.resource) self.n_python_package_name = to_native('.'.join(package_components)) self._fqcr = u'.'.join(fqcr_components) def __repr__(self): return 'AnsibleCollectionRef(collection={0!r}, subdirs={1!r}, resource={2!r})'.format(self.collection, self.subdirs, self.resource) @property def fqcr(self): return self._fqcr @staticmethod def from_fqcr(ref, ref_type): """ Parse a string as a fully-qualified collection reference, raises ValueError if invalid :param ref: collection reference to parse (a valid ref is of the form 'ns.coll.resource' or 'ns.coll.subdir1.subdir2.resource') :param ref_type: the type of the reference, eg 'module', 'role', 'doc_fragment' :return: a populated AnsibleCollectionRef object """ # assuming the fq_name is of the form (ns).(coll).(optional_subdir_N).(resource_name), # we split the resource name off the right, split ns and coll off the left, and we're left with any optional # subdirs that need to be added back below the plugin-specific subdir we'll add. So: # ns.coll.resource -> ansible_collections.ns.coll.plugins.(plugintype).resource # ns.coll.subdir1.resource -> ansible_collections.ns.coll.plugins.subdir1.(plugintype).resource # ns.coll.rolename -> ansible_collections.ns.coll.roles.rolename if not AnsibleCollectionRef.is_valid_fqcr(ref): raise ValueError('{0} is not a valid collection reference'.format(to_native(ref))) ref = to_text(ref, errors='strict') ref_type = to_text(ref_type, errors='strict') ext = '' if ref_type == u'playbook' and ref.endswith(PB_EXTENSIONS): resource_splitname = ref.rsplit(u'.', 2) package_remnant = resource_splitname[0] resource = resource_splitname[1] ext = '.' + resource_splitname[2] else: resource_splitname = ref.rsplit(u'.', 1) package_remnant = resource_splitname[0] resource = resource_splitname[1] # split the left two components of the collection package name off, anything remaining is plugin-type # specific subdirs to be added back on below the plugin type package_splitname = package_remnant.split(u'.', 2) if len(package_splitname) == 3: subdirs = package_splitname[2] else: subdirs = u'' collection_name = u'.'.join(package_splitname[0:2]) return AnsibleCollectionRef(collection_name, subdirs, resource + ext, ref_type) @staticmethod def try_parse_fqcr(ref, ref_type): """ Attempt to parse a string as a fully-qualified collection reference, returning None on failure (instead of raising an error) :param ref: collection reference to parse (a valid ref is of the form 'ns.coll.resource' or 'ns.coll.subdir1.subdir2.resource') :param ref_type: the type of the reference, eg 'module', 'role', 'doc_fragment' :return: a populated AnsibleCollectionRef object on successful parsing, else None """ try: return AnsibleCollectionRef.from_fqcr(ref, ref_type) except ValueError: pass @staticmethod def legacy_plugin_dir_to_plugin_type(legacy_plugin_dir_name): """ Utility method to convert from a PluginLoader dir name to a plugin ref_type :param legacy_plugin_dir_name: PluginLoader dir name (eg, 'action_plugins', 'library') :return: the corresponding plugin ref_type (eg, 'action', 'role') """ legacy_plugin_dir_name = to_text(legacy_plugin_dir_name) plugin_type = legacy_plugin_dir_name.removesuffix(u'_plugins') if plugin_type == u'library': plugin_type = u'modules' if plugin_type not in AnsibleCollectionRef.VALID_REF_TYPES: raise ValueError('{0} cannot be mapped to a valid collection ref type'.format(to_native(legacy_plugin_dir_name))) return plugin_type @staticmethod def is_valid_fqcr(ref, ref_type=None): """ Validates if is string is a well-formed fully-qualified collection reference (does not look up the collection itself) :param ref: candidate collection reference to validate (a valid ref is of the form 'ns.coll.resource' or 'ns.coll.subdir1.subdir2.resource') :param ref_type: optional reference type to enable deeper validation, eg 'module', 'role', 'doc_fragment' :return: True if the collection ref passed is well-formed, False otherwise """ ref = to_text(ref) if not ref_type: return bool(re.match(AnsibleCollectionRef.VALID_FQCR_RE, ref)) return bool(AnsibleCollectionRef.try_parse_fqcr(ref, ref_type)) @staticmethod def is_valid_collection_name(collection_name): """ Validates if the given string is a well-formed collection name (does not look up the collection itself) :param collection_name: candidate collection name to validate (a valid name is of the form 'ns.collname') :return: True if the collection name passed is well-formed, False otherwise """ collection_name = to_text(collection_name) if collection_name.count(u'.') != 1: return False return all( # NOTE: keywords and identifiers are different in different Pythons not iskeyword(ns_or_name) and is_python_identifier(ns_or_name) for ns_or_name in collection_name.split(u'.') ) def _get_collection_path(collection_name): collection_name = to_native(collection_name) if not collection_name or not isinstance(collection_name, string_types) or len(collection_name.split('.')) != 2: raise ValueError('collection_name must be a non-empty string of the form namespace.collection') try: collection_pkg = import_module('ansible_collections.' + collection_name) except ImportError: raise ValueError('unable to locate collection {0}'.format(collection_name)) return to_native(os.path.dirname(to_bytes(collection_pkg.__file__))) def _get_collection_playbook_path(playbook): acr = AnsibleCollectionRef.try_parse_fqcr(playbook, u'playbook') if acr: try: # get_collection_path pkg = import_module(acr.n_python_collection_package_name) except (IOError, ModuleNotFoundError) as e: # leaving e as debug target, even though not used in normal code pkg = None if pkg: cpath = os.path.join(sys.modules[acr.n_python_collection_package_name].__file__.replace('__synthetic__', 'playbooks')) if acr.subdirs: paths = [to_native(x) for x in acr.subdirs.split(u'.')] paths.insert(0, cpath) cpath = os.path.join(*paths) path = os.path.join(cpath, to_native(acr.resource)) if os.path.exists(to_bytes(path)): return acr.resource, path, acr.collection elif not acr.resource.endswith(PB_EXTENSIONS): for ext in PB_EXTENSIONS: path = os.path.join(cpath, to_native(acr.resource + ext)) if os.path.exists(to_bytes(path)): return acr.resource, path, acr.collection return None def _get_collection_role_path(role_name, collection_list=None): return _get_collection_resource_path(role_name, u'role', collection_list) def _get_collection_resource_path(name, ref_type, collection_list=None): if ref_type == u'playbook': # they are handled a bit diff due to 'extension variance' and no collection_list return _get_collection_playbook_path(name) acr = AnsibleCollectionRef.try_parse_fqcr(name, ref_type) if acr: # looks like a valid qualified collection ref; skip the collection_list collection_list = [acr.collection] subdirs = acr.subdirs resource = acr.resource elif not collection_list: return None # not a FQ and no collection search list spec'd, nothing to do else: resource = name # treat as unqualified, loop through the collection search list to try and resolve subdirs = '' for collection_name in collection_list: try: acr = AnsibleCollectionRef(collection_name=collection_name, subdirs=subdirs, resource=resource, ref_type=ref_type) # FIXME: error handling/logging; need to catch any import failures and move along pkg = import_module(acr.n_python_package_name) if pkg is not None: # the package is now loaded, get the collection's package and ask where it lives path = os.path.dirname(to_bytes(sys.modules[acr.n_python_package_name].__file__, errors='surrogate_or_strict')) return resource, to_text(path, errors='surrogate_or_strict'), collection_name except (IOError, ModuleNotFoundError) as e: continue except Exception as ex: # FIXME: pick out typical import errors first, then error logging continue return None def _get_collection_name_from_path(path): """ Return the containing collection name for a given path, or None if the path is not below a configured collection, or the collection cannot be loaded (eg, the collection is masked by another of the same name higher in the configured collection roots). :param path: path to evaluate for collection containment :return: collection name or None """ # ensure we compare full paths since pkg path will be abspath path = to_native(os.path.abspath(to_bytes(path))) path_parts = path.split('/') if path_parts.count('ansible_collections') != 1: return None ac_pos = path_parts.index('ansible_collections') # make sure it's followed by at least a namespace and collection name if len(path_parts) < ac_pos + 3: return None candidate_collection_name = '.'.join(path_parts[ac_pos + 1:ac_pos + 3]) try: # we've got a name for it, now see if the path prefix matches what the loader sees imported_pkg_path = to_native(os.path.dirname(to_bytes(import_module('ansible_collections.' + candidate_collection_name).__file__))) except ImportError: return None # reassemble the original path prefix up the collection name, and it should match what we just imported. If not # this is probably a collection root that's not configured. original_path_prefix = os.path.join('/', *path_parts[0:ac_pos + 3]) imported_pkg_path = to_native(os.path.abspath(to_bytes(imported_pkg_path))) if original_path_prefix != imported_pkg_path: return None return candidate_collection_name def _get_import_redirect(collection_meta_dict, fullname): if not collection_meta_dict: return None return _nested_dict_get(collection_meta_dict, ['import_redirection', fullname, 'redirect']) def _get_ancestor_redirect(redirected_package_map, fullname): # walk the requested module's ancestor packages to see if any have been previously redirected cur_pkg = fullname while cur_pkg: cur_pkg = cur_pkg.rpartition('.')[0] ancestor_redirect = redirected_package_map.get(cur_pkg) if ancestor_redirect: # rewrite the prefix on fullname so we import the target first, then alias it redirect = ancestor_redirect + fullname[len(cur_pkg):] return redirect return None def _nested_dict_get(root_dict, key_list): cur_value = root_dict for key in key_list: cur_value = cur_value.get(key) if not cur_value: return None return cur_value def _iter_modules_impl(paths, prefix=''): # NB: this currently only iterates what's on disk- redirected modules are not considered if not prefix: prefix = '' else: prefix = to_native(prefix) # yield (module_loader, name, ispkg) for each module/pkg under path # TODO: implement ignore/silent catch for unreadable? for b_path in map(to_bytes, paths): if not os.path.isdir(b_path): continue for b_basename in sorted(os.listdir(b_path)): b_candidate_module_path = os.path.join(b_path, b_basename) if os.path.isdir(b_candidate_module_path): # exclude things that obviously aren't Python package dirs # FIXME: this dir is adjustable in py3.8+, check for it if b'.' in b_basename or b_basename == b'__pycache__': continue # TODO: proper string handling? yield prefix + to_native(b_basename), True else: # FIXME: match builtin ordering for package/dir/file, support compiled? if b_basename.endswith(b'.py') and b_basename != b'__init__.py': yield prefix + to_native(os.path.splitext(b_basename)[0]), False def _get_collection_metadata(collection_name): collection_name = to_native(collection_name) if not collection_name or not isinstance(collection_name, string_types) or len(collection_name.split('.')) != 2: raise ValueError('collection_name must be a non-empty string of the form namespace.collection') try: collection_pkg = import_module('ansible_collections.' + collection_name) except ImportError: raise ValueError('unable to locate collection {0}'.format(collection_name)) _collection_meta = getattr(collection_pkg, '_collection_meta', None) if _collection_meta is None: raise ValueError('collection metadata was not loaded for collection {0}'.format(collection_name)) return _collection_meta