From bdf6f1b9a9e55dd264245463ea0b06f2c4d7b498 Mon Sep 17 00:00:00 2001 From: David Wilson Date: Sat, 17 Aug 2019 19:46:52 +0100 Subject: [PATCH] issue #590: rework ParentEnumerationMethod to recursively handle bad modules In the worst case it will start with sys.path and resolve everything from scratch. --- mitogen/master.py | 133 +++++++++++++++++++++++++----------- tests/module_finder_test.py | 39 +++++++++++ tests/responder_test.py | 10 +-- 3 files changed, 140 insertions(+), 42 deletions(-) diff --git a/mitogen/master.py b/mitogen/master.py index 11ef2b00..f9ddf3dd 100644 --- a/mitogen/master.py +++ b/mitogen/master.py @@ -535,18 +535,20 @@ class SysModulesMethod(FinderMethod): Find `fullname` using its :data:`__file__` attribute. """ module = sys.modules.get(fullname) - LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module) - if getattr(module, '__name__', None) != fullname: - LOG.debug('sys.modules[%r].__name__ does not match %r, assuming ' - 'this is a hacky module alias and ignoring it', - fullname, fullname) - return - if not isinstance(module, types.ModuleType): LOG.debug('%r: sys.modules[%r] absent or not a regular module', self, fullname) return + LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module) + alleged_name = getattr(module, '__name__', None) + if alleged_name != fullname: + LOG.debug('sys.modules[%r].__name__ is incorrect, assuming ' + 'this is a hacky module alias and ignoring it. ' + 'Got %r, module object: %r', + fullname, alleged_name, module) + return + path = _py_filename(getattr(module, '__file__', '')) if not path: return @@ -573,43 +575,57 @@ class SysModulesMethod(FinderMethod): class ParentEnumerationMethod(FinderMethod): """ Attempt to fetch source code by examining the module's (hopefully less - insane) parent package. Required for older versions of - :mod:`ansible.compat.six`, :mod:`plumbum.colors`, and Ansible 2.8 - :mod:`ansible.module_utils.distro`. + insane) parent package, and if no insane parents exist, simply use + :mod:`sys.path` to search for it from scratch on the filesystem using the + normal Python lookup mechanism. + + This is required for older versions of :mod:`ansible.compat.six`, + :mod:`plumbum.colors`, Ansible 2.8 :mod:`ansible.module_utils.distro` and + its submodule :mod:`ansible.module_utils.distro._distro`. + + When some package dynamically replaces itself in :data:`sys.modules`, but + only conditionally according to some program logic, it is possible that + children may attempt to load modules and subpackages from it that can no + longer be resolved by examining a (corrupted) parent. For cases like :mod:`ansible.module_utils.distro`, this must handle cases where a package transmuted itself into a totally unrelated module during - import and vice versa. + import and vice versa, where :data:`sys.modules` is replaced with junk that + makes it impossible to discover the loaded module using the in-memory + module object or any parent package's :data:`__path__`, since they have all + been overwritten. Some men just want to watch the world burn. """ - def find(self, fullname): + def _find_sane_parent(self, fullname): """ - See implementation for a description of how this works. + Iteratively search :data:`sys.modules` for the least indirect parent of + `fullname` that is loaded and contains a :data:`__path__` attribute. + + :return: + `(parent_name, path, modpath)` tuple, where: + + * `modname`: canonical name of the found package, or the empty + string if none is found. + * `search_path`: :data:`__path__` attribute of the least + indirect parent found, or :data:`None` if no indirect parent + was found. + * `modpath`: list of module name components leading from `path` + to the target module. """ - if fullname not in sys.modules: - # Don't attempt this unless a module really exists in sys.modules, - # else we could return junk. - return - - pkgname, _, modname = str_rpartition(to_text(fullname), u'.') - pkg = sys.modules.get(pkgname) - if pkg is None or not hasattr(pkg, '__file__'): - LOG.debug('%r: %r is not a package or lacks __file__ attribute', - self, pkgname) - return - - pkg_path = [os.path.dirname(pkg.__file__)] - try: - fp, path, (suffix, _, kind) = imp.find_module(modname, pkg_path) - except ImportError: - e = sys.exc_info()[1] - LOG.debug('%r: imp.find_module(%r, %r) -> %s', - self, modname, [pkg_path], e) - return None - - if kind == imp.PKG_DIRECTORY: - return self._found_package(fullname, path) - else: - return self._found_module(fullname, path, fp) + path = None + modpath = [] + while True: + pkgname, _, modname = str_rpartition(to_text(fullname), u'.') + modpath.insert(0, modname) + if not pkgname: + return [], None, modpath + + pkg = sys.modules.get(pkgname) + path = getattr(pkg, '__path__', None) + if pkg and path: + return pkgname.split('.'), path, modpath + + LOG.debug('%r: %r lacks __path__ attribute', self, pkgname) + fullname = pkgname def _found_package(self, fullname, path): path = os.path.join(path, '__init__.py') @@ -638,6 +654,47 @@ class ParentEnumerationMethod(FinderMethod): source = source.encode('utf-8') return path, source, is_pkg + def _find_one_component(self, modname, search_path): + try: + #fp, path, (suffix, _, kind) = imp.find_module(modname, search_path) + return imp.find_module(modname, search_path) + except ImportError: + e = sys.exc_info()[1] + LOG.debug('%r: imp.find_module(%r, %r) -> %s', + self, modname, [search_path], e) + return None + + def find(self, fullname): + """ + See implementation for a description of how this works. + """ + #if fullname not in sys.modules: + # Don't attempt this unless a module really exists in sys.modules, + # else we could return junk. + #return + + fullname = to_text(fullname) + modname, search_path, modpath = self._find_sane_parent(fullname) + while True: + tup = self._find_one_component(modpath.pop(0), search_path) + if tup is None: + return None + + fp, path, (suffix, _, kind) = tup + if modpath: + # Still more components to descent. Result must be a package + if fp: + fp.close() + if kind != imp.PKG_DIRECTORY: + LOG.debug('%r: %r appears to be child of non-package %r', + self, fullname, path) + return None + search_path = [path] + elif kind == imp.PKG_DIRECTORY: + return self._found_package(fullname, path) + else: + return self._found_module(fullname, path, fp) + class ModuleFinder(object): """ diff --git a/tests/module_finder_test.py b/tests/module_finder_test.py index e61c768f..fc3a17de 100644 --- a/tests/module_finder_test.py +++ b/tests/module_finder_test.py @@ -180,6 +180,7 @@ class GetModuleViaParentEnumerationTest(testlib.TestCase): 'pkg_like_ansible.module_utils.distro._distro' ) + # ensure we can resolve the subpackage. path, src, is_pkg = self.call('pkg_like_ansible.module_utils.distro') modpath = os.path.join(MODS_DIR, 'pkg_like_ansible/module_utils/distro/__init__.py') @@ -187,6 +188,44 @@ class GetModuleViaParentEnumerationTest(testlib.TestCase): self.assertEquals(src, open(modpath, 'rb').read()) self.assertEquals(is_pkg, True) + # ensure we can resolve a child of the subpackage. + path, src, is_pkg = self.call( + 'pkg_like_ansible.module_utils.distro._distro' + ) + modpath = os.path.join(MODS_DIR, + 'pkg_like_ansible/module_utils/distro/_distro.py') + self.assertEquals(path, modpath) + self.assertEquals(src, open(modpath, 'rb').read()) + self.assertEquals(is_pkg, False) + + def test_ansible_module_utils_system_distro_succeeds(self): + # #590: a package that turns itself into a module. + # #590: a package that turns itself into a module. + import pkg_like_ansible.module_utils.sys_distro as d + self.assertEquals(d.I_AM, "the system module that replaced the subpackage") + self.assertEquals( + sys.modules['pkg_like_ansible.module_utils.sys_distro'].__name__, + 'system_distro' + ) + + # ensure we can resolve the subpackage. + path, src, is_pkg = self.call('pkg_like_ansible.module_utils.sys_distro') + modpath = os.path.join(MODS_DIR, + 'pkg_like_ansible/module_utils/sys_distro/__init__.py') + self.assertEquals(path, modpath) + self.assertEquals(src, open(modpath, 'rb').read()) + self.assertEquals(is_pkg, True) + + # ensure we can resolve a child of the subpackage. + path, src, is_pkg = self.call( + 'pkg_like_ansible.module_utils.sys_distro._distro' + ) + modpath = os.path.join(MODS_DIR, + 'pkg_like_ansible/module_utils/sys_distro/_distro.py') + self.assertEquals(path, modpath) + self.assertEquals(src, open(modpath, 'rb').read()) + self.assertEquals(is_pkg, False) + class ResolveRelPathTest(testlib.TestCase): klass = mitogen.master.ModuleFinder diff --git a/tests/responder_test.py b/tests/responder_test.py index 285acd6f..60817747 100644 --- a/tests/responder_test.py +++ b/tests/responder_test.py @@ -158,14 +158,16 @@ class BrokenModulesTest(testlib.TestCase): self.assertEquals(1, len(router._async_route.mock_calls)) self.assertEquals(1, responder.get_module_count) - self.assertEquals(0, responder.good_load_module_count) - self.assertEquals(0, responder.good_load_module_size) - self.assertEquals(1, responder.bad_load_module_count) + self.assertEquals(1, responder.good_load_module_count) + self.assertEquals(7642, responder.good_load_module_size) + self.assertEquals(0, responder.bad_load_module_count) call = router._async_route.mock_calls[0] msg, = call[1] self.assertEquals(mitogen.core.LOAD_MODULE, msg.handle) - self.assertIsInstance(msg.unpickle(), tuple) + + tup = msg.unpickle() + self.assertIsInstance(tup, tuple) class ForwardTest(testlib.RouterMixin, testlib.TestCase):