issue #590: refactor ModuleFinder and teach it a new special case.

Now it's possible to find both packages and modules when the
sys.modules[...] state for the package/module is junk. Previously only
modules were possible.

This also refactors things to make writing better tests for all these
cases much simpler.
pull/595/head
David Wilson 6 years ago
parent 7a5c436a39
commit 875ff5c060

@ -36,6 +36,7 @@ contexts.
""" """
import dis import dis
import errno
import imp import imp
import inspect import inspect
import itertools import itertools
@ -142,6 +143,41 @@ def get_child_modules(path):
return [to_text(name) for _, name, _ in it] return [to_text(name) for _, name, _ in it]
def _looks_like_script(path):
"""
Return :data:`True` if the (possibly extensionless) file at `path`
resembles a Python script. For now we simply verify the file contains
ASCII text.
"""
try:
fp = open(path, 'rb')
except IOError:
e = sys.exc_info()[1]
if e.args[0] == errno.EISDIR:
return False
raise
try:
sample = fp.read(512).decode('latin-1')
return not set(sample).difference(string.printable)
finally:
fp.close()
def _py_filename(path):
if not path:
return None
if path[-4:] in ('.pyc', '.pyo'):
path = path.rstrip('co')
if path.endswith('.py'):
return path
if os.path.exists(path) and _looks_like_script(path):
return path
def _get_core_source(): def _get_core_source():
""" """
Master version of parent.get_core_source(). Master version of parent.get_core_source().
@ -368,56 +404,22 @@ class LogForwarder(object):
return 'LogForwarder(%r)' % (self._router,) return 'LogForwarder(%r)' % (self._router,)
class ModuleFinder(object): class FinderMethod(object):
"""
Given the name of a loaded module, make a best-effort attempt at finding
related modules likely needed by a child context requesting the original
module.
"""
def __init__(self):
#: Import machinery is expensive, keep :py:meth`:get_module_source`
#: results around.
self._found_cache = {}
#: Avoid repeated dependency scanning, which is expensive.
self._related_cache = {}
def __repr__(self): def __repr__(self):
return 'ModuleFinder()' return '%s()' % (type(self).__name__,)
def _looks_like_script(self, path): def find(self, fullname):
""" pass
Return :data:`True` if the (possibly extensionless) file at `path`
resembles a Python script. For now we simply verify the file contains
ASCII text.
"""
fp = open(path, 'rb')
try:
sample = fp.read(512).decode('latin-1')
return not set(sample).difference(string.printable)
finally:
fp.close()
def _py_filename(self, path):
if not path:
return None
if path[-4:] in ('.pyc', '.pyo'):
path = path.rstrip('co')
if path.endswith('.py'):
return path
if os.path.exists(path) and self._looks_like_script(path):
return path
def _get_main_module_defective_python_3x(self, fullname): class DefectivePython3xMainMethod(FinderMethod):
""" """
Recent versions of Python 3.x introduced an incomplete notion of Recent versions of Python 3.x introduced an incomplete notion of
importer specs, and in doing so created permanent asymmetry in the importer specs, and in doing so created permanent asymmetry in the
:mod:`pkgutil` interface handling for the `__main__` module. Therefore :mod:`pkgutil` interface handling for the `__main__` module. Therefore
we must handle `__main__` specially. we must handle `__main__` specially.
""" """
def find(self, fullname):
if fullname != '__main__': if fullname != '__main__':
return None return None
@ -426,7 +428,7 @@ class ModuleFinder(object):
return None return None
path = getattr(mod, '__file__', None) path = getattr(mod, '__file__', None)
if not (os.path.exists(path) and self._looks_like_script(path)): if not (os.path.exists(path) and _looks_like_script(path)):
return None return None
fp = open(path, 'rb') fp = open(path, 'rb')
@ -437,11 +439,13 @@ class ModuleFinder(object):
return path, source, False return path, source, False
def _get_module_via_pkgutil(self, fullname):
class PkgutilMethod(FinderMethod):
""" """
Attempt to fetch source code via pkgutil. In an ideal world, this would Attempt to fetch source code via pkgutil. In an ideal world, this would
be the only required implementation of get_module(). be the only required implementation of get_module().
""" """
def find(self, fullname):
try: try:
# Pre-'import spec' this returned None, in Python3.6 it raises # Pre-'import spec' this returned None, in Python3.6 it raises
# ImportError. # ImportError.
@ -458,7 +462,7 @@ class ModuleFinder(object):
return return
try: try:
path = self._py_filename(loader.get_filename(fullname)) path = _py_filename(loader.get_filename(fullname))
source = loader.get_source(fullname) source = loader.get_source(fullname)
is_pkg = loader.is_package(fullname) is_pkg = loader.is_package(fullname)
except (AttributeError, ImportError): except (AttributeError, ImportError):
@ -484,19 +488,27 @@ class ModuleFinder(object):
return path, source, is_pkg return path, source, is_pkg
def _get_module_via_sys_modules(self, fullname):
class SysModulesMethod(FinderMethod):
""" """
Attempt to fetch source code via sys.modules. This is specifically to Attempt to fetch source code via sys.modules. This is specifically to
support __main__, but it may catch a few more cases. support __main__, but it may catch a few more cases.
""" """
def find(self, fullname):
module = sys.modules.get(fullname) module = sys.modules.get(fullname)
LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module) LOG.debug('_get_module_via_sys_modules(%r) -> %r', fullname, module)
if getattr(module, '__name__', None) != fullname:
LOG.debug('sys.modules[%r].__name__ does not match %r, assuming '
'this is a hacky module alias and ignoring it',
fullname, fullname)
return
if not isinstance(module, types.ModuleType): if not isinstance(module, types.ModuleType):
LOG.debug('sys.modules[%r] absent or not a regular module', LOG.debug('sys.modules[%r] absent or not a regular module',
fullname) fullname)
return return
path = self._py_filename(getattr(module, '__file__', '')) path = _py_filename(getattr(module, '__file__', ''))
if not path: if not path:
return return
@ -517,12 +529,19 @@ class ModuleFinder(object):
return path, source, is_pkg return path, source, is_pkg
def _get_module_via_parent_enumeration(self, fullname):
class ParentEnumerationMethod(FinderMethod):
""" """
Attempt to fetch source code by examining the module's (hopefully less Attempt to fetch source code by examining the module's (hopefully less
insane) parent package. Required for older versions of insane) parent package. Required for older versions of
ansible.compat.six and plumbum.colors. ansible.compat.six and plumbum.colors, and Ansible 2.8
ansible.module_utils.distro.
For cases like module_utils.distro, this must handle cases where a package
transmuted itself into a totally unrelated module during import and vice
versa.
""" """
def find(self, fullname):
if fullname not in sys.modules: if fullname not in sys.modules:
# Don't attempt this unless a module really exists in sys.modules, # Don't attempt this unless a module really exists in sys.modules,
# else we could return junk. # else we could return junk.
@ -531,15 +550,38 @@ class ModuleFinder(object):
pkgname, _, modname = str_rpartition(to_text(fullname), u'.') pkgname, _, modname = str_rpartition(to_text(fullname), u'.')
pkg = sys.modules.get(pkgname) pkg = sys.modules.get(pkgname)
if pkg is None or not hasattr(pkg, '__file__'): if pkg is None or not hasattr(pkg, '__file__'):
LOG.debug('%r: %r is not a package or lacks __file__ attribute',
self, pkgname)
return return
pkg_path = os.path.dirname(pkg.__file__) pkg_path = [os.path.dirname(pkg.__file__)]
try: try:
fp, path, ext = imp.find_module(modname, [pkg_path]) fp, path, (suffix, _, kind) = imp.find_module(modname, pkg_path)
except ImportError:
e = sys.exc_info()[1]
LOG.debug('%r: imp.find_module(%r, %r) -> %s',
self, modname, [pkg_path], e)
return None
if kind == imp.PKG_DIRECTORY:
return self._found_package(fullname, path)
else:
return self._found_module(fullname, path, fp)
def _found_package(self, fullname, path):
path = os.path.join(path, '__init__.py')
LOG.debug('%r: %r is PKG_DIRECTORY: %r', self, fullname, path)
return self._found_module(
fullname=fullname,
path=path,
fp=open(path, 'rb'),
is_pkg=True,
)
def _found_module(self, fullname, path, fp, is_pkg=False):
try: try:
path = self._py_filename(path) path = _py_filename(path)
if not path: if not path:
fp.close()
return return
source = fp.read() source = fp.read()
@ -551,10 +593,25 @@ class ModuleFinder(object):
# get_source() returns "string" according to PEP-302, which was # get_source() returns "string" according to PEP-302, which was
# reinterpreted for Python 3 to mean a Unicode string. # reinterpreted for Python 3 to mean a Unicode string.
source = source.encode('utf-8') source = source.encode('utf-8')
return path, source, False return path, source, is_pkg
except ImportError:
e = sys.exc_info()[1]
LOG.debug('imp.find_module(%r, %r) -> %s', modname, [pkg_path], e) class ModuleFinder(object):
"""
Given the name of a loaded module, make a best-effort attempt at finding
related modules likely needed by a child context requesting the original
module.
"""
def __init__(self):
#: Import machinery is expensive, keep :py:meth`:get_module_source`
#: results around.
self._found_cache = {}
#: Avoid repeated dependency scanning, which is expensive.
self._related_cache = {}
def __repr__(self):
return 'ModuleFinder()'
def add_source_override(self, fullname, path, source, is_pkg): def add_source_override(self, fullname, path, source, is_pkg):
""" """
@ -576,10 +633,10 @@ class ModuleFinder(object):
self._found_cache[fullname] = (path, source, is_pkg) self._found_cache[fullname] = (path, source, is_pkg)
get_module_methods = [ get_module_methods = [
_get_main_module_defective_python_3x, DefectivePython3xMainMethod(),
_get_module_via_pkgutil, PkgutilMethod(),
_get_module_via_sys_modules, SysModulesMethod(),
_get_module_via_parent_enumeration, ParentEnumerationMethod(),
] ]
def get_module_source(self, fullname): def get_module_source(self, fullname):
@ -595,7 +652,7 @@ class ModuleFinder(object):
return tup return tup
for method in self.get_module_methods: for method in self.get_module_methods:
tup = method(self, fullname) tup = method.find(fullname)
if tup: if tup:
#LOG.debug('%r returned %r', method, tup) #LOG.debug('%r returned %r', method, tup)
break break

Loading…
Cancel
Save