diff --git a/ansible_mitogen/module_finder.py b/ansible_mitogen/module_finder.py index a1870833..fd62afde 100644 --- a/ansible_mitogen/module_finder.py +++ b/ansible_mitogen/module_finder.py @@ -44,7 +44,7 @@ except ImportError: # Python < 3.4, PEP 302 Import Hooks import imp -import mitogen.master +import mitogen.imports LOG = logging.getLogger(__name__) @@ -146,7 +146,7 @@ def scan_fromlist(code): >>> list(scan_fromlist(code)) [(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')] """ - for level, modname_s, fromlist in mitogen.master.scan_code_imports(code): + for level, modname_s, fromlist in mitogen.imports.codeobj_imports(code): for name in fromlist: yield level, str('%s.%s' % (modname_s, name)) if not fromlist: @@ -172,7 +172,7 @@ def walk_imports(code, prefix=None): prefix = '' pattern = re.compile(r'(^|\.)(\w+)') start = len(prefix) - for _, name, fromlist in mitogen.master.scan_code_imports(code): + for _, name, fromlist in mitogen.imports.codeobj_imports(code): if not name.startswith(prefix): continue for match in pattern.finditer(name, start): diff --git a/docs/changelog.rst b/docs/changelog.rst index 410dee9b..407acc11 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file In progress (unreleased) ------------------------ +* :gh:issue:`1325` :mod:`mitogen`: Refactor + ``mitogen.master.scan_code_imports()`` as + :func:`mitogen.import.codeobj_imports` and speed-up by 1.5 - 2.5 x * :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow * :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users * :gh:issue:`1325` tests: Improve ``master_test.ScanCodeImportsTest`` coverage diff --git a/mitogen/core.py b/mitogen/core.py index a548c72f..441743d4 100644 --- a/mitogen/core.py +++ b/mitogen/core.py @@ -1300,6 +1300,7 @@ class Importer(object): 'kubectl', 'fakessh', 'fork', + 'imports', 'jail', 'lxc', 'lxd', diff --git a/mitogen/imports/__init__.py b/mitogen/imports/__init__.py new file mode 100644 index 00000000..ecbdb795 --- /dev/null +++ b/mitogen/imports/__init__.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: 2025 Mitogen authors +# SPDX-License-Identifier: MIT +# !mitogen: minify_safe + +import sys + +if sys.version_info >= (3, 6): + from mitogen.imports._py36 import _code_imports +elif sys.version_info >= (2, 5): + from mitogen.imports._py2 import _code_imports_py25 as _code_imports +else: + from mitogen.imports._py2 import _code_imports_py24 as _code_imports + + +def codeobj_imports(co): + """ + Yield (level, modname, names) tuples by scanning the code object `co`. + + Top level `import mod` & `from mod import foo` statements are matched. + Those inside a `class ...` or `def ...` block are currently skipped. + + >>> co = compile('import a, b; from c import d, e as f', '', 'exec') + >>> list(codeobj_imports(co)) # doctest: +ELLIPSIS + [(..., 'a', ()), (..., 'b', ()), (..., 'c', ('d', 'e'))] + + :return: + Generator producing `(level, modname, names)` tuples, where: + + * `level`: + -1 implicit relative (Python 2.x default) + 0 absolute (Python 3.x, `from __future__ import absolute_import`) + >0 explicit relative (`from . import a`, `from ..b, import c`) + * `modname`: Name of module to import, or to import `names` from. + * `names`: tuple of names in `from mod import ..`. + """ + return _code_imports(co.co_code, co.co_consts, co.co_names) diff --git a/mitogen/imports/_py2.py b/mitogen/imports/_py2.py new file mode 100644 index 00000000..46f559e3 --- /dev/null +++ b/mitogen/imports/_py2.py @@ -0,0 +1,54 @@ +# SPDX-FileCopyrightText: 2025 Mitogen authors +# SPDX-License-Identifier: MIT +# !mitogen: minify_safe + +import array +import itertools +import opcode + + +IMPORT_NAME = opcode.opmap['IMPORT_NAME'] +LOAD_CONST = opcode.opmap['LOAD_CONST'] + + +def _opargs(code, _have_arg=opcode.HAVE_ARGUMENT): + it = iter(array.array('B', code)) + nexti = it.next + for i in it: + if i >= _have_arg: + yield (i, nexti() | (nexti() << 8)) + else: + yield (i, None) + + +def _code_imports_py25(code, consts, names): + it1, it2, it3 = itertools.tee(_opargs(code), 3) + try: + next(it2) + next(it3) + next(it3) + except StopIteration: + return + for oparg1, oparg2, (op3, arg3) in itertools.izip(it1, it2, it3): + if op3 != IMPORT_NAME: + continue + op1, arg1 = oparg1 + op2, arg2 = oparg2 + if op1 != LOAD_CONST or op2 != LOAD_CONST: + continue + yield (consts[arg1], names[arg3], consts[arg2] or ()) + + +def _code_imports_py24(code, consts, names): + it1, it2 = itertools.tee(_opargs(code), 2) + try: + next(it2) + except StopIteration: + return + for oparg1, (op2, arg2) in itertools.izip(it1, it2): + if op2 != IMPORT_NAME: + continue + op1, arg1 = oparg1 + if op1 != LOAD_CONST: + continue + yield (-1, names[arg2], consts[arg1] or ()) diff --git a/mitogen/imports/_py36.py b/mitogen/imports/_py36.py new file mode 100644 index 00000000..19e51a6e --- /dev/null +++ b/mitogen/imports/_py36.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2025 Mitogen authors +# SPDX-License-Identifier: MIT +# !mitogen: minify_safe + +import opcode + +IMPORT_NAME = opcode.opmap['IMPORT_NAME'] +LOAD_CONST = opcode.opmap['LOAD_CONST'] + + +def _code_imports(code, consts, names): + start = 4 + while True: + op3_idx = code.find(IMPORT_NAME, start, -1) + if op3_idx < 0: + return + if op3_idx % 2: + start = op3_idx + 1 + continue + if code[op3_idx-4] != LOAD_CONST or code[op3_idx-2] != LOAD_CONST: + start = op3_idx + 2 + continue + start = op3_idx + 6 + arg1, arg2, arg3 = code[op3_idx-3], code[op3_idx-1], code[op3_idx+1] + yield (consts[arg1], names[arg3], consts[arg2] or ()) diff --git a/mitogen/master.py b/mitogen/master.py index 927ccaf1..f5d40025 100644 --- a/mitogen/master.py +++ b/mitogen/master.py @@ -35,10 +35,8 @@ be sent to any context that will be used to establish additional child contexts. """ -import dis import errno import inspect -import itertools import logging import os import pkgutil @@ -83,6 +81,7 @@ except ImportError: import mitogen import mitogen.core +import mitogen.imports import mitogen.minify import mitogen.parent @@ -90,14 +89,10 @@ from mitogen.core import any from mitogen.core import b from mitogen.core import IOLOG from mitogen.core import LOG -from mitogen.core import next from mitogen.core import str_partition from mitogen.core import str_rpartition from mitogen.core import to_text -imap = getattr(itertools, 'imap', map) -izip = getattr(itertools, 'izip', zip) - RLOG = logging.getLogger('mitogen.ctx') @@ -253,80 +248,6 @@ if mitogen.is_master: mitogen.parent._get_core_source = _get_core_source -LOAD_CONST = dis.opname.index('LOAD_CONST') -IMPORT_NAME = dis.opname.index('IMPORT_NAME') - - -def _getarg(nextb, c): - if c >= dis.HAVE_ARGUMENT: - return nextb() | (nextb() << 8) - - -if sys.version_info < (3, 0): - def iter_opcodes(co): - # Yield `(op, oparg)` tuples from the code object `co`. - ordit = imap(ord, co.co_code) - nextb = ordit.next - return ((c, _getarg(nextb, c)) for c in ordit) -elif sys.version_info < (3, 6): - def iter_opcodes(co): - # Yield `(op, oparg)` tuples from the code object `co`. - ordit = iter(co.co_code) - nextb = ordit.__next__ - return ((c, _getarg(nextb, c)) for c in ordit) -else: - def iter_opcodes(co): - # Yield `(op, oparg)` tuples from the code object `co`. - ordit = iter(co.co_code) - nextb = ordit.__next__ - # https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md - return ((c, nextb()) for c in ordit) - - -def scan_code_imports(co): - """ - Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME`` - and associated prior ``LOAD_CONST`` instructions representing an `Import` - statement or `ImportFrom` statement. - - :return: - Generator producing `(level, modname, namelist)` tuples, where: - - * `level`: -1 for normal import, 0, for absolute import, and >0 for - relative import. - * `modname`: Name of module to import, or from where `namelist` names - are imported. - * `namelist`: for `ImportFrom`, the list of names to be imported from - `modname`. - """ - opit = iter_opcodes(co) - opit, opit2, opit3 = itertools.tee(opit, 3) - - try: - next(opit2) - next(opit3) - next(opit3) - except StopIteration: - return - - if sys.version_info >= (2, 5): - for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3): - if op3 == IMPORT_NAME: - op2, arg2 = oparg2 - op1, arg1 = oparg1 - if op1 == op2 == LOAD_CONST: - yield (co.co_consts[arg1], - co.co_names[arg3], - co.co_consts[arg2] or ()) - else: - # Python 2.4 did not yet have 'level', so stack format differs. - for oparg1, (op2, arg2) in izip(opit, opit2): - if op2 == IMPORT_NAME: - op1, arg1 = oparg1 - if op1 == LOAD_CONST: - yield (-1, co.co_names[arg2], co.co_consts[arg1] or ()) - - class ThreadWatcher(object): """ Manage threads that wait for another thread to shut down, before invoking @@ -1029,7 +950,7 @@ class ModuleFinder(object): maybe_names = list(self.generate_parent_names(fullname)) co = compile(src, modpath, 'exec') - for level, modname, namelist in scan_code_imports(co): + for level, modname, namelist in mitogen.imports.codeobj_imports(co): if level == -1: modnames = [modname, '%s.%s' % (fullname, modname)] else: diff --git a/tests/bench/scan_code b/tests/bench/scan_code index 0b5d43ff..118dd9d7 100755 --- a/tests/bench/scan_code +++ b/tests/bench/scan_code @@ -4,10 +4,10 @@ set -o errexit set -o pipefail set -o nounset -BENCH_FUNC="scan_code_imports" +BENCH_FUNC="codeobj_imports" BENCH_EXPR="deque($BENCH_FUNC(co), maxlen=0)" BIG_MODULE_PATH="$(dirname -- "$0")/data/big_module.py" -IMPORTS="from collections import deque; from mitogen.master import $BENCH_FUNC" +IMPORTS="from collections import deque; from mitogen.imports import $BENCH_FUNC" COMPILE="co=compile(open('$BIG_MODULE_PATH').read(), '$BIG_MODULE_PATH', 'exec')" PYTHONS=( python2.7 python3.9 python3.10 python3.11 python3.12 python3.13 diff --git a/tests/master_test.py b/tests/imports_test.py similarity index 98% rename from tests/master_test.py rename to tests/imports_test.py index 519ac3af..d150356b 100644 --- a/tests/master_test.py +++ b/tests/imports_test.py @@ -2,8 +2,9 @@ import os import sys import unittest +import mitogen.imports + import testlib -import mitogen.master def testmod_compile(path): @@ -15,7 +16,7 @@ def testmod_compile(path): class ScanCodeImportsTest(testlib.TestCase): - func = staticmethod(mitogen.master.scan_code_imports) + func = staticmethod(mitogen.imports.codeobj_imports) @unittest.skipIf(sys.version_info < (3, 0), "Py is 2.x, would be relative") def test_default_absolute(self):