Merge pull request #1328 from moreati/issue1325-scan_code_imports-refactor

mitogen: Refactor `mitogen.master.scan_code_imports()` -> `mitogen.imports.codeobj_imports()`
pull/1331/head
Alex Willmer 3 months ago committed by GitHub
commit b8d3f86b12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -44,7 +44,7 @@ except ImportError:
# Python < 3.4, PEP 302 Import Hooks # Python < 3.4, PEP 302 Import Hooks
import imp import imp
import mitogen.master import mitogen.imports
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -146,7 +146,7 @@ def scan_fromlist(code):
>>> list(scan_fromlist(code)) >>> list(scan_fromlist(code))
[(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')] [(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')]
""" """
for level, modname_s, fromlist in mitogen.master.scan_code_imports(code): for level, modname_s, fromlist in mitogen.imports.codeobj_imports(code):
for name in fromlist: for name in fromlist:
yield level, str('%s.%s' % (modname_s, name)) yield level, str('%s.%s' % (modname_s, name))
if not fromlist: if not fromlist:
@ -172,7 +172,7 @@ def walk_imports(code, prefix=None):
prefix = '' prefix = ''
pattern = re.compile(r'(^|\.)(\w+)') pattern = re.compile(r'(^|\.)(\w+)')
start = len(prefix) start = len(prefix)
for _, name, fromlist in mitogen.master.scan_code_imports(code): for _, name, fromlist in mitogen.imports.codeobj_imports(code):
if not name.startswith(prefix): if not name.startswith(prefix):
continue continue
for match in pattern.finditer(name, start): for match in pattern.finditer(name, start):

@ -21,8 +21,12 @@ To avail of fixes in an unreleased version, please download a ZIP file
In progress (unreleased) In progress (unreleased)
------------------------ ------------------------
* :gh:issue:`1325` :mod:`mitogen`: Refactor
``mitogen.master.scan_code_imports()`` as
:func:`mitogen.import.codeobj_imports` and speed-up by 1.5 - 2.5 x
* :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow * :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow
* :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users * :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users
* :gh:issue:`1325` tests: Improve ``master_test.ScanCodeImportsTest`` coverage
v0.3.26 (2025-08-04) v0.3.26 (2025-08-04)

@ -1300,6 +1300,7 @@ class Importer(object):
'kubectl', 'kubectl',
'fakessh', 'fakessh',
'fork', 'fork',
'imports',
'jail', 'jail',
'lxc', 'lxc',
'lxd', 'lxd',

@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import sys
if sys.version_info >= (3, 6):
from mitogen.imports._py36 import _code_imports
elif sys.version_info >= (2, 5):
from mitogen.imports._py2 import _code_imports_py25 as _code_imports
else:
from mitogen.imports._py2 import _code_imports_py24 as _code_imports
def codeobj_imports(co):
"""
Yield (level, modname, names) tuples by scanning the code object `co`.
Top level `import mod` & `from mod import foo` statements are matched.
Those inside a `class ...` or `def ...` block are currently skipped.
>>> co = compile('import a, b; from c import d, e as f', '<str>', 'exec')
>>> list(codeobj_imports(co)) # doctest: +ELLIPSIS
[(..., 'a', ()), (..., 'b', ()), (..., 'c', ('d', 'e'))]
:return:
Generator producing `(level, modname, names)` tuples, where:
* `level`:
-1 implicit relative (Python 2.x default)
0 absolute (Python 3.x, `from __future__ import absolute_import`)
>0 explicit relative (`from . import a`, `from ..b, import c`)
* `modname`: Name of module to import, or to import `names` from.
* `names`: tuple of names in `from mod import ..`.
"""
return _code_imports(co.co_code, co.co_consts, co.co_names)

@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import array
import itertools
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _opargs(code, _have_arg=opcode.HAVE_ARGUMENT):
it = iter(array.array('B', code))
nexti = it.next
for i in it:
if i >= _have_arg:
yield (i, nexti() | (nexti() << 8))
else:
yield (i, None)
def _code_imports_py25(code, consts, names):
it1, it2, it3 = itertools.tee(_opargs(code), 3)
try:
next(it2)
next(it3)
next(it3)
except StopIteration:
return
for oparg1, oparg2, (op3, arg3) in itertools.izip(it1, it2, it3):
if op3 != IMPORT_NAME:
continue
op1, arg1 = oparg1
op2, arg2 = oparg2
if op1 != LOAD_CONST or op2 != LOAD_CONST:
continue
yield (consts[arg1], names[arg3], consts[arg2] or ())
def _code_imports_py24(code, consts, names):
it1, it2 = itertools.tee(_opargs(code), 2)
try:
next(it2)
except StopIteration:
return
for oparg1, (op2, arg2) in itertools.izip(it1, it2):
if op2 != IMPORT_NAME:
continue
op1, arg1 = oparg1
if op1 != LOAD_CONST:
continue
yield (-1, names[arg2], consts[arg1] or ())

@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _code_imports(code, consts, names):
start = 4
while True:
op3_idx = code.find(IMPORT_NAME, start, -1)
if op3_idx < 0:
return
if op3_idx % 2:
start = op3_idx + 1
continue
if code[op3_idx-4] != LOAD_CONST or code[op3_idx-2] != LOAD_CONST:
start = op3_idx + 2
continue
start = op3_idx + 6
arg1, arg2, arg3 = code[op3_idx-3], code[op3_idx-1], code[op3_idx+1]
yield (consts[arg1], names[arg3], consts[arg2] or ())

@ -35,10 +35,8 @@ be sent to any context that will be used to establish additional child
contexts. contexts.
""" """
import dis
import errno import errno
import inspect import inspect
import itertools
import logging import logging
import os import os
import pkgutil import pkgutil
@ -83,6 +81,7 @@ except ImportError:
import mitogen import mitogen
import mitogen.core import mitogen.core
import mitogen.imports
import mitogen.minify import mitogen.minify
import mitogen.parent import mitogen.parent
@ -90,14 +89,10 @@ from mitogen.core import any
from mitogen.core import b from mitogen.core import b
from mitogen.core import IOLOG from mitogen.core import IOLOG
from mitogen.core import LOG from mitogen.core import LOG
from mitogen.core import next
from mitogen.core import str_partition from mitogen.core import str_partition
from mitogen.core import str_rpartition from mitogen.core import str_rpartition
from mitogen.core import to_text from mitogen.core import to_text
imap = getattr(itertools, 'imap', map)
izip = getattr(itertools, 'izip', zip)
RLOG = logging.getLogger('mitogen.ctx') RLOG = logging.getLogger('mitogen.ctx')
@ -253,80 +248,6 @@ if mitogen.is_master:
mitogen.parent._get_core_source = _get_core_source mitogen.parent._get_core_source = _get_core_source
LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
def _getarg(nextb, c):
if c >= dis.HAVE_ARGUMENT:
return nextb() | (nextb() << 8)
if sys.version_info < (3, 0):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = imap(ord, co.co_code)
nextb = ordit.next
return ((c, _getarg(nextb, c)) for c in ordit)
elif sys.version_info < (3, 6):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
return ((c, _getarg(nextb, c)) for c in ordit)
else:
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
# https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md
return ((c, nextb()) for c in ordit)
def scan_code_imports(co):
"""
Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME``
and associated prior ``LOAD_CONST`` instructions representing an `Import`
statement or `ImportFrom` statement.
:return:
Generator producing `(level, modname, namelist)` tuples, where:
* `level`: -1 for normal import, 0, for absolute import, and >0 for
relative import.
* `modname`: Name of module to import, or from where `namelist` names
are imported.
* `namelist`: for `ImportFrom`, the list of names to be imported from
`modname`.
"""
opit = iter_opcodes(co)
opit, opit2, opit3 = itertools.tee(opit, 3)
try:
next(opit2)
next(opit3)
next(opit3)
except StopIteration:
return
if sys.version_info >= (2, 5):
for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3):
if op3 == IMPORT_NAME:
op2, arg2 = oparg2
op1, arg1 = oparg1
if op1 == op2 == LOAD_CONST:
yield (co.co_consts[arg1],
co.co_names[arg3],
co.co_consts[arg2] or ())
else:
# Python 2.4 did not yet have 'level', so stack format differs.
for oparg1, (op2, arg2) in izip(opit, opit2):
if op2 == IMPORT_NAME:
op1, arg1 = oparg1
if op1 == LOAD_CONST:
yield (-1, co.co_names[arg2], co.co_consts[arg1] or ())
class ThreadWatcher(object): class ThreadWatcher(object):
""" """
Manage threads that wait for another thread to shut down, before invoking Manage threads that wait for another thread to shut down, before invoking
@ -1029,7 +950,7 @@ class ModuleFinder(object):
maybe_names = list(self.generate_parent_names(fullname)) maybe_names = list(self.generate_parent_names(fullname))
co = compile(src, modpath, 'exec') co = compile(src, modpath, 'exec')
for level, modname, namelist in scan_code_imports(co): for level, modname, namelist in mitogen.imports.codeobj_imports(co):
if level == -1: if level == -1:
modnames = [modname, '%s.%s' % (fullname, modname)] modnames = [modname, '%s.%s' % (fullname, modname)]
else: else:

File diff suppressed because it is too large Load Diff

@ -0,0 +1,18 @@
#!/usr/bin/env bash
set -o errexit
set -o pipefail
set -o nounset
BENCH_FUNC="codeobj_imports"
BENCH_EXPR="deque($BENCH_FUNC(co), maxlen=0)"
BIG_MODULE_PATH="$(dirname -- "$0")/data/big_module.py"
IMPORTS="from collections import deque; from mitogen.imports import $BENCH_FUNC"
COMPILE="co=compile(open('$BIG_MODULE_PATH').read(), '$BIG_MODULE_PATH', 'exec')"
PYTHONS=(
python2.7 python3.9 python3.10 python3.11 python3.12 python3.13
)
for p in "${PYTHONS[@]}"; do
echo -e -n "$BENCH_FUNC $p "
$p -m timeit -s "$IMPORTS" -s "$COMPILE" "$BENCH_EXPR"
done

@ -0,0 +1,11 @@
# pyright: reportMissingImports=false
# ruff: noqa: E401 E702 F401 F403
import a
import a.b
import c as d
import e, e.f as g \
, h; import i
from j import k, l, m as n
from o import *

@ -0,0 +1,9 @@
# pyright: reportMissingImports=false
# ruff: noqa: E401 E702 F401 F403
from . import a
from .b import c, d as e
from ... import (
f,
j as k,
)

@ -0,0 +1,13 @@
# pyright: reportMissingImports=false
# ruff: noqa: E401 E702 F401 F403
from __future__ import absolute_import
import a
import a.b
import c as d
import e, e.f as g \
, h; import i
from j import k, l, m as n
from o import *

@ -0,0 +1,7 @@
class C:
import in_class
from in_class import x as y
def m(self):
import in_method
from in_method import x as y, z

@ -0,0 +1,3 @@
def f():
import in_func
from in_func import x as y, z

@ -0,0 +1,16 @@
import sys
if True:
import in_if_always_true
from in_if_always_true import x as y, z
else:
import in_else_never_true
from in_else_never_true import x as y, z
if sys.version >= (3, 0):
import in_if_py3
from in_if_py3 import x as y, z
else:
import in_else_py2
from in_else_py2 import x as y, z

@ -0,0 +1,9 @@
try:
import in_try
from in_try import x as y, z
except ImportError:
import in_except_importerror
from in_except_importerror import x as y, z
except Exception:
import in_except_exception
from in_except_exception import x as y, z

@ -0,0 +1,130 @@
import os
import sys
import unittest
import mitogen.imports
import testlib
def testmod_compile(path):
path = os.path.join(testlib.MODS_DIR, path)
f = open(path, 'rb')
co = compile(f.read(), path, 'exec')
f.close()
return co
class ScanCodeImportsTest(testlib.TestCase):
func = staticmethod(mitogen.imports.codeobj_imports)
@unittest.skipIf(sys.version_info < (3, 0), "Py is 2.x, would be relative")
def test_default_absolute(self):
co = testmod_compile('scanning/defaults.py')
expected = [
(0, 'a', ()), (0, 'a.b', ()), (0, 'c', ()),
(0, 'e', ()), (0, 'e.f', ()), (0, 'h', ()),
(0, 'i', ()),
(0, 'j', ('k', 'l', 'm')),
(0, 'o', ('*',)),
]
self.assertEqual(list(self.func(co)), expected)
@unittest.skipIf(sys.version_info >= (3, 0), "Py is 3.x, would be absolute")
def test_default_relative(self):
co = testmod_compile('scanning/defaults.py')
expected = [
(-1, 'a', ()), (-1, 'a.b', ()), (-1, 'c', ()),
(-1, 'e', ()), (-1, 'e.f', ()), (-1, 'h', ()),
(-1, 'i', ()),
(-1, 'j', ('k', 'l', 'm')),
(-1, 'o', ('*',)),
]
self.assertEqual(list(self.func(co)), expected)
@unittest.skipIf(sys.version_info < (2, 5), "Py is 2.4, no absolute_import")
def test_explicit_absolute(self):
co = testmod_compile('scanning/has_absolute_import.py')
expected = [
(0, '__future__', ('absolute_import',)),
(0, 'a', ()), (0, 'a.b', ()), (0, 'c', ()),
(0, 'e', ()), (0, 'e.f', ()), (0, 'h', ()),
(0, 'i', ()),
(0, 'j', ('k', 'l', 'm')),
(0, 'o', ('*',)),
]
self.assertEqual(list(self.func(co)), expected)
@unittest.skipIf(sys.version_info < (2, 5), "Py is 2.4, no `from . import x`")
def test_explicit_relative(self):
co = testmod_compile('scanning/explicit_relative.py')
expected = [
(1, '', ('a',)),
(1, 'b', ('c', 'd')),
(3, '', ('f', 'j')),
]
self.assertEqual(list(self.func(co)), expected)
def test_scoped_class(self):
# Imports in `class` or `def` are ignored, a bad heuristc to detect
# lazy imports and skip sending the pre-emptively.
# See
# - https://github.com/mitogen-hq/mitogen/issues/682
# - https://github.com/mitogen-hq/mitogen/issues/1325#issuecomment-3170482014
co = testmod_compile('scanning/scoped_class.py')
self.assertEqual(list(self.func(co)), [])
pass
def test_scoped_function(self):
co = testmod_compile('scanning/scoped_function.py')
self.assertEqual(list(self.func(co)), [])
@unittest.skipIf(sys.version_info >= (3, 0), "Python is 3.x, which prunes")
def test_scoped_if_else_unpruned(self):
co = testmod_compile('scanning/scoped_if_else.py')
level = (-1, 0)[int(sys.version_info >= (3, 0))]
expected = [
(level, 'sys', ()),
(level, 'in_if_always_true', ()),
(level, 'in_if_always_true', ('x', 'z')),
# Python 2.x does no pruning
(level, 'in_else_never_true', ()),
(level, 'in_else_never_true', ('x', 'z')),
(level, 'in_if_py3', ()),
(level, 'in_if_py3', ('x', 'z')),
(level, 'in_else_py2', ()),
(level, 'in_else_py2', ('x', 'z')),
]
self.assertEqual(list(self.func(co)), expected)
@unittest.skipIf(sys.version_info < (3, 0), "Python is 2.x, which doesn't prune")
def test_scoped_if_else_pruned(self):
co = testmod_compile('scanning/scoped_if_else.py')
level = (-1, 0)[int(sys.version_info >= (3, 0))]
expected = [
(level, 'sys', ()),
(level, 'in_if_always_true', ()),
(level, 'in_if_always_true', ('x', 'z')),
# Python 3.x prunes some impossible branches ...
(level, 'in_if_py3', ()),
(level, 'in_if_py3', ('x', 'z')),
# ... but not sys.version_info ones
(level, 'in_else_py2', ()),
(level, 'in_else_py2', ('x', 'z')),
]
self.assertEqual(list(self.func(co)), expected)
def test_scoped_try_except(self):
co = testmod_compile('scanning/scoped_try_except.py')
level = (-1, 0)[int(sys.version_info >= (3, 0))]
expected = [
(level, 'in_try', ()),
(level, 'in_try', ('x', 'z')),
(level, 'in_except_importerror', ()),
(level, 'in_except_importerror', ('x', 'z')),
(level, 'in_except_exception', ()),
(level, 'in_except_exception', ('x', 'z')),
]
self.assertEqual(list(self.func(co)), expected)

@ -1,25 +0,0 @@
import inspect
import testlib
import mitogen.master
class ScanCodeImportsTest(testlib.TestCase):
func = staticmethod(mitogen.master.scan_code_imports)
if mitogen.core.PY3:
level = 0
else:
level = -1
SIMPLE_EXPECT = [
(level, 'inspect', ()),
(level, 'testlib', ()),
(level, 'mitogen.master', ()),
]
def test_simple(self):
source_path = inspect.getsourcefile(ScanCodeImportsTest)
with open(source_path) as f:
co = compile(f.read(), source_path, 'exec')
self.assertEqual(list(self.func(co)), self.SIMPLE_EXPECT)
Loading…
Cancel
Save