mitogen: Refactor scan_code_imports() as mitogen.imports.codeobj_imports()

This replaces `mitogen.master.scan_code_imports()` with
`mitogen.imports.codeobj_imports()`. The Python 3.x implementation now uses
`str.find()`, relying on Python >= 3.6 "widecode" format. Behaviour and
semantics should be unchanged. Now implementations are approx
- 1.5 x faster on Python 2.x
- 2 - 3 x faster on Python 3.x

Before
```console
$ ./tests/bench/scan_code
scan_code_imports python2.7  100 loops, best of 3: 3.19 msec per loop
scan_code_imports python3.9  500 loops, best of 5: 685 usec per loop
scan_code_imports python3.10  500 loops, best of 5: 727 usec per loop
scan_code_imports python3.11  500 loops, best of 5: 601 usec per loop
scan_code_imports python3.12  500 loops, best of 5: 609 usec per loop
scan_code_imports python3.13  500 loops, best of 5: 586 usec per loop
```

After
```console
codeobj_imports python2.7  1000 loops, best of 3: 1.98 msec per loop
codeobj_imports python3.9  1000 loops, best of 5: 302 usec per loop
codeobj_imports python3.10  1000 loops, best of 5: 297 usec per loop
codeobj_imports python3.11  1000 loops, best of 5: 243 usec per loop
codeobj_imports python3.12  1000 loops, best of 5: 278 usec per loop
codeobj_imports python3.13  1000 loops, best of 5: 259 usec per loop
```
```console
$ uname -a
Darwin kintha 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:29 PDT
2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6000 arm64
```
pull/1328/head
Alex Willmer 4 months ago
parent 3093d0bb2d
commit 0e5f47f145

@ -44,7 +44,7 @@ except ImportError:
# Python < 3.4, PEP 302 Import Hooks
import imp
import mitogen.master
import mitogen.imports
LOG = logging.getLogger(__name__)
@ -146,7 +146,7 @@ def scan_fromlist(code):
>>> list(scan_fromlist(code))
[(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')]
"""
for level, modname_s, fromlist in mitogen.master.scan_code_imports(code):
for level, modname_s, fromlist in mitogen.imports.codeobj_imports(code):
for name in fromlist:
yield level, str('%s.%s' % (modname_s, name))
if not fromlist:
@ -172,7 +172,7 @@ def walk_imports(code, prefix=None):
prefix = ''
pattern = re.compile(r'(^|\.)(\w+)')
start = len(prefix)
for _, name, fromlist in mitogen.master.scan_code_imports(code):
for _, name, fromlist in mitogen.imports.codeobj_imports(code):
if not name.startswith(prefix):
continue
for match in pattern.finditer(name, start):

@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file
In progress (unreleased)
------------------------
* :gh:issue:`1325` :mod:`mitogen`: Refactor
``mitogen.master.scan_code_imports()`` as
:func:`mitogen.import.codeobj_imports` and speed-up by 1.5 - 2.5 x
* :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow
* :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users
* :gh:issue:`1325` tests: Improve ``master_test.ScanCodeImportsTest`` coverage

@ -1300,6 +1300,7 @@ class Importer(object):
'kubectl',
'fakessh',
'fork',
'imports',
'jail',
'lxc',
'lxd',

@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import sys
if sys.version_info >= (3, 6):
from mitogen.imports._py36 import _code_imports
elif sys.version_info >= (2, 5):
from mitogen.imports._py2 import _code_imports_py25 as _code_imports
else:
from mitogen.imports._py2 import _code_imports_py24 as _code_imports
def codeobj_imports(co):
"""
Yield (level, modname, names) tuples by scanning the code object `co`.
Top level `import mod` & `from mod import foo` statements are matched.
Those inside a `class ...` or `def ...` block are currently skipped.
>>> co = compile('import a, b; from c import d, e as f', '<str>', 'exec')
>>> list(codeobj_imports(co)) # doctest: +ELLIPSIS
[(..., 'a', ()), (..., 'b', ()), (..., 'c', ('d', 'e'))]
:return:
Generator producing `(level, modname, names)` tuples, where:
* `level`:
-1 implicit relative (Python 2.x default)
0 absolute (Python 3.x, `from __future__ import absolute_import`)
>0 explicit relative (`from . import a`, `from ..b, import c`)
* `modname`: Name of module to import, or to import `names` from.
* `names`: tuple of names in `from mod import ..`.
"""
return _code_imports(co.co_code, co.co_consts, co.co_names)

@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import array
import itertools
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _opargs(code, _have_arg=opcode.HAVE_ARGUMENT):
it = iter(array.array('B', code))
nexti = it.next
for i in it:
if i >= _have_arg:
yield (i, nexti() | (nexti() << 8))
else:
yield (i, None)
def _code_imports_py25(code, consts, names):
it1, it2, it3 = itertools.tee(_opargs(code), 3)
try:
next(it2)
next(it3)
next(it3)
except StopIteration:
return
for oparg1, oparg2, (op3, arg3) in itertools.izip(it1, it2, it3):
if op3 != IMPORT_NAME:
continue
op1, arg1 = oparg1
op2, arg2 = oparg2
if op1 != LOAD_CONST or op2 != LOAD_CONST:
continue
yield (consts[arg1], names[arg3], consts[arg2] or ())
def _code_imports_py24(code, consts, names):
it1, it2 = itertools.tee(_opargs(code), 2)
try:
next(it2)
except StopIteration:
return
for oparg1, (op2, arg2) in itertools.izip(it1, it2):
if op2 != IMPORT_NAME:
continue
op1, arg1 = oparg1
if op1 != LOAD_CONST:
continue
yield (-1, names[arg2], consts[arg1] or ())

@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _code_imports(code, consts, names):
start = 4
while True:
op3_idx = code.find(IMPORT_NAME, start, -1)
if op3_idx < 0:
return
if op3_idx % 2:
start = op3_idx + 1
continue
if code[op3_idx-4] != LOAD_CONST or code[op3_idx-2] != LOAD_CONST:
start = op3_idx + 2
continue
start = op3_idx + 6
arg1, arg2, arg3 = code[op3_idx-3], code[op3_idx-1], code[op3_idx+1]
yield (consts[arg1], names[arg3], consts[arg2] or ())

@ -35,10 +35,8 @@ be sent to any context that will be used to establish additional child
contexts.
"""
import dis
import errno
import inspect
import itertools
import logging
import os
import pkgutil
@ -83,6 +81,7 @@ except ImportError:
import mitogen
import mitogen.core
import mitogen.imports
import mitogen.minify
import mitogen.parent
@ -90,14 +89,10 @@ from mitogen.core import any
from mitogen.core import b
from mitogen.core import IOLOG
from mitogen.core import LOG
from mitogen.core import next
from mitogen.core import str_partition
from mitogen.core import str_rpartition
from mitogen.core import to_text
imap = getattr(itertools, 'imap', map)
izip = getattr(itertools, 'izip', zip)
RLOG = logging.getLogger('mitogen.ctx')
@ -253,80 +248,6 @@ if mitogen.is_master:
mitogen.parent._get_core_source = _get_core_source
LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
def _getarg(nextb, c):
if c >= dis.HAVE_ARGUMENT:
return nextb() | (nextb() << 8)
if sys.version_info < (3, 0):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = imap(ord, co.co_code)
nextb = ordit.next
return ((c, _getarg(nextb, c)) for c in ordit)
elif sys.version_info < (3, 6):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
return ((c, _getarg(nextb, c)) for c in ordit)
else:
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
# https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md
return ((c, nextb()) for c in ordit)
def scan_code_imports(co):
"""
Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME``
and associated prior ``LOAD_CONST`` instructions representing an `Import`
statement or `ImportFrom` statement.
:return:
Generator producing `(level, modname, namelist)` tuples, where:
* `level`: -1 for normal import, 0, for absolute import, and >0 for
relative import.
* `modname`: Name of module to import, or from where `namelist` names
are imported.
* `namelist`: for `ImportFrom`, the list of names to be imported from
`modname`.
"""
opit = iter_opcodes(co)
opit, opit2, opit3 = itertools.tee(opit, 3)
try:
next(opit2)
next(opit3)
next(opit3)
except StopIteration:
return
if sys.version_info >= (2, 5):
for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3):
if op3 == IMPORT_NAME:
op2, arg2 = oparg2
op1, arg1 = oparg1
if op1 == op2 == LOAD_CONST:
yield (co.co_consts[arg1],
co.co_names[arg3],
co.co_consts[arg2] or ())
else:
# Python 2.4 did not yet have 'level', so stack format differs.
for oparg1, (op2, arg2) in izip(opit, opit2):
if op2 == IMPORT_NAME:
op1, arg1 = oparg1
if op1 == LOAD_CONST:
yield (-1, co.co_names[arg2], co.co_consts[arg1] or ())
class ThreadWatcher(object):
"""
Manage threads that wait for another thread to shut down, before invoking
@ -1029,7 +950,7 @@ class ModuleFinder(object):
maybe_names = list(self.generate_parent_names(fullname))
co = compile(src, modpath, 'exec')
for level, modname, namelist in scan_code_imports(co):
for level, modname, namelist in mitogen.imports.codeobj_imports(co):
if level == -1:
modnames = [modname, '%s.%s' % (fullname, modname)]
else:

@ -4,10 +4,10 @@ set -o errexit
set -o pipefail
set -o nounset
BENCH_FUNC="scan_code_imports"
BENCH_FUNC="codeobj_imports"
BENCH_EXPR="deque($BENCH_FUNC(co), maxlen=0)"
BIG_MODULE_PATH="$(dirname -- "$0")/data/big_module.py"
IMPORTS="from collections import deque; from mitogen.master import $BENCH_FUNC"
IMPORTS="from collections import deque; from mitogen.imports import $BENCH_FUNC"
COMPILE="co=compile(open('$BIG_MODULE_PATH').read(), '$BIG_MODULE_PATH', 'exec')"
PYTHONS=(
python2.7 python3.9 python3.10 python3.11 python3.12 python3.13

@ -2,8 +2,9 @@ import os
import sys
import unittest
import mitogen.imports
import testlib
import mitogen.master
def testmod_compile(path):
@ -15,7 +16,7 @@ def testmod_compile(path):
class ScanCodeImportsTest(testlib.TestCase):
func = staticmethod(mitogen.master.scan_code_imports)
func = staticmethod(mitogen.imports.codeobj_imports)
@unittest.skipIf(sys.version_info < (3, 0), "Py is 2.x, would be relative")
def test_default_absolute(self):
Loading…
Cancel
Save