mitogen: Refactor scan_code_imports() as mitogen.imports.codeobj_imports()

This replaces `mitogen.master.scan_code_imports()` with
`mitogen.imports.codeobj_imports()`. The Python 3.x implementation now uses
`str.find()`, relying on Python >= 3.6 "widecode" format. Behaviour and
semantics should be unchanged. Now implementations are approx
- 1.5 x faster on Python 2.x
- 2 - 3 x faster on Python 3.x

Before
```console
$ ./tests/bench/scan_code
scan_code_imports python2.7  100 loops, best of 3: 3.19 msec per loop
scan_code_imports python3.9  500 loops, best of 5: 685 usec per loop
scan_code_imports python3.10  500 loops, best of 5: 727 usec per loop
scan_code_imports python3.11  500 loops, best of 5: 601 usec per loop
scan_code_imports python3.12  500 loops, best of 5: 609 usec per loop
scan_code_imports python3.13  500 loops, best of 5: 586 usec per loop
```

After
```console
codeobj_imports python2.7  1000 loops, best of 3: 1.98 msec per loop
codeobj_imports python3.9  1000 loops, best of 5: 302 usec per loop
codeobj_imports python3.10  1000 loops, best of 5: 297 usec per loop
codeobj_imports python3.11  1000 loops, best of 5: 243 usec per loop
codeobj_imports python3.12  1000 loops, best of 5: 278 usec per loop
codeobj_imports python3.13  1000 loops, best of 5: 259 usec per loop
```
```console
$ uname -a
Darwin kintha 24.6.0 Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:29 PDT
2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6000 arm64
```
pull/1328/head
Alex Willmer 4 months ago
parent 3093d0bb2d
commit 0e5f47f145

@ -44,7 +44,7 @@ except ImportError:
# Python < 3.4, PEP 302 Import Hooks # Python < 3.4, PEP 302 Import Hooks
import imp import imp
import mitogen.master import mitogen.imports
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -146,7 +146,7 @@ def scan_fromlist(code):
>>> list(scan_fromlist(code)) >>> list(scan_fromlist(code))
[(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')] [(0, 'a'), (0, 'b.c'), (0, 'd.e.f'), (0, 'g.h'), (0, 'g.i')]
""" """
for level, modname_s, fromlist in mitogen.master.scan_code_imports(code): for level, modname_s, fromlist in mitogen.imports.codeobj_imports(code):
for name in fromlist: for name in fromlist:
yield level, str('%s.%s' % (modname_s, name)) yield level, str('%s.%s' % (modname_s, name))
if not fromlist: if not fromlist:
@ -172,7 +172,7 @@ def walk_imports(code, prefix=None):
prefix = '' prefix = ''
pattern = re.compile(r'(^|\.)(\w+)') pattern = re.compile(r'(^|\.)(\w+)')
start = len(prefix) start = len(prefix)
for _, name, fromlist in mitogen.master.scan_code_imports(code): for _, name, fromlist in mitogen.imports.codeobj_imports(code):
if not name.startswith(prefix): if not name.startswith(prefix):
continue continue
for match in pattern.finditer(name, start): for match in pattern.finditer(name, start):

@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file
In progress (unreleased) In progress (unreleased)
------------------------ ------------------------
* :gh:issue:`1325` :mod:`mitogen`: Refactor
``mitogen.master.scan_code_imports()`` as
:func:`mitogen.import.codeobj_imports` and speed-up by 1.5 - 2.5 x
* :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow * :gh:issue:`1329` CI: Refactor and de-duplicate Github Actions workflow
* :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users * :gh:issue:`1315` CI: macOS: Increase failed logins limit of test users
* :gh:issue:`1325` tests: Improve ``master_test.ScanCodeImportsTest`` coverage * :gh:issue:`1325` tests: Improve ``master_test.ScanCodeImportsTest`` coverage

@ -1300,6 +1300,7 @@ class Importer(object):
'kubectl', 'kubectl',
'fakessh', 'fakessh',
'fork', 'fork',
'imports',
'jail', 'jail',
'lxc', 'lxc',
'lxd', 'lxd',

@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import sys
if sys.version_info >= (3, 6):
from mitogen.imports._py36 import _code_imports
elif sys.version_info >= (2, 5):
from mitogen.imports._py2 import _code_imports_py25 as _code_imports
else:
from mitogen.imports._py2 import _code_imports_py24 as _code_imports
def codeobj_imports(co):
"""
Yield (level, modname, names) tuples by scanning the code object `co`.
Top level `import mod` & `from mod import foo` statements are matched.
Those inside a `class ...` or `def ...` block are currently skipped.
>>> co = compile('import a, b; from c import d, e as f', '<str>', 'exec')
>>> list(codeobj_imports(co)) # doctest: +ELLIPSIS
[(..., 'a', ()), (..., 'b', ()), (..., 'c', ('d', 'e'))]
:return:
Generator producing `(level, modname, names)` tuples, where:
* `level`:
-1 implicit relative (Python 2.x default)
0 absolute (Python 3.x, `from __future__ import absolute_import`)
>0 explicit relative (`from . import a`, `from ..b, import c`)
* `modname`: Name of module to import, or to import `names` from.
* `names`: tuple of names in `from mod import ..`.
"""
return _code_imports(co.co_code, co.co_consts, co.co_names)

@ -0,0 +1,54 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import array
import itertools
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _opargs(code, _have_arg=opcode.HAVE_ARGUMENT):
it = iter(array.array('B', code))
nexti = it.next
for i in it:
if i >= _have_arg:
yield (i, nexti() | (nexti() << 8))
else:
yield (i, None)
def _code_imports_py25(code, consts, names):
it1, it2, it3 = itertools.tee(_opargs(code), 3)
try:
next(it2)
next(it3)
next(it3)
except StopIteration:
return
for oparg1, oparg2, (op3, arg3) in itertools.izip(it1, it2, it3):
if op3 != IMPORT_NAME:
continue
op1, arg1 = oparg1
op2, arg2 = oparg2
if op1 != LOAD_CONST or op2 != LOAD_CONST:
continue
yield (consts[arg1], names[arg3], consts[arg2] or ())
def _code_imports_py24(code, consts, names):
it1, it2 = itertools.tee(_opargs(code), 2)
try:
next(it2)
except StopIteration:
return
for oparg1, (op2, arg2) in itertools.izip(it1, it2):
if op2 != IMPORT_NAME:
continue
op1, arg1 = oparg1
if op1 != LOAD_CONST:
continue
yield (-1, names[arg2], consts[arg1] or ())

@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: 2025 Mitogen authors <https://github.com/mitogen-hq>
# SPDX-License-Identifier: MIT
# !mitogen: minify_safe
import opcode
IMPORT_NAME = opcode.opmap['IMPORT_NAME']
LOAD_CONST = opcode.opmap['LOAD_CONST']
def _code_imports(code, consts, names):
start = 4
while True:
op3_idx = code.find(IMPORT_NAME, start, -1)
if op3_idx < 0:
return
if op3_idx % 2:
start = op3_idx + 1
continue
if code[op3_idx-4] != LOAD_CONST or code[op3_idx-2] != LOAD_CONST:
start = op3_idx + 2
continue
start = op3_idx + 6
arg1, arg2, arg3 = code[op3_idx-3], code[op3_idx-1], code[op3_idx+1]
yield (consts[arg1], names[arg3], consts[arg2] or ())

@ -35,10 +35,8 @@ be sent to any context that will be used to establish additional child
contexts. contexts.
""" """
import dis
import errno import errno
import inspect import inspect
import itertools
import logging import logging
import os import os
import pkgutil import pkgutil
@ -83,6 +81,7 @@ except ImportError:
import mitogen import mitogen
import mitogen.core import mitogen.core
import mitogen.imports
import mitogen.minify import mitogen.minify
import mitogen.parent import mitogen.parent
@ -90,14 +89,10 @@ from mitogen.core import any
from mitogen.core import b from mitogen.core import b
from mitogen.core import IOLOG from mitogen.core import IOLOG
from mitogen.core import LOG from mitogen.core import LOG
from mitogen.core import next
from mitogen.core import str_partition from mitogen.core import str_partition
from mitogen.core import str_rpartition from mitogen.core import str_rpartition
from mitogen.core import to_text from mitogen.core import to_text
imap = getattr(itertools, 'imap', map)
izip = getattr(itertools, 'izip', zip)
RLOG = logging.getLogger('mitogen.ctx') RLOG = logging.getLogger('mitogen.ctx')
@ -253,80 +248,6 @@ if mitogen.is_master:
mitogen.parent._get_core_source = _get_core_source mitogen.parent._get_core_source = _get_core_source
LOAD_CONST = dis.opname.index('LOAD_CONST')
IMPORT_NAME = dis.opname.index('IMPORT_NAME')
def _getarg(nextb, c):
if c >= dis.HAVE_ARGUMENT:
return nextb() | (nextb() << 8)
if sys.version_info < (3, 0):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = imap(ord, co.co_code)
nextb = ordit.next
return ((c, _getarg(nextb, c)) for c in ordit)
elif sys.version_info < (3, 6):
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
return ((c, _getarg(nextb, c)) for c in ordit)
else:
def iter_opcodes(co):
# Yield `(op, oparg)` tuples from the code object `co`.
ordit = iter(co.co_code)
nextb = ordit.__next__
# https://github.com/abarnert/cpython/blob/c095a32f/Python/wordcode.md
return ((c, nextb()) for c in ordit)
def scan_code_imports(co):
"""
Given a code object `co`, scan its bytecode yielding any ``IMPORT_NAME``
and associated prior ``LOAD_CONST`` instructions representing an `Import`
statement or `ImportFrom` statement.
:return:
Generator producing `(level, modname, namelist)` tuples, where:
* `level`: -1 for normal import, 0, for absolute import, and >0 for
relative import.
* `modname`: Name of module to import, or from where `namelist` names
are imported.
* `namelist`: for `ImportFrom`, the list of names to be imported from
`modname`.
"""
opit = iter_opcodes(co)
opit, opit2, opit3 = itertools.tee(opit, 3)
try:
next(opit2)
next(opit3)
next(opit3)
except StopIteration:
return
if sys.version_info >= (2, 5):
for oparg1, oparg2, (op3, arg3) in izip(opit, opit2, opit3):
if op3 == IMPORT_NAME:
op2, arg2 = oparg2
op1, arg1 = oparg1
if op1 == op2 == LOAD_CONST:
yield (co.co_consts[arg1],
co.co_names[arg3],
co.co_consts[arg2] or ())
else:
# Python 2.4 did not yet have 'level', so stack format differs.
for oparg1, (op2, arg2) in izip(opit, opit2):
if op2 == IMPORT_NAME:
op1, arg1 = oparg1
if op1 == LOAD_CONST:
yield (-1, co.co_names[arg2], co.co_consts[arg1] or ())
class ThreadWatcher(object): class ThreadWatcher(object):
""" """
Manage threads that wait for another thread to shut down, before invoking Manage threads that wait for another thread to shut down, before invoking
@ -1029,7 +950,7 @@ class ModuleFinder(object):
maybe_names = list(self.generate_parent_names(fullname)) maybe_names = list(self.generate_parent_names(fullname))
co = compile(src, modpath, 'exec') co = compile(src, modpath, 'exec')
for level, modname, namelist in scan_code_imports(co): for level, modname, namelist in mitogen.imports.codeobj_imports(co):
if level == -1: if level == -1:
modnames = [modname, '%s.%s' % (fullname, modname)] modnames = [modname, '%s.%s' % (fullname, modname)]
else: else:

@ -4,10 +4,10 @@ set -o errexit
set -o pipefail set -o pipefail
set -o nounset set -o nounset
BENCH_FUNC="scan_code_imports" BENCH_FUNC="codeobj_imports"
BENCH_EXPR="deque($BENCH_FUNC(co), maxlen=0)" BENCH_EXPR="deque($BENCH_FUNC(co), maxlen=0)"
BIG_MODULE_PATH="$(dirname -- "$0")/data/big_module.py" BIG_MODULE_PATH="$(dirname -- "$0")/data/big_module.py"
IMPORTS="from collections import deque; from mitogen.master import $BENCH_FUNC" IMPORTS="from collections import deque; from mitogen.imports import $BENCH_FUNC"
COMPILE="co=compile(open('$BIG_MODULE_PATH').read(), '$BIG_MODULE_PATH', 'exec')" COMPILE="co=compile(open('$BIG_MODULE_PATH').read(), '$BIG_MODULE_PATH', 'exec')"
PYTHONS=( PYTHONS=(
python2.7 python3.9 python3.10 python3.11 python3.12 python3.13 python2.7 python3.9 python3.10 python3.11 python3.12 python3.13

@ -2,8 +2,9 @@ import os
import sys import sys
import unittest import unittest
import mitogen.imports
import testlib import testlib
import mitogen.master
def testmod_compile(path): def testmod_compile(path):
@ -15,7 +16,7 @@ def testmod_compile(path):
class ScanCodeImportsTest(testlib.TestCase): class ScanCodeImportsTest(testlib.TestCase):
func = staticmethod(mitogen.master.scan_code_imports) func = staticmethod(mitogen.imports.codeobj_imports)
@unittest.skipIf(sys.version_info < (3, 0), "Py is 2.x, would be relative") @unittest.skipIf(sys.version_info < (3, 0), "Py is 2.x, would be relative")
def test_default_absolute(self): def test_default_absolute(self):
Loading…
Cancel
Save