diff --git a/mitogen/core.py b/mitogen/core.py index ccaf9ab0..66842852 100644 --- a/mitogen/core.py +++ b/mitogen/core.py @@ -555,6 +555,7 @@ class Importer(object): 'jail', 'lxc', 'master', + 'minify', 'parent', 'select', 'service', diff --git a/mitogen/master.py b/mitogen/master.py index 9a5b7e83..9665377f 100644 --- a/mitogen/master.py +++ b/mitogen/master.py @@ -52,7 +52,9 @@ if not hasattr(pkgutil, 'find_loader'): # been kept intentionally 2.3 compatible so we can reuse it. from mitogen.compat import pkgutil +import mitogen import mitogen.core +import mitogen.minify import mitogen.parent from mitogen.core import LOG @@ -79,6 +81,19 @@ def get_child_modules(path): return [name for _, name, _ in it] +def get_core_source(): + """ + Master version of parent.get_core_source(). + """ + source = inspect.getsource(mitogen.core) + return mitogen.minify.minimize_source(source) + + +if mitogen.is_master: + # TODO: find a less surprising way of installing this. + mitogen.parent.get_core_source = get_core_source + + LOAD_CONST = dis.opname.index('LOAD_CONST') IMPORT_NAME = dis.opname.index('IMPORT_NAME') diff --git a/mitogen/minify.py b/mitogen/minify.py new file mode 100644 index 00000000..1d6f8d11 --- /dev/null +++ b/mitogen/minify.py @@ -0,0 +1,134 @@ +# Copyright 2017, Alex Willmer +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its contributors +# may be used to endorse or promote products derived from this software without +# specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE +# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. + +import sys + + +try: + from cStringIO import StringIO as BytesIO +except ImportError: + from io import BytesIO + +if sys.version_info < (2, 7, 11): + from mitogen.compat import tokenize +else: + import tokenize + +try: + from functools import lru_cache +except ImportError: + from mitogen.compat.functools import lru_cache + + +@lru_cache() +def minimize_source(source): + """Remove most comments and docstrings from Python source code. + """ + tokens = tokenize.generate_tokens(BytesIO(source).readline) + tokens = strip_comments(tokens) + tokens = strip_docstrings(tokens) + tokens = reindent(tokens) + return tokenize.untokenize(tokens) + + +def strip_comments(tokens): + """Drop comment tokens from a `tokenize` stream. + + Comments on lines 1-2 are kept, to preserve hashbang and encoding. + Trailing whitespace is remove from all lines. + """ + prev_typ = None + prev_end_col = 0 + for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: + if typ in (tokenize.NL, tokenize.NEWLINE): + if prev_typ in (tokenize.NL, tokenize.NEWLINE): + start_col = 0 + else: + start_col = prev_end_col + end_col = start_col + 1 + elif typ == tokenize.COMMENT and start_row > 2: + continue + prev_typ = typ + prev_end_col = end_col + yield typ, tok, (start_row, start_col), (end_row, end_col), line + + +def strip_docstrings(tokens): + """Replace docstring tokens with NL tokens in a `tokenize` stream. + + Any STRING token not part of an expression is deemed a docstring. + Indented docstrings are not yet recognised. + """ + stack = [] + state = 'wait_string' + for t in tokens: + typ = t[0] + if state == 'wait_string': + if typ in (tokenize.NL, tokenize.COMMENT): + yield t + elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING): + stack.append(t) + elif typ == tokenize.NEWLINE: + stack.append(t) + start_line, end_line = stack[0][2][0], stack[-1][3][0]+1 + for i in range(start_line, end_line): + yield tokenize.NL, '\n', (i, 0), (i,1), '\n' + for t in stack: + if t[0] in (tokenize.DEDENT, tokenize.INDENT): + yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4] + del stack[:] + else: + stack.append(t) + for t in stack: yield t + del stack[:] + state = 'wait_newline' + elif state == 'wait_newline': + if typ == tokenize.NEWLINE: + state = 'wait_string' + yield t + + +def reindent(tokens, indent=' '): + """Replace existing indentation in a token steam, with `indent`. + """ + old_levels = [] + old_level = 0 + new_level = 0 + for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: + if typ == tokenize.INDENT: + old_levels.append(old_level) + old_level = len(tok) + new_level += 1 + tok = indent * new_level + elif typ == tokenize.DEDENT: + old_level = old_levels.pop() + new_level -= 1 + start_col = max(0, start_col - old_level + new_level) + if start_row == end_row: + end_col = start_col + len(tok) + yield typ, tok, (start_row, start_col), (end_row, end_col), line diff --git a/mitogen/parent.py b/mitogen/parent.py index 9436591e..bec03f85 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -52,21 +52,6 @@ import zlib # Absolute imports for <2.5. select = __import__('select') -try: - from cStringIO import StringIO as BytesIO -except ImportError: - from io import BytesIO - -if sys.version_info < (2, 7, 11): - from mitogen.compat import tokenize -else: - import tokenize - -try: - from functools import lru_cache -except ImportError: - from mitogen.compat.functools import lru_cache - import mitogen.core from mitogen.core import LOG from mitogen.core import IOLOG @@ -82,101 +67,21 @@ def get_log_level(): return (LOG.level or logging.getLogger().level or logging.INFO) -def is_immediate_child(msg, stream): - """ - Handler policy that requires messages to arrive only from immediately - connected children. - """ - return msg.src_id == stream.remote_id - - -@lru_cache() -def minimize_source(source): - """Remove most comments and docstrings from Python source code. +def get_core_source(): """ - tokens = tokenize.generate_tokens(BytesIO(source).readline) - tokens = strip_comments(tokens) - tokens = strip_docstrings(tokens) - tokens = reindent(tokens) - return tokenize.untokenize(tokens) - - -def strip_comments(tokens): - """Drop comment tokens from a `tokenize` stream. - - Comments on lines 1-2 are kept, to preserve hashbang and encoding. - Trailing whitespace is remove from all lines. + In non-masters, simply fetch the cached mitogen.core source code via the + import mechanism. In masters, this function is replaced with a version that + performs minification directly. """ - prev_typ = None - prev_end_col = 0 - for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: - if typ in (tokenize.NL, tokenize.NEWLINE): - if prev_typ in (tokenize.NL, tokenize.NEWLINE): - start_col = 0 - else: - start_col = prev_end_col - end_col = start_col + 1 - elif typ == tokenize.COMMENT and start_row > 2: - continue - prev_typ = typ - prev_end_col = end_col - yield typ, tok, (start_row, start_col), (end_row, end_col), line - + return inspect.getsource(mitogen.core) -def strip_docstrings(tokens): - """Replace docstring tokens with NL tokens in a `tokenize` stream. - Any STRING token not part of an expression is deemed a docstring. - Indented docstrings are not yet recognised. +def is_immediate_child(msg, stream): """ - stack = [] - state = 'wait_string' - for t in tokens: - typ = t[0] - if state == 'wait_string': - if typ in (tokenize.NL, tokenize.COMMENT): - yield t - elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING): - stack.append(t) - elif typ == tokenize.NEWLINE: - stack.append(t) - start_line, end_line = stack[0][2][0], stack[-1][3][0]+1 - for i in range(start_line, end_line): - yield tokenize.NL, '\n', (i, 0), (i,1), '\n' - for t in stack: - if t[0] in (tokenize.DEDENT, tokenize.INDENT): - yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4] - del stack[:] - else: - stack.append(t) - for t in stack: yield t - del stack[:] - state = 'wait_newline' - elif state == 'wait_newline': - if typ == tokenize.NEWLINE: - state = 'wait_string' - yield t - - -def reindent(tokens, indent=' '): - """Replace existing indentation in a token steam, with `indent`. + Handler policy that requires messages to arrive only from immediately + connected children. """ - old_levels = [] - old_level = 0 - new_level = 0 - for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens: - if typ == tokenize.INDENT: - old_levels.append(old_level) - old_level = len(tok) - new_level += 1 - tok = indent * new_level - elif typ == tokenize.DEDENT: - old_level = old_levels.pop() - new_level -= 1 - start_col = max(0, start_col - old_level + new_level) - if start_row == end_row: - end_col = start_col + len(tok) - yield typ, tok, (start_row, start_col), (end_row, end_col), line + return msg.src_id == stream.remote_id def flags(names): @@ -498,8 +403,7 @@ def stream_by_method_name(name): @mitogen.core.takes_econtext def _proxy_connect(name, method_name, kwargs, econtext): - - mitogen.parent.upgrade_router(econtext) + upgrade_router(econtext) try: context = econtext.router._connect( klass=stream_by_method_name(method_name), @@ -921,11 +825,11 @@ class Stream(mitogen.core.Stream): } def get_preamble(self): - source = inspect.getsource(mitogen.core) + source = get_core_source() source += '\nExternalContext(%r).main()\n' % ( self.get_econtext_config(), ) - return zlib.compress(minimize_source(source), 9) + return zlib.compress(source, 9) create_child = staticmethod(create_child) create_child_args = {} diff --git a/mitogen/service.py b/mitogen/service.py index 6719f833..4d824f3d 100644 --- a/mitogen/service.py +++ b/mitogen/service.py @@ -149,6 +149,7 @@ class Error(Exception): """ Raised when an error occurs configuring a service or pool. """ + pass # cope with minify_source() bug. class Policy(object): diff --git a/preamble_size.py b/preamble_size.py index 6e3c7924..df1b3330 100644 --- a/preamble_size.py +++ b/preamble_size.py @@ -8,6 +8,7 @@ import zlib import mitogen.fakessh import mitogen.master +import mitogen.minify import mitogen.parent import mitogen.service import mitogen.ssh @@ -34,16 +35,16 @@ print( ) for mod in ( - mitogen.master, mitogen.parent, - mitogen.service, mitogen.ssh, mitogen.sudo, + mitogen.service, mitogen.fakessh, + mitogen.master, ): original = inspect.getsource(mod) original_size = len(original) - minimized = mitogen.parent.minimize_source(original) + minimized = mitogen.minify.minimize_source(original) minimized_size = len(minimized) compressed = zlib.compress(minimized, 9) compressed_size = len(compressed)