Merge commit 'refs/pull/178/head' of github.com:dw/mitogen

pull/193/head
David Wilson 7 years ago
commit ec720eab1a

@ -0,0 +1,117 @@
"""Selected backports from Python stdlib collections module
"""
__all__ = [
'namedtuple',
]
from operator import itemgetter as _itemgetter
from keyword import iskeyword as _iskeyword
import sys as _sys
try:
all([])
except NameError:
def all(iterable):
for element in iterable:
if not element:
return False
return True
def namedtuple(typename, field_names, verbose=False):
"""Returns a new subclass of tuple with named fields.
>>> Point = namedtuple('Point', 'x y')
>>> Point.__doc__ # docstring for the new class
'Point(x, y)'
>>> p = Point(11, y=22) # instantiate with positional args or keywords
>>> p[0] + p[1] # indexable like a plain tuple
33
>>> x, y = p # unpack like a regular tuple
>>> x, y
(11, 22)
>>> p.x + p.y # fields also accessable by name
33
>>> d = p._asdict() # convert to a dictionary
>>> d['x']
11
>>> Point(**d) # convert from a dictionary
Point(x=11, y=22)
>>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
Point(x=100, y=22)
"""
# Parse and validate the field names. Validation serves two purposes,
# generating informative error messages and preventing template injection attacks.
if isinstance(field_names, basestring):
field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas
field_names = tuple(map(str, field_names))
for name in (typename,) + field_names:
if not all(c.isalnum() or c=='_' for c in name):
raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)
if _iskeyword(name):
raise ValueError('Type names and field names cannot be a keyword: %r' % name)
if name[0].isdigit():
raise ValueError('Type names and field names cannot start with a number: %r' % name)
seen_names = set()
for name in field_names:
if name.startswith('_'):
raise ValueError('Field names cannot start with an underscore: %r' % name)
if name in seen_names:
raise ValueError('Encountered duplicate field name: %r' % name)
seen_names.add(name)
# Create and fill-in the class template
numfields = len(field_names)
argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes
reprtxt = ', '.join('%s=%%r' % name for name in field_names)
dicttxt = ', '.join('%r: t[%d]' % (name, pos) for pos, name in enumerate(field_names))
template = '''class %(typename)s(tuple):
'%(typename)s(%(argtxt)s)' \n
__slots__ = () \n
_fields = %(field_names)r \n
def __new__(_cls, %(argtxt)s):
return _tuple.__new__(_cls, (%(argtxt)s)) \n
@classmethod
def _make(cls, iterable, new=tuple.__new__, len=len):
'Make a new %(typename)s object from a sequence or iterable'
result = new(cls, iterable)
if len(result) != %(numfields)d:
raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))
return result \n
def __repr__(self):
return '%(typename)s(%(reprtxt)s)' %% self \n
def _asdict(t):
'Return a new dict which maps field names to their values'
return {%(dicttxt)s} \n
def _replace(_self, **kwds):
'Return a new %(typename)s object replacing specified fields with new values'
result = _self._make(map(kwds.pop, %(field_names)r, _self))
if kwds:
raise ValueError('Got unexpected field names: %%r' %% kwds.keys())
return result \n
def __getnewargs__(self):
return tuple(self) \n\n''' % locals()
for i, name in enumerate(field_names):
template += ' %s = _property(_itemgetter(%d))\n' % (name, i)
if verbose:
print template
# Execute the template string in a temporary namespace and
# support tracing utilities by setting a value for frame.f_globals['__name__']
namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
_property=property, _tuple=tuple)
try:
exec template in namespace
except SyntaxError, e:
raise SyntaxError(e.message + ':\n' + template)
result = namespace[typename]
# For pickling to work, the __module__ variable needs to be set to the frame
# where the named tuple is created. Bypass this step in enviroments where
# sys._getframe is not defined (Jython for example).
if hasattr(_sys, '_getframe'):
result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
return result

@ -0,0 +1,303 @@
# encoding: utf-8
"""Selected backports from Python stdlib functools module
"""
# Written by Nick Coghlan <ncoghlan at gmail.com>,
# Raymond Hettinger <python at rcn.com>,
# and Łukasz Langa <lukasz at langa.pl>.
# Copyright (C) 2006-2013 Python Software Foundation.
__all__ = [
'update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES',
'lru_cache',
]
try:
from collections import namedtuple
except ImportError:
from mitogen.compat.collections import namedtuple
from threading import RLock
################################################################################
### update_wrapper() and wraps() decorator
################################################################################
# update_wrapper() and wraps() are tools to help write
# wrapper functions that can handle naive introspection
WRAPPER_ASSIGNMENTS = ('__module__', '__name__', '__qualname__', '__doc__',
'__annotations__')
WRAPPER_UPDATES = ('__dict__',)
def update_wrapper(wrapper,
wrapped,
assigned = WRAPPER_ASSIGNMENTS,
updated = WRAPPER_UPDATES):
"""Update a wrapper function to look like the wrapped function
wrapper is the function to be updated
wrapped is the original function
assigned is a tuple naming the attributes assigned directly
from the wrapped function to the wrapper function (defaults to
functools.WRAPPER_ASSIGNMENTS)
updated is a tuple naming the attributes of the wrapper that
are updated with the corresponding attribute from the wrapped
function (defaults to functools.WRAPPER_UPDATES)
"""
for attr in assigned:
try:
value = getattr(wrapped, attr)
except AttributeError:
pass
else:
setattr(wrapper, attr, value)
for attr in updated:
getattr(wrapper, attr).update(getattr(wrapped, attr, {}))
# Issue #17482: set __wrapped__ last so we don't inadvertently copy it
# from the wrapped function when updating __dict__
wrapper.__wrapped__ = wrapped
# Return the wrapper so this can be used as a decorator via partial()
return wrapper
def wraps(wrapped,
assigned = WRAPPER_ASSIGNMENTS,
updated = WRAPPER_UPDATES):
"""Decorator factory to apply update_wrapper() to a wrapper function
Returns a decorator that invokes update_wrapper() with the decorated
function as the wrapper argument and the arguments to wraps() as the
remaining arguments. Default arguments are as for update_wrapper().
This is a convenience function to simplify applying partial() to
update_wrapper().
"""
return partial(update_wrapper, wrapped=wrapped,
assigned=assigned, updated=updated)
################################################################################
### partial() argument application
################################################################################
# Purely functional, no descriptor behaviour
def partial(func, *args, **keywords):
"""New function with partial application of the given arguments
and keywords.
"""
if hasattr(func, 'func'):
args = func.args + args
tmpkw = func.keywords.copy()
tmpkw.update(keywords)
keywords = tmpkw
del tmpkw
func = func.func
def newfunc(*fargs, **fkeywords):
newkeywords = keywords.copy()
newkeywords.update(fkeywords)
return func(*(args + fargs), **newkeywords)
newfunc.func = func
newfunc.args = args
newfunc.keywords = keywords
return newfunc
################################################################################
### LRU Cache function decorator
################################################################################
_CacheInfo = namedtuple("CacheInfo", ["hits", "misses", "maxsize", "currsize"])
class _HashedSeq(list):
""" This class guarantees that hash() will be called no more than once
per element. This is important because the lru_cache() will hash
the key multiple times on a cache miss.
"""
__slots__ = 'hashvalue'
def __init__(self, tup, hash=hash):
self[:] = tup
self.hashvalue = hash(tup)
def __hash__(self):
return self.hashvalue
def _make_key(args, kwds, typed,
kwd_mark = (object(),),
fasttypes = set([int, str, frozenset, type(None)]),
sorted=sorted, tuple=tuple, type=type, len=len):
"""Make a cache key from optionally typed positional and keyword arguments
The key is constructed in a way that is flat as possible rather than
as a nested structure that would take more memory.
If there is only a single argument and its data type is known to cache
its hash value, then that argument is returned without a wrapper. This
saves space and improves lookup speed.
"""
key = args
if kwds:
sorted_items = sorted(kwds.items())
key += kwd_mark
for item in sorted_items:
key += item
if typed:
key += tuple(type(v) for v in args)
if kwds:
key += tuple(type(v) for k, v in sorted_items)
elif len(key) == 1 and type(key[0]) in fasttypes:
return key[0]
return _HashedSeq(key)
def lru_cache(maxsize=128, typed=False):
"""Least-recently-used cache decorator.
If *maxsize* is set to None, the LRU features are disabled and the cache
can grow without bound.
If *typed* is True, arguments of different types will be cached separately.
For example, f(3.0) and f(3) will be treated as distinct calls with
distinct results.
Arguments to the cached function must be hashable.
View the cache statistics named tuple (hits, misses, maxsize, currsize)
with f.cache_info(). Clear the cache and statistics with f.cache_clear().
Access the underlying function with f.__wrapped__.
See: http://en.wikipedia.org/wiki/Cache_algorithms#Least_Recently_Used
"""
# Users should only access the lru_cache through its public API:
# cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version).
# Early detection of an erroneous call to @lru_cache without any arguments
# resulting in the inner function being passed to maxsize instead of an
# integer or None.
if maxsize is not None and not isinstance(maxsize, int):
raise TypeError('Expected maxsize to be an integer or None')
def decorating_function(user_function):
wrapper = _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo)
return update_wrapper(wrapper, user_function)
return decorating_function
def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
# Constants shared by all lru cache instances:
sentinel = object() # unique object used to signal cache misses
make_key = _make_key # build a key from the function arguments
PREV, NEXT, KEY, RESULT = 0, 1, 2, 3 # names for the link fields
cache = {}
cache_get = cache.get # bound method to lookup a key or return None
lock = RLock() # because linkedlist updates aren't threadsafe
root = [] # root of the circular doubly linked list
root[:] = [root, root, None, None] # initialize by pointing to self
hits_misses_full_root = [0, 0, False, root]
HITS,MISSES,FULL,ROOT = 0, 1, 2, 3
if maxsize == 0:
def wrapper(*args, **kwds):
# No caching -- just a statistics update after a successful call
result = user_function(*args, **kwds)
hits_misses_full_root[MISSES] += 1
return result
elif maxsize is None:
def wrapper(*args, **kwds):
# Simple caching without ordering or size limit
key = make_key(args, kwds, typed)
result = cache_get(key, sentinel)
if result is not sentinel:
hits_misses_full_root[HITS] += 1
return result
result = user_function(*args, **kwds)
cache[key] = result
hits_misses_full_root[MISSES] += 1
return result
else:
def wrapper(*args, **kwds):
# Size limited caching that tracks accesses by recency
key = make_key(args, kwds, typed)
lock.acquire()
try:
link = cache_get(key)
if link is not None:
# Move the link to the front of the circular queue
root = hits_misses_full_root[ROOT]
link_prev, link_next, _key, result = link
link_prev[NEXT] = link_next
link_next[PREV] = link_prev
last = root[PREV]
last[NEXT] = root[PREV] = link
link[PREV] = last
link[NEXT] = root
hits_misses_full_root[HITS] += 1
return result
finally:
lock.release()
result = user_function(*args, **kwds)
lock.acquire()
try:
if key in cache:
# Getting here means that this same key was added to the
# cache while the lock was released. Since the link
# update is already done, we need only return the
# computed result and update the count of misses.
pass
elif hits_misses_full_root[FULL]:
# Use the old root to store the new key and result.
oldroot = root = hits_misses_full_root[ROOT]
oldroot[KEY] = key
oldroot[RESULT] = result
# Empty the oldest link and make it the new root.
# Keep a reference to the old key and old result to
# prevent their ref counts from going to zero during the
# update. That will prevent potentially arbitrary object
# clean-up code (i.e. __del__) from running while we're
# still adjusting the links.
root = hits_misses_full_root[ROOT] = oldroot[NEXT]
oldkey = root[KEY]
oldresult = root[RESULT]
root[KEY] = root[RESULT] = None
# Now update the cache dictionary.
del cache[oldkey]
# Save the potentially reentrant cache[key] assignment
# for last, after the root and links have been put in
# a consistent state.
cache[key] = oldroot
else:
# Put result in a new link at the front of the queue.
root = hits_misses_full_root[ROOT]
last = root[PREV]
link = [last, root, key, result]
last[NEXT] = root[PREV] = cache[key] = link
# Use the __len__() method instead of the len() function
# which could potentially be wrapped in an lru_cache itself.
hits_misses_full_root[FULL] = (cache.__len__() >= maxsize)
hits_misses_full_root[MISSES]
finally:
lock.release()
return result
def cache_info():
"""Report cache statistics"""
lock.acquire()
try:
return _CacheInfo(hits, misses, maxsize, cache.__len__())
finally:
lock.release()
def cache_clear():
"""Clear the cache and cache statistics"""
lock.acquire()
try:
cache.clear()
root = hits_misses_full_root[ROOT]
root[:] = [root, root, None, None]
hits_misses_full[HITS] = 0
hits_misses_full[MISSES] = 0
hits_misses_full[FULL] = False
finally:
lock.release()
wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear
return wrapper

@ -26,12 +26,12 @@
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import cStringIO
import fcntl
import getpass
import inspect
import logging
import os
import re
import select
import signal
import socket
@ -39,18 +39,21 @@ import sys
import termios
import textwrap
import threading
import tokenize
import time
import types
import zlib
try:
from functools import lru_cache
except ImportError:
from mitogen.compat.functools import lru_cache
import mitogen.core
from mitogen.core import LOG
from mitogen.core import IOLOG
DOCSTRING_RE = re.compile(r'""".+?"""', re.M | re.S)
COMMENT_RE = re.compile(r'^[ ]*#[^\n]*$', re.M)
try:
SC_OPEN_MAX = os.sysconf('SC_OPEN_MAX')
except:
@ -86,11 +89,93 @@ def is_immediate_child(msg, stream):
return msg.src_id == stream.remote_id
@lru_cache()
def minimize_source(source):
subber = lambda match: '""' + ('\n' * match.group(0).count('\n'))
source = DOCSTRING_RE.sub(subber, source)
source = COMMENT_RE.sub('', source)
return source.replace(' ', '\t')
"""Remove most comments and docstrings from Python source code.
"""
tokens = tokenize.generate_tokens(cStringIO.StringIO(source).readline)
tokens = strip_comments(tokens)
tokens = strip_docstrings(tokens)
tokens = reindent(tokens)
return tokenize.untokenize(tokens)
def strip_comments(tokens):
"""Drop comment tokens from a `tokenize` stream.
Comments on lines 1-2 are kept, to preserve hashbang and encoding.
Trailing whitespace is remove from all lines.
"""
prev_typ = None
prev_end_col = 0
for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
if typ in (tokenize.NL, tokenize.NEWLINE):
if prev_typ in (tokenize.NL, tokenize.NEWLINE):
start_col = 0
else:
start_col = prev_end_col
end_col = start_col + 1
elif typ == tokenize.COMMENT and start_row > 2:
continue
prev_typ = typ
prev_end_col = end_col
yield typ, tok, (start_row, start_col), (end_row, end_col), line
def strip_docstrings(tokens):
"""Replace docstring tokens with NL tokens in a `tokenize` stream.
Any STRING token not part of an expression is deemed a docstring.
Indented docstrings are not yet recognised.
"""
stack = []
state = 'wait_string'
for t in tokens:
typ = t[0]
if state == 'wait_string':
if typ in (tokenize.NL, tokenize.COMMENT):
yield t
elif typ in (tokenize.DEDENT, tokenize.INDENT, tokenize.STRING):
stack.append(t)
elif typ == tokenize.NEWLINE:
stack.append(t)
start_line, end_line = stack[0][2][0], stack[-1][3][0]+1
for i in range(start_line, end_line):
yield tokenize.NL, '\n', (i, 0), (i,1), '\n'
for t in stack:
if t[0] in (tokenize.DEDENT, tokenize.INDENT):
yield t[0], t[1], (i+1, t[2][1]), (i+1, t[3][1]), t[4]
del stack[:]
else:
stack.append(t)
for t in stack: yield t
del stack[:]
state = 'wait_newline'
elif state == 'wait_newline':
if typ == tokenize.NEWLINE:
state = 'wait_string'
yield t
def reindent(tokens, indent=' '):
"""Replace existing indentation in a token steam, with `indent`.
"""
old_levels = []
old_level = 0
new_level = 0
for typ, tok, (start_row, start_col), (end_row, end_col), line in tokens:
if typ == tokenize.INDENT:
old_levels.append(old_level)
old_level = len(tok)
new_level += 1
tok = indent * new_level
elif typ == tokenize.DEDENT:
old_level = old_levels.pop()
new_level -= 1
start_col = max(0, start_col - old_level + new_level)
if start_row == end_row:
end_col = start_col + len(tok)
yield typ, tok, (start_row, start_col), (end_row, end_col), line
def flags(names):

@ -22,6 +22,16 @@ print 'Preamble size: %s (%.2fKiB)' % (
len(stream.get_preamble()) / 1024.0,
)
print(
' '
' '
' Original '
' '
' Minimized '
' '
' Compressed '
)
for mod in (
mitogen.master,
mitogen.parent,
@ -29,5 +39,28 @@ for mod in (
mitogen.sudo,
mitogen.fakessh,
):
sz = len(zlib.compress(mitogen.parent.minimize_source(inspect.getsource(mod))))
print '%s size: %s (%.2fKiB)' % (mod.__name__, sz, sz / 1024.0)
original = inspect.getsource(mod)
original_size = len(original)
minimized = mitogen.parent.minimize_source(original)
minimized_size = len(minimized)
compressed = zlib.compress(minimized, 9)
compressed_size = len(compressed)
print(
'%-15s'
' '
'%5i %4.1fKiB'
' '
'%5i %4.1fKiB %.1f%%'
' '
'%5i %4.1fKiB %.1f%%'
% (
mod.__name__,
original_size,
original_size / 1024.0,
minimized_size,
minimized_size / 1024.0,
100 * minimized_size / float(original_size),
compressed_size,
compressed_size / 1024.0,
100 * compressed_size / float(original_size),
))

@ -0,0 +1,8 @@
class C:
"""docstring
"""
def method(self):
"""docstring
"""
pass

@ -0,0 +1,8 @@
class C:
def method(self):
pass

@ -0,0 +1,4 @@
def f():
"""docstring
"""
pass

@ -0,0 +1,3 @@
#/usr/bin/python -c
# coding: utf-8
# comment

@ -0,0 +1,3 @@
#/usr/bin/python -c
# coding: utf-8

@ -0,0 +1,5 @@
"""docstring
"""
pass

@ -0,0 +1,49 @@
#!python
# coding : utf-8
# comment
"docstring"
import sys
"cc", {}
def f1(a, b= None, c=[]): # comment
r"""docstring
"""
x = ""
print(
a,
b,
"""arg1""", # comment
"""arg2""", # comment
c )
# comment
'foo' > sys.stderr
"baz"
#comment
sys.stdout;lambda: "justastring"
1j
def f2():
# comment
'''docstring'''
pass
class c:
u'''docstring'''
f='justastring'
b'''docstring
'''
@f1
class c2(object):
'docstring'
def __init__(self):
"docstring"
def inner(): pass
d = {'a':0}
for x in '': # FIXME Removing this leaves nothing, hence a SyntaxError
"justastring"

@ -0,0 +1,49 @@
#!python
# coding : utf-8
import sys
"cc", {}
def f1(a, b= None, c=[]):
x = ""
print(
a,
b,
"""arg1""",
"""arg2""",
c )
'foo' > sys.stderr
sys.stdout;lambda: "justastring"
1j
def f2():
pass
class c:
f='justastring'
@f1
class c2(object):
def __init__(self):
def inner(): pass
d = {'a':0}
for x in '':

@ -0,0 +1,54 @@
import unittest2
from mitogen.parent import minimize_source
import testlib
def read_sample(fname):
sample_path = testlib.data_path('minimize_samples/' + fname)
sample_file = open(sample_path)
sample = sample_file.read()
sample_file.close()
return sample
class MinimizeSource(unittest2.TestCase):
def test_class(self):
original = read_sample('class.py')
expected = read_sample('class_min.py')
self.assertEqual(expected, minimize_source(original))
def test_comment(self):
original = read_sample('comment.py')
expected = read_sample('comment_min.py')
self.assertEqual(expected, minimize_source(original))
def test_def(self):
original = read_sample('def.py')
expected = read_sample('def_min.py')
self.assertEqual(expected, minimize_source(original))
def test_hashbang(self):
original = read_sample('hashbang.py')
expected = read_sample('hashbang_min.py')
self.assertEqual(expected, minimize_source(original))
def test_mod(self):
original = read_sample('mod.py')
expected = read_sample('mod_min.py')
self.assertEqual(expected, minimize_source(original))
def test_pass(self):
original = read_sample('pass.py')
expected = read_sample('pass_min.py')
self.assertEqual(expected, minimize_source(original))
def test_obstacle_course(self):
original = read_sample('obstacle_course.py')
expected = read_sample('obstacle_course_min.py')
self.assertEqual(expected, minimize_source(original))
if __name__ == '__main__':
unittest2.main()

@ -198,6 +198,9 @@ class FindRelatedTest(testlib.TestCase):
related = self.call('mitogen.fakessh')
self.assertEquals(related, [
'mitogen',
'mitogen.compat',
'mitogen.compat.collections',
'mitogen.compat.functools',
'mitogen.core',
'mitogen.master',
'mitogen.parent',

Loading…
Cancel
Save