diff --git a/docs/changelog.rst b/docs/changelog.rst index 296c30fa..adc5c33a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file In progress (unreleased) ------------------------ +* :gh:issue:`1243` :mod:`mitogen`: Pass first stage, context name, & preamble + size as seperate **argv** arguments + v0.3.31 (2025-11-05) -------------------- diff --git a/docs/howitworks.rst b/docs/howitworks.rst index d7606b11..ae1910f8 100644 --- a/docs/howitworks.rst +++ b/docs/howitworks.rst @@ -27,14 +27,13 @@ Python Command Line ################### The Python command line sent to the host is a :mod:`zlib`-compressed [#f2]_ and -base64-encoded copy of the :py:meth:`mitogen.master.Stream._first_stage` -function, which has been carefully optimized to reduce its size. Prior to -compression and encoding, ``CONTEXT_NAME`` is replaced with the desired context -name in the function's source code. +base64-encoded copy of :py:meth:`mitogen.parent.Connection._first_stage`, +which is carefully written to maximize it compatibility and minimize its size. +A simplified illustration of the bootstrap command is .. code:: - python -c 'exec "xxx".decode("base64").decode("zlib")' + python -c 'exec(sys.argv[1].decode("base64").decode("zlib"))' ... The command-line arranges for the Python interpreter to decode the base64'd component, decompress it and execute it as Python code. Base64 is used since @@ -71,8 +70,8 @@ of the large base64-encoded first stage parameter, and to replace **argv[0]** with something descriptive. After configuring its ``stdin`` to point to the read end of the pipe, the -parent half of the fork re-executes Python, with **argv[0]** taken from the -``CONTEXT_NAME`` variable earlier substituted into its source code. As no +fork parent re-executes Python with **argv[0]** composed of the Python +interpreter path and a remote name supplied by the Mitogen parent. As no arguments are provided to this new execution of Python, and since ``stdin`` is connected to a pipe (whose write end is connected to the first stage), the Python interpreter begins reading source code to execute from the pipe diff --git a/mitogen/parent.py b/mitogen/parent.py index 1a23df18..6e30b1c6 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1396,10 +1396,6 @@ class Connection(object): # with a custom argv. # * Optimized for minimum byte count after minification & compression. # The script preamble_size.py measures this. - # * 'CONTEXT_NAME' and 'PREAMBLE_COMPRESSED_LEN' are substituted with - # their respective values. - # * CONTEXT_NAME must be prefixed with the name of the Python binary in - # order to allow virtualenvs to detect their install prefix. # # macOS tweaks for Python 2.7 must be kept in sync with the the Ansible # module test_echo_module, used by the integration tests. @@ -1435,20 +1431,21 @@ class Connection(object): os.close(r) os.close(W) os.close(w) - if os.uname()[0]=='Darwin'and os.uname()[2][:2]<'19'and sys.executable=='/usr/bin/python':sys.executable='/usr/bin/python2.7' - if os.uname()[0]=='Darwin'and os.uname()[2][:2]in'2021'and sys.version[:3]=='2.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' + if os.uname()[0]+os.uname()[2][:2]+sys.executable=='Darwin19/usr/bin/python':sys.executable+='2.7' + if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin202.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' + if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin212.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' os.environ['ARGV0']=sys.executable - os.execl(sys.executable,sys.executable+'(mitogen:CONTEXT_NAME)') + os.execl(sys.executable,sys.executable+'(mitogen:%s)'%sys.argv[2]) os.write(1,'MITO000\n'.encode()) C=''.encode() - while PREAMBLE_COMPRESSED_LEN-len(C)and select.select([0],[],[]):C+=os.read(0,PREAMBLE_COMPRESSED_LEN-len(C)) + while int(sys.argv[3])-len(C)and select.select([0],[],[]):C+=os.read(0,int(sys.argv[3])-len(C)) C=zlib.decompress(C) - fp=os.fdopen(W,'wb',0) - fp.write(C) - fp.close() - fp=os.fdopen(w,'wb',0) - fp.write(C) - fp.close() + f=os.fdopen(W,'wb',0) + f.write(C) + f.close() + f=os.fdopen(w,'wb',0) + f.write(C) + f.close() os.write(1,'MITO001\n'.encode()) os.close(2) @@ -1469,11 +1466,10 @@ class Connection(object): source = inspect.getsource(self._first_stage) source = textwrap.dedent('\n'.join(source.strip().split('\n')[2:])) source = source.replace(' ', ' ') - source = source.replace('CONTEXT_NAME', self.options.remote_name) - preamble_compressed = self.get_preamble() - source = source.replace('PREAMBLE_COMPRESSED_LEN', - str(len(preamble_compressed))) - compressed = zlib.compress(source.encode(), 9) + compressor = zlib.compressobj( + zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, + ) + compressed = compressor.compress(source.encode()) + compressor.flush() encoded = binascii.b2a_base64(compressed).replace(b('\n'), b('')) # Just enough to decode, decompress, and exec the first stage. @@ -1484,7 +1480,10 @@ class Connection(object): '-c', 'import sys;sys.path=[p for p in sys.path if p];' 'import binascii,os,select,zlib;' - 'exec(zlib.decompress(binascii.a2b_base64("%s")))' % (encoded.decode(),), + 'exec(zlib.decompress(binascii.a2b_base64(sys.argv[1]),-15))', + encoded.decode(), + self.options.remote_name, + str(len(self.get_preamble())), ] def get_econtext_config(self): diff --git a/tests/first_stage_test.py b/tests/first_stage_test.py index e06f453f..2576ec14 100644 --- a/tests/first_stage_test.py +++ b/tests/first_stage_test.py @@ -26,7 +26,7 @@ class CommandLineTest(testlib.RouterMixin, testlib.TestCase): # preamble from stdin, then execute it. # This test attaches /dev/zero to stdin to create a specific failure - # 1. Fork child reads PREAMBLE_COMPRESSED_LEN bytes of junk (all `\0`) + # 1. Fork child reads bytes of NUL (`b'\0'`) # 2. Fork child crashes (trying to decompress the junk data) # 3. Fork child's file descriptors (write pipes) are closed by the OS # 4. Fork parent does `dup(, )` and `exec()`