From 408946adbe702d0b704722f569adde4484aea452 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Thu, 6 Nov 2025 10:39:49 +0000 Subject: [PATCH 1/4] mitogen: Golf 8 bytes from bootstrap first stage (798 -> 790) Before ``` SSH command size: 798 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 99020 96.7KiB 51247 50.0KiB 51.8% 12910 12.6KiB 13.0% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` After ``` SSH command size: 790 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 99020 96.7KiB 51247 50.0KiB 51.8% 12903 12.6KiB 13.0% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` --- mitogen/parent.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/mitogen/parent.py b/mitogen/parent.py index 1a23df18..be046bf6 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1435,20 +1435,21 @@ class Connection(object): os.close(r) os.close(W) os.close(w) - if os.uname()[0]=='Darwin'and os.uname()[2][:2]<'19'and sys.executable=='/usr/bin/python':sys.executable='/usr/bin/python2.7' - if os.uname()[0]=='Darwin'and os.uname()[2][:2]in'2021'and sys.version[:3]=='2.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' + if os.uname()[0]+os.uname()[2][:2]+sys.executable=='Darwin19/usr/bin/python':sys.executable+='2.7' + if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin202.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' + if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin212.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' os.environ['ARGV0']=sys.executable os.execl(sys.executable,sys.executable+'(mitogen:CONTEXT_NAME)') os.write(1,'MITO000\n'.encode()) C=''.encode() while PREAMBLE_COMPRESSED_LEN-len(C)and select.select([0],[],[]):C+=os.read(0,PREAMBLE_COMPRESSED_LEN-len(C)) C=zlib.decompress(C) - fp=os.fdopen(W,'wb',0) - fp.write(C) - fp.close() - fp=os.fdopen(w,'wb',0) - fp.write(C) - fp.close() + f=os.fdopen(W,'wb',0) + f.write(C) + f.close() + f=os.fdopen(w,'wb',0) + f.write(C) + f.close() os.write(1,'MITO001\n'.encode()) os.close(2) From 191abd492ad1535a38a5f2c624ccf3f044e4e47d Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Thu, 6 Nov 2025 11:33:54 +0000 Subject: [PATCH 2/4] mitogen: Compress first stage without header or checksum (790 bytes -> 786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ```console ➜ mitogen git:(boot-cmd--argv) ✗ ./preamble_size.py SSH command size: 786 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 99166 96.8KiB 51375 50.2KiB 51.8% 12957 12.7KiB 13.1% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` Confirmed Python 2.4 supports this use of zlib.compressobj, despite lack of mention in https://docs.python.org/2.4/lib/module-zlib.html ```pycon Python 2.4.6 (#2, Apr 29 2018, 11:16:24) [GCC 7.3.0] on linux4 Type "help", "copyright", "credits" or "license" for more information. >>> import zlib >>> c=zlib.compressobj(zlib.Z_BEST_COMPRESSION,zlib.DEFLATED,-zlib.MAX_WBITS) >>> c.compress('qwertyuiop') + c.flush() '+,O-*\xa9,\xcd\xcc/\x00\x00' ``` --- mitogen/parent.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/mitogen/parent.py b/mitogen/parent.py index be046bf6..9668a85b 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1474,7 +1474,10 @@ class Connection(object): preamble_compressed = self.get_preamble() source = source.replace('PREAMBLE_COMPRESSED_LEN', str(len(preamble_compressed))) - compressed = zlib.compress(source.encode(), 9) + compressor = zlib.compressobj( + zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, + ) + compressed = compressor.compress(source.encode()) + compressor.flush() encoded = binascii.b2a_base64(compressed).replace(b('\n'), b('')) # Just enough to decode, decompress, and exec the first stage. @@ -1485,7 +1488,7 @@ class Connection(object): '-c', 'import sys;sys.path=[p for p in sys.path if p];' 'import binascii,os,select,zlib;' - 'exec(zlib.decompress(binascii.a2b_base64("%s")))' % (encoded.decode(),), + 'exec(zlib.decompress(binascii.a2b_base64("%s",-15)))' % (encoded.decode(),), ] def get_econtext_config(self): From 3b7a75dfafa63113bc82dcc898af11b40bc48c8a Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Thu, 6 Nov 2025 11:46:31 +0000 Subject: [PATCH 3/4] mitogen: Send first stage as argv (786 bytes -> 796 bytes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This saves one layer of quoting/quote escaping in the bootstrap command and a string interpolation per connection. The cost is an increasing the bootstrap command by 10 bytes. I like the tradeoff. I could be convinced to revert it. ```console ➜ mitogen git:(boot-cmd--argv) ✗ ./preamble_size.py SSH command size: 796 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 99181 96.9KiB 51384 50.2KiB 51.8% 12956 12.7KiB 13.1% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` --- mitogen/parent.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mitogen/parent.py b/mitogen/parent.py index 9668a85b..ab40a87f 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1488,7 +1488,8 @@ class Connection(object): '-c', 'import sys;sys.path=[p for p in sys.path if p];' 'import binascii,os,select,zlib;' - 'exec(zlib.decompress(binascii.a2b_base64("%s",-15)))' % (encoded.decode(),), + 'exec(zlib.decompress(binascii.a2b_base64(sys.argv[1]),-15))', + encoded.decode(), ] def get_econtext_config(self): From 83c5ab190068d2ad8c6d3a027109104edba491c6 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Thu, 6 Nov 2025 13:48:54 +0000 Subject: [PATCH 4/4] mitogen: Send first stage parameters as argv (796 bytes -> 822) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benefit: The base64 lump is now static for a given Mitogen version, and the variable parts are more visible. This will make debugging, auditting, and allow-listing a bit easier. Potential benefit: generate the base64 once, at build time or startup. Rather than once per connection. Cost: Bootstrap command is 26 bytes longer. ``` ➜ mitogen git:(boot-cmd--argv) ✗ ./preamble_size.py SSH command size: 822 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 98746 96.4KiB 51215 50.0KiB 51.9% 12922 12.6KiB 13.1% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` --- docs/changelog.rst | 3 +++ docs/howitworks.rst | 13 ++++++------- mitogen/parent.py | 14 ++++---------- tests/first_stage_test.py | 2 +- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 296c30fa..adc5c33a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file In progress (unreleased) ------------------------ +* :gh:issue:`1243` :mod:`mitogen`: Pass first stage, context name, & preamble + size as seperate **argv** arguments + v0.3.31 (2025-11-05) -------------------- diff --git a/docs/howitworks.rst b/docs/howitworks.rst index d7606b11..ae1910f8 100644 --- a/docs/howitworks.rst +++ b/docs/howitworks.rst @@ -27,14 +27,13 @@ Python Command Line ################### The Python command line sent to the host is a :mod:`zlib`-compressed [#f2]_ and -base64-encoded copy of the :py:meth:`mitogen.master.Stream._first_stage` -function, which has been carefully optimized to reduce its size. Prior to -compression and encoding, ``CONTEXT_NAME`` is replaced with the desired context -name in the function's source code. +base64-encoded copy of :py:meth:`mitogen.parent.Connection._first_stage`, +which is carefully written to maximize it compatibility and minimize its size. +A simplified illustration of the bootstrap command is .. code:: - python -c 'exec "xxx".decode("base64").decode("zlib")' + python -c 'exec(sys.argv[1].decode("base64").decode("zlib"))' ... The command-line arranges for the Python interpreter to decode the base64'd component, decompress it and execute it as Python code. Base64 is used since @@ -71,8 +70,8 @@ of the large base64-encoded first stage parameter, and to replace **argv[0]** with something descriptive. After configuring its ``stdin`` to point to the read end of the pipe, the -parent half of the fork re-executes Python, with **argv[0]** taken from the -``CONTEXT_NAME`` variable earlier substituted into its source code. As no +fork parent re-executes Python with **argv[0]** composed of the Python +interpreter path and a remote name supplied by the Mitogen parent. As no arguments are provided to this new execution of Python, and since ``stdin`` is connected to a pipe (whose write end is connected to the first stage), the Python interpreter begins reading source code to execute from the pipe diff --git a/mitogen/parent.py b/mitogen/parent.py index ab40a87f..6e30b1c6 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1396,10 +1396,6 @@ class Connection(object): # with a custom argv. # * Optimized for minimum byte count after minification & compression. # The script preamble_size.py measures this. - # * 'CONTEXT_NAME' and 'PREAMBLE_COMPRESSED_LEN' are substituted with - # their respective values. - # * CONTEXT_NAME must be prefixed with the name of the Python binary in - # order to allow virtualenvs to detect their install prefix. # # macOS tweaks for Python 2.7 must be kept in sync with the the Ansible # module test_echo_module, used by the integration tests. @@ -1439,10 +1435,10 @@ class Connection(object): if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin202.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin212.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' os.environ['ARGV0']=sys.executable - os.execl(sys.executable,sys.executable+'(mitogen:CONTEXT_NAME)') + os.execl(sys.executable,sys.executable+'(mitogen:%s)'%sys.argv[2]) os.write(1,'MITO000\n'.encode()) C=''.encode() - while PREAMBLE_COMPRESSED_LEN-len(C)and select.select([0],[],[]):C+=os.read(0,PREAMBLE_COMPRESSED_LEN-len(C)) + while int(sys.argv[3])-len(C)and select.select([0],[],[]):C+=os.read(0,int(sys.argv[3])-len(C)) C=zlib.decompress(C) f=os.fdopen(W,'wb',0) f.write(C) @@ -1470,10 +1466,6 @@ class Connection(object): source = inspect.getsource(self._first_stage) source = textwrap.dedent('\n'.join(source.strip().split('\n')[2:])) source = source.replace(' ', ' ') - source = source.replace('CONTEXT_NAME', self.options.remote_name) - preamble_compressed = self.get_preamble() - source = source.replace('PREAMBLE_COMPRESSED_LEN', - str(len(preamble_compressed))) compressor = zlib.compressobj( zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, ) @@ -1490,6 +1482,8 @@ class Connection(object): 'import binascii,os,select,zlib;' 'exec(zlib.decompress(binascii.a2b_base64(sys.argv[1]),-15))', encoded.decode(), + self.options.remote_name, + str(len(self.get_preamble())), ] def get_econtext_config(self): diff --git a/tests/first_stage_test.py b/tests/first_stage_test.py index e06f453f..2576ec14 100644 --- a/tests/first_stage_test.py +++ b/tests/first_stage_test.py @@ -26,7 +26,7 @@ class CommandLineTest(testlib.RouterMixin, testlib.TestCase): # preamble from stdin, then execute it. # This test attaches /dev/zero to stdin to create a specific failure - # 1. Fork child reads PREAMBLE_COMPRESSED_LEN bytes of junk (all `\0`) + # 1. Fork child reads bytes of NUL (`b'\0'`) # 2. Fork child crashes (trying to decompress the junk data) # 3. Fork child's file descriptors (write pipes) are closed by the OS # 4. Fork parent does `dup(, )` and `exec()`