From 83c5ab190068d2ad8c6d3a027109104edba491c6 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Thu, 6 Nov 2025 13:48:54 +0000 Subject: [PATCH] mitogen: Send first stage parameters as argv (796 bytes -> 822) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benefit: The base64 lump is now static for a given Mitogen version, and the variable parts are more visible. This will make debugging, auditting, and allow-listing a bit easier. Potential benefit: generate the base64 once, at build time or startup. Rather than once per connection. Cost: Bootstrap command is 26 bytes longer. ``` ➜ mitogen git:(boot-cmd--argv) ✗ ./preamble_size.py SSH command size: 822 Preamble (mitogen.core + econtext) size: 18230 (17.80KiB) Original Minimized Compressed mitogen.core 152237 148.7KiB 68453 66.8KiB 45.0% 18130 17.7KiB 11.9% mitogen.parent 98746 96.4KiB 51215 50.0KiB 51.9% 12922 12.6KiB 13.1% mitogen.fork 8445 8.2KiB 4139 4.0KiB 49.0% 1652 1.6KiB 19.6% mitogen.ssh 10847 10.6KiB 6913 6.8KiB 63.7% 2102 2.1KiB 19.4% mitogen.sudo 12089 11.8KiB 5924 5.8KiB 49.0% 2249 2.2KiB 18.6% mitogen.select 12325 12.0KiB 2929 2.9KiB 23.8% 964 0.9KiB 7.8% mitogen.service 41581 40.6KiB 22398 21.9KiB 53.9% 5847 5.7KiB 14.1% mitogen.fakessh 15753 15.4KiB 8135 7.9KiB 51.6% 2672 2.6KiB 17.0% mitogen.master 52891 51.7KiB 27586 26.9KiB 52.2% 7129 7.0KiB 13.5% ``` --- docs/changelog.rst | 3 +++ docs/howitworks.rst | 13 ++++++------- mitogen/parent.py | 14 ++++---------- tests/first_stage_test.py | 2 +- 4 files changed, 14 insertions(+), 18 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 296c30fa..adc5c33a 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -21,6 +21,9 @@ To avail of fixes in an unreleased version, please download a ZIP file In progress (unreleased) ------------------------ +* :gh:issue:`1243` :mod:`mitogen`: Pass first stage, context name, & preamble + size as seperate **argv** arguments + v0.3.31 (2025-11-05) -------------------- diff --git a/docs/howitworks.rst b/docs/howitworks.rst index d7606b11..ae1910f8 100644 --- a/docs/howitworks.rst +++ b/docs/howitworks.rst @@ -27,14 +27,13 @@ Python Command Line ################### The Python command line sent to the host is a :mod:`zlib`-compressed [#f2]_ and -base64-encoded copy of the :py:meth:`mitogen.master.Stream._first_stage` -function, which has been carefully optimized to reduce its size. Prior to -compression and encoding, ``CONTEXT_NAME`` is replaced with the desired context -name in the function's source code. +base64-encoded copy of :py:meth:`mitogen.parent.Connection._first_stage`, +which is carefully written to maximize it compatibility and minimize its size. +A simplified illustration of the bootstrap command is .. code:: - python -c 'exec "xxx".decode("base64").decode("zlib")' + python -c 'exec(sys.argv[1].decode("base64").decode("zlib"))' ... The command-line arranges for the Python interpreter to decode the base64'd component, decompress it and execute it as Python code. Base64 is used since @@ -71,8 +70,8 @@ of the large base64-encoded first stage parameter, and to replace **argv[0]** with something descriptive. After configuring its ``stdin`` to point to the read end of the pipe, the -parent half of the fork re-executes Python, with **argv[0]** taken from the -``CONTEXT_NAME`` variable earlier substituted into its source code. As no +fork parent re-executes Python with **argv[0]** composed of the Python +interpreter path and a remote name supplied by the Mitogen parent. As no arguments are provided to this new execution of Python, and since ``stdin`` is connected to a pipe (whose write end is connected to the first stage), the Python interpreter begins reading source code to execute from the pipe diff --git a/mitogen/parent.py b/mitogen/parent.py index ab40a87f..6e30b1c6 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1396,10 +1396,6 @@ class Connection(object): # with a custom argv. # * Optimized for minimum byte count after minification & compression. # The script preamble_size.py measures this. - # * 'CONTEXT_NAME' and 'PREAMBLE_COMPRESSED_LEN' are substituted with - # their respective values. - # * CONTEXT_NAME must be prefixed with the name of the Python binary in - # order to allow virtualenvs to detect their install prefix. # # macOS tweaks for Python 2.7 must be kept in sync with the the Ansible # module test_echo_module, used by the integration tests. @@ -1439,10 +1435,10 @@ class Connection(object): if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin202.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' if os.uname()[0]+os.uname()[2][:2]+sys.version[:3]=='Darwin212.7':os.environ['PYTHON_LAUNCHED_FROM_WRAPPER']='1' os.environ['ARGV0']=sys.executable - os.execl(sys.executable,sys.executable+'(mitogen:CONTEXT_NAME)') + os.execl(sys.executable,sys.executable+'(mitogen:%s)'%sys.argv[2]) os.write(1,'MITO000\n'.encode()) C=''.encode() - while PREAMBLE_COMPRESSED_LEN-len(C)and select.select([0],[],[]):C+=os.read(0,PREAMBLE_COMPRESSED_LEN-len(C)) + while int(sys.argv[3])-len(C)and select.select([0],[],[]):C+=os.read(0,int(sys.argv[3])-len(C)) C=zlib.decompress(C) f=os.fdopen(W,'wb',0) f.write(C) @@ -1470,10 +1466,6 @@ class Connection(object): source = inspect.getsource(self._first_stage) source = textwrap.dedent('\n'.join(source.strip().split('\n')[2:])) source = source.replace(' ', ' ') - source = source.replace('CONTEXT_NAME', self.options.remote_name) - preamble_compressed = self.get_preamble() - source = source.replace('PREAMBLE_COMPRESSED_LEN', - str(len(preamble_compressed))) compressor = zlib.compressobj( zlib.Z_BEST_COMPRESSION, zlib.DEFLATED, -zlib.MAX_WBITS, ) @@ -1490,6 +1482,8 @@ class Connection(object): 'import binascii,os,select,zlib;' 'exec(zlib.decompress(binascii.a2b_base64(sys.argv[1]),-15))', encoded.decode(), + self.options.remote_name, + str(len(self.get_preamble())), ] def get_econtext_config(self): diff --git a/tests/first_stage_test.py b/tests/first_stage_test.py index e06f453f..2576ec14 100644 --- a/tests/first_stage_test.py +++ b/tests/first_stage_test.py @@ -26,7 +26,7 @@ class CommandLineTest(testlib.RouterMixin, testlib.TestCase): # preamble from stdin, then execute it. # This test attaches /dev/zero to stdin to create a specific failure - # 1. Fork child reads PREAMBLE_COMPRESSED_LEN bytes of junk (all `\0`) + # 1. Fork child reads bytes of NUL (`b'\0'`) # 2. Fork child crashes (trying to decompress the junk data) # 3. Fork child's file descriptors (write pipes) are closed by the OS # 4. Fork parent does `dup(, )` and `exec()`