issue #139: eliminate quadratic behaviour on input path

Rather than slowly build up a Python string over time, we just store a deque of chunks (which, in a later commit, will now be around 128KB each), and track the total buffer size in a separate integer. The tricky loop is there to ensure the header does not need to be sliced off the full message (which may be huge, causing yet another spike and copy), but rather only off the much smaller first 128kb-sized chunk received. There is one more problem with this code: the ''.join() causes RAM usage to temporarily double, but that was true of the old solution too. Shall wait for bug reports before fixing this, as it gets very ugly very fast.
8 years ago · a3b4b459fa
parent ba9a06d0f5
commit a3b4b459fa
1 changed files with 33 additions and 18 deletions
--- a/mitogen/core.py
+++ b/mitogen/core.py
@ -695,8 +695,6 @@ class Stream(BasicStream):
    :py:class:`BasicStream` subclass implementing mitogen's :ref:`stream
    protocol <stream-protocol>`.
    """
-    _input_buf = ''
-
    #: If not ``None``, :py:class:`Router` stamps this into
    #: :py:attr:`Message.auth_id` of every message received on this stream.
    auth_id = None
@ -707,6 +705,8 @@ class Stream(BasicStream):
        self.name = 'default'
        self.sent_modules = set()
        self.construct(**kwargs)
+        self._input_buf = collections.deque()
+        self._input_buf_len = 0
        self._output_buf = collections.deque()

    def construct(self):
@ -718,40 +718,55 @@ class Stream(BasicStream):
        _vv and IOLOG.debug('%r.on_receive()', self)

        buf = self.receive_side.read()
-        if buf is None:
-            buf = ''
-
-        self._input_buf += buf
-        while self._receive_one(broker):
-            pass
-
-        if not buf:
+        if buf:
+            if self._input_buf and self._input_buf_len < 128:
+                self._input_buf[0] += buf
+            else:
+                self._input_buf.append(buf)
+            self._input_buf_len += len(buf)
+            while self._receive_one(broker):
+                pass
+        else:
            return self.on_disconnect(broker)

    HEADER_FMT = '>hhhLLL'
    HEADER_LEN = struct.calcsize(HEADER_FMT)

    def _receive_one(self, broker):
-        if len(self._input_buf) < self.HEADER_LEN:
+        if self._input_buf_len < self.HEADER_LEN:
            return False

        msg = Message()
-        # To support unpickling Contexts.
        msg.router = self._router

        (msg.dst_id, msg.src_id, msg.auth_id,
         msg.handle, msg.reply_to, msg_len) = struct.unpack(
            self.HEADER_FMT,
-            self._input_buf[:self.HEADER_LEN]
+            self._input_buf[0][:self.HEADER_LEN],
        )

-        if (len(self._input_buf) - self.HEADER_LEN) < msg_len:
-            _vv and IOLOG.debug('%r: Input too short (want %d, got %d)',
-                        self, msg_len, len(self._input_buf) - self.HEADER_LEN)
+        if (self._input_buf_len - self.HEADER_LEN) < msg_len:
+            _vv and IOLOG.debug(
+                '%r: Input too short (want %d, got %d)',
+                self, msg_len, self._input_buf_len - self.HEADER_LEN
+            )
            return False

-        msg.data = self._input_buf[self.HEADER_LEN:self.HEADER_LEN+msg_len]
-        self._input_buf = self._input_buf[self.HEADER_LEN+msg_len:]
+        start = self.HEADER_LEN
+        prev_start = start
+        remain = msg_len + start
+        bits = []
+        while remain:
+            buf = self._input_buf.popleft()
+            bit = buf[start:remain]
+            bits.append(bit)
+            remain -= len(bit) + start
+            prev_start = start
+            start = 0
+
+        msg.data = ''.join(bits)
+        self._input_buf.appendleft(buf[prev_start+len(bit):])
+        self._input_buf_len -= self.HEADER_LEN + msg_len
        self._router._async_route(msg, self)
        return True