diff --git a/ansible_mitogen/connection.py b/ansible_mitogen/connection.py index c5c96f24..12626485 100644 --- a/ansible_mitogen/connection.py +++ b/ansible_mitogen/connection.py @@ -880,13 +880,11 @@ class Connection(ansible.plugins.connection.ConnectionBase): path=mitogen.utils.cast(in_path) ) - # A roundtrip is always necessary for the target to request the file - # from FileService, however, by pipelining the transfer function, the - # subsequent step (probably a module invocation) can get its - # dependencies and function call in-flight before the transfer is - # complete. This saves at least 1 RTT between the transfer completing - # and the start of the follow-up task. - self.get_chain().call_no_reply( + # For now this must remain synchronous, as the action plug-in may have + # passed us a temporary file to transfer. A future FileService could + # maintain an LRU list of open file descriptors to keep the temporary + # file alive, but that requires more work. + self.get_chain().call( ansible_mitogen.target.transfer_file, context=self.parent, in_path=in_path, diff --git a/docs/ansible.rst b/docs/ansible.rst index 7a06c120..485c24dc 100644 --- a/docs/ansible.rst +++ b/docs/ansible.rst @@ -313,9 +313,10 @@ Performance ^^^^^^^^^^^ One roundtrip initiates a transfer larger than 124 KiB, while smaller transfers -are embedded in a 0-roundtrip remote call. For tools operating via SSH -multiplexing, 4 roundtrips are required to configure the IO channel, in -addition to the time to start the local and remote processes. +are embedded in a 0-roundtrip pipelined call. For tools operating via SSH +multiplexing, 4 roundtrips are required to configure the IO channel, followed +by 6 roundtrips to transfer the file in the case of ``sftp``, in addition to +the time to start the local and remote processes. An invocation of ``scp`` with an empty ``.profile`` over a 30 ms link takes ~140 ms, wasting 110 ms per invocation, rising to ~2,000 ms over a 400 ms @@ -848,6 +849,8 @@ logging is necessary. File-based logging can be enabled by setting enabled, one file per context will be created on the local machine and every target machine, as ``/tmp/mitogen..log``. +.. _diagnosing-hangs: + Diagnosing Hangs ~~~~~~~~~~~~~~~~ diff --git a/docs/api.rst b/docs/api.rst index cb980b55..c74193e3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -792,8 +792,8 @@ Context Class .. method:: send (msg) - Arrange for `msg` to be delivered to this context. Updates the - message's `dst_id` prior to routing it via the associated router. + Arrange for `msg` to be delivered to this context. + :attr:`dst_id ` is set to the target context ID. :param mitogen.core.Message msg: The message. @@ -801,9 +801,9 @@ Context Class .. method:: send_async (msg, persist=False) Arrange for `msg` to be delivered to this context, with replies - delivered to a newly constructed Receiver. Updates the message's - `dst_id` prior to routing it via the associated router and registers a - handle which is placed in the message's `reply_to`. + directed to a newly constructed receiver. :attr:`dst_id + ` is set to the target context ID, and :attr:`reply_to + ` is set to the newly constructed receiver's handle. :param bool persist: If :data:`False`, the handler will be unregistered after a single @@ -818,15 +818,15 @@ Context Class .. method:: send_await (msg, deadline=None) - As with :meth:`send_async`, but expect a single reply - (`persist=False`) delivered within `deadline` seconds. + Like :meth:`send_async`, but expect a single reply (`persist=False`) + delivered within `deadline` seconds. :param mitogen.core.Message msg: The message. - :param float deadline: If not :data:`None`, seconds before timing out waiting for a reply. - + :returns: + The deserialized reply. :raises mitogen.core.TimeoutError: No message was received and `deadline` passed. @@ -838,9 +838,9 @@ Context Class .. class:: Context - Extend :class:`mitogen.core.Router` with functionality useful to - masters, and child contexts who later become parents. Currently when this - class is required, the target context's router is upgraded at runtime. + Extend :class:`mitogen.core.Context` with functionality useful to masters, + and child contexts who later become parents. Currently when this class is + required, the target context's router is upgraded at runtime. .. attribute:: default_call_chain @@ -1196,7 +1196,7 @@ Broker Class Responsible for handling I/O multiplexing in a private thread. **Note:** This is the somewhat limited core version of the Broker class - used by child contexts. The master subclass is documented below this one. + used by child contexts. The master subclass is documented below. .. attribute:: shutdown_timeout = 3.0 diff --git a/docs/changelog.rst b/docs/changelog.rst index 7beb043c..162a7c48 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -53,11 +53,6 @@ Enhancements a 250 ms link from 30 seconds to 10 seconds compared to v0.2.2, down from 120 seconds compared to vanilla. -* `49736b3a `_: avoid a - roundtrip when transferring files larger than 124KiB, removing a delay - between waiting for the transfer to complete and start of the follow-up - action. - * `#337 `_: To avoid a scaling limitation, a PTY is no longer allocated for an SSH connection unless the configuration specifies a password. @@ -74,6 +69,14 @@ Enhancements improvement, but is easily noticeable on short many-target runs, where startup overhead dominates runtime. +* The `faulthandler `_ module is + automatically activated if it is installed, simplifying debugging of hangs. + See :ref:`diagnosing-hangs` for more information. + +* The ``MITOGEN_DUMP_THREAD_STACKS`` environment variable's value now indicates + the number of seconds between stack dumps. See :ref:`diagnosing-hangs` for + more information. + Fixes ^^^^^ diff --git a/docs/howitworks.rst b/docs/howitworks.rst index 1e3d2768..a3b08eac 100644 --- a/docs/howitworks.rst +++ b/docs/howitworks.rst @@ -377,6 +377,13 @@ Children listen on the following handles: 6-tuples from :class:`mitogen.parent.CallChain`, imports ``mod_name``, then attempts to execute `class_name.func_name(\*args, \**kwargs)`. + * `chain_id`: if not :data:`None`, an identifier unique to the originating + :class:`mitogen.parent.CallChain`. When set, if an exception occurs + during a call, future calls with the same ID automatically fail with the + same exception without ever executing, and failed calls with no + `reply_to` set are not dumped to the logging framework as they otherwise + would. This is used to implement pipelining. + When this channel is closed (by way of receiving a dead message), the child's main thread begins graceful shutdown of its own :py:class:`Broker` and :py:class:`Router`. diff --git a/mitogen/parent.py b/mitogen/parent.py index 8e9f53a5..bb2b5d1e 100644 --- a/mitogen/parent.py +++ b/mitogen/parent.py @@ -1149,14 +1149,15 @@ class CallChain(object): calls execute until :meth:`reset` is invoked. No exception is logged for calls made with :meth:`call_no_reply`, instead - it is saved and reported as the result of subsequent :meth:`call` or - :meth:`call_async` calls. + the exception is saved and reported as the result of subsequent + :meth:`call` or :meth:`call_async` calls. Sequences of asynchronous calls can be made without wasting network round-trips to discover if prior calls succeed, and chains originating from multiple unrelated source contexts may overlap concurrently at a target - context without interference. In this example, 4 calls complete in one - round-trip:: + context without interference. + + In this example, 4 calls complete in one round-trip:: chain = mitogen.parent.CallChain(context, pipelined=True) chain.call_no_reply(os.mkdir, '/tmp/foo') diff --git a/mitogen/service.py b/mitogen/service.py index 3713bdea..ffb7308e 100644 --- a/mitogen/service.py +++ b/mitogen/service.py @@ -873,7 +873,14 @@ class FileService(Service): raise Error(self.context_mismatch_msg) LOG.debug('Serving %r', path) - fp = open(path, 'rb', self.IO_SIZE) + try: + fp = open(path, 'rb', self.IO_SIZE) + except IOError: + msg.reply(mitogen.core.CallError( + sys.exc_info()[1] + )) + return + # Response must arrive first so requestee can begin receive loop, # otherwise first ack won't arrive until all pending chunks were # delivered. In that case max BDP would always be 128KiB, aka. max diff --git a/tests/ansible/integration/action/all.yml b/tests/ansible/integration/action/all.yml index c5cb80d7..75f77243 100644 --- a/tests/ansible/integration/action/all.yml +++ b/tests/ansible/integration/action/all.yml @@ -1,3 +1,4 @@ +- import_playbook: copy.yml - import_playbook: fixup_perms2__copy.yml - import_playbook: low_level_execute_command.yml - import_playbook: make_tmp_path.yml diff --git a/tests/ansible/integration/action/copy.yml b/tests/ansible/integration/action/copy.yml new file mode 100644 index 00000000..e3fca87f --- /dev/null +++ b/tests/ansible/integration/action/copy.yml @@ -0,0 +1,66 @@ +# Verify copy module for small and large files, and inline content. + +- name: integration/action/synchronize.yml + hosts: test-targets + any_errors_fatal: true + tasks: + - copy: + dest: /tmp/copy-tiny-file + content: + this is a tiny file. + connection: local + + - copy: + dest: /tmp/copy-large-file + # Must be larger than Connection.SMALL_SIZE_LIMIT. + content: "{% for x in range(200000) %}x{% endfor %}" + connection: local + + # end of making files + + - file: + state: absent + path: "{{item}}" + with_items: + - /tmp/copy-tiny-file.out + - /tmp/copy-large-file.out + - /tmp/copy-tiny-inline-file.out + - /tmp/copy-large-inline-file.out + + # end of cleaning out files + + - copy: + dest: /tmp/copy-large-file.out + src: /tmp/copy-large-file + + - copy: + dest: /tmp/copy-tiny-file.out + src: /tmp/copy-tiny-file + + - copy: + dest: /tmp/copy-tiny-inline-file.out + content: "tiny inline content" + + - copy: + dest: /tmp/copy-large-inline-file.out + content: | + {% for x in range(200000) %}y{% endfor %} + + # stat results + + - stat: + path: "{{item}}" + with_items: + - /tmp/copy-tiny-file.out + - /tmp/copy-large-file.out + - /tmp/copy-tiny-inline-file.out + - /tmp/copy-large-inline-file.out + register: stat + + - assert: + that: + - stat.results[0].stat.checksum == "f29faa9a6f19a700a941bf2aa5b281643c4ec8a0" + - stat.results[1].stat.checksum == "62951f943c41cdd326e5ce2b53a779e7916a820d" + - stat.results[2].stat.checksum == "b26dd6444595e2bdb342aa0a91721b57478b5029" + - stat.results[3].stat.checksum == "d675f47e467eae19e49032a2cc39118e12a6ee72" + diff --git a/tests/ansible/soak/_file_service_loop.yml b/tests/ansible/soak/_file_service_loop.yml new file mode 100644 index 00000000..96111b3c --- /dev/null +++ b/tests/ansible/soak/_file_service_loop.yml @@ -0,0 +1,6 @@ + - file: + path: /tmp/foo-{{inventory_hostname}} + state: absent + - copy: + dest: /tmp/foo-{{inventory_hostname}} + content: "{{content}}" diff --git a/tests/ansible/soak/file_service.yml b/tests/ansible/soak/file_service.yml new file mode 100644 index 00000000..3b338b3c --- /dev/null +++ b/tests/ansible/soak/file_service.yml @@ -0,0 +1,6 @@ +- hosts: all + tasks: + - set_fact: + content: "{% for x in range(126977) %}x{% endfor %}" + - include_tasks: _file_service_loop.yml + with_sequence: start=1 end=100