You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mitogen/ansible_mitogen/services.py

459 lines
16 KiB
Python

# Copyright 2017, David Wilson
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
"""
Classes in this file define Mitogen 'services' that run (initially) within the
connection multiplexer process that is forked off the top-level controller
process.
Once a worker process connects to a multiplexer process
(Connection._connect()), it communicates with these services to establish new
connections, grant access to files by children, and register for notification
when a child has completed a job.
"""
from __future__ import absolute_import
import logging
import os
import os.path
import pprint
import sys
import threading
import zlib
import mitogen
import mitogen.service
import ansible_mitogen.target
LOG = logging.getLogger(__name__)
class Error(Exception):
pass
class ContextService(mitogen.service.Service):
"""
Used by workers to fetch the single Context instance corresponding to a
connection configuration, creating the matching connection if it does not
exist.
For connection methods and their parameters, see:
https://mitogen.readthedocs.io/en/latest/api.html#context-factories
This concentrates connections in the top-level process, which may become a
bottleneck. The bottleneck can be removed using per-CPU connection
processes and arranging for the worker to select one according to a hash of
the connection parameters (sharding).
"""
handle = 500
max_message_size = 1000
max_interpreters = int(os.getenv('MITOGEN_MAX_INTERPRETERS', '20'))
def __init__(self, *args, **kwargs):
super(ContextService, self).__init__(*args, **kwargs)
self._lock = threading.Lock()
#: Records the :meth:`get` result dict for successful calls, returned
#: for identical subsequent calls. Keyed by :meth:`key_from_kwargs`.
self._response_by_key = {}
#: List of :class:`mitogen.core.Message` waiting for the result dict
#: for a particular connection config. Keyed as sbove.
self._waiters_by_key = {}
#: Mapping of :class:`mitogen.core.Context` -> reference count. Each
#: call to :meth:`get` increases this by one. Calls to :meth:`put`
#: decrease it by one.
self._refs_by_context = {}
#: List of contexts in creation order by via= parameter. When
#: :attr:`max_interpreters` is reached, the most recently used context
#: is destroyed to make room for any additional context.
self._lru_by_via = {}
#: :meth:`key_from_kwargs` result by Context.
self._key_by_context = {}
@mitogen.service.expose(mitogen.service.AllowParents())
@mitogen.service.arg_spec({
'context': mitogen.core.Context
})
def put(self, context):
"""
Return a reference, making it eligable for recycling once its reference
count reaches zero.
"""
LOG.debug('%r.put(%r)', self, context)
if self._refs_by_context.get(context, 0) == 0:
LOG.warning('%r.put(%r): refcount was 0. shutdown_all called?',
self, context)
return
self._refs_by_context[context] -= 1
def key_from_kwargs(self, **kwargs):
"""
Generate a deduplication key from the request. The default
implementation returns a string based on a stable representation of the
input dictionary generated by :py:func:`pprint.pformat`.
"""
return pprint.pformat(kwargs)
def _produce_response(self, key, response):
"""
Reply to every waiting request matching a configuration key with a
response dictionary, deleting the list of waiters when done.
:param str key:
Result of :meth:`key_from_kwargs`
:param dict response:
Response dictionary
:returns:
Number of waiters that were replied to.
"""
self._lock.acquire()
try:
waiters = self._waiters_by_key.pop(key)
count = len(waiters)
for msg in waiters:
msg.reply(response)
finally:
self._lock.release()
return count
def _shutdown(self, context, lru=None, new_context=None):
"""
Arrange for `context` to be shut down, and optionally add `new_context`
to the LRU list while holding the lock.
"""
LOG.info('%r._shutdown(): shutting down %r', self, context)
context.shutdown()
key = self._key_by_context[context]
self._lock.acquire()
try:
del self._response_by_key[key]
del self._refs_by_context[context]
del self._key_by_context[context]
if lru:
lru.remove(context)
if new_context:
lru.append(new_context)
finally:
self._lock.release()
def _update_lru(self, new_context, **kwargs):
"""
Update the LRU ("MRU"?) list associated with the connection described
by `kwargs`, destroying the most recently created context if the list
is full. Finally add `new_context` to the list.
"""
via = kwargs.get('via')
if via is None:
# We don't have a limit on the number of directly connections.
return
lru = self._lru_by_via.setdefault(via, [])
if len(lru) < self.max_interpreters:
lru.append(new_context)
return
for context in reversed(lru):
if self._refs_by_context[context] == 0:
break
else:
LOG.warning('via=%r reached maximum number of interpreters, '
'but they are all marked as in-use.', via)
return
self._shutdown(context, lru=lru, new_context=new_context)
@mitogen.service.expose(mitogen.service.AllowParents())
def shutdown_all(self):
"""
For testing use, arrange for all connections to be shut down.
"""
for context in list(self._key_by_context):
self._shutdown(context)
self._lru_by_via = {}
def _connect(self, key, method_name, **kwargs):
"""
Actual connect implementation. Arranges for the Mitogen connection to
be created and enqueues an asynchronous call to start the forked task
parent in the remote context.
:param key:
Deduplication key representing the connection configuration.
:param method_name:
:class:`mitogen.parent.Router` method implementing the connection
type.
:param kwargs:
Keyword arguments passed to the router method.
:returns:
Dict like::
{
'context': mitogen.core.Context or None,
'home_dir': str or None,
'msg': str or None
}
Where either `msg` is an error message and the remaining fields are
:data:`None`, or `msg` is :data:`None` and the remaining fields are
set.
"""
method = getattr(self.router, method_name, None)
if method is None:
raise Error('no such Router method: %s' % (method_name,))
try:
context = method(**kwargs)
except mitogen.core.StreamError as e:
return {
'context': None,
'home_dir': None,
'msg': str(e),
}
if kwargs.get('via'):
self._update_lru(context, method_name=method_name, **kwargs)
home_dir = context.call(os.path.expanduser, '~')
# We don't need to wait for the result of this. Ideally we'd check its
# return value somewhere, but logs will catch a failure anyway.
context.call_async(ansible_mitogen.target.start_fork_parent)
self._key_by_context[context] = key
self._refs_by_context[context] = 0
return {
'context': context,
'home_dir': home_dir,
'msg': None,
}
@mitogen.service.expose(mitogen.service.AllowParents())
@mitogen.service.arg_spec({
'method_name': str
})
def get(self, msg, **kwargs):
"""
Return a Context referring to an established connection with the given
configuration, establishing a new connection as necessary.
:param str method_name:
The :class:`mitogen.parent.Router` connection method to use.
:param dict kwargs:
Keyword arguments passed to `mitogen.master.Router.[method_name]()`.
:returns tuple:
Tuple of `(context, home_dir)`, where:
* `context` is the mitogen.master.Context referring to the
target context.
* `home_dir` is a cached copy of the remote directory.
"""
key = self.key_from_kwargs(**kwargs)
self._lock.acquire()
try:
response = self._response_by_key.get(key)
if response is not None:
self._refs_by_context[response['context']] += 1
return response
waiters = self._waiters_by_key.get(key)
if waiters is not None:
waiters.append(msg)
return self.NO_REPLY
self._waiters_by_key[key] = [msg]
finally:
self._lock.release()
# I'm the first thread to wait, so I will create the connection.
try:
response = self._connect(key, **kwargs)
count = self._produce_response(key, response)
if response['msg'] is None:
# Only record the response for non-error results.
self._response_by_key[key] = response
# Set the reference count to the number of waiters.
self._refs_by_context[response['context']] += count
except mitogen.core.CallError:
e = sys.exc_info()[1]
self._produce_response(key, e)
except Exception:
e = sys.exc_info()[1]
self._produce_response(key, mitogen.core.CallError(e))
return self.NO_REPLY
class FileService(mitogen.service.Service):
"""
Primitive latency-inducing file server for old-style incantations of the
module runner. This is to be replaced later with a scheme that forwards
files known to be missing without the target having to ask for them,
avoiding a corresponding roundtrip per file.
Paths must be explicitly added to the service by a trusted context before
they will be served to an untrusted context.
"""
handle = 501
max_message_size = 1000
unregistered_msg = 'Path is not registered with FileService.'
def __init__(self, router):
super(FileService, self).__init__(router)
self._paths = {}
@mitogen.service.expose(policy=mitogen.service.AllowParents())
@mitogen.service.arg_spec({
'path': basestring
})
def register(self, path):
"""
Authorize a path for access by child contexts. Calling this repeatedly
with the same path is harmless.
:param str path:
File path.
"""
if path not in self._paths:
LOG.debug('%r: registering %r', self, path)
with open(path, 'rb') as fp:
self._paths[path] = zlib.compress(fp.read())
@mitogen.service.expose(policy=mitogen.service.AllowAny())
@mitogen.service.arg_spec({
'path': basestring
})
def fetch(self, path):
"""
Fetch a file's data.
:param str path:
File path.
:returns:
The file data.
:raises mitogen.core.CallError:
The path was not registered.
"""
if path not in self._paths:
raise mitogen.core.CallError(self.unregistered_msg)
LOG.debug('Serving %r', path)
return self._paths[path]
class JobResultService(mitogen.service.Service):
"""
Receive the result of a task from a child and forward it to interested
listeners. If no listener exists, store the result until it is requested.
Storing results in an intermediary service allows:
* the lifetime of the worker to be decoupled from the lifetime of the job,
* for new and unrelated workers to request the job result after the original
worker that spawned it has exitted,
* for synchronous and asynchronous jobs to be treated identically,
* for latency-free polling and waiting on job results, and
* for Ansible job IDs to be be used to refer to a job in preference to
Mitogen-internal identifiers such as Sender and Context.
Results are keyed by job ID.
"""
handle = 502
max_message_size = 1048576 * 64
def __init__(self, router):
super(JobResultService, self).__init__(router)
self._lock = threading.Lock()
self._result_by_job_id = {}
self._sender_by_job_id = {}
@mitogen.service.expose(mitogen.service.AllowParents())
@mitogen.service.arg_spec({
'job_id': str,
'sender': mitogen.core.Sender,
})
def listen(self, job_id, sender):
"""
Register to receive the result of a job when it becomes available.
:param str job_id:
Job ID to listen for.
:param mitogen.core.Sender sender:
Sender on which to deliver the job result.
"""
LOG.debug('%r.listen(job_id=%r, sender=%r)', self, job_id, sender)
with self._lock:
if job_id in self._sender_by_job_id:
raise Error('Listener already exists for job: %s' % (job_id,))
self._sender_by_job_id[job_id] = sender
@mitogen.service.expose(mitogen.service.AllowParents())
@mitogen.service.arg_spec({
'job_id': basestring,
})
def get(self, job_id):
"""
Return a job's result if it is available, otherwise return immediately.
The job result is forgotten once it has been returned by this method.
:param str job_id:
Job ID to return.
:returns:
Job result dictionary, or :data:`None`.
"""
LOG.debug('%r.get(job_id=%r)', self, job_id)
with self._lock:
return self._result_by_job_id.pop(job_id, None)
@mitogen.service.expose(mitogen.service.AllowAny())
@mitogen.service.arg_spec({
'job_id': basestring,
'result': (mitogen.core.CallError, dict)
})
def push(self, job_id, result):
"""
Deliver a job's result from a child context, notifying any listener
registred via :meth:`listen` of the result.
:param str job_id:
Job ID whose result is being pushed.
:param dict result:
Job result dictionary.
"""
LOG.debug('%r.push(job_id=%r, result=%r)', self, job_id, result)
with self._lock:
if job_id in self._result_by_job_id:
raise Error('Result already exists for job: %s' % (job_id,))
sender = self._sender_by_job_id.pop(job_id, None)
if sender:
sender.send(result)
else:
self._result_by_job_id[job_id] = result