Controller-side module caching.

This makes our recursive, ast.parse performance measures as fast as
pre-ziploader baseline.

Since this unittest isn't testing that the returned module data is
correct we don't need to worry about os.rename not having any module
data.  Should devise a separate test for the module and caching code
pull/15344/head
Toshio Kuratomi 9 years ago
parent b5717ef696
commit dcc5dfdf81

@ -33,6 +33,7 @@ except Exception:
pass pass
import os import os
import shutil
import sys import sys
import traceback import traceback
@ -40,6 +41,7 @@ import traceback
from multiprocessing import Lock from multiprocessing import Lock
debug_lock = Lock() debug_lock = Lock()
import ansible.constants as C
from ansible.errors import AnsibleError, AnsibleOptionsError, AnsibleParserError from ansible.errors import AnsibleError, AnsibleOptionsError, AnsibleParserError
from ansible.utils.display import Display from ansible.utils.display import Display
from ansible.utils.unicode import to_unicode from ansible.utils.unicode import to_unicode
@ -87,28 +89,28 @@ if __name__ == '__main__':
cli = mycli(sys.argv) cli = mycli(sys.argv)
cli.parse() cli.parse()
sys.exit(cli.run()) exit_code = cli.run()
except AnsibleOptionsError as e: except AnsibleOptionsError as e:
cli.parser.print_help() cli.parser.print_help()
display.error(to_unicode(e), wrap_text=False) display.error(to_unicode(e), wrap_text=False)
sys.exit(5) exit_code = 5
except AnsibleParserError as e: except AnsibleParserError as e:
display.error(to_unicode(e), wrap_text=False) display.error(to_unicode(e), wrap_text=False)
sys.exit(4) exit_code = 4
# TQM takes care of these, but leaving comment to reserve the exit codes # TQM takes care of these, but leaving comment to reserve the exit codes
# except AnsibleHostUnreachable as e: # except AnsibleHostUnreachable as e:
# display.error(str(e)) # display.error(str(e))
# sys.exit(3) # exit_code = 3
# except AnsibleHostFailed as e: # except AnsibleHostFailed as e:
# display.error(str(e)) # display.error(str(e))
# sys.exit(2) # exit_code = 2
except AnsibleError as e: except AnsibleError as e:
display.error(to_unicode(e), wrap_text=False) display.error(to_unicode(e), wrap_text=False)
sys.exit(1) exit_code = 1
except KeyboardInterrupt: except KeyboardInterrupt:
display.error("User interrupted execution") display.error("User interrupted execution")
sys.exit(99) exit_code = 99
except Exception as e: except Exception as e:
have_cli_options = cli is not None and cli.options is not None have_cli_options = cli is not None and cli.options is not None
display.error("Unexpected Exception: %s" % to_unicode(e), wrap_text=False) display.error("Unexpected Exception: %s" % to_unicode(e), wrap_text=False)
@ -116,4 +118,9 @@ if __name__ == '__main__':
display.display(u"the full traceback was:\n\n%s" % to_unicode(traceback.format_exc())) display.display(u"the full traceback was:\n\n%s" % to_unicode(traceback.format_exc()))
else: else:
display.display("to see the full traceback, use -vvv") display.display("to see the full traceback, use -vvv")
sys.exit(250) exit_code = 250
finally:
# Remove ansible tempdir
shutil.rmtree(C.DEFAULT_LOCAL_TMP, True)
sys.exit(exit_code)

@ -452,6 +452,22 @@ This is the default location Ansible looks to find modules::
Ansible knows how to look in multiple locations if you feed it a colon separated path, and it also will look for modules in the Ansible knows how to look in multiple locations if you feed it a colon separated path, and it also will look for modules in the
"./library" directory alongside a playbook. "./library" directory alongside a playbook.
.. _local_tmp:
local_tmp
=========
When Ansible gets ready to send a module to a remote machine it usually has to
add a few things to the module: Some boilerplate code, the module's
parameters, and a few constants from the config file. This combination of
things gets stored in a temporary file until ansible exits and cleans up after
itself. The default location is a subdirectory of the user's home directory.
If you'd like to change that, you can do so by altering this setting::
local_tmp = $HOME/.ansible/tmp
Ansible will then choose a random directory name inside this location.
.. _log_path: .. _log_path:
log_path log_path

@ -14,6 +14,7 @@
#inventory = /etc/ansible/hosts #inventory = /etc/ansible/hosts
#library = /usr/share/my_modules/ #library = /usr/share/my_modules/
#remote_tmp = $HOME/.ansible/tmp #remote_tmp = $HOME/.ansible/tmp
#local_tmp = $HOME/.ansible/tmp
#forks = 5 #forks = 5
#poll_interval = 15 #poll_interval = 15
#sudo_user = root #sudo_user = root

@ -29,12 +29,14 @@
# test-module -m ../library/file/lineinfile -a "dest=/etc/exports line='/srv/home hostname1(rw,sync)'" --check # test-module -m ../library/file/lineinfile -a "dest=/etc/exports line='/srv/home hostname1(rw,sync)'" --check
# test-module -m ../library/commands/command -a "echo hello" -n -o "test_hello" # test-module -m ../library/commands/command -a "echo hello" -n -o "test_hello"
import sys
import base64 import base64
from multiprocessing import Lock
import optparse
import os import os
import subprocess import subprocess
import sys
import traceback import traceback
import optparse
import ansible.utils.vars as utils_vars import ansible.utils.vars as utils_vars
from ansible.parsing.dataloader import DataLoader from ansible.parsing.dataloader import DataLoader
from ansible.parsing.utils.jsonify import jsonify from ansible.parsing.utils.jsonify import jsonify
@ -133,10 +135,12 @@ def boilerplate_module(modfile, args, interpreter, check, destfile):
modname = os.path.basename(modfile) modname = os.path.basename(modfile)
modname = os.path.splitext(modname)[0] modname = os.path.splitext(modname)[0]
action_write_lock = Lock()
(module_data, module_style, shebang) = module_common.modify_module( (module_data, module_style, shebang) = module_common.modify_module(
modname, modname,
modfile, modfile,
complex_args, complex_args,
action_write_lock,
task_vars=task_vars task_vars=task_vars
) )

@ -20,6 +20,7 @@ from __future__ import (absolute_import, division, print_function)
__metaclass__ = type __metaclass__ = type
import os import os
import tempfile
from string import ascii_letters, digits from string import ascii_letters, digits
from ansible.compat.six import string_types from ansible.compat.six import string_types
@ -47,7 +48,7 @@ def shell_expand(path):
path = os.path.expanduser(os.path.expandvars(path)) path = os.path.expanduser(os.path.expandvars(path))
return path return path
def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False): def get_config(p, section, key, env_var, default, boolean=False, integer=False, floating=False, islist=False, isnone=False, ispath=False, ispathlist=False, istmppath=False):
''' return a configuration variable with casting ''' ''' return a configuration variable with casting '''
value = _get_config(p, section, key, env_var, default) value = _get_config(p, section, key, env_var, default)
if boolean: if boolean:
@ -65,6 +66,11 @@ def get_config(p, section, key, env_var, default, boolean=False, integer=False,
value = None value = None
elif ispath: elif ispath:
value = shell_expand(value) value = shell_expand(value)
elif istmppath:
value = shell_expand(value)
if not os.path.exists(value):
os.makedirs(value, 0o700)
value = tempfile.mkdtemp(prefix='ansible-local-tmp', dir=value)
elif ispathlist: elif ispathlist:
if isinstance(value, string_types): if isinstance(value, string_types):
value = [shell_expand(x) for x in value.split(os.pathsep)] value = [shell_expand(x) for x in value.split(os.pathsep)]
@ -136,6 +142,7 @@ DEFAULT_HOST_LIST = get_config(p, DEFAULTS,'inventory', 'ANSIBLE_INVENTO
DEFAULT_MODULE_PATH = get_config(p, DEFAULTS, 'library', 'ANSIBLE_LIBRARY', None, ispathlist=True) DEFAULT_MODULE_PATH = get_config(p, DEFAULTS, 'library', 'ANSIBLE_LIBRARY', None, ispathlist=True)
DEFAULT_ROLES_PATH = get_config(p, DEFAULTS, 'roles_path', 'ANSIBLE_ROLES_PATH', '/etc/ansible/roles', ispathlist=True) DEFAULT_ROLES_PATH = get_config(p, DEFAULTS, 'roles_path', 'ANSIBLE_ROLES_PATH', '/etc/ansible/roles', ispathlist=True)
DEFAULT_REMOTE_TMP = get_config(p, DEFAULTS, 'remote_tmp', 'ANSIBLE_REMOTE_TEMP', '$HOME/.ansible/tmp') DEFAULT_REMOTE_TMP = get_config(p, DEFAULTS, 'remote_tmp', 'ANSIBLE_REMOTE_TEMP', '$HOME/.ansible/tmp')
DEFAULT_LOCAL_TMP = get_config(p, DEFAULTS, 'local_tmp', 'ANSIBLE_LOCAL_TEMP', '$HOME/.ansible/tmp', istmppath=True)
DEFAULT_MODULE_NAME = get_config(p, DEFAULTS, 'module_name', None, 'command') DEFAULT_MODULE_NAME = get_config(p, DEFAULTS, 'module_name', None, 'command')
DEFAULT_FORKS = get_config(p, DEFAULTS, 'forks', 'ANSIBLE_FORKS', 5, integer=True) DEFAULT_FORKS = get_config(p, DEFAULTS, 'forks', 'ANSIBLE_FORKS', 5, integer=True)
DEFAULT_MODULE_ARGS = get_config(p, DEFAULTS, 'module_args', 'ANSIBLE_MODULE_ARGS', '') DEFAULT_MODULE_ARGS = get_config(p, DEFAULTS, 'module_args', 'ANSIBLE_MODULE_ARGS', '')

@ -33,6 +33,7 @@ from ansible import __version__
from ansible import constants as C from ansible import constants as C
from ansible.errors import AnsibleError from ansible.errors import AnsibleError
from ansible.utils.unicode import to_bytes, to_unicode from ansible.utils.unicode import to_bytes, to_unicode
from ansible.plugins.strategy import action_write_locks
try: try:
from __main__ import display from __main__ import display
@ -275,7 +276,11 @@ def _get_facility(task_vars):
facility = task_vars['ansible_syslog_facility'] facility = task_vars['ansible_syslog_facility']
return facility return facility
def meta_finder(data, snippet_names, snippet_data, zf): def recursive_finder(data, snippet_names, snippet_data, zf):
"""
Using ModuleDepFinder, make sure we have all of the module_utils files that
the module its module_utils files needs.
"""
tree = ast.parse(data) tree = ast.parse(data)
finder = ModuleDepFinder() finder = ModuleDepFinder()
finder.visit(tree) finder.visit(tree)
@ -290,7 +295,7 @@ def meta_finder(data, snippet_names, snippet_data, zf):
snippet_names.update(new_snippets) snippet_names.update(new_snippets)
for snippet_name in tuple(new_snippets): for snippet_name in tuple(new_snippets):
meta_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf) recursive_finder(snippet_data[snippet_name], snippet_names, snippet_data, zf)
del snippet_data[snippet_name] del snippet_data[snippet_name]
def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression): def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression):
@ -350,6 +355,24 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
except AttributeError: except AttributeError:
display.warning(u'Bad module compression string specified: %s. Using ZIP_STORED (no compression)' % module_compression) display.warning(u'Bad module compression string specified: %s. Using ZIP_STORED (no compression)' % module_compression)
compression_method = zipfile.ZIP_STORED compression_method = zipfile.ZIP_STORED
lookup_path = os.path.join(C.DEFAULT_LOCAL_TMP, 'ziploader_cache')
if not os.path.exists(lookup_path):
os.mkdir(lookup_path)
cached_module_filename = os.path.join(lookup_path, "%s-%s" % (module_name, module_compression))
zipdata = None
# Optimization -- don't lock if the module has already been cached
if os.path.exists(cached_module_filename):
zipdata = open(cached_module_filename, 'rb').read()
# Fool the check later... I think we should just remove the check
snippet_names.add('basic')
else:
with action_write_locks[module_name]:
# Check that no other process has created this while we were
# waiting for the lock
if not os.path.exists(cached_module_filename):
# Create the module zip data
zipoutput = BytesIO() zipoutput = BytesIO()
zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method) zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method)
zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n"))) zf.writestr('ansible/__init__.py', b''.join((b"__version__ = '", to_bytes(__version__), b"'\n")))
@ -360,13 +383,33 @@ def _find_snippet_imports(module_name, module_data, module_path, module_args, ta
zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data) zf.writestr('ansible/module_exec/%s/__main__.py' % module_name, module_data)
snippet_data = dict() snippet_data = dict()
meta_finder(module_data, snippet_names, snippet_data, zf) recursive_finder(module_data, snippet_names, snippet_data, zf)
zf.close() zf.close()
zipdata = base64.b64encode(zipoutput.getvalue())
# Write the assembled module to a temp file (write to temp
# so that no one looking for the file reads a partially
# written file)
with open(cached_module_filename + '-part', 'w') as f:
f.write(zipdata)
# Rename the file into its final position in the cache so
# future users of this module can read it off the
# filesystem instead of constructing from scratch.
os.rename(cached_module_filename + '-part', cached_module_filename)
if zipdata is None:
# Another process wrote the file while we were waiting for
# the write lock. Go ahead and read the data from disk
# instead of re-creating it.
zipdata = open(cached_module_filename, 'rb').read()
# Fool the check later... I think we should just remove the check
snippet_names.add('basic')
shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars) shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars)
if shebang is None: if shebang is None:
shebang = u'#!/usr/bin/python' shebang = u'#!/usr/bin/python'
output.write(to_bytes(STRIPPED_ZIPLOADER_TEMPLATE % dict( output.write(to_bytes(STRIPPED_ZIPLOADER_TEMPLATE % dict(
zipdata=base64.b64encode(zipoutput.getvalue()), zipdata=zipdata,
ansible_module=module_name, ansible_module=module_name,
args=python_repred_args, args=python_repred_args,
constants=python_repred_constants, constants=python_repred_constants,
@ -450,17 +493,6 @@ def modify_module(module_name, module_path, module_args, task_vars=dict(), modul
which results in the inclusion of the common code from powershell.ps1 which results in the inclusion of the common code from powershell.ps1
""" """
### TODO: Optimization ideas if this code is actually a source of slowness:
# * Fix comment stripping: Currently doesn't preserve shebangs and encoding info (but we unconditionally add encoding info)
# * Use pyminifier if installed
# * comment stripping/pyminifier needs to have config setting to turn it
# off for debugging purposes (goes along with keep remote but should be
# separate otherwise users wouldn't be able to get info on what the
# minifier output)
# * Only split into lines and recombine into strings once
# * Cache the modified module? If only the args are different and we do
# that as the last step we could cache all the work up to that point.
with open(module_path, 'rb') as f: with open(module_path, 'rb') as f:
# read in the module source # read in the module source

@ -19,15 +19,16 @@
from __future__ import (absolute_import, division, print_function) from __future__ import (absolute_import, division, print_function)
__metaclass__ = type __metaclass__ = type
from ansible.compat.six.moves import queue as Queue
from ansible.compat.six import iteritems, text_type, string_types
import json import json
import time import time
import zlib import zlib
from collections import defaultdict
from multiprocessing import Lock
from jinja2.exceptions import UndefinedError from jinja2.exceptions import UndefinedError
from ansible.compat.six.moves import queue as Queue
from ansible.compat.six import iteritems, text_type, string_types
from ansible import constants as C from ansible import constants as C
from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable from ansible.errors import AnsibleError, AnsibleParserError, AnsibleUndefinedVariable
from ansible.executor.play_iterator import PlayIterator from ansible.executor.play_iterator import PlayIterator
@ -51,6 +52,8 @@ except ImportError:
__all__ = ['StrategyBase'] __all__ = ['StrategyBase']
action_write_locks = defaultdict(Lock)
# TODO: this should probably be in the plugins/__init__.py, with # TODO: this should probably be in the plugins/__init__.py, with
# a smarter mechanism to set all of the attributes based on # a smarter mechanism to set all of the attributes based on
@ -141,6 +144,20 @@ class StrategyBase:
display.debug("entering _queue_task() for %s/%s" % (host, task)) display.debug("entering _queue_task() for %s/%s" % (host, task))
# Add a write lock for tasks.
# Maybe this should be added somewhere further up the call stack but
# this is the earliest in the code where we have task (1) extracted
# into its own variable and (2) there's only a single code path
# leading to the module being run. This is called by three
# functions: __init__.py::_do_handler_run(), linear.py::run(), and
# free.py::run() so we'd have to add to all three to do it there.
# The next common higher level is __init__.py::run() and that has
# tasks inside of play_iterator so we'd have to extract them to do it
# there.
if not action_write_locks[task.action]:
display.warning('Python defaultdict did not create the Lock for us. Creating manually')
action_write_locks[task.action] = Lock()
# and then queue the new task # and then queue the new task
display.debug("%s - putting task (%s) in queue" % (host, task)) display.debug("%s - putting task (%s) in queue" % (host, task))
try: try:

@ -212,6 +212,7 @@ class TestActionBase(unittest.TestCase):
# test python module formatting # test python module formatting
with patch.object(builtins, 'open', mock_open(read_data=to_bytes(python_module_replacers.strip(), encoding='utf-8'))) as m: with patch.object(builtins, 'open', mock_open(read_data=to_bytes(python_module_replacers.strip(), encoding='utf-8'))) as m:
with patch.object(os, 'rename') as m:
mock_task.args = dict(a=1, foo='fö〩') mock_task.args = dict(a=1, foo='fö〩')
mock_connection.module_implementation_preferences = ('',) mock_connection.module_implementation_preferences = ('',)
(style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args) (style, shebang, data) = action_base._configure_module(mock_task.action, mock_task.args)

Loading…
Cancel
Save