From 9b2baebe64bf31eecec995af80c0501434b3c2fe Mon Sep 17 00:00:00 2001 From: Matt Martz Date: Mon, 20 Aug 2018 15:08:29 -0500 Subject: [PATCH] Don't use copy.deepcopy in high workload areas, use naive_deepcopy (#44337) * Don't use copy.deepcopy in high workload areas, use deepishcopy. ci_complete * Add tests * Add changelog fragment * rename to naive_deepcopy and add extra docs * Rename to module_response_deepcopy and move to vars/clean --- .../fragments/deepcopy-alternative.yaml | 2 + lib/ansible/executor/task_executor.py | 2 + lib/ansible/executor/task_result.py | 6 +- lib/ansible/vars/clean.py | 59 +++++++++++++++--- lib/ansible/vars/manager.py | 1 + .../vars/test_module_response_deepcopy.py | 60 +++++++++++++++++++ 6 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 changelogs/fragments/deepcopy-alternative.yaml create mode 100644 test/units/vars/test_module_response_deepcopy.py diff --git a/changelogs/fragments/deepcopy-alternative.yaml b/changelogs/fragments/deepcopy-alternative.yaml new file mode 100644 index 00000000000..3229824a5f0 --- /dev/null +++ b/changelogs/fragments/deepcopy-alternative.yaml @@ -0,0 +1,2 @@ +minor_changes: +- replace copy.deepcopy in high workload areas with a custom function to improve performance (https://github.com/ansible/ansible/pull/44337) diff --git a/lib/ansible/executor/task_executor.py b/lib/ansible/executor/task_executor.py index 94644981bc9..2b622db3440 100644 --- a/lib/ansible/executor/task_executor.py +++ b/lib/ansible/executor/task_executor.py @@ -629,6 +629,7 @@ class TaskExecutor: if self._task.action in ('set_fact', 'include_vars'): vars_copy.update(result['ansible_facts']) else: + # TODO: cleaning of facts should eventually become part of taskresults instead of vars vars_copy.update(namespace_facts(result['ansible_facts'])) if C.INJECT_FACTS_AS_VARS: vars_copy.update(clean_facts(result['ansible_facts'])) @@ -690,6 +691,7 @@ class TaskExecutor: if self._task.action in ('set_fact', 'include_vars'): variables.update(result['ansible_facts']) else: + # TODO: cleaning of facts should eventually become part of taskresults instead of vars variables.update(namespace_facts(result['ansible_facts'])) if C.INJECT_FACTS_AS_VARS: variables.update(clean_facts(result['ansible_facts'])) diff --git a/lib/ansible/executor/task_result.py b/lib/ansible/executor/task_result.py index ba5bc93f2d4..d5eb10107c4 100644 --- a/lib/ansible/executor/task_result.py +++ b/lib/ansible/executor/task_result.py @@ -5,11 +5,9 @@ from __future__ import (absolute_import, division, print_function) __metaclass__ = type -from copy import deepcopy - from ansible import constants as C from ansible.parsing.dataloader import DataLoader -from ansible.vars.clean import strip_internal_keys +from ansible.vars.clean import module_response_deepcopy, strip_internal_keys _IGNORE = ('failed', 'skipped') _PRESERVE = ('attempts', 'changed', 'retries') @@ -131,7 +129,7 @@ class TaskResult: result._result = x elif self._result: - result._result = deepcopy(self._result) + result._result = module_response_deepcopy(self._result) # actualy remove for remove_key in ignore: diff --git a/lib/ansible/vars/clean.py b/lib/ansible/vars/clean.py index 64bb042054a..7b29383c2e5 100644 --- a/lib/ansible/vars/clean.py +++ b/lib/ansible/vars/clean.py @@ -8,11 +8,9 @@ __metaclass__ = type import os import re -from copy import deepcopy - from ansible import constants as C from ansible.module_utils._text import to_text -from ansible.module_utils.six import string_types +from ansible.module_utils import six from ansible.plugins.loader import connection_loader try: @@ -22,6 +20,53 @@ except ImportError: display = Display() +def module_response_deepcopy(v): + """Function to create a deep copy of module response data + + Designed to be used within the Ansible "engine" to improve performance + issues where ``copy.deepcopy`` was used previously, largely with CPU + and memory contention. + + This only supports the following data types, and was designed to only + handle specific workloads: + + * ``dict`` + * ``list`` + + The data we pass here will come from a serialization such + as JSON, so we shouldn't have need for other data types such as + ``set`` or ``tuple``. + + Take note that this function should not be used extensively as a + replacement for ``deepcopy`` due to the naive way in which this + handles other data types. + + Do not expect uses outside of those listed below to maintain + backwards compatibility, in case we need to extend this function + to handle our specific needs: + + * ``ansible.executor.task_result.TaskResult.clean_copy`` + * ``ansible.vars.clean.clean_facts`` + * ``ansible.vars.namespace_facts`` + """ + if isinstance(v, dict): + ret = v.copy() + items = six.iteritems(ret) + elif isinstance(v, list): + ret = v[:] + items = enumerate(ret) + else: + return v + + for key, value in items: + if isinstance(value, (dict, list)): + ret[key] = module_response_deepcopy(value) + else: + ret[key] = value + + return ret + + def strip_internal_keys(dirty, exceptions=None): ''' All keys starting with _ansible_ are internal, so create a copy of the 'dirty' dict @@ -32,7 +77,7 @@ def strip_internal_keys(dirty, exceptions=None): exceptions = () clean = dirty.copy() for k in dirty.keys(): - if isinstance(k, string_types) and k.startswith('_ansible_'): + if isinstance(k, six.string_types) and k.startswith('_ansible_'): if k not in exceptions: del clean[k] elif isinstance(dirty[k], dict): @@ -57,7 +102,7 @@ def remove_internal_keys(data): def clean_facts(facts): ''' remove facts that can override internal keys or otherwise deemed unsafe ''' - data = deepcopy(facts) + data = module_response_deepcopy(facts) remove_keys = set() fact_keys = set(data.keys()) @@ -113,8 +158,8 @@ def namespace_facts(facts): for k in facts: if k in ('ansible_local',): # exceptions to 'deprefixing' - deprefixed[k] = deepcopy(facts[k]) + deprefixed[k] = module_response_deepcopy(facts[k]) else: - deprefixed[k.replace('ansible_', '', 1)] = deepcopy(facts[k]) + deprefixed[k.replace('ansible_', '', 1)] = module_response_deepcopy(facts[k]) return {'ansible_facts': deprefixed} diff --git a/lib/ansible/vars/manager.py b/lib/ansible/vars/manager.py index 9e938732986..7ac85f5a25d 100644 --- a/lib/ansible/vars/manager.py +++ b/lib/ansible/vars/manager.py @@ -307,6 +307,7 @@ class VariableManager: all_vars = combine_vars(all_vars, _plugins_play([host])) # finally, the facts caches for this host, if it exists + # TODO: cleaning of facts should eventually become part of taskresults instead of vars try: facts = wrap_var(self._fact_cache.get(host.name, {})) all_vars.update(namespace_facts(facts)) diff --git a/test/units/vars/test_module_response_deepcopy.py b/test/units/vars/test_module_response_deepcopy.py new file mode 100644 index 00000000000..1fdaf7a4b35 --- /dev/null +++ b/test/units/vars/test_module_response_deepcopy.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +# (c) 2018 Matt Martz +# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) + +from __future__ import absolute_import, division, print_function +__metaclass__ = type + +from ansible.vars.clean import module_response_deepcopy + +import pytest + + +def test_module_response_deepcopy_basic(): + x = 42 + y = module_response_deepcopy(x) + assert y == x + + +def test_module_response_deepcopy_atomic(): + tests = [None, 42, 2**100, 3.14, True, False, 1j, + "hello", "hello\u1234"] + for x in tests: + assert module_response_deepcopy(x) is x + + +def test_module_response_deepcopy_list(): + x = [[1, 2], 3] + y = module_response_deepcopy(x) + assert y == x + assert x is not y + assert x[0] is not y[0] + + +def test_module_response_deepcopy_empty_tuple(): + x = () + y = module_response_deepcopy(x) + assert x is y + + +@pytest.mark.skip(reason='No current support for this situation') +def test_module_response_deepcopy_tuple(): + x = ([1, 2], 3) + y = module_response_deepcopy(x) + assert y == x + assert x is not y + assert x[0] is not y[0] + + +def test_module_response_deepcopy_tuple_of_immutables(): + x = ((1, 2), 3) + y = module_response_deepcopy(x) + assert x is y + + +def test_module_response_deepcopy_dict(): + x = {"foo": [1, 2], "bar": 3} + y = module_response_deepcopy(x) + assert y == x + assert x is not y + assert x["foo"] is not y["foo"]