From d6ae9e2c291cdc8f60f066c7339ef28731e96d4e Mon Sep 17 00:00:00 2001 From: Tobias Wolf Date: Tue, 19 Jan 2016 17:56:07 +0100 Subject: [PATCH] Avoid recursively checking JSON inventory for Unicode by moving to en-bloc unicode conversion to act on scripts stdout Both python-json and simplejson always return unicode strings when using their loads() method on unicode strings. This is true at least since 2009. This makes checking each substring unnecessary, because we do not need to recursively check the strings contained in the inventory dict later one-by-one This commit makes parsing of large dynamic inventory at least 2 seconds faster. cf: https://github.com/towolf/ansible-large-inventory-testcase --- lib/ansible/inventory/script.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/ansible/inventory/script.py b/lib/ansible/inventory/script.py index 042fa8c24a2..999e472f539 100644 --- a/lib/ansible/inventory/script.py +++ b/lib/ansible/inventory/script.py @@ -31,7 +31,7 @@ from ansible.errors import AnsibleError from ansible.inventory.host import Host from ansible.inventory.group import Group from ansible.module_utils.basic import json_dict_bytes_to_unicode -from ansible.utils.unicode import to_str +from ansible.utils.unicode import to_str, to_unicode class InventoryScript: @@ -58,7 +58,13 @@ class InventoryScript: if sp.returncode != 0: raise AnsibleError("Inventory script (%s) had an execution error: %s " % (filename,stderr)) - self.data = stdout + # make sure script output is unicode so that json loader will output + # unicode strings itself + try: + self.data = to_unicode(stdout, errors="strict") + except Exception as e: + raise AnsibleError("inventory data from {0} contained characters that cannot be interpreted as UTF-8: {1}".format(to_str(self.filename), to_str(e))) + # see comment about _meta below self.host_vars_from_top = None self._parse(stderr) @@ -78,8 +84,6 @@ class InventoryScript: sys.stderr.write(err + "\n") raise AnsibleError("failed to parse executable inventory script results from {0}: data needs to be formatted as a json dict".format(to_str(self.filename))) - self.raw = json_dict_bytes_to_unicode(self.raw) - group = None for (group_name, data) in self.raw.items():