Merge branch 'optimise_utils_clean_data' of https://github.com/leth/ansible into leth-optimise_utils_clean_data

10 years ago · 25e9d1197e
parent a650421e39 c47d1f5265
commit 25e9d1197e
2 changed files with 43 additions and 52 deletions
--- a/lib/ansible/utils/init.py
+++ b/lib/ansible/utils/init.py
@ -46,6 +46,7 @@ import getpass
 import sys
 import json
 import subprocess
 import contextlib
 from vault import VaultLib
@ -56,6 +57,8 @@ MAX_FILE_SIZE_FOR_DIFF=1*1024*1024
 # caching the compilation of the regex used
 # to check for lookup calls within data
 LOOKUP_REGEX = re.compile(r'lookup\s*\(')
 PRINT_CODE_REGEX = re.compile(r'(?:{[{%]|[%}]})')
 CODE_REGEX = re.compile(r'(?:{%|%})')
 try:
    import json
@ -355,64 +358,48 @@ def _clean_data(orig_data, from_remote=False, from_inventory=False):
    if not isinstance(orig_data, basestring):
        return orig_data
    data = StringIO.StringIO("")
    # when the data is marked as having come from a remote, we always
    # replace any print blocks (ie. {{var}}), however when marked as coming
    # from inventory we only replace print blocks that contain a call to
    # a lookup plugin (ie. {{lookup('foo','bar'))}})
    replace_prints = from_remote or (from_inventory and '{{' in orig_data and LOOKUP_REGEX.search(orig_data) is not None)
    regex = PRINT_CODE_REGEX if replace_prints else CODE_REGEX
    with contextlib.closing(StringIO.StringIO(orig_data)) as data:
        # these variables keep track of opening block locations, as we only
        # want to replace matched pairs of print/block tags
        print_openings = []
        block_openings = []
-
+        for mo in regex.finditer(orig_data):
-    for idx,c in enumerate(orig_data):
+            token = mo.group(0)
-        # if the current character is an opening brace, check to
+            token_start = mo.start(0)
-        # see if this is a jinja2 token. Otherwise, if the current
+
-        # character is a closing brace, we backup one character to
+            if token[0] == '{':
-        # see if we have a closing.
+                if token == '{%':
-        if c == '{' and idx < len(orig_data) - 1:
+                    block_openings.append(token_start)
-            token = orig_data[idx:idx+2]
+                elif token == '{{':
-            # if so, and we want to replace this block, push
+                    print_openings.append(token_start)
-            # this token's location onto the appropriate array
+
-            if token == '{{' and replace_prints:
+            elif token[1] == '}':
-                print_openings.append(idx)
+                prev_idx = None
-            elif token == '{%':
+                if token == '%}' and block_openings:
                block_openings.append(idx)
            # finally we write the data to the buffer and write
            data.seek(0, os.SEEK_END)
            data.write(c)
        elif c == '}' and idx > 0:
            token = orig_data[idx-1:idx+1]
            prev_idx = -1
            if token == '%}' and len(block_openings) > 0:
                    prev_idx = block_openings.pop()
-            elif token == '}}' and len(print_openings) > 0:
+                elif token == '}}' and print_openings:
                    prev_idx = print_openings.pop()
-            # if we have a closing token, and we have previously found
+
-            # the opening to the same kind of block represented by this
+                if prev_idx is not None:
            # token, replace both occurrences, otherwise we just write
            # the current character to the buffer
            if prev_idx != -1:
                    # replace the opening
                    data.seek(prev_idx, os.SEEK_SET)
                    data.write('{#')
                    # replace the closing
-                data.seek(-1, os.SEEK_END)
+                    data.seek(token_start, os.SEEK_SET)
                    data.write('#}')
            else:
-                data.seek(0, os.SEEK_END)
+                assert False, 'Unhandled regex match'
-                data.write(c)
+
-        else:
+        return data.getvalue()
            # not a jinja2 token, so we just write the current char
            # to the output buffer
            data.seek(0, os.SEEK_END)
            data.write(c)
    return_data = data.getvalue()
    data.close()
    return return_data
 def _clean_data_struct(orig_data, from_remote=False, from_inventory=False):
    '''
--- a/test/units/TestUtils.py
+++ b/test/units/TestUtils.py
@ -729,6 +729,10 @@ class TestUtils(unittest.TestCase):
            ansible.utils._clean_data('this string has a {{variable}}', from_remote=True),
            'this string has a {#variable#}'
        )
        self.assertEqual(
            ansible.utils._clean_data('this string {{has}} two {{variables}} in it', from_remote=True),
            'this string {#has#} two {#variables#} in it'
        )
        self.assertEqual(
            ansible.utils._clean_data('this string has a {{variable with a\nnewline}}', from_remote=True),
            'this string has a {#variable with a\nnewline#}'