From 7f33580eba14ec141cb08116e98a12b7c9cc2669 Mon Sep 17 00:00:00 2001 From: Grzegorz Nosek Date: Fri, 1 Aug 2014 14:34:37 +0200 Subject: [PATCH] Fix exponential regex performance issue filter_leading_non_json_lines effectively does re.match(".*\w+=\w+.*", line) for every line of output. This has abysmal performance in case of large Base64-encoded data (which ultimately does not match the regex but does match the .*\w+= part) as returned e.g. by the template module (diffs). Replacing the match with re.search("\w=\w", line) drops the complexity back to linear, and actually usable with large diffs from the template module (a 150 KB Base64 diff kept Ansible spinning at 100% cpu for minutes). Also, check the easy cases (line.startswith) first while we're here. Closes: #8932 --- lib/ansible/utils/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py index 474dd86f098..a1880b07c44 100644 --- a/lib/ansible/utils/__init__.py +++ b/lib/ansible/utils/__init__.py @@ -1041,11 +1041,11 @@ def filter_leading_non_json_lines(buf): filter only leading lines since multiline JSON is valid. ''' - kv_regex = re.compile(r'.*\w+=\w+.*') + kv_regex = re.compile(r'\w=\w') filtered_lines = StringIO.StringIO() stop_filtering = False for line in buf.splitlines(): - if stop_filtering or kv_regex.match(line) or line.startswith('{') or line.startswith('['): + if stop_filtering or line.startswith('{') or line.startswith('[') or kv_regex.search(line): stop_filtering = True filtered_lines.write(line + '\n') return filtered_lines.getvalue()