From 7f33580eba14ec141cb08116e98a12b7c9cc2669 Mon Sep 17 00:00:00 2001
From: Grzegorz Nosek <root@localdomain.pl>
Date: Fri, 1 Aug 2014 14:34:37 +0200
Subject: [PATCH] Fix exponential regex performance issue

filter_leading_non_json_lines effectively does

re.match(".*\w+=\w+.*", line)

for every line of output. This has abysmal performance in case of large
Base64-encoded data (which ultimately does not match the regex but does
match the .*\w+= part) as returned e.g. by the template module (diffs).

Replacing the match with

re.search("\w=\w", line)

drops the complexity back to linear, and actually usable with large
diffs from the template module (a 150 KB Base64 diff kept Ansible
spinning at 100% cpu for minutes).

Also, check the easy cases (line.startswith) first while we're here.

Closes: #8932
---
 lib/ansible/utils/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/ansible/utils/__init__.py b/lib/ansible/utils/__init__.py
index 474dd86f098..a1880b07c44 100644
--- a/lib/ansible/utils/__init__.py
+++ b/lib/ansible/utils/__init__.py
@@ -1041,11 +1041,11 @@ def filter_leading_non_json_lines(buf):
     filter only leading lines since multiline JSON is valid.
     '''
 
-    kv_regex = re.compile(r'.*\w+=\w+.*')
+    kv_regex = re.compile(r'\w=\w')
     filtered_lines = StringIO.StringIO()
     stop_filtering = False
     for line in buf.splitlines():
-        if stop_filtering or kv_regex.match(line) or line.startswith('{') or line.startswith('['):
+        if stop_filtering or line.startswith('{') or line.startswith('[') or kv_regex.search(line):
             stop_filtering = True
             filtered_lines.write(line + '\n')
     return filtered_lines.getvalue()