Reduce commenting as basic.py is copied to the remote host with every module transfer.

Move some comments to the unittest as we might look at that if we decide to refactor the code.
10 years ago · 8233522a7a
parent d293a46994
commit 8233522a7a
2 changed files with 46 additions and 51 deletions
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@ -967,66 +967,50 @@ class AnsibleModule(object):
        return (params2, args)

    def _heuristic_log_sanitize(self, data):
-        ''' Remove strings that look like passwords from log messages
-
-        Currently filters out things like:
-        * user:pass@foo/whatever
-        * http://username:pass@wherever/foo
-
-        Currently, the heuristics are subject to false positives.  This could
-        change in the future should we decide that no_log is what we want to
-        push people towards.
-        '''
-        # Regexes can be too slow for this.  Pathological cases for regexes
-        # seem to be large amounts of data with many ':' but no '@'
-        # This function gets slower when there are many replacements but not
-        # nearly as slow as the worst case for regexes. If we need the
-        # flexibility of regex's in the future, re.sub() is faster than
-        # re.match() + str.join().  We might be able to decide to use a regex
-        # strategy if we detect a large number of '@' symbolsand use this
-        # function otherwise.
-
-        # begin points to the beginning of a password containing string
-        # end points to the end of the password containing string
-        # sep points to the char in between username and password
-        # prev_begin keeps track of where in the string to start a new search
-        #   for the end of a password substring
-        # sep_search_end keeps track of where in the string to end a search
-        #   for the separator
+        ''' Remove strings that look like passwords from log messages '''
+        # Currently filters:
+        # user:pass@foo/whatever and http://username:pass@wherever/foo
+        # This code has false positives and consumes parts of logs that are
+        # not passwds
+
+        # begin: start of a passwd containing string
+        # end: end of a passwd containing string
+        # sep: char between user and passwd
+        # prev_begin: where in the overall string to start a search for
+        #   a passwd
+        # sep_search_end: where in the string to end a search for the sep
        output = []
        begin = len(data)
        prev_begin = begin
-        sep = 1  # Prime the pump with a sentinel value
+        sep = 1
        while sep:
-            # Find the potential end of a password
+            # Find the potential end of a passwd
            try:
                end = data.rindex('@', 0, begin)
            except ValueError:
-                # No end marker, so add the rest of the data
+                # No passwd in the rest of the data
                output.insert(0, data[0:begin])
                break

-            # Search for the beginning of the password
+            # Search for the beginning of a passwd
            sep = None
            sep_search_end = end
            while not sep:
-                # Search for the beginning of a URL-style username+password
+                # URL-style username+password
                try:
                    begin = data.rindex('://', 0, sep_search_end)
                except ValueError:
-                    # If we don't find that, then we default to the start of
-                    # the string (b/c ssh-style username+password could
-                    # be taking up all of this parameter).
+                    # No url style in the data, check for ssh style in the
+                    # rest of the string
                    begin = 0
-                # Search for a separator character inside of where we know the
-                # password might live.
+                # Search for separator
                try:
                    sep = data.index(':', begin + 3, end)
                except ValueError:
-                    # No separator, now we have choices:
+                    # No separator; choices:
                    if begin == 0:
-                        # We've searched the whole string so there's no
-                        # password here.  Return the remaining data
+                        # Searched the whole string so there's no password
+                        # here.  Return the remaining data
                        output.insert(0, data[0:begin])
                        break
                    # Search for a different beginning of the password field.
@ -1081,11 +1065,7 @@ class AnsibleModule(object):

        # 6655 - allow for accented characters
        if isinstance(msg, unicode):
-            # If we've done everything right up above msg should be type
-            # str, not type unicode here.  This is partial protection in case
-            # we've done something wrong.  But if we arrive here we can
-            # potentially get tracebacks in code up above (when mixing unicode
-            # and str together)
+            # We should never get here as msg should be type str, not unicode
            msg = msg.encode('utf-8')

        if (has_journal):
--- a/test/units/TestModuleUtilsBasic.py
+++ b/test/units/TestModuleUtilsBasic.py
@ -247,11 +247,21 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
    #################################################################################

    #
-    # Several speed tests -- previously, the obfuscation of passwords carried
-    # an unreasonable speed penalty for some module parameters.  We want to be
-    # sure we don't regress to that state.
+    # Speed tests
    #

+    # Previously, we used regexes which had some pathologically slow cases for
+    # parameters with large amounts of data with many ':' but no '@'.  The
+    # present function gets slower when there are many replacements so we may
+    # want to explore regexes in the future (for the speed when substituting
+    # or flexibility).  These speed tests will hopefully tell us if we're
+    # introducing code that has cases that are simply too slow.
+    #
+    # Some regex notes:
+    # * re.sub() is faster than re.match() + str.join().
+    # * We may be able to detect a large number of '@' symbols and then use
+    #   a regex else use the present function.
+
    @timed(5)
    def test_log_sanitize_speed_many_url(self):
        self.module._heuristic_log_sanitize(self.many_url)
@ -297,10 +307,15 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
        # the same:
        self.assertEqual(len(url_output), len(url_data))

-        # ssh checking is somewhat harder as the heuristic is overzealous in
-        # most cases.  Since the input will have at least one ":" present
-        # before the password we can tell some things about the beginning and
-        # end of the data, though:
+        # ssh checking is harder as the heuristic is overzealous in many
+        # cases.  Since the input will have at least one ":" present before
+        # the password we can tell some things about the beginning and end of
+        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("'}}}}"))
        self.assertIn(":********@foo.com/data',", ssh_output)
+
+        # The overzealous-ness here may lead to us changing the algorithm in
+        # the future.  We could make it consume less of the data (with the
+        # possiblity of leaving partial passwords exposed) and encourage
+        # people to use no_log instead of relying on this obfuscation.