Reduce commenting as basic.py is copied to the remote host with every module transfer.

Move some comments to the unittest as we might look at that if we decide to refactor the code.
10 years ago · 8233522a7a
parent d293a46994
commit 8233522a7a
2 changed files with 46 additions and 51 deletions
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@ -967,66 +967,50 @@ class AnsibleModule(object):
        return (params2, args)
    def _heuristic_log_sanitize(self, data):
-        ''' Remove strings that look like passwords from log messages
+        ''' Remove strings that look like passwords from log messages '''
-
+        # Currently filters:
-        Currently filters out things like:
+        # user:pass@foo/whatever and http://username:pass@wherever/foo
-        * user:pass@foo/whatever
+        # This code has false positives and consumes parts of logs that are
-        * http://username:pass@wherever/foo
+        # not passwds
-
+
-        Currently, the heuristics are subject to false positives.  This could
+        # begin: start of a passwd containing string
-        change in the future should we decide that no_log is what we want to
+        # end: end of a passwd containing string
-        push people towards.
+        # sep: char between user and passwd
-        '''
+        # prev_begin: where in the overall string to start a search for
-        # Regexes can be too slow for this.  Pathological cases for regexes
+        #   a passwd
-        # seem to be large amounts of data with many ':' but no '@'
+        # sep_search_end: where in the string to end a search for the sep
        # This function gets slower when there are many replacements but not
        # nearly as slow as the worst case for regexes. If we need the
        # flexibility of regex's in the future, re.sub() is faster than
        # re.match() + str.join().  We might be able to decide to use a regex
        # strategy if we detect a large number of '@' symbolsand use this
        # function otherwise.
        # begin points to the beginning of a password containing string
        # end points to the end of the password containing string
        # sep points to the char in between username and password
        # prev_begin keeps track of where in the string to start a new search
        #   for the end of a password substring
        # sep_search_end keeps track of where in the string to end a search
        #   for the separator
        output = []
        begin = len(data)
        prev_begin = begin
-        sep = 1  # Prime the pump with a sentinel value
+        sep = 1
        while sep:
-            # Find the potential end of a password
+            # Find the potential end of a passwd
            try:
                end = data.rindex('@', 0, begin)
            except ValueError:
-                # No end marker, so add the rest of the data
+                # No passwd in the rest of the data
                output.insert(0, data[0:begin])
                break
-            # Search for the beginning of the password
+            # Search for the beginning of a passwd
            sep = None
            sep_search_end = end
            while not sep:
-                # Search for the beginning of a URL-style username+password
+                # URL-style username+password
                try:
                    begin = data.rindex('://', 0, sep_search_end)
                except ValueError:
-                    # If we don't find that, then we default to the start of
+                    # No url style in the data, check for ssh style in the
-                    # the string (b/c ssh-style username+password could
+                    # rest of the string
                    # be taking up all of this parameter).
                    begin = 0
-                # Search for a separator character inside of where we know the
+                # Search for separator
                # password might live.
                try:
                    sep = data.index(':', begin + 3, end)
                except ValueError:
-                    # No separator, now we have choices:
+                    # No separator; choices:
                    if begin == 0:
-                        # We've searched the whole string so there's no
+                        # Searched the whole string so there's no password
-                        # password here.  Return the remaining data
+                        # here.  Return the remaining data
                        output.insert(0, data[0:begin])
                        break
                    # Search for a different beginning of the password field.
@ -1081,11 +1065,7 @@ class AnsibleModule(object):
        # 6655 - allow for accented characters
        if isinstance(msg, unicode):
-            # If we've done everything right up above msg should be type
+            # We should never get here as msg should be type str, not unicode
            # str, not type unicode here.  This is partial protection in case
            # we've done something wrong.  But if we arrive here we can
            # potentially get tracebacks in code up above (when mixing unicode
            # and str together)
            msg = msg.encode('utf-8')
        if (has_journal):
--- a/test/units/TestModuleUtilsBasic.py
+++ b/test/units/TestModuleUtilsBasic.py
@ -247,11 +247,21 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
    #################################################################################
    #
-    # Several speed tests -- previously, the obfuscation of passwords carried
+    # Speed tests
    # an unreasonable speed penalty for some module parameters.  We want to be
    # sure we don't regress to that state.
    #
    # Previously, we used regexes which had some pathologically slow cases for
    # parameters with large amounts of data with many ':' but no '@'.  The
    # present function gets slower when there are many replacements so we may
    # want to explore regexes in the future (for the speed when substituting
    # or flexibility).  These speed tests will hopefully tell us if we're
    # introducing code that has cases that are simply too slow.
    #
    # Some regex notes:
    # * re.sub() is faster than re.match() + str.join().
    # * We may be able to detect a large number of '@' symbols and then use
    #   a regex else use the present function.
    @timed(5)
    def test_log_sanitize_speed_many_url(self):
        self.module._heuristic_log_sanitize(self.many_url)
@ -297,10 +307,15 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
        # the same:
        self.assertEqual(len(url_output), len(url_data))
-        # ssh checking is somewhat harder as the heuristic is overzealous in
+        # ssh checking is harder as the heuristic is overzealous in many
-        # most cases.  Since the input will have at least one ":" present
+        # cases.  Since the input will have at least one ":" present before
-        # before the password we can tell some things about the beginning and
+        # the password we can tell some things about the beginning and end of
-        # end of the data, though:
+        # the data, though:
        self.assertTrue(ssh_output.startswith("{'"))
        self.assertTrue(ssh_output.endswith("'}}}}"))
        self.assertIn(":********@foo.com/data',", ssh_output)
        # The overzealous-ness here may lead to us changing the algorithm in
        # the future.  We could make it consume less of the data (with the
        # possiblity of leaving partial passwords exposed) and encourage
        # people to use no_log instead of relying on this obfuscation.