Merge pull request #10178 from ansible/password-obfuscation

Obfuscate passwords in a variety of output
10 years ago · d15eb066a1
parent 76a5dd7f62 4902c06304
commit d15eb066a1
4 changed files with 184 additions and 195 deletions
--- a/lib/ansible/module_utils/basic.py
+++ b/lib/ansible/module_utils/basic.py
@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
    directory_mode = dict(), # used by copy
 )
 PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
 def get_platform():
    ''' what's the platform?  example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
    else:
        return d
 def heuristic_log_sanitize(data):
    ''' Remove strings that look like passwords from log messages '''
    # Currently filters:
    # user:pass@foo/whatever and http://username:pass@wherever/foo
    # This code has false positives and consumes parts of logs that are
    # not passwds
    # begin: start of a passwd containing string
    # end: end of a passwd containing string
    # sep: char between user and passwd
    # prev_begin: where in the overall string to start a search for
    #   a passwd
    # sep_search_end: where in the string to end a search for the sep
    output = []
    begin = len(data)
    prev_begin = begin
    sep = 1
    while sep:
        # Find the potential end of a passwd
        try:
            end = data.rindex('@', 0, begin)
        except ValueError:
            # No passwd in the rest of the data
            output.insert(0, data[0:begin])
            break
        # Search for the beginning of a passwd
        sep = None
        sep_search_end = end
        while not sep:
            # URL-style username+password
            try:
                begin = data.rindex('://', 0, sep_search_end)
            except ValueError:
                # No url style in the data, check for ssh style in the
                # rest of the string
                begin = 0
            # Search for separator
            try:
                sep = data.index(':', begin + 3, end)
            except ValueError:
                # No separator; choices:
                if begin == 0:
                    # Searched the whole string so there's no password
                    # here.  Return the remaining data
                    output.insert(0, data[0:begin])
                    break
                # Search for a different beginning of the password field.
                sep_search_end = begin
                continue
        if sep:
            # Password was found; remove it.
            output.insert(0, data[end:prev_begin])
            output.insert(0, '********')
            output.insert(0, data[begin:sep + 1])
            prev_begin = begin
    return ''.join(output)
 class AnsibleModule(object):
@ -1019,65 +1079,6 @@ class AnsibleModule(object):
        params2.update(params)
        return (params2, args)
    def _heuristic_log_sanitize(self, data):
        ''' Remove strings that look like passwords from log messages '''
        # Currently filters:
        # user:pass@foo/whatever and http://username:pass@wherever/foo
        # This code has false positives and consumes parts of logs that are
        # not passwds
        # begin: start of a passwd containing string
        # end: end of a passwd containing string
        # sep: char between user and passwd
        # prev_begin: where in the overall string to start a search for
        #   a passwd
        # sep_search_end: where in the string to end a search for the sep
        output = []
        begin = len(data)
        prev_begin = begin
        sep = 1
        while sep:
            # Find the potential end of a passwd
            try:
                end = data.rindex('@', 0, begin)
            except ValueError:
                # No passwd in the rest of the data
                output.insert(0, data[0:begin])
                break
            # Search for the beginning of a passwd
            sep = None
            sep_search_end = end
            while not sep:
                # URL-style username+password
                try:
                    begin = data.rindex('://', 0, sep_search_end)
                except ValueError:
                    # No url style in the data, check for ssh style in the
                    # rest of the string
                    begin = 0
                # Search for separator
                try:
                    sep = data.index(':', begin + 3, end)
                except ValueError:
                    # No separator; choices:
                    if begin == 0:
                        # Searched the whole string so there's no password
                        # here.  Return the remaining data
                        output.insert(0, data[0:begin])
                        break
                    # Search for a different beginning of the password field.
                    sep_search_end = begin
                    continue
            if sep:
                # Password was found; remove it.
                output.insert(0, data[end:prev_begin])
                output.insert(0, '********')
                output.insert(0, data[begin:sep + 1])
                prev_begin = begin
        return ''.join(output)
    def _log_invocation(self):
        ''' log that ansible ran the module '''
        # TODO: generalize a separate log function and make log_invocation use it
@ -1100,7 +1101,7 @@ class AnsibleModule(object):
                    param_val = str(param_val)
                elif isinstance(param_val, unicode):
                    param_val = param_val.encode('utf-8')
-                log_args[param] = self._heuristic_log_sanitize(param_val)
+                log_args[param] = heuristic_log_sanitize(param_val)
        module = 'ansible-%s' % os.path.basename(__file__)
        msg = []
@ -1444,27 +1445,27 @@ class AnsibleModule(object):
        # create a printable version of the command for use
        # in reporting later, which strips out things like
        # passwords from the args list
-        if isinstance(args, list):
+        if isinstance(args, basestring):
-            clean_args = " ".join(pipes.quote(arg) for arg in args)
+            to_clean_args = shlex.split(args.encode('utf-8'))
        else:
-            clean_args = args
+            to_clean_args = args
-
+
-        # all clean strings should return two match groups, 
+        clean_args = []
-        # where the first is the CLI argument and the second 
+        is_passwd = False
-        # is the password/key/phrase that will be hidden
+        for arg in to_clean_args:
-        clean_re_strings = [
+            if is_passwd:
-            # this removes things like --password, --pass, --pass-wd, etc.
+                is_passwd = False
-            # optionally followed by an '=' or a space. The password can 
+                clean_args.append('********')
-            # be quoted or not too, though it does not care about quotes
+                continue
-            # that are not balanced
+            if PASSWD_ARG_RE.match(arg):
-            # source: http://blog.stevenlevithan.com/archives/match-quoted-string
+                sep_idx = arg.find('=')
-            r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
+                if sep_idx > -1:
-            r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', 
+                    clean_args.append('%s=********' % arg[:sep_idx])
-            # TODO: add more regex checks here
+                    continue
-        ]
+                else:
-        for re_str in clean_re_strings:
+                    is_passwd = True
-            r = re.compile(re_str)
+            clean_args.append(heuristic_log_sanitize(arg))
-            clean_args = r.sub(r'\1********', clean_args)
+        clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
        if data:
            st_in = subprocess.PIPE
@ -1549,7 +1550,7 @@ class AnsibleModule(object):
            self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
        if rc != 0 and check_rc:
-            msg = stderr.rstrip()
+            msg = heuristic_log_sanitize(stderr.rstrip())
            self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
        # reset the pwd
--- a/lib/ansible/utils/init.py
+++ b/lib/ansible/utils/init.py
@ -32,6 +32,7 @@ from ansible.utils.su_prompts import *
 from ansible.utils.hashing import secure_hash, secure_hash_s, checksum, checksum_s, md5, md5s
 from ansible.callbacks import display
 from ansible.module_utils.splitter import split_args, unquote
 from ansible.module_utils.basic import heuristic_log_sanitize
 import ansible.constants as C
 import ast
 import time
@ -932,34 +933,18 @@ def sanitize_output(str):
    private_keys = ['password', 'login_password']
-    filter_re = [
+    parts = parse_kv(str)
-        # filter out things like user:pass@foo/whatever
+    output = []
-        # and http://username:pass@wherever/foo
+    for (k, v) in parts.items():
-        re.compile('^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$'),
+        if k in private_keys:
-    ]
+            output.append("%s=VALUE_HIDDEN" % k)
-
+            continue
-    parts = str.split()
+        else:
-    output = ''
+            v = heuristic_log_sanitize(v)
-    for part in parts:
+        output.append('%s=%s' % (k, v))
-        try:
+    output = ' '.join(output)
-            (k,v) = part.split('=', 1)
+    return output
            if k in private_keys:
                output += " %s=VALUE_HIDDEN" % k
            else:
                found = False
                for filter in filter_re:
                    m = filter.match(v)
                    if m:
                        d = m.groupdict()
                        output += " %s=%s" % (k, d['before'] + "********" + d['after'])
                        found = True
                        break
                if not found:
                    output += " %s" % part
        except:
            output += " %s" % part
    return output.strip()
 ####################################################################
 # option handling code for /usr/bin/ansible and ansible-playbook
--- a/test/units/TestModuleUtilsBasic.py
+++ b/test/units/TestModuleUtilsBasic.py
@ -7,6 +7,7 @@ from nose.tools import timed
 from ansible import errors
 from ansible.module_common import ModuleReplacer
 from ansible.module_utils.basic import heuristic_log_sanitize
 from ansible.utils import checksum as utils_checksum
 TEST_MODULE_DATA = """
@ -264,23 +265,23 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
    @timed(5)
    def test_log_sanitize_speed_many_url(self):
-        self.module._heuristic_log_sanitize(self.many_url)
+        heuristic_log_sanitize(self.many_url)
    @timed(5)
    def test_log_sanitize_speed_many_ssh(self):
-        self.module._heuristic_log_sanitize(self.many_ssh)
+        heuristic_log_sanitize(self.many_ssh)
    @timed(5)
    def test_log_sanitize_speed_one_url(self):
-        self.module._heuristic_log_sanitize(self.one_url)
+        heuristic_log_sanitize(self.one_url)
    @timed(5)
    def test_log_sanitize_speed_one_ssh(self):
-        self.module._heuristic_log_sanitize(self.one_ssh)
+        heuristic_log_sanitize(self.one_ssh)
    @timed(5)
    def test_log_sanitize_speed_zero_secrets(self):
-        self.module._heuristic_log_sanitize(self.zero_secrets)
+        heuristic_log_sanitize(self.zero_secrets)
    #
    # Test that the password obfuscation sanitizes somewhat cleanly.
@ -290,8 +291,8 @@ class TestModuleUtilsBasicHelpers(unittest.TestCase):
        url_data = repr(self._gen_data(3, True, True, self.URL_SECRET))
        ssh_data = repr(self._gen_data(3, True, True, self.SSH_SECRET))
-        url_output = self.module._heuristic_log_sanitize(url_data)
+        url_output = heuristic_log_sanitize(url_data)
-        ssh_output = self.module._heuristic_log_sanitize(ssh_data)
+        ssh_output = heuristic_log_sanitize(ssh_data)
        # Basic functionality: Successfully hid the password
        try:
--- a/v2/ansible/module_utils/basic.py
+++ b/v2/ansible/module_utils/basic.py
@ -171,6 +171,7 @@ FILE_COMMON_ARGUMENTS=dict(
    directory_mode = dict(), # used by copy
 )
 PASSWD_ARG_RE = re.compile(r'^[-]{0,2}pass[-]?(word|wd)?')
 def get_platform():
    ''' what's the platform?  example: Linux is a platform. '''
@ -269,6 +270,65 @@ def json_dict_bytes_to_unicode(d):
    else:
        return d
 def heuristic_log_sanitize(data):
    ''' Remove strings that look like passwords from log messages '''
    # Currently filters:
    # user:pass@foo/whatever and http://username:pass@wherever/foo
    # This code has false positives and consumes parts of logs that are
    # not passwds
    # begin: start of a passwd containing string
    # end: end of a passwd containing string
    # sep: char between user and passwd
    # prev_begin: where in the overall string to start a search for
    #   a passwd
    # sep_search_end: where in the string to end a search for the sep
    output = []
    begin = len(data)
    prev_begin = begin
    sep = 1
    while sep:
        # Find the potential end of a passwd
        try:
            end = data.rindex('@', 0, begin)
        except ValueError:
            # No passwd in the rest of the data
            output.insert(0, data[0:begin])
            break
        # Search for the beginning of a passwd
        sep = None
        sep_search_end = end
        while not sep:
            # URL-style username+password
            try:
                begin = data.rindex('://', 0, sep_search_end)
            except ValueError:
                # No url style in the data, check for ssh style in the
                # rest of the string
                begin = 0
            # Search for separator
            try:
                sep = data.index(':', begin + 3, end)
            except ValueError:
                # No separator; choices:
                if begin == 0:
                    # Searched the whole string so there's no password
                    # here.  Return the remaining data
                    output.insert(0, data[0:begin])
                    break
                # Search for a different beginning of the password field.
                sep_search_end = begin
                continue
        if sep:
            # Password was found; remove it.
            output.insert(0, data[end:prev_begin])
            output.insert(0, '********')
            output.insert(0, data[begin:sep + 1])
            prev_begin = begin
    return ''.join(output)
 class AnsibleModule(object):
@ -1009,64 +1069,6 @@ class AnsibleModule(object):
            params = dict()
        return params
    def _heuristic_log_sanitize(self, data):
        ''' Remove strings that look like passwords from log messages '''
        # Currently filters:
        # user:pass@foo/whatever and http://username:pass@wherever/foo
        # This code has false positives and consumes parts of logs that are
        # not passwds
        # begin: start of a passwd containing string
        # end: end of a passwd containing string
        # sep: char between user and passwd
        # prev_begin: where in the overall string to start a search for
        #   a passwd
        # sep_search_end: where in the string to end a search for the sep
        output = []
        begin = len(data)
        prev_begin = begin
        sep = 1
        while sep:
            # Find the potential end of a passwd
            try:
                end = data.rindex('@', 0, begin)
            except ValueError:
                # No passwd in the rest of the data
                output.insert(0, data[0:begin])
                break
            # Search for the beginning of a passwd
            sep = None
            sep_search_end = end
            while not sep:
                # URL-style username+password
                try:
                    begin = data.rindex('://', 0, sep_search_end)
                except ValueError:
                    # No url style in the data, check for ssh style in the
                    # rest of the string
                    begin = 0
                # Search for separator
                try:
                    sep = data.index(':', begin + 3, end)
                except ValueError:
                    # No separator; choices:
                    if begin == 0:
                        # Searched the whole string so there's no password
                        # here.  Return the remaining data
                        output.insert(0, data[0:begin])
                        break
                    # Search for a different beginning of the password field.
                    sep_search_end = begin
                    continue
            if sep:
                # Password was found; remove it.
                output.insert(0, data[end:prev_begin])
                output.insert(0, '********')
                output.insert(0, data[begin:sep + 1])
                prev_begin = begin
        return ''.join(output)
    def _log_invocation(self):
        ''' log that ansible ran the module '''
@ -1090,7 +1092,7 @@ class AnsibleModule(object):
                    param_val = str(param_val)
                elif isinstance(param_val, unicode):
                    param_val = param_val.encode('utf-8')
-                log_args[param] = self._heuristic_log_sanitize(param_val)
+                log_args[param] = heuristic_log_sanitize(param_val)
        module = 'ansible-%s' % os.path.basename(__file__)
        msg = []
@ -1434,27 +1436,27 @@ class AnsibleModule(object):
        # create a printable version of the command for use
        # in reporting later, which strips out things like
        # passwords from the args list
-        if isinstance(args, list):
+        if isinstance(args, basestring):
-            clean_args = " ".join(pipes.quote(arg) for arg in args)
+            to_clean_args = shlex.split(args.encode('utf-8'))
        else:
-            clean_args = args
+            to_clean_args = args
-
+
-        # all clean strings should return two match groups, 
+        clean_args = []
-        # where the first is the CLI argument and the second 
+        is_passwd = False
-        # is the password/key/phrase that will be hidden
+        for arg in to_clean_args:
-        clean_re_strings = [
+            if is_passwd:
-            # this removes things like --password, --pass, --pass-wd, etc.
+                is_passwd = False
-            # optionally followed by an '=' or a space. The password can 
+                clean_args.append('********')
-            # be quoted or not too, though it does not care about quotes
+                continue
-            # that are not balanced
+            if PASSWD_ARG_RE.match(arg):
-            # source: http://blog.stevenlevithan.com/archives/match-quoted-string
+                sep_idx = arg.find('=')
-            r'([-]{0,2}pass[-]?(?:word|wd)?[=\s]?)((?:["\'])?(?:[^\s])*(?:\1)?)',
+                if sep_idx > -1:
-            r'^(?P<before>.*:)(?P<password>.*)(?P<after>\@.*)$', 
+                    clean_args.append('%s=********' % arg[:sep_idx])
-            # TODO: add more regex checks here
+                    continue
-        ]
+                else:
-        for re_str in clean_re_strings:
+                    is_passwd = True
-            r = re.compile(re_str)
+            clean_args.append(heuristic_log_sanitize(arg))
-            clean_args = r.sub(r'\1********', clean_args)
+        clean_args = ' '.join(pipes.quote(arg) for arg in clean_args)
        if data:
            st_in = subprocess.PIPE
@ -1539,7 +1541,7 @@ class AnsibleModule(object):
            self.fail_json(rc=257, msg=traceback.format_exc(), cmd=clean_args)
        if rc != 0 and check_rc:
-            msg = stderr.rstrip()
+            msg = heuristic_log_sanitize(stderr.rstrip())
            self.fail_json(cmd=clean_args, rc=rc, stdout=stdout, stderr=stderr, msg=msg)
        # reset the pwd