Ensure that the src file contents is converted to unicode in diff info (#45744)

* Ensure that the src file contents is converted to unicode in diff info. Fixes #45717

* Fix up and cleanup

* The diff functionality in the callback plugins should have the
  to_text() calls removed since we're now doing it in ActionBase
* catching of UnicodeError and warnings in the callback diff
  functionality from 61d01f549f haven't been
  needed since we switched to to_text so remove them.
* Add a note to ActionBase's diff function giving an example of when the
  diff function will be inaccurate and how to fix it

* Fix callback get_diff() tests

I believe the unittests of callback's get_diff() were wrong.  They were
sending in a list where strings were expected.  Because previous code
was transforming the lists into strings via their repr, the previous
tests did not fail but they would have formatted the test cases output
in an odd way if we had looked at it.
pull/45929/head
Matt Martz 6 years ago committed by GitHub
parent 24dd87bd0a
commit 95e77ac853
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,5 @@
bugfixes:
- copy - Ensure that the src file contents is converted to unicode in diff
information so that it is properly wrapped by AnsibleUnsafeText to prevent
unexpected templating of diff data in Python3
(https://github.com/ansible/ansible/issues/45717)

@ -1,3 +1,4 @@
# coding: utf-8
# Copyright: (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com> # Copyright: (c) 2012-2014, Michael DeHaan <michael.dehaan@gmail.com>
# Copyright: (c) 2018, Ansible Project # Copyright: (c) 2018, Ansible Project
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt) # GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
@ -1013,6 +1014,15 @@ class ActionBase(with_metaclass(ABCMeta, object)):
def _get_diff_data(self, destination, source, task_vars, source_file=True): def _get_diff_data(self, destination, source, task_vars, source_file=True):
# Note: Since we do not diff the source and destination before we transform from bytes into
# text the diff between source and destination may not be accurate. To fix this, we'd need
# to move the diffing from the callback plugins into here.
#
# Example of data which would cause trouble is src_content == b'\xff' and dest_content ==
# b'\xfe'. Neither of those are valid utf-8 so both get turned into the replacement
# character: diff['before'] = u'<27>' ; diff['after'] = u'<27>' When the callback plugin later
# diffs before and after it shows an empty diff.
diff = {} diff = {}
display.debug("Going to peek to see if file has changed permissions") display.debug("Going to peek to see if file has changed permissions")
peek_result = self._execute_module(module_name='file', module_args=dict(path=destination, _diff_peek=True), task_vars=task_vars, persist_files=True) peek_result = self._execute_module(module_name='file', module_args=dict(path=destination, _diff_peek=True), task_vars=task_vars, persist_files=True)
@ -1020,22 +1030,22 @@ class ActionBase(with_metaclass(ABCMeta, object)):
if not peek_result.get('failed', False) or peek_result.get('rc', 0) == 0: if not peek_result.get('failed', False) or peek_result.get('rc', 0) == 0:
if peek_result.get('state') == 'absent': if peek_result.get('state') == 'absent':
diff['before'] = '' diff['before'] = u''
elif peek_result.get('appears_binary'): elif peek_result.get('appears_binary'):
diff['dst_binary'] = 1 diff['dst_binary'] = 1
elif peek_result.get('size') and C.MAX_FILE_SIZE_FOR_DIFF > 0 and peek_result['size'] > C.MAX_FILE_SIZE_FOR_DIFF: elif peek_result.get('size') and C.MAX_FILE_SIZE_FOR_DIFF > 0 and peek_result['size'] > C.MAX_FILE_SIZE_FOR_DIFF:
diff['dst_larger'] = C.MAX_FILE_SIZE_FOR_DIFF diff['dst_larger'] = C.MAX_FILE_SIZE_FOR_DIFF
else: else:
display.debug("Slurping the file %s" % source) display.debug(u"Slurping the file %s" % source)
dest_result = self._execute_module(module_name='slurp', module_args=dict(path=destination), task_vars=task_vars, persist_files=True) dest_result = self._execute_module(module_name='slurp', module_args=dict(path=destination), task_vars=task_vars, persist_files=True)
if 'content' in dest_result: if 'content' in dest_result:
dest_contents = dest_result['content'] dest_contents = dest_result['content']
if dest_result['encoding'] == 'base64': if dest_result['encoding'] == u'base64':
dest_contents = base64.b64decode(dest_contents) dest_contents = base64.b64decode(dest_contents)
else: else:
raise AnsibleError("unknown encoding in content option, failed: %s" % dest_result) raise AnsibleError("unknown encoding in content option, failed: %s" % to_native(dest_result))
diff['before_header'] = destination diff['before_header'] = destination
diff['before'] = dest_contents diff['before'] = to_text(dest_contents)
if source_file: if source_file:
st = os.stat(source) st = os.stat(source)
@ -1053,17 +1063,17 @@ class ActionBase(with_metaclass(ABCMeta, object)):
diff['src_binary'] = 1 diff['src_binary'] = 1
else: else:
diff['after_header'] = source diff['after_header'] = source
diff['after'] = src_contents diff['after'] = to_text(src_contents)
else: else:
display.debug("source of file passed in") display.debug(u"source of file passed in")
diff['after_header'] = 'dynamically generated' diff['after_header'] = u'dynamically generated'
diff['after'] = source diff['after'] = source
if self._play_context.no_log: if self._play_context.no_log:
if 'before' in diff: if 'before' in diff:
diff["before"] = "" diff["before"] = u""
if 'after' in diff: if 'after' in diff:
diff["after"] = " [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n" diff["after"] = u" [[ Diff output has been hidden because 'no_log: true' was specified for this result ]]\n"
return diff return diff

@ -31,7 +31,6 @@ from collections import MutableMapping
from ansible import constants as C from ansible import constants as C
from ansible.parsing.ajson import AnsibleJSONEncoder from ansible.parsing.ajson import AnsibleJSONEncoder
from ansible.plugins import AnsiblePlugin, get_plugin_class from ansible.plugins import AnsiblePlugin, get_plugin_class
from ansible.module_utils._text import to_text
from ansible.utils.color import stringc from ansible.utils.color import stringc
from ansible.vars.clean import strip_internal_keys from ansible.vars.clean import strip_internal_keys
@ -155,67 +154,62 @@ class CallbackBase(AnsiblePlugin):
ret = [] ret = []
for diff in difflist: for diff in difflist:
try:
with warnings.catch_warnings():
warnings.simplefilter('ignore')
if 'dst_binary' in diff: if 'dst_binary' in diff:
ret.append("diff skipped: destination file appears to be binary\n") ret.append(u"diff skipped: destination file appears to be binary\n")
if 'src_binary' in diff: if 'src_binary' in diff:
ret.append("diff skipped: source file appears to be binary\n") ret.append(u"diff skipped: source file appears to be binary\n")
if 'dst_larger' in diff: if 'dst_larger' in diff:
ret.append("diff skipped: destination file size is greater than %d\n" % diff['dst_larger']) ret.append(u"diff skipped: destination file size is greater than %d\n" % diff['dst_larger'])
if 'src_larger' in diff: if 'src_larger' in diff:
ret.append("diff skipped: source file size is greater than %d\n" % diff['src_larger']) ret.append(u"diff skipped: source file size is greater than %d\n" % diff['src_larger'])
if 'before' in diff and 'after' in diff: if 'before' in diff and 'after' in diff:
# format complex structures into 'files' # format complex structures into 'files'
for x in ['before', 'after']: for x in ['before', 'after']:
if isinstance(diff[x], MutableMapping): if isinstance(diff[x], MutableMapping):
diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(',', ': ')) + '\n' diff[x] = json.dumps(diff[x], sort_keys=True, indent=4, separators=(u',', u': ')) + u'\n'
if 'before_header' in diff: if 'before_header' in diff:
before_header = "before: %s" % diff['before_header'] before_header = u"before: %s" % diff['before_header']
else: else:
before_header = 'before' before_header = u'before'
if 'after_header' in diff: if 'after_header' in diff:
after_header = "after: %s" % diff['after_header'] after_header = u"after: %s" % diff['after_header']
else: else:
after_header = 'after' after_header = u'after'
before_lines = to_text(diff['before']).splitlines(True) before_lines = diff['before'].splitlines(True)
after_lines = to_text(diff['after']).splitlines(True) after_lines = diff['after'].splitlines(True)
if before_lines and not before_lines[-1].endswith('\n'): if before_lines and not before_lines[-1].endswith(u'\n'):
before_lines[-1] += '\n\\ No newline at end of file\n' before_lines[-1] += u'\n\\ No newline at end of file\n'
if after_lines and not after_lines[-1].endswith('\n'): if after_lines and not after_lines[-1].endswith('\n'):
after_lines[-1] += '\n\\ No newline at end of file\n' after_lines[-1] += u'\n\\ No newline at end of file\n'
differ = difflib.unified_diff(before_lines, differ = difflib.unified_diff(before_lines,
after_lines, after_lines,
fromfile=before_header, fromfile=before_header,
tofile=after_header, tofile=after_header,
fromfiledate='', fromfiledate=u'',
tofiledate='', tofiledate=u'',
n=C.DIFF_CONTEXT) n=C.DIFF_CONTEXT)
difflines = list(differ) difflines = list(differ)
if len(difflines) >= 3 and sys.version_info[:2] == (2, 6): if len(difflines) >= 3 and sys.version_info[:2] == (2, 6):
# difflib in Python 2.6 adds trailing spaces after # difflib in Python 2.6 adds trailing spaces after
# filenames in the -- before/++ after headers. # filenames in the -- before/++ after headers.
difflines[0] = difflines[0].replace(' \n', '\n') difflines[0] = difflines[0].replace(u' \n', u'\n')
difflines[1] = difflines[1].replace(' \n', '\n') difflines[1] = difflines[1].replace(u' \n', u'\n')
# it also treats empty files differently # it also treats empty files differently
difflines[2] = difflines[2].replace('-1,0', '-0,0').replace('+1,0', '+0,0') difflines[2] = difflines[2].replace(u'-1,0', u'-0,0').replace(u'+1,0', u'+0,0')
has_diff = False has_diff = False
for line in difflines: for line in difflines:
has_diff = True has_diff = True
if line.startswith('+'): if line.startswith(u'+'):
line = stringc(line, C.COLOR_DIFF_ADD) line = stringc(line, C.COLOR_DIFF_ADD)
elif line.startswith('-'): elif line.startswith(u'-'):
line = stringc(line, C.COLOR_DIFF_REMOVE) line = stringc(line, C.COLOR_DIFF_REMOVE)
elif line.startswith('@@'): elif line.startswith(u'@@'):
line = stringc(line, C.COLOR_DIFF_LINES) line = stringc(line, C.COLOR_DIFF_LINES)
ret.append(line) ret.append(line)
if has_diff: if has_diff:
ret.append('\n') ret.append('\n')
if 'prepared' in diff: if 'prepared' in diff:
ret.append(to_text(diff['prepared'])) ret.append(diff['prepared'])
except UnicodeDecodeError:
ret.append(">> the files are different, but the diff library cannot compare unicode strings\n\n")
return u''.join(ret) return u''.join(ret)
def _get_item_label(self, result): def _get_item_label(self, result):

@ -175,9 +175,6 @@ class TestCallbackDumpResults(unittest.TestCase):
self.assertTrue('LEFTIN' in json_out) self.assertTrue('LEFTIN' in json_out)
# TODO: triggr the 'except UnicodeError' around _get_diff
# that try except orig appeared in 61d01f549f2143fd9adfa4ffae42f09d24649c26
# in 2013 so maybe a < py2.6 issue
class TestCallbackDiff(unittest.TestCase): class TestCallbackDiff(unittest.TestCase):
def setUp(self): def setUp(self):
@ -188,28 +185,28 @@ class TestCallbackDiff(unittest.TestCase):
def test_difflist(self): def test_difflist(self):
# TODO: split into smaller tests? # TODO: split into smaller tests?
difflist = [{'before': ['preface\nThe Before String\npostscript'], difflist = [{'before': u'preface\nThe Before String\npostscript',
'after': ['preface\nThe After String\npostscript'], 'after': u'preface\nThe After String\npostscript',
'before_header': 'just before', 'before_header': u'just before',
'after_header': 'just after' 'after_header': u'just after'
}, },
{'before': ['preface\nThe Before String\npostscript'], {'before': u'preface\nThe Before String\npostscript',
'after': ['preface\nThe After String\npostscript'], 'after': u'preface\nThe After String\npostscript',
}, },
{'src_binary': 'chicane'}, {'src_binary': 'chicane'},
{'dst_binary': 'chicanery'}, {'dst_binary': 'chicanery'},
{'dst_larger': 1}, {'dst_larger': 1},
{'src_larger': 2}, {'src_larger': 2},
{'prepared': 'what does prepared do?'}, {'prepared': u'what does prepared do?'},
{'before_header': 'just before'}, {'before_header': u'just before'},
{'after_header': 'just after'}] {'after_header': u'just after'}]
res = self.cb._get_diff(difflist) res = self.cb._get_diff(difflist)
self.assertIn('Before String', res) self.assertIn(u'Before String', res)
self.assertIn('After String', res) self.assertIn(u'After String', res)
self.assertIn('just before', res) self.assertIn(u'just before', res)
self.assertIn('just after', res) self.assertIn(u'just after', res)
def test_simple_diff(self): def test_simple_diff(self):
self.assertMultiLineEqual( self.assertMultiLineEqual(

Loading…
Cancel
Save