From 976d876e5515bdf37d03f53172e2611e76930fa2 Mon Sep 17 00:00:00 2001 From: Dag Wieers Date: Fri, 9 Sep 2016 18:26:19 +0200 Subject: [PATCH] Ensure unicode characters in zip-compressed filenames work correctly (#4702) * Ensure unicode characters in zip-compressed filenames work correctly Another corner-case we are fixing hoping it doesn't break anything else. This fixes: - The correct encoding of unicode paths internally (so the filenames we scrape from the output and is returned by zipfile match) - Disable LANG=C for the unzip command (because it breaks the unicode output, unlike on gtar) * Fix for python3 and other suggestions from @abadger --- lib/ansible/modules/files/unarchive.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/lib/ansible/modules/files/unarchive.py b/lib/ansible/modules/files/unarchive.py index c14864d4666..7f5ab13b4c4 100644 --- a/lib/ansible/modules/files/unarchive.py +++ b/lib/ansible/modules/files/unarchive.py @@ -133,6 +133,7 @@ import time import binascii import codecs from zipfile import ZipFile, BadZipfile +from ansible.module_utils._text import to_text try: # python 3.3+ from shlex import quote @@ -352,7 +353,7 @@ class ZipArchive(object): version = pcs[1] ostype = pcs[2] size = int(pcs[3]) - path = pcs[7] + path = to_text(pcs[7], errors='surrogate_or_strict') # Skip excluded files if path in self.excludes: @@ -597,7 +598,7 @@ class TgzArchive(object): if self.excludes: cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '-f', self.src ]) - rc, out, err = self.module.run_command(cmd) + rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) if rc != 0: raise UnarchiveError('Unable to list files in the archive') @@ -626,7 +627,7 @@ class TgzArchive(object): if self.excludes: cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '-f', self.src ]) - rc, out, err = self.module.run_command(cmd) + rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) # Check whether the differences are in something that we're # setting anyway @@ -675,7 +676,7 @@ class TgzArchive(object): if self.excludes: cmd.extend([ '--exclude=' + quote(f) for f in self.excludes ]) cmd.extend([ '-f', self.src ]) - rc, out, err = self.module.run_command(cmd, cwd=self.dest) + rc, out, err = self.module.run_command(cmd, cwd=self.dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) return dict(cmd=cmd, rc=rc, out=out, err=err) def can_handle_archive(self): @@ -746,9 +747,6 @@ def main(): supports_check_mode = True, ) - # We screenscrape a huge amount of commands so use C locale anytime we do - module.run_command_environ_update = dict(LANG='C', LC_ALL='C', LC_MESSAGES='C', LC_CTYPE='C') - src = os.path.expanduser(module.params['src']) dest = os.path.expanduser(module.params['dest']) copy = module.params['copy']