From 930ea5dd55f44ad2a79d05f1aaed136dd8db0552 Mon Sep 17 00:00:00 2001 From: Virgil Dupras Date: Sun, 1 May 2016 20:34:43 -0400 Subject: [PATCH] unarchive: use Python's tarfile module for tar listing (#3575) * unarchive: use Python's tarfile module for tar listing fixes https://github.com/ansible/ansible/issues/11348 Depending on the current active locale, `tar`'s file listing can end up spitting backslash-escaped characters. Unfortunately, when that happens, we end up with double-escaped backslashes, giving us a wrong path, making our action fail. We could try un-double-escaping our paths, but that would be complicated and, I think, error-prone. The easiest way forward seemed to simply use the `tarfile` module. Why use it only for listing? Because the `unarchive` option also supports the `extra_opts` option, and that supporting this would require us to mimick `tar`'s interface. For listing files, however, I don't think that the loss of `extra_opts` support causes problems (well, I hope so). * unarchive: re-add xz decompression support Following previous change to use Python's `tarfile` module for tar file listing, we lost `xz` decompression support. This commits re-add it by adding a special case in `TarXzArchive` that pre-decompresses the source file. --- lib/ansible/modules/files/unarchive.py | 35 +++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/ansible/modules/files/unarchive.py b/lib/ansible/modules/files/unarchive.py index cf9005a47b6..6ded1e779ba 100644 --- a/lib/ansible/modules/files/unarchive.py +++ b/lib/ansible/modules/files/unarchive.py @@ -5,6 +5,7 @@ # (c) 2013, Dylan Martin # (c) 2015, Toshio Kuratomi # (c) 2016, Dag Wieers +# (c) 2016, Virgil Dupras # # This file is part of Ansible # @@ -114,6 +115,8 @@ import datetime import time import binascii from zipfile import ZipFile +import tarfile +import subprocess # String from tar that shows the tar contents are different from the # filesystem @@ -492,22 +495,23 @@ class TgzArchive(object): self.zipflag = 'z' self._files_in_archive = [] + def _get_tar_fileobj(self): + """Returns a file object that can be read by ``tarfile.open()``.""" + return open(self.src, 'rb') + @property def files_in_archive(self, force_refresh=False): if self._files_in_archive and not force_refresh: return self._files_in_archive - cmd = '%s -t%s' % (self.cmd_path, self.zipflag) - if self.opts: - cmd += ' ' + ' '.join(self.opts) - if self.excludes: - cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' - cmd += ' -f "%s"' % self.src - rc, out, err = self.module.run_command(cmd) - if rc != 0: + # The use of Python's tarfile module here allows us to easily avoid tricky file encoding + # problems. Ref #11348 + try: + tf = tarfile.open(fileobj=self._get_tar_fileobj()) + except Exception: raise UnarchiveError('Unable to list files in the archive') - for filename in out.splitlines(): + for filename in tf.getnames(): if filename and filename not in self.excludes: self._files_in_archive.append(filename) return self._files_in_archive @@ -605,6 +609,19 @@ class TarXzArchive(TgzArchive): super(TarXzArchive, self).__init__(src, dest, file_args, module) self.zipflag = 'J' + def _get_tar_fileobj(self): + # Python's tarfile module doesn't support xz compression so we have to manually uncompress + # it first. + xz_bin_path = self.module.get_bin_path('xz') + xz_stdout = tempfile.TemporaryFile() + # we don't use self.module.run_command() to avoid loading the whole archive in memory. + cmd = subprocess.Popen([xz_bin_path, '-dc', self.src], stdout=xz_stdout) + rc = cmd.wait() + if rc != 0: + raise UnarchiveError("Could not uncompress with xz") + xz_stdout.seek(0) + return xz_stdout + # try handlers in order and return the one that works or bail if none work def pick_handler(src, dest, file_args, module):