unarchive: use Python's tarfile module for tar listing (#3575)

* unarchive: use Python's tarfile module for tar listing

fixes https://github.com/ansible/ansible/issues/11348

Depending on the current active locale, `tar`'s file listing can end up
spitting backslash-escaped characters. Unfortunately, when that happens,
we end up with double-escaped backslashes, giving us a wrong path,
making our action fail.

We could try un-double-escaping our paths, but that would be complicated
and, I think, error-prone. The easiest way forward seemed to simply use
the `tarfile` module.

Why use it only for listing? Because the `unarchive` option also
supports the `extra_opts` option, and that supporting this would require
us to mimick `tar`'s interface.

For listing files, however, I don't think that the loss of `extra_opts`
support causes problems (well, I hope so).

* unarchive: re-add xz decompression support

Following previous change to use Python's `tarfile` module for tar file
listing, we lost `xz` decompression support. This commits re-add it by
adding a special case in `TarXzArchive` that pre-decompresses the source
file.
pull/18777/head
Virgil Dupras 8 years ago committed by Matt Clay
parent a0fe26b98b
commit 930ea5dd55

@ -5,6 +5,7 @@
# (c) 2013, Dylan Martin <dmartin@seattlecentral.edu> # (c) 2013, Dylan Martin <dmartin@seattlecentral.edu>
# (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com> # (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com>
# (c) 2016, Dag Wieers <dag@wieers.com> # (c) 2016, Dag Wieers <dag@wieers.com>
# (c) 2016, Virgil Dupras <hsoft@hardcoded.net>
# #
# This file is part of Ansible # This file is part of Ansible
# #
@ -114,6 +115,8 @@ import datetime
import time import time
import binascii import binascii
from zipfile import ZipFile from zipfile import ZipFile
import tarfile
import subprocess
# String from tar that shows the tar contents are different from the # String from tar that shows the tar contents are different from the
# filesystem # filesystem
@ -492,22 +495,23 @@ class TgzArchive(object):
self.zipflag = 'z' self.zipflag = 'z'
self._files_in_archive = [] self._files_in_archive = []
def _get_tar_fileobj(self):
"""Returns a file object that can be read by ``tarfile.open()``."""
return open(self.src, 'rb')
@property @property
def files_in_archive(self, force_refresh=False): def files_in_archive(self, force_refresh=False):
if self._files_in_archive and not force_refresh: if self._files_in_archive and not force_refresh:
return self._files_in_archive return self._files_in_archive
cmd = '%s -t%s' % (self.cmd_path, self.zipflag) # The use of Python's tarfile module here allows us to easily avoid tricky file encoding
if self.opts: # problems. Ref #11348
cmd += ' ' + ' '.join(self.opts) try:
if self.excludes: tf = tarfile.open(fileobj=self._get_tar_fileobj())
cmd += ' --exclude="' + '" --exclude="'.join(self.excludes) + '"' except Exception:
cmd += ' -f "%s"' % self.src
rc, out, err = self.module.run_command(cmd)
if rc != 0:
raise UnarchiveError('Unable to list files in the archive') raise UnarchiveError('Unable to list files in the archive')
for filename in out.splitlines(): for filename in tf.getnames():
if filename and filename not in self.excludes: if filename and filename not in self.excludes:
self._files_in_archive.append(filename) self._files_in_archive.append(filename)
return self._files_in_archive return self._files_in_archive
@ -605,6 +609,19 @@ class TarXzArchive(TgzArchive):
super(TarXzArchive, self).__init__(src, dest, file_args, module) super(TarXzArchive, self).__init__(src, dest, file_args, module)
self.zipflag = 'J' self.zipflag = 'J'
def _get_tar_fileobj(self):
# Python's tarfile module doesn't support xz compression so we have to manually uncompress
# it first.
xz_bin_path = self.module.get_bin_path('xz')
xz_stdout = tempfile.TemporaryFile()
# we don't use self.module.run_command() to avoid loading the whole archive in memory.
cmd = subprocess.Popen([xz_bin_path, '-dc', self.src], stdout=xz_stdout)
rc = cmd.wait()
if rc != 0:
raise UnarchiveError("Could not uncompress with xz")
xz_stdout.seek(0)
return xz_stdout
# try handlers in order and return the one that works or bail if none work # try handlers in order and return the one that works or bail if none work
def pick_handler(src, dest, file_args, module): def pick_handler(src, dest, file_args, module):

Loading…
Cancel
Save