unarchive - add include option (#40522)

This should allow users to extract specific files from an archive as
desired.

Fixes #16130, #27081.

* Rebase and make a few minor changes
* Add changelog
* Improve tests

- move to separate tasks file
- change assertions to check for exactly one file
- use remote_tmp_dir for output dir

* Make exclude and include mutually exclusive
* Don't remove files needed by other tasks
* Fix sanity tests
* Improve feature documentation
* Skip tests that use map() on CentOS 6
* Use fnmatch on include for zip archives
  This matches the behavior of exclude

Co-authored-by: Sam Doran <sdoran@redhat.com>
pull/72892/head
Sijis Aviles 4 years ago committed by GitHub
parent 6608f3aab3
commit 034e9b0252
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,4 @@
minor_changes:
- >
unarchive - add ``include`` parameter to allow extracting specific files
from an archive (https://github.com/ansible/ansible/pull/40522)

@ -58,9 +58,20 @@ options:
exclude: exclude:
description: description:
- List the directory and file entries that you would like to exclude from the unarchive action. - List the directory and file entries that you would like to exclude from the unarchive action.
- Mutually exclusive with C(include).
type: list type: list
default: []
elements: str elements: str
version_added: "2.1" version_added: "2.1"
include:
description:
- List of directory and file entries that you would like to extract from the archive. Only
files listed here will be extracted.
- Mutually exclusive with C(exclude).
type: list
default: []
elements: str
version_added: "2.11"
keep_newer: keep_newer:
description: description:
- Do not replace existing files that are newer than files from the archive. - Do not replace existing files that are newer than files from the archive.
@ -264,6 +275,7 @@ class ZipArchive(object):
self.module = module self.module = module
self.excludes = module.params['exclude'] self.excludes = module.params['exclude']
self.includes = [] self.includes = []
self.include_files = self.module.params['include']
self.cmd_path = self.module.get_bin_path('unzip') self.cmd_path = self.module.get_bin_path('unzip')
self.zipinfocmd_path = self.module.get_bin_path('zipinfo') self.zipinfocmd_path = self.module.get_bin_path('zipinfo')
self._files_in_archive = [] self._files_in_archive = []
@ -337,14 +349,19 @@ class ZipArchive(object):
else: else:
try: try:
for member in archive.namelist(): for member in archive.namelist():
exclude_flag = False if self.include_files:
if self.excludes: for include in self.include_files:
for exclude in self.excludes: if fnmatch.fnmatch(member, include):
if fnmatch.fnmatch(member, exclude): self._files_in_archive.append(to_native(member))
exclude_flag = True else:
break exclude_flag = False
if not exclude_flag: if self.excludes:
self._files_in_archive.append(to_native(member)) for exclude in self.excludes:
if not fnmatch.fnmatch(member, exclude):
exclude_flag = True
break
if not exclude_flag:
self._files_in_archive.append(to_native(member))
except Exception: except Exception:
archive.close() archive.close()
raise UnarchiveError('Unable to list files in the archive') raise UnarchiveError('Unable to list files in the archive')
@ -357,6 +374,8 @@ class ZipArchive(object):
cmd = [self.zipinfocmd_path, '-T', '-s', self.src] cmd = [self.zipinfocmd_path, '-T', '-s', self.src]
if self.excludes: if self.excludes:
cmd.extend(['-x', ] + self.excludes) cmd.extend(['-x', ] + self.excludes)
if self.include_files:
cmd.extend(self.include_files)
rc, out, err = self.module.run_command(cmd) rc, out, err = self.module.run_command(cmd)
old_out = out old_out = out
@ -665,6 +684,8 @@ class ZipArchive(object):
# cmd.extend(map(shell_escape, self.includes)) # cmd.extend(map(shell_escape, self.includes))
if self.excludes: if self.excludes:
cmd.extend(['-x'] + self.excludes) cmd.extend(['-x'] + self.excludes)
if self.include_files:
cmd.extend(self.include_files)
cmd.extend(['-d', self.b_dest]) cmd.extend(['-d', self.b_dest])
rc, out, err = self.module.run_command(cmd) rc, out, err = self.module.run_command(cmd)
return dict(cmd=cmd, rc=rc, out=out, err=err) return dict(cmd=cmd, rc=rc, out=out, err=err)
@ -690,6 +711,7 @@ class TgzArchive(object):
if self.module.check_mode: if self.module.check_mode:
self.module.exit_json(skipped=True, msg="remote module (%s) does not support check mode when using gtar" % self.module._name) self.module.exit_json(skipped=True, msg="remote module (%s) does not support check mode when using gtar" % self.module._name)
self.excludes = [path.rstrip('/') for path in self.module.params['exclude']] self.excludes = [path.rstrip('/') for path in self.module.params['exclude']]
self.include_files = self.module.params['include']
# Prefer gtar (GNU tar) as it supports the compression options -z, -j and -J # Prefer gtar (GNU tar) as it supports the compression options -z, -j and -J
self.cmd_path = self.module.get_bin_path('gtar', None) self.cmd_path = self.module.get_bin_path('gtar', None)
if not self.cmd_path: if not self.cmd_path:
@ -726,8 +748,10 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend(['--exclude=' + f for f in self.excludes]) cmd.extend(['--exclude=' + f for f in self.excludes])
cmd.extend(['-f', self.src]) cmd.extend(['-f', self.src])
rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) if self.include_files:
cmd.extend(self.include_files)
rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
if rc != 0: if rc != 0:
raise UnarchiveError('Unable to list files in the archive') raise UnarchiveError('Unable to list files in the archive')
@ -769,6 +793,8 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend(['--exclude=' + f for f in self.excludes]) cmd.extend(['--exclude=' + f for f in self.excludes])
cmd.extend(['-f', self.src]) cmd.extend(['-f', self.src])
if self.include_files:
cmd.extend(self.include_files)
rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
# Check whether the differences are in something that we're # Check whether the differences are in something that we're
@ -820,6 +846,8 @@ class TgzArchive(object):
if self.excludes: if self.excludes:
cmd.extend(['--exclude=' + f for f in self.excludes]) cmd.extend(['--exclude=' + f for f in self.excludes])
cmd.extend(['-f', self.src]) cmd.extend(['-f', self.src])
if self.include_files:
cmd.extend(self.include_files)
rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C')) rc, out, err = self.module.run_command(cmd, cwd=self.b_dest, environ_update=dict(LANG='C', LC_ALL='C', LC_MESSAGES='C'))
return dict(cmd=cmd, rc=rc, out=out, err=err) return dict(cmd=cmd, rc=rc, out=out, err=err)
@ -887,12 +915,14 @@ def main():
list_files=dict(type='bool', default=False), list_files=dict(type='bool', default=False),
keep_newer=dict(type='bool', default=False), keep_newer=dict(type='bool', default=False),
exclude=dict(type='list', elements='str', default=[]), exclude=dict(type='list', elements='str', default=[]),
include=dict(type='list', elements='str', default=[]),
extra_opts=dict(type='list', elements='str', default=[]), extra_opts=dict(type='list', elements='str', default=[]),
validate_certs=dict(type='bool', default=True), validate_certs=dict(type='bool', default=True),
), ),
add_file_common_args=True, add_file_common_args=True,
# check-mode only works for zip files, we cover that later # check-mode only works for zip files, we cover that later
supports_check_mode=True, supports_check_mode=True,
mutually_exclusive=[('include', 'exclude')],
) )
src = module.params['src'] src = module.params['src']

@ -6,6 +6,7 @@
- import_tasks: test_tar_gz_keep_newer.yml - import_tasks: test_tar_gz_keep_newer.yml
- import_tasks: test_zip.yml - import_tasks: test_zip.yml
- import_tasks: test_exclude.yml - import_tasks: test_exclude.yml
- import_tasks: test_include.yml
- import_tasks: test_parent_not_writeable.yml - import_tasks: test_parent_not_writeable.yml
- import_tasks: test_mode.yml - import_tasks: test_mode.yml
- import_tasks: test_quotable_characters.yml - import_tasks: test_quotable_characters.yml

@ -89,4 +89,4 @@
mode: preserve mode: preserve
- name: prep a tar.gz file with directory - name: prep a tar.gz file with directory
shell: tar czvf test-unarchive-dir.tar.gz unarchive-dir chdir={{remote_tmp_dir}} shell: tar czvf test-unarchive-dir.tar.gz unarchive-dir chdir={{remote_tmp_dir}}

@ -37,12 +37,3 @@
file: file:
path: '{{remote_tmp_dir}}/test-unarchive-zip' path: '{{remote_tmp_dir}}/test-unarchive-zip'
state: absent state: absent
- name: remove our test files for the archive
file:
path: '{{remote_tmp_dir}}/{{item}}'
state: absent
with_items:
- foo-unarchive.txt
- foo-unarchive-777.txt
- FOO-UNAR.TXT

@ -0,0 +1,83 @@
- name: Create a tar file with multiple files
shell: tar cvf test-unarchive-multi.tar foo-unarchive-777.txt foo-unarchive.txt
args:
chdir: "{{ remote_tmp_dir }}"
- name: Create include test directories
file:
state: directory
path: "{{ remote_tmp_dir }}/{{ item }}"
loop:
- include-zip
- include-tar
- name: Unpack zip file include one file
unarchive:
src: "{{ remote_tmp_dir }}/test-unarchive.zip"
dest: "{{ remote_tmp_dir }}/include-zip"
include:
- FOO-UNAR.TXT
- name: Verify that single file was unarchived
find:
paths: "{{ remote_tmp_dir }}/include-zip"
register: unarchive_dir02
# The map filter was added in Jinja2 2.7, which is newer than the version on RHEL/CentOS 6,
# so we skip this validation on those hosts
- name: Verify that zip extraction included only one file
assert:
that:
- file_names == ['FOO-UNAR.TXT']
vars:
file_names: "{{ unarchive_dir02.files | map(attribute='path') | map('basename') }}"
when:
- "ansible_facts.os_family == 'RedHat'"
- ansible_facts.distribution_major_version is version('7', '>=')
- name: Unpack tar file include one file
unarchive:
src: "{{ remote_tmp_dir }}/test-unarchive-multi.tar"
dest: "{{ remote_tmp_dir }}/include-tar"
include:
- foo-unarchive-777.txt
- name: verify that single file was unarchived from tar
find:
paths: "{{ remote_tmp_dir }}/include-tar"
register: unarchive_dir03
- name: Verify that tar extraction included only one file
assert:
that:
- file_names == ['foo-unarchive-777.txt']
vars:
file_names: "{{ unarchive_dir03.files | map(attribute='path') | map('basename') }}"
when:
- "ansible_facts.os_family == 'RedHat'"
- ansible_facts.distribution_major_version is version('7', '>=')
- name: Check mutually exclusive parameters
unarchive:
src: "{{ remote_tmp_dir }}/test-unarchive-multi.tar"
dest: "{{ remote_tmp_dir }}/include-tar"
include:
- foo-unarchive-777.txt
exclude:
- foo
ignore_errors: yes
register: unarchive_mutually_exclusive_check
- name: Check mutually exclusive parameters
assert:
that:
- unarchive_mutually_exclusive_check is failed
- "'mutually exclusive' in unarchive_mutually_exclusive_check.msg"
- name: "Remove include feature tests directory"
file:
state: absent
path: "{{ remote_tmp_dir }}/{{ item }}"
loop:
- 'include-zip'
- 'include-tar'
Loading…
Cancel
Save