You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ansible/lib/ansible/modules/get_url.py

707 lines
26 KiB
Python

# -*- coding: utf-8 -*-
# Copyright: (c) 2012, Jan-Piet Mens <jpmens () gmail.com>
# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
from __future__ import absolute_import, division, print_function
__metaclass__ = type
DOCUMENTATION = r'''
---
module: get_url
short_description: Downloads files from HTTP, HTTPS, or FTP to node
description:
- Downloads files from HTTP, HTTPS, or FTP to the remote server. The remote
server I(must) have direct access to the remote resource.
- By default, if an environment variable C(<protocol>_proxy) is set on
the target host, requests will be sent through that proxy. This
behaviour can be overridden by setting a variable for this task
(see R(setting the environment,playbooks_environment)),
or by using the use_proxy option.
- HTTP redirects can redirect from HTTP to HTTPS so you should be sure that
your proxy environment for both protocols is correct.
- From Ansible 2.4 when run with C(--check), it will do a HEAD request to validate the URL but
will not download the entire file or verify it against hashes and will report incorrect changed status.
- For Windows targets, use the M(ansible.windows.win_get_url) module instead.
version_added: '0.6'
options:
ciphers:
description:
- SSL/TLS Ciphers to use for the request
- 'When a list is provided, all ciphers are joined in order with C(:)'
- See the L(OpenSSL Cipher List Format,https://www.openssl.org/docs/manmaster/man1/openssl-ciphers.html#CIPHER-LIST-FORMAT)
for more details.
- The available ciphers is dependent on the Python and OpenSSL/LibreSSL versions
type: list
elements: str
version_added: '2.14'
decompress:
description:
- Whether to attempt to decompress gzip content-encoded responses
type: bool
default: true
version_added: '2.14'
url:
description:
- HTTP, HTTPS, or FTP URL in the form (http|https|ftp)://[user[:pass]]@host.domain[:port]/path
type: str
required: true
dest:
description:
- Absolute path of where to download the file to.
- If C(dest) is a directory, either the server provided filename or, if
none provided, the base name of the URL on the remote server will be
used. If a directory, C(force) has no effect.
- If C(dest) is a directory, the file will always be downloaded
(regardless of the C(force) and C(checksum) option), but
replaced only if the contents changed.
type: path
required: true
tmp_dest:
description:
- Absolute path of where temporary file is downloaded to.
- When run on Ansible 2.5 or greater, path defaults to ansible's remote_tmp setting
- When run on Ansible prior to 2.5, it defaults to C(TMPDIR), C(TEMP) or C(TMP) env variables or a platform specific value.
- U(https://docs.python.org/3/library/tempfile.html#tempfile.tempdir)
type: path
version_added: '2.1'
force:
description:
- If C(true) and C(dest) is not a directory, will download the file every
time and replace the file if the contents change. If C(false), the file
will only be downloaded if the destination does not exist. Generally
should be C(true) only for small local files.
- Prior to 0.6, this module behaved as if C(true) was the default.
type: bool
default: no
version_added: '0.7'
backup:
description:
- Create a backup file including the timestamp information so you can get
the original file back if you somehow clobbered it incorrectly.
type: bool
default: no
version_added: '2.1'
checksum:
description:
- 'If a checksum is passed to this parameter, the digest of the
destination file will be calculated after it is downloaded to ensure
its integrity and verify that the transfer completed successfully.
Format: <algorithm>:<checksum|url>, e.g. checksum="sha256:D98291AC[...]B6DC7B97",
checksum="sha256:http://example.com/path/sha256sum.txt"'
- If you worry about portability, only the sha1 algorithm is available
on all platforms and python versions.
- The third party hashlib library can be installed for access to additional algorithms.
- Additionally, if a checksum is passed to this parameter, and the file exist under
the C(dest) location, the I(destination_checksum) would be calculated, and if
checksum equals I(destination_checksum), the file download would be skipped
(unless C(force) is true). If the checksum does not equal I(destination_checksum),
the destination file is deleted.
type: str
default: ''
version_added: "2.0"
use_proxy:
description:
- if C(false), it will not use a proxy, even if one is defined in
an environment variable on the target hosts.
type: bool
default: yes
validate_certs:
description:
- If C(false), SSL certificates will not be validated.
- This should only be used on personally controlled sites using self-signed certificates.
type: bool
default: yes
timeout:
description:
- Timeout in seconds for URL request.
type: int
default: 10
version_added: '1.8'
headers:
description:
- Add custom HTTP headers to a request in hash/dict format.
- The hash/dict format was added in Ansible 2.6.
- Previous versions used a C("key:value,key:value") string format.
- The C("key:value,key:value") string format is deprecated and has been removed in version 2.10.
type: dict
version_added: '2.0'
url_username:
description:
- The username for use in HTTP basic authentication.
- This parameter can be used without C(url_password) for sites that allow empty passwords.
- Since version 2.8 you can also use the C(username) alias for this option.
type: str
aliases: ['username']
version_added: '1.6'
url_password:
description:
- The password for use in HTTP basic authentication.
- If the C(url_username) parameter is not specified, the C(url_password) parameter will not be used.
- Since version 2.8 you can also use the 'password' alias for this option.
type: str
aliases: ['password']
version_added: '1.6'
force_basic_auth:
description:
- Force the sending of the Basic authentication header upon initial request.
- httplib2, the library used by the uri module only sends authentication information when a webservice
responds to an initial request with a 401 status. Since some basic auth services do not properly
send a 401, logins will fail.
type: bool
default: no
version_added: '2.0'
client_cert:
description:
- PEM formatted certificate chain file to be used for SSL client authentication.
- This file can also include the key as well, and if the key is included, C(client_key) is not required.
type: path
version_added: '2.4'
client_key:
description:
- PEM formatted file that contains your private key to be used for SSL client authentication.
- If C(client_cert) contains both the certificate and key, this option is not required.
type: path
version_added: '2.4'
http_agent:
description:
- Header to identify as, generally appears in web server logs.
type: str
default: ansible-httpget
unredirected_headers:
description:
- A list of header names that will not be sent on subsequent redirected requests. This list is case
insensitive. By default all headers will be redirected. In some cases it may be beneficial to list
headers such as C(Authorization) here to avoid potential credential exposure.
default: []
type: list
elements: str
version_added: '2.12'
use_gssapi:
description:
- Use GSSAPI to perform the authentication, typically this is for Kerberos or Kerberos through Negotiate
authentication.
- Requires the Python library L(gssapi,https://github.com/pythongssapi/python-gssapi) to be installed.
- Credentials for GSSAPI can be specified with I(url_username)/I(url_password) or with the GSSAPI env var
C(KRB5CCNAME) that specified a custom Kerberos credential cache.
- NTLM authentication is I(not) supported even if the GSSAPI mech for NTLM has been installed.
type: bool
default: no
version_added: '2.11'
use_netrc:
description:
- Determining whether to use credentials from ``~/.netrc`` file
- By default .netrc is used with Basic authentication headers
- When set to False, .netrc credentials are ignored
type: bool
default: true
version_added: '2.14'
# informational: requirements for nodes
extends_documentation_fragment:
- files
- action_common_attributes
attributes:
check_mode:
details: the changed status will reflect comparison to an empty source file
support: partial
diff_mode:
support: none
platform:
platforms: posix
notes:
- For Windows targets, use the M(ansible.windows.win_get_url) module instead.
seealso:
- module: ansible.builtin.uri
- module: ansible.windows.win_get_url
author:
- Jan-Piet Mens (@jpmens)
'''
EXAMPLES = r'''
- name: Download foo.conf
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
mode: '0440'
- name: Download file and force basic auth
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
force_basic_auth: yes
- name: Download file with custom HTTP headers
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
headers:
key1: one
key2: two
- name: Download file with check (sha256)
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
checksum: sha256:b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c
- name: Download file with check (md5)
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
checksum: md5:66dffb5228a211e61d6d7ef4a86f5758
- name: Download file with checksum url (sha256)
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
checksum: sha256:http://example.com/path/sha256sum.txt
- name: Download file from a file path
ansible.builtin.get_url:
url: file:///tmp/afile.txt
dest: /tmp/afilecopy.txt
- name: < Fetch file that requires authentication.
username/password only available since 2.8, in older versions you need to use url_username/url_password
ansible.builtin.get_url:
url: http://example.com/path/file.conf
dest: /etc/foo.conf
username: bar
password: '{{ mysecret }}'
'''
RETURN = r'''
backup_file:
description: name of backup file created after download
returned: changed and if backup=yes
type: str
sample: /path/to/file.txt.2015-02-12@22:09~
checksum_dest:
description: sha1 checksum of the file after copy
returned: success
type: str
sample: 6e642bb8dd5c2e027bf21dd923337cbb4214f827
checksum_src:
description: sha1 checksum of the file
returned: success
type: str
sample: 6e642bb8dd5c2e027bf21dd923337cbb4214f827
dest:
description: destination file/path
returned: success
type: str
sample: /path/to/file.txt
elapsed:
description: The number of seconds that elapsed while performing the download
returned: always
type: int
sample: 23
gid:
description: group id of the file
returned: success
type: int
sample: 100
group:
description: group of the file
returned: success
type: str
sample: "httpd"
md5sum:
description: md5 checksum of the file after download
returned: when supported
type: str
sample: "2a5aeecc61dc98c4d780b14b330e3282"
mode:
description: permissions of the target
returned: success
type: str
sample: "0644"
msg:
description: the HTTP message from the request
returned: always
type: str
sample: OK (unknown bytes)
owner:
description: owner of the file
returned: success
type: str
sample: httpd
secontext:
description: the SELinux security context of the file
returned: success
type: str
sample: unconfined_u:object_r:user_tmp_t:s0
size:
description: size of the target
returned: success
type: int
sample: 1220
src:
description: source file used after download
returned: always
type: str
sample: /tmp/tmpAdFLdV
state:
description: state of the target
returned: success
type: str
sample: file
status_code:
description: the HTTP status code from the request
returned: always
type: int
sample: 200
uid:
description: owner id of the file, after execution
returned: success
type: int
sample: 100
url:
description: the actual URL used for the request
returned: always
type: str
sample: https://www.ansible.com/
'''
import datetime
import os
import re
import shutil
import tempfile
import traceback
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.six.moves.urllib.parse import urlsplit
from ansible.module_utils._text import to_native
from ansible.module_utils.urls import fetch_url, url_argument_spec
# ==============================================================
# url handling
def url_filename(url):
fn = os.path.basename(urlsplit(url)[2])
if fn == '':
return 'index.html'
return fn
def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, headers=None, tmp_dest='', method='GET', unredirected_headers=None,
decompress=True, ciphers=None, use_netrc=True):
"""
Download data from the url and store in a temporary file.
Return (tempfile, info about the request)
"""
start = datetime.datetime.utcnow()
rsp, info = fetch_url(module, url, use_proxy=use_proxy, force=force, last_mod_time=last_mod_time, timeout=timeout, headers=headers, method=method,
unredirected_headers=unredirected_headers, decompress=decompress, ciphers=ciphers, use_netrc=use_netrc)
elapsed = (datetime.datetime.utcnow() - start).seconds
if info['status'] == 304:
module.exit_json(url=url, dest=dest, changed=False, msg=info.get('msg', ''), status_code=info['status'], elapsed=elapsed)
# Exceptions in fetch_url may result in a status -1, the ensures a proper error to the user in all cases
if info['status'] == -1:
module.fail_json(msg=info['msg'], url=url, dest=dest, elapsed=elapsed)
if info['status'] != 200 and not url.startswith('file:/') and not (url.startswith('ftp:/') and info.get('msg', '').startswith('OK')):
module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], url=url, dest=dest, elapsed=elapsed)
# create a temporary file and copy content to do checksum-based replacement
if tmp_dest:
# tmp_dest should be an existing dir
tmp_dest_is_dir = os.path.isdir(tmp_dest)
if not tmp_dest_is_dir:
if os.path.exists(tmp_dest):
module.fail_json(msg="%s is a file but should be a directory." % tmp_dest, elapsed=elapsed)
else:
module.fail_json(msg="%s directory does not exist." % tmp_dest, elapsed=elapsed)
else:
tmp_dest = module.tmpdir
fd, tempname = tempfile.mkstemp(dir=tmp_dest)
f = os.fdopen(fd, 'wb')
try:
shutil.copyfileobj(rsp, f)
except Exception as e:
os.remove(tempname)
module.fail_json(msg="failed to create temporary content file: %s" % to_native(e), elapsed=elapsed, exception=traceback.format_exc())
f.close()
rsp.close()
return tempname, info
def extract_filename_from_headers(headers):
"""
Extracts a filename from the given dict of HTTP headers.
Looks for the content-disposition header and applies a regex.
Returns the filename if successful, else None."""
cont_disp_regex = 'attachment; ?filename="?([^"]+)'
res = None
if 'content-disposition' in headers:
cont_disp = headers['content-disposition']
match = re.match(cont_disp_regex, cont_disp)
if match:
res = match.group(1)
# Try preventing any funny business.
res = os.path.basename(res)
return res
def is_url(checksum):
"""
Returns True if checksum value has supported URL scheme, else False."""
supported_schemes = ('http', 'https', 'ftp', 'file')
return urlsplit(checksum).scheme in supported_schemes
# ==============================================================
# main
def main():
argument_spec = url_argument_spec()
# setup aliases
argument_spec['url_username']['aliases'] = ['username']
argument_spec['url_password']['aliases'] = ['password']
argument_spec.update(
url=dict(type='str', required=True),
dest=dict(type='path', required=True),
backup=dict(type='bool', default=False),
checksum=dict(type='str', default=''),
timeout=dict(type='int', default=10),
headers=dict(type='dict'),
tmp_dest=dict(type='path'),
unredirected_headers=dict(type='list', elements='str', default=[]),
decompress=dict(type='bool', default=True),
ciphers=dict(type='list', elements='str'),
use_netrc=dict(type='bool', default=True),
)
module = AnsibleModule(
# not checking because of daisy chain to file module
argument_spec=argument_spec,
add_file_common_args=True,
supports_check_mode=True,
)
url = module.params['url']
dest = module.params['dest']
backup = module.params['backup']
force = module.params['force']
checksum = module.params['checksum']
use_proxy = module.params['use_proxy']
timeout = module.params['timeout']
headers = module.params['headers']
tmp_dest = module.params['tmp_dest']
unredirected_headers = module.params['unredirected_headers']
decompress = module.params['decompress']
ciphers = module.params['ciphers']
use_netrc = module.params['use_netrc']
result = dict(
changed=False,
checksum_dest=None,
checksum_src=None,
dest=dest,
elapsed=0,
url=url,
)
dest_is_dir = os.path.isdir(dest)
last_mod_time = None
# checksum specified, parse for algorithm and checksum
if checksum:
try:
algorithm, checksum = checksum.split(':', 1)
except ValueError:
module.fail_json(msg="The checksum parameter has to be in format <algorithm>:<checksum>", **result)
if is_url(checksum):
checksum_url = checksum
# download checksum file to checksum_tmpsrc
checksum_tmpsrc, checksum_info = url_get(module, checksum_url, dest, use_proxy, last_mod_time, force, timeout, headers, tmp_dest,
unredirected_headers=unredirected_headers, ciphers=ciphers, use_netrc=use_netrc)
with open(checksum_tmpsrc) as f:
lines = [line.rstrip('\n') for line in f]
os.remove(checksum_tmpsrc)
checksum_map = []
filename = url_filename(url)
if len(lines) == 1 and len(lines[0].split()) == 1:
# Only a single line with a single string
# treat it as a checksum only file
checksum_map.append((lines[0], filename))
else:
# The assumption here is the file is in the format of
# checksum filename
for line in lines:
# Split by one whitespace to keep the leading type char ' ' (whitespace) for text and '*' for binary
parts = line.split(" ", 1)
if len(parts) == 2:
# Remove the leading type char, we expect
if parts[1].startswith((" ", "*",)):
parts[1] = parts[1][1:]
# Append checksum and path without potential leading './'
checksum_map.append((parts[0], parts[1].lstrip("./")))
# Look through each line in the checksum file for a hash corresponding to
# the filename in the url, returning the first hash that is found.
for cksum in (s for (s, f) in checksum_map if f == filename):
checksum = cksum
break
else:
checksum = None
if checksum is None:
module.fail_json(msg="Unable to find a checksum for file '%s' in '%s'" % (filename, checksum_url))
# Remove any non-alphanumeric characters, including the infamous
# Unicode zero-width space
checksum = re.sub(r'\W+', '', checksum).lower()
# Ensure the checksum portion is a hexdigest
try:
int(checksum, 16)
except ValueError:
module.fail_json(msg='The checksum format is invalid', **result)
if not dest_is_dir and os.path.exists(dest):
checksum_mismatch = False
# If the download is not forced and there is a checksum, allow
# checksum match to skip the download.
if not force and checksum != '':
destination_checksum = module.digest_from_file(dest, algorithm)
if checksum != destination_checksum:
checksum_mismatch = True
# Not forcing redownload, unless checksum does not match
if not force and checksum and not checksum_mismatch:
# Not forcing redownload, unless checksum does not match
# allow file attribute changes
file_args = module.load_file_common_arguments(module.params, path=dest)
result['changed'] = module.set_fs_attributes_if_different(file_args, False)
if result['changed']:
module.exit_json(msg="file already exists but file attributes changed", **result)
module.exit_json(msg="file already exists", **result)
# If the file already exists, prepare the last modified time for the
# request.
mtime = os.path.getmtime(dest)
last_mod_time = datetime.datetime.utcfromtimestamp(mtime)
# If the checksum does not match we have to force the download
# because last_mod_time may be newer than on remote
if checksum_mismatch:
force = True
# download to tmpsrc
start = datetime.datetime.utcnow()
method = 'HEAD' if module.check_mode else 'GET'
tmpsrc, info = url_get(module, url, dest, use_proxy, last_mod_time, force, timeout, headers, tmp_dest, method,
unredirected_headers=unredirected_headers, decompress=decompress, use_netrc=use_netrc)
result['elapsed'] = (datetime.datetime.utcnow() - start).seconds
result['src'] = tmpsrc
# Now the request has completed, we can finally generate the final
# destination file name from the info dict.
if dest_is_dir:
filename = extract_filename_from_headers(info)
if not filename:
# Fall back to extracting the filename from the URL.
# Pluck the URL from the info, since a redirect could have changed
# it.
filename = url_filename(info['url'])
dest = os.path.join(dest, filename)
result['dest'] = dest
# raise an error if there is no tmpsrc file
if not os.path.exists(tmpsrc):
os.remove(tmpsrc)
module.fail_json(msg="Request failed", status_code=info['status'], response=info['msg'], **result)
if not os.access(tmpsrc, os.R_OK):
os.remove(tmpsrc)
module.fail_json(msg="Source %s is not readable" % (tmpsrc), **result)
result['checksum_src'] = module.sha1(tmpsrc)
# check if there is no dest file
if os.path.exists(dest):
# raise an error if copy has no permission on dest
if not os.access(dest, os.W_OK):
os.remove(tmpsrc)
module.fail_json(msg="Destination %s is not writable" % (dest), **result)
if not os.access(dest, os.R_OK):
os.remove(tmpsrc)
module.fail_json(msg="Destination %s is not readable" % (dest), **result)
result['checksum_dest'] = module.sha1(dest)
else:
if not os.path.exists(os.path.dirname(dest)):
os.remove(tmpsrc)
module.fail_json(msg="Destination %s does not exist" % (os.path.dirname(dest)), **result)
if not os.access(os.path.dirname(dest), os.W_OK):
os.remove(tmpsrc)
module.fail_json(msg="Destination %s is not writable" % (os.path.dirname(dest)), **result)
if module.check_mode:
if os.path.exists(tmpsrc):
os.remove(tmpsrc)
result['changed'] = ('checksum_dest' not in result or
result['checksum_src'] != result['checksum_dest'])
module.exit_json(msg=info.get('msg', ''), **result)
backup_file = None
if result['checksum_src'] != result['checksum_dest']:
try:
if backup:
if os.path.exists(dest):
backup_file = module.backup_local(dest)
module.atomic_move(tmpsrc, dest, unsafe_writes=module.params['unsafe_writes'])
except Exception as e:
if os.path.exists(tmpsrc):
os.remove(tmpsrc)
module.fail_json(msg="failed to copy %s to %s: %s" % (tmpsrc, dest, to_native(e)),
exception=traceback.format_exc(), **result)
result['changed'] = True
else:
result['changed'] = False
if os.path.exists(tmpsrc):
os.remove(tmpsrc)
if checksum != '':
destination_checksum = module.digest_from_file(dest, algorithm)
if checksum != destination_checksum:
os.remove(dest)
module.fail_json(msg="The checksum for %s did not match %s; it was %s." % (dest, checksum, destination_checksum), **result)
# allow file attribute changes
file_args = module.load_file_common_arguments(module.params, path=dest)
result['changed'] = module.set_fs_attributes_if_different(file_args, result['changed'])
# Backwards compat only. We'll return None on FIPS enabled systems
try:
result['md5sum'] = module.md5(dest)
except ValueError:
result['md5sum'] = None
if backup_file:
result['backup_file'] = backup_file
# Mission complete
module.exit_json(msg=info.get('msg', ''), status_code=info.get('status', ''), **result)
if __name__ == '__main__':
main()