uri module improvements (#50771)

pull/76310/head
Matt Martz 3 years ago committed by GitHub
parent c9d4b6bfb0
commit 4073a22d55
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,3 @@
minor_changes:
- uri - Eliminate multiple requests to determine the final URL for file naming with ``dest``
- uri - Avoid reading the response body when not needed

@ -72,7 +72,7 @@ import ansible.module_utils.six.moves.urllib.request as urllib_request
import ansible.module_utils.six.moves.urllib.error as urllib_error
from ansible.module_utils.common.collections import Mapping
from ansible.module_utils.six import PY3, string_types
from ansible.module_utils.six import PY2, PY3, string_types
from ansible.module_utils.six.moves import cStringIO
from ansible.module_utils.basic import get_distribution, missing_required_lib
from ansible.module_utils._text import to_bytes, to_native, to_text
@ -89,7 +89,7 @@ except ImportError:
urllib_request.HTTPRedirectHandler.http_error_308 = urllib_request.HTTPRedirectHandler.http_error_307
try:
from ansible.module_utils.six.moves.urllib.parse import urlparse, urlunparse
from ansible.module_utils.six.moves.urllib.parse import urlparse, urlunparse, unquote
HAS_URLPARSE = True
except Exception:
HAS_URLPARSE = False
@ -755,6 +755,30 @@ def extract_pem_certs(b_data):
yield match.group(0)
def get_response_filename(response):
url = response.geturl()
path = urlparse(url)[2]
filename = os.path.basename(path.rstrip('/')) or None
if filename:
filename = unquote(filename)
return response.headers.get_param('filename', header='content-disposition') or filename
def parse_content_type(response):
if PY2:
get_type = response.headers.gettype
get_param = response.headers.getparam
else:
get_type = response.headers.get_content_type
get_param = response.headers.get_param
content_type = (get_type() or 'application/octet-stream').split(',')[0]
main_type, sub_type = content_type.split('/')
charset = (get_param('charset') or 'utf-8').split(',')[0]
return content_type, main_type, sub_type, charset
class RequestWithMethod(urllib_request.Request):
'''
Workaround for using DELETE/PUT/etc with urllib2
@ -1813,10 +1837,13 @@ def fetch_url(module, url, data=None, headers=None, method=None,
except MissingModuleError as e:
module.fail_json(msg=to_text(e), exception=e.import_traceback)
except urllib_error.HTTPError as e:
r = e
try:
body = e.read()
except AttributeError:
body = ''
else:
e.close()
# Try to add exception info to the output but don't fail if we can't
try:

@ -427,7 +427,6 @@ url:
sample: https://www.ansible.com/
'''
import cgi
import datetime
import json
import os
@ -437,11 +436,11 @@ import sys
import tempfile
from ansible.module_utils.basic import AnsibleModule, sanitize_keys
from ansible.module_utils.six import PY2, iteritems, string_types
from ansible.module_utils.six import PY2, PY3, binary_type, iteritems, string_types
from ansible.module_utils.six.moves.urllib.parse import urlencode, urlsplit
from ansible.module_utils._text import to_native, to_text
from ansible.module_utils.common._collections_compat import Mapping, Sequence
from ansible.module_utils.urls import fetch_url, prepare_multipart, url_argument_spec
from ansible.module_utils.urls import fetch_url, get_response_filename, parse_content_type, prepare_multipart, url_argument_spec
JSON_CANDIDATES = ('text', 'json', 'javascript')
@ -458,12 +457,15 @@ def format_message(err, resp):
return err + (' %s' % msg if msg else '')
def write_file(module, url, dest, content, resp):
def write_file(module, dest, content, resp):
# create a tempfile with some test content
fd, tmpsrc = tempfile.mkstemp(dir=module.tmpdir)
f = open(tmpsrc, 'wb')
f = os.fdopen(fd, 'wb')
try:
if isinstance(content, binary_type):
f.write(content)
else:
shutil.copyfileobj(content, f)
except Exception as e:
os.remove(tmpsrc)
msg = format_message("Failed to create temporary content file: %s" % to_native(e), resp)
@ -513,13 +515,6 @@ def write_file(module, url, dest, content, resp):
os.remove(tmpsrc)
def url_filename(url):
fn = os.path.basename(urlsplit(url)[2])
if fn == '':
return 'index.html'
return fn
def absolute_location(url, location):
"""Attempts to create an absolute URL based on initial URL, and
next URL, specifically in the case of a ``Location`` header.
@ -577,9 +572,7 @@ def form_urlencoded(body):
def uri(module, url, dest, body, body_format, method, headers, socket_timeout, ca_path, unredirected_headers):
# is dest is set and is a directory, let's check if we get redirected and
# set the filename from that url
redirected = False
redir_info = {}
r = {}
src = module.params['src']
if src:
try:
@ -593,42 +586,15 @@ def uri(module, url, dest, body, body_format, method, headers, socket_timeout, c
data = body
kwargs = {}
if dest is not None:
# Stash follow_redirects, in this block we don't want to follow
# we'll reset back to the supplied value soon
follow_redirects = module.params['follow_redirects']
module.params['follow_redirects'] = False
if os.path.isdir(dest):
# first check if we are redirected to a file download
_, redir_info = fetch_url(module, url, data=body,
headers=headers,
method=method,
timeout=socket_timeout, unix_socket=module.params['unix_socket'])
# if we are redirected, update the url with the location header,
# and update dest with the new url filename
if redir_info['status'] in (301, 302, 303, 307):
url = redir_info['location']
redirected = True
dest = os.path.join(dest, url_filename(url))
if dest is not None and os.path.isfile(dest):
# if destination file already exist, only download if file newer
if os.path.exists(dest):
kwargs['last_mod_time'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(dest))
# Reset follow_redirects back to the stashed value
module.params['follow_redirects'] = follow_redirects
resp, info = fetch_url(module, url, data=data, headers=headers,
method=method, timeout=socket_timeout, unix_socket=module.params['unix_socket'],
ca_path=ca_path, unredirected_headers=unredirected_headers,
**kwargs)
try:
content = resp.read()
except AttributeError:
# there was no content, but the error read()
# may have been stored in the info as 'body'
content = info.pop('body', '')
if src:
# Try to close the open file handle
try:
@ -636,11 +602,7 @@ def uri(module, url, dest, body, body_format, method, headers, socket_timeout, c
except Exception:
pass
r['redirected'] = redirected or info['url'] != url
r.update(redir_info)
r.update(info)
return r, content, dest
return resp, info
def main():
@ -726,16 +688,52 @@ def main():
# Make the request
start = datetime.datetime.utcnow()
resp, content, dest = uri(module, url, dest, body, body_format, method,
r, info = uri(module, url, dest, body, body_format, method,
dict_headers, socket_timeout, ca_path, unredirected_headers)
resp['elapsed'] = (datetime.datetime.utcnow() - start).seconds
elapsed = (datetime.datetime.utcnow() - start).seconds
if r and dest is not None and os.path.isdir(dest):
filename = get_response_filename(r) or 'index.html'
dest = os.path.join(dest, filename)
if r:
content_type, main_type, sub_type, content_encoding = parse_content_type(r)
else:
content_type = 'application/octet-stream'
main_type = 'aplication'
sub_type = 'octet-stream'
content_encoding = 'utf-8'
maybe_json = content_type and any(candidate in sub_type for candidate in JSON_CANDIDATES)
maybe_output = maybe_json or return_content or info['status'] not in status_code
if maybe_output:
try:
if PY3 and r.closed:
raise TypeError
content = r.read()
except (AttributeError, TypeError):
# there was no content, but the error read()
# may have been stored in the info as 'body'
content = info.pop('body', b'')
elif r:
content = r
else:
content = None
resp = {}
resp['redirected'] = info['url'] != url
resp.update(info)
resp['elapsed'] = elapsed
resp['status'] = int(resp['status'])
resp['changed'] = False
# Write the file out if requested
if dest is not None:
if r and dest is not None:
if resp['status'] in status_code and resp['status'] != 304:
write_file(module, url, dest, content, resp)
write_file(module, dest, content, resp)
# allow file attribute changes
resp['changed'] = True
module.params['path'] = dest
@ -756,34 +754,9 @@ def main():
uresp['location'] = absolute_location(url, uresp['location'])
# Default content_encoding to try
content_encoding = 'utf-8'
if 'content_type' in uresp:
# Handle multiple Content-Type headers
charsets = []
content_types = []
for value in uresp['content_type'].split(','):
ct, params = cgi.parse_header(value)
if ct not in content_types:
content_types.append(ct)
if 'charset' in params:
if params['charset'] not in charsets:
charsets.append(params['charset'])
if content_types:
content_type = content_types[0]
if len(content_types) > 1:
module.warn(
'Received multiple conflicting Content-Type values (%s), using %s' % (', '.join(content_types), content_type)
)
if charsets:
content_encoding = charsets[0]
if len(charsets) > 1:
module.warn(
'Received multiple conflicting charset values (%s), using %s' % (', '.join(charsets), content_encoding)
)
if isinstance(content, binary_type):
u_content = to_text(content, encoding=content_encoding)
if any(candidate in content_type for candidate in JSON_CANDIDATES):
if maybe_json:
try:
js = json.loads(u_content)
uresp['json'] = js
@ -791,7 +764,7 @@ def main():
if PY2:
sys.exc_clear() # Avoid false positive traceback in fail_json() on Python 2
else:
u_content = to_text(content, encoding=content_encoding)
u_content = None
if module.no_log_values:
uresp = sanitize_keys(uresp, module.no_log_values, NO_MODIFY_KEYS)

@ -59,7 +59,6 @@ lib/ansible/modules/systemd.py validate-modules:parameter-invalid
lib/ansible/modules/systemd.py validate-modules:return-syntax-error
lib/ansible/modules/sysvinit.py validate-modules:return-syntax-error
lib/ansible/modules/unarchive.py validate-modules:nonexistent-parameter-documented
lib/ansible/modules/uri.py pylint:disallowed-name
lib/ansible/modules/uri.py validate-modules:doc-required-mismatch
lib/ansible/modules/user.py validate-modules:doc-default-does-not-match-spec
lib/ansible/modules/user.py validate-modules:doc-default-incompatible-type

Loading…
Cancel
Save