diff --git a/changelogs/fragments/50771-uri-improvements.yml b/changelogs/fragments/50771-uri-improvements.yml new file mode 100644 index 00000000000..8edd8cf7263 --- /dev/null +++ b/changelogs/fragments/50771-uri-improvements.yml @@ -0,0 +1,3 @@ +minor_changes: +- uri - Eliminate multiple requests to determine the final URL for file naming with ``dest`` +- uri - Avoid reading the response body when not needed diff --git a/lib/ansible/module_utils/urls.py b/lib/ansible/module_utils/urls.py index c6b194d680b..914a963d811 100644 --- a/lib/ansible/module_utils/urls.py +++ b/lib/ansible/module_utils/urls.py @@ -72,7 +72,7 @@ import ansible.module_utils.six.moves.urllib.request as urllib_request import ansible.module_utils.six.moves.urllib.error as urllib_error from ansible.module_utils.common.collections import Mapping -from ansible.module_utils.six import PY3, string_types +from ansible.module_utils.six import PY2, PY3, string_types from ansible.module_utils.six.moves import cStringIO from ansible.module_utils.basic import get_distribution, missing_required_lib from ansible.module_utils._text import to_bytes, to_native, to_text @@ -89,7 +89,7 @@ except ImportError: urllib_request.HTTPRedirectHandler.http_error_308 = urllib_request.HTTPRedirectHandler.http_error_307 try: - from ansible.module_utils.six.moves.urllib.parse import urlparse, urlunparse + from ansible.module_utils.six.moves.urllib.parse import urlparse, urlunparse, unquote HAS_URLPARSE = True except Exception: HAS_URLPARSE = False @@ -755,6 +755,30 @@ def extract_pem_certs(b_data): yield match.group(0) +def get_response_filename(response): + url = response.geturl() + path = urlparse(url)[2] + filename = os.path.basename(path.rstrip('/')) or None + if filename: + filename = unquote(filename) + + return response.headers.get_param('filename', header='content-disposition') or filename + + +def parse_content_type(response): + if PY2: + get_type = response.headers.gettype + get_param = response.headers.getparam + else: + get_type = response.headers.get_content_type + get_param = response.headers.get_param + + content_type = (get_type() or 'application/octet-stream').split(',')[0] + main_type, sub_type = content_type.split('/') + charset = (get_param('charset') or 'utf-8').split(',')[0] + return content_type, main_type, sub_type, charset + + class RequestWithMethod(urllib_request.Request): ''' Workaround for using DELETE/PUT/etc with urllib2 @@ -1813,10 +1837,13 @@ def fetch_url(module, url, data=None, headers=None, method=None, except MissingModuleError as e: module.fail_json(msg=to_text(e), exception=e.import_traceback) except urllib_error.HTTPError as e: + r = e try: body = e.read() except AttributeError: body = '' + else: + e.close() # Try to add exception info to the output but don't fail if we can't try: diff --git a/lib/ansible/modules/uri.py b/lib/ansible/modules/uri.py index e2ccc6cdcc7..1ce2e64c2eb 100644 --- a/lib/ansible/modules/uri.py +++ b/lib/ansible/modules/uri.py @@ -427,7 +427,6 @@ url: sample: https://www.ansible.com/ ''' -import cgi import datetime import json import os @@ -437,11 +436,11 @@ import sys import tempfile from ansible.module_utils.basic import AnsibleModule, sanitize_keys -from ansible.module_utils.six import PY2, iteritems, string_types +from ansible.module_utils.six import PY2, PY3, binary_type, iteritems, string_types from ansible.module_utils.six.moves.urllib.parse import urlencode, urlsplit from ansible.module_utils._text import to_native, to_text from ansible.module_utils.common._collections_compat import Mapping, Sequence -from ansible.module_utils.urls import fetch_url, prepare_multipart, url_argument_spec +from ansible.module_utils.urls import fetch_url, get_response_filename, parse_content_type, prepare_multipart, url_argument_spec JSON_CANDIDATES = ('text', 'json', 'javascript') @@ -458,12 +457,15 @@ def format_message(err, resp): return err + (' %s' % msg if msg else '') -def write_file(module, url, dest, content, resp): +def write_file(module, dest, content, resp): # create a tempfile with some test content fd, tmpsrc = tempfile.mkstemp(dir=module.tmpdir) - f = open(tmpsrc, 'wb') + f = os.fdopen(fd, 'wb') try: - f.write(content) + if isinstance(content, binary_type): + f.write(content) + else: + shutil.copyfileobj(content, f) except Exception as e: os.remove(tmpsrc) msg = format_message("Failed to create temporary content file: %s" % to_native(e), resp) @@ -513,13 +515,6 @@ def write_file(module, url, dest, content, resp): os.remove(tmpsrc) -def url_filename(url): - fn = os.path.basename(urlsplit(url)[2]) - if fn == '': - return 'index.html' - return fn - - def absolute_location(url, location): """Attempts to create an absolute URL based on initial URL, and next URL, specifically in the case of a ``Location`` header. @@ -577,9 +572,7 @@ def form_urlencoded(body): def uri(module, url, dest, body, body_format, method, headers, socket_timeout, ca_path, unredirected_headers): # is dest is set and is a directory, let's check if we get redirected and # set the filename from that url - redirected = False - redir_info = {} - r = {} + src = module.params['src'] if src: try: @@ -593,42 +586,15 @@ def uri(module, url, dest, body, body_format, method, headers, socket_timeout, c data = body kwargs = {} - if dest is not None: - # Stash follow_redirects, in this block we don't want to follow - # we'll reset back to the supplied value soon - follow_redirects = module.params['follow_redirects'] - module.params['follow_redirects'] = False - if os.path.isdir(dest): - # first check if we are redirected to a file download - _, redir_info = fetch_url(module, url, data=body, - headers=headers, - method=method, - timeout=socket_timeout, unix_socket=module.params['unix_socket']) - # if we are redirected, update the url with the location header, - # and update dest with the new url filename - if redir_info['status'] in (301, 302, 303, 307): - url = redir_info['location'] - redirected = True - dest = os.path.join(dest, url_filename(url)) + if dest is not None and os.path.isfile(dest): # if destination file already exist, only download if file newer - if os.path.exists(dest): - kwargs['last_mod_time'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(dest)) - - # Reset follow_redirects back to the stashed value - module.params['follow_redirects'] = follow_redirects + kwargs['last_mod_time'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(dest)) resp, info = fetch_url(module, url, data=data, headers=headers, method=method, timeout=socket_timeout, unix_socket=module.params['unix_socket'], ca_path=ca_path, unredirected_headers=unredirected_headers, **kwargs) - try: - content = resp.read() - except AttributeError: - # there was no content, but the error read() - # may have been stored in the info as 'body' - content = info.pop('body', '') - if src: # Try to close the open file handle try: @@ -636,11 +602,7 @@ def uri(module, url, dest, body, body_format, method, headers, socket_timeout, c except Exception: pass - r['redirected'] = redirected or info['url'] != url - r.update(redir_info) - r.update(info) - - return r, content, dest + return resp, info def main(): @@ -726,16 +688,52 @@ def main(): # Make the request start = datetime.datetime.utcnow() - resp, content, dest = uri(module, url, dest, body, body_format, method, - dict_headers, socket_timeout, ca_path, unredirected_headers) - resp['elapsed'] = (datetime.datetime.utcnow() - start).seconds + r, info = uri(module, url, dest, body, body_format, method, + dict_headers, socket_timeout, ca_path, unredirected_headers) + + elapsed = (datetime.datetime.utcnow() - start).seconds + + if r and dest is not None and os.path.isdir(dest): + filename = get_response_filename(r) or 'index.html' + dest = os.path.join(dest, filename) + + if r: + content_type, main_type, sub_type, content_encoding = parse_content_type(r) + else: + content_type = 'application/octet-stream' + main_type = 'aplication' + sub_type = 'octet-stream' + content_encoding = 'utf-8' + + maybe_json = content_type and any(candidate in sub_type for candidate in JSON_CANDIDATES) + maybe_output = maybe_json or return_content or info['status'] not in status_code + + if maybe_output: + try: + if PY3 and r.closed: + raise TypeError + content = r.read() + except (AttributeError, TypeError): + # there was no content, but the error read() + # may have been stored in the info as 'body' + content = info.pop('body', b'') + elif r: + content = r + else: + content = None + + resp = {} + resp['redirected'] = info['url'] != url + resp.update(info) + + resp['elapsed'] = elapsed resp['status'] = int(resp['status']) resp['changed'] = False # Write the file out if requested - if dest is not None: + if r and dest is not None: if resp['status'] in status_code and resp['status'] != 304: - write_file(module, url, dest, content, resp) + write_file(module, dest, content, resp) # allow file attribute changes resp['changed'] = True module.params['path'] = dest @@ -756,34 +754,9 @@ def main(): uresp['location'] = absolute_location(url, uresp['location']) # Default content_encoding to try - content_encoding = 'utf-8' - if 'content_type' in uresp: - # Handle multiple Content-Type headers - charsets = [] - content_types = [] - for value in uresp['content_type'].split(','): - ct, params = cgi.parse_header(value) - if ct not in content_types: - content_types.append(ct) - if 'charset' in params: - if params['charset'] not in charsets: - charsets.append(params['charset']) - - if content_types: - content_type = content_types[0] - if len(content_types) > 1: - module.warn( - 'Received multiple conflicting Content-Type values (%s), using %s' % (', '.join(content_types), content_type) - ) - if charsets: - content_encoding = charsets[0] - if len(charsets) > 1: - module.warn( - 'Received multiple conflicting charset values (%s), using %s' % (', '.join(charsets), content_encoding) - ) - + if isinstance(content, binary_type): u_content = to_text(content, encoding=content_encoding) - if any(candidate in content_type for candidate in JSON_CANDIDATES): + if maybe_json: try: js = json.loads(u_content) uresp['json'] = js @@ -791,7 +764,7 @@ def main(): if PY2: sys.exc_clear() # Avoid false positive traceback in fail_json() on Python 2 else: - u_content = to_text(content, encoding=content_encoding) + u_content = None if module.no_log_values: uresp = sanitize_keys(uresp, module.no_log_values, NO_MODIFY_KEYS) diff --git a/test/sanity/ignore.txt b/test/sanity/ignore.txt index 9dbf85a89f2..d056a698ce0 100644 --- a/test/sanity/ignore.txt +++ b/test/sanity/ignore.txt @@ -59,7 +59,6 @@ lib/ansible/modules/systemd.py validate-modules:parameter-invalid lib/ansible/modules/systemd.py validate-modules:return-syntax-error lib/ansible/modules/sysvinit.py validate-modules:return-syntax-error lib/ansible/modules/unarchive.py validate-modules:nonexistent-parameter-documented -lib/ansible/modules/uri.py pylint:disallowed-name lib/ansible/modules/uri.py validate-modules:doc-required-mismatch lib/ansible/modules/user.py validate-modules:doc-default-does-not-match-spec lib/ansible/modules/user.py validate-modules:doc-default-incompatible-type