get_url: properly parse filename in content-disposition (#83748)

Since we don't really care about the type we don't have to
query for it and just retrieve the filename value.
Unfortunately we cannot use module_utils.urls.get_response_filename
as we don't have the response object, so just utilize
email.message.Message to parse the filename
instead of manually doing the work ourselves.

Fixes: #83690
pull/80637/merge
Martin Krizek 2 months ago committed by GitHub
parent b3c4154e86
commit f593eb42a3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,2 @@
bugfixes:
- get_url - fix honoring ``filename`` from the ``content-disposition`` header even when the type is ``inline`` (https://github.com/ansible/ansible/issues/83690)

@ -340,13 +340,16 @@ def extract_pem_certs(data):
def get_response_filename(response):
if filename := response.headers.get_param('filename', header='content-disposition'):
filename = os.path.basename(filename)
else:
url = response.geturl()
path = urlparse(url)[2]
filename = os.path.basename(path.rstrip('/')) or None
if filename:
filename = unquote(filename)
return response.headers.get_param('filename', header='content-disposition') or filename
return filename
def parse_content_type(response):

@ -367,6 +367,7 @@ url:
sample: https://www.ansible.com/
'''
import email.message
import os
import re
import shutil
@ -439,23 +440,16 @@ def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, head
def extract_filename_from_headers(headers):
"""Extracts a filename from the given dict of HTTP headers.
Returns the filename if successful, else None.
"""
Extracts a filename from the given dict of HTTP headers.
Looks for the content-disposition header and applies a regex.
Returns the filename if successful, else None."""
cont_disp_regex = 'attachment; ?filename="?([^"]+)'
res = None
if 'content-disposition' in headers:
cont_disp = headers['content-disposition']
match = re.match(cont_disp_regex, cont_disp)
if match:
res = match.group(1)
# Try preventing any funny business.
res = os.path.basename(res)
return res
msg = email.message.Message()
msg['content-disposition'] = headers.get('content-disposition', '')
if filename := msg.get_param('filename', header='content-disposition'):
# Avoid directory traversal
filename = os.path.basename(filename)
return filename
def is_url(checksum):

@ -719,3 +719,23 @@
- result is not changed
- '"did not match" in result.msg'
- stat_result_checksum_verify.stat.exists
- name: Test downloading to dir with content-disposition attachment
get_url:
url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=attachment%3B%20filename%3D%22filename.json%22'
dest: "{{ remote_tmp_dir }}"
register: get_dir_filename
- assert:
that:
- get_dir_filename.dest == remote_tmp_dir ~ "/filename.json"
- name: Test downloading to dir with content-disposition inline
get_url:
url: 'https://{{ httpbin_host }}/response-headers?Content-Disposition=inline%3B%20filename%3D%22filename.json%22'
dest: "{{ remote_tmp_dir }}"
register: get_dir_filename
- assert:
that:
- get_dir_filename.dest == remote_tmp_dir ~ "/filename.json"

Loading…
Cancel
Save