diff --git a/changelogs/fragments/84825-get-url-incomplete-read.yml b/changelogs/fragments/84825-get-url-incomplete-read.yml new file mode 100644 index 00000000000..6879bcfd8c1 --- /dev/null +++ b/changelogs/fragments/84825-get-url-incomplete-read.yml @@ -0,0 +1,2 @@ +bugfixes: + - get_url - add a check to recognize incomplete data transfers. diff --git a/lib/ansible/modules/get_url.py b/lib/ansible/modules/get_url.py index c9e0ac8644b..f25743b9a41 100644 --- a/lib/ansible/modules/get_url.py +++ b/lib/ansible/modules/get_url.py @@ -436,6 +436,23 @@ def url_get(module, url, dest, use_proxy, last_mod_time, force, timeout=10, head module.fail_json(msg="failed to create temporary content file: %s" % to_native(e), elapsed=elapsed) f.close() rsp.close() + + # Since shutil.copyfileobj() will read from HTTPResponse in chunks, HTTPResponse.read() will not recognize + # if the entire content-length of data was not read. We need to do that validation here, unless a 'chunked' + # transfer-encoding was used, in which case we will not know content-length because it will not be returned. + # But in that case, HTTPResponse will behave correctly and recognize an IncompleteRead. + + is_gzip = info.get('content-encoding') == 'gzip' + + if not module.check_mode and 'content-length' in info: + # If data is decompressed, then content-length won't match the amount of data we've read, so skip. + if not is_gzip or (is_gzip and not decompress): + st = os.stat(tempname) + cl = int(info['content-length']) + if st.st_size != cl: + diff = cl - st.st_size + module.fail_json(msg=f'Incomplete read, ({rsp.length=}, {cl=}, {st.st_size=}) failed to read remaining {diff} bytes') + return tempname, info diff --git a/test/integration/targets/get_url/files/testserver.py b/test/integration/targets/get_url/files/testserver.py index 3a83724ce87..2ddb537ef4e 100644 --- a/test/integration/targets/get_url/files/testserver.py +++ b/test/integration/targets/get_url/files/testserver.py @@ -8,7 +8,15 @@ if __name__ == '__main__': PORT = int(sys.argv[1]) class Handler(http.server.SimpleHTTPRequestHandler): - pass + def do_GET(self): + if self.path == '/incompleteRead': + self.send_response(200) + self.send_header("Content-type", "text/html") + self.send_header("Content-Length", "100") + self.end_headers() + self.wfile.write(b"ABCD") + else: + super().do_GET() Handler.extensions_map['.json'] = 'application/json' httpd = socketserver.TCPServer(("", PORT), Handler) diff --git a/test/integration/targets/get_url/tasks/main.yml b/test/integration/targets/get_url/tasks/main.yml index 0ec6afd2025..d9c64c5af7c 100644 --- a/test/integration/targets/get_url/tasks/main.yml +++ b/test/integration/targets/get_url/tasks/main.yml @@ -761,3 +761,16 @@ - assert: that: - get_dir_filename.dest == remote_tmp_dir ~ "/filename.json" + +- name: Test for incomplete data read (issue 85164) + get_url: + url: 'http://localhost:{{ http_port }}/incompleteRead' + dest: '{{ remote_tmp_dir }}/85164.txt' + ignore_errors: true + register: result + +- name: Assert we have an incomplete read failure + assert: + that: + - result is failed + - '"Incomplete read" in result.msg'