From 3e01ce744a981d8f19ae77ec695005e7000f4703 Mon Sep 17 00:00:00 2001 From: coletdjnz Date: Sun, 1 Jan 2023 18:40:26 +1300 Subject: [PATCH] [extractor/generic] Use `Accept-Encoding: identity` for initial request The existing comment seems to imply this was the desired behavior from the beginning. Partial fix for https://github.com/yt-dlp/yt-dlp/issues/5855, https://github.com/yt-dlp/yt-dlp/issues/5851, https://github.com/yt-dlp/yt-dlp/issues/4748 --- yt_dlp/extractor/generic.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 2281c71f3..ffc279023 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2154,6 +2154,21 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } + }, { + 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', + 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'info_dict': { + 'id': 'cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', + 'ext': 'mp4', + 'title': 'čauky lidi 70 finall', + 'description': 'čauky lidi 70 finall', + 'thumbnail': 'h', + 'upload_date': '20220606', + 'timestamp': 1654513791, + 'duration': 318.0, + 'direct': True, + 'age_limit': 0, + } } ] @@ -2312,7 +2327,7 @@ class GenericIE(InfoExtractor): # It may probably better to solve this by checking Content-Type for application/octet-stream # after a HEAD request, but not sure if we can rely on this. full_response = self._request_webpage(url, video_id, headers={ - 'Accept-Encoding': '*', + 'Accept-Encoding': 'identity', **smuggled_data.get('http_headers', {}) }) new_url = full_response.geturl()