From edfd095b1917701c5046bd51f9542897c17d41a7 Mon Sep 17 00:00:00 2001 From: Simon Sawicki Date: Sun, 13 Oct 2024 03:42:43 +0200 Subject: [PATCH] [ie/generic] Impersonate browser by default (#11206) Also adds `impersonate` extractor arg Authored by: Grub4K --- README.md | 1 + yt_dlp/extractor/generic.py | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fbf50072d..4b1ada82e 100644 --- a/README.md +++ b/README.md @@ -1795,6 +1795,7 @@ The following extractors use this feature: * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` +* `impersonate`: Target(s) to try and impersonate with the initial webpage request; e.g. `safari,chrome-110`. By default any available target will be used. Use `false` to disable impersonation #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 592800287..9b5421e41 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -8,6 +8,7 @@ from .common import InfoExtractor from .commonprotocols import RtmpIE from .youtube import YoutubeIE from ..compat import compat_etree_fromstring +from ..networking.impersonate import ImpersonateTarget from ..utils import ( KNOWN_EXTENSIONS, MEDIA_EXTENSIONS, @@ -2373,6 +2374,12 @@ class GenericIE(InfoExtractor): else: video_id = self._generic_id(url) + # Try to impersonate a web-browser by default if possible + # Skip impersonation if not available to omit the warning + impersonate = self._configuration_arg('impersonate', ['']) + if 'false' in impersonate or not self._downloader._impersonate_target_available(ImpersonateTarget()): + impersonate = None + # Some webservers may serve compressed content of rather big size (e.g. gzipped flac) # making it impossible to download only chunk of the file (yet we need only 512kB to # test whether it's HTML or not). According to yt-dlp default Accept-Encoding @@ -2384,7 +2391,7 @@ class GenericIE(InfoExtractor): full_response = self._request_webpage(url, video_id, headers=filter_dict({ 'Accept-Encoding': 'identity', 'Referer': smuggled_data.get('referer'), - })) + }), impersonate=impersonate) new_url = full_response.url if new_url != extract_basic_auth(url)[0]: self.report_following_redirect(new_url)