From 05997b6e98e638d97d409c65bb5eb86da68f3b64 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Mon, 2 Jan 2023 08:06:01 -0600 Subject: [PATCH] [extractor/generic] Decode unicode-escaped embed URLs (#5919) Authored by: bashonly Closes #5854 --- yt_dlp/extractor/generic.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 14d492f07..04677b23f 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2135,7 +2135,8 @@ class GenericIE(InfoExtractor): 'age_limit': 0, 'direct': True, } - }, { + }, + { 'note': 'server returns data in brotli compression by default if `accept-encoding: *` is specified.', 'url': 'https://www.extra.cz/cauky-lidi-70-dil-babis-predstavil-pohadky-prymulanek-nebo-andrejovy-nove-saty-ac867', 'info_dict': { @@ -2149,7 +2150,23 @@ class GenericIE(InfoExtractor): 'duration': 318.0, 'direct': True, 'age_limit': 0, - } + }, + }, + { + 'note': 'JW Player embed with unicode-escape sequences in URL', + 'url': 'https://www.medici.tv/en/concerts/lahav-shani-mozart-mahler-israel-philharmonic-abu-dhabi-classics', + 'info_dict': { + 'id': 'm', + 'ext': 'mp4', + 'title': 'Lahav Shani conducts the Israel Philharmonic\'s first-ever concert in Abu Dhabi', + 'description': 'Mahler\'s ', + 'uploader': 'www.medici.tv', + 'age_limit': 0, + 'thumbnail': r're:^https?://.+\.jpg', + }, + 'params': { + 'skip_download': True, + }, }, { 'url': 'https://shooshtime.com/videos/284002/just-out-of-the-shower-joi/', @@ -2751,6 +2768,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in orderedSet(found): + video_url = video_url.encode().decode('unicode-escape') video_url = unescapeHTML(video_url) video_url = video_url.replace('\\/', '/') video_url = urllib.parse.urljoin(url, video_url)