From 1d55ebabc93b8e422a0126fc307f2a8e50fa5a97 Mon Sep 17 00:00:00 2001 From: Simon Sawicki <37424085+Grub4K@users.noreply.github.com> Date: Sun, 9 Oct 2022 05:17:58 +0200 Subject: [PATCH] [extractor/common] Fix `json_ld` type checks (#5145) Closes #5144, #5143 Authored by: Grub4K --- yt_dlp/extractor/common.py | 12 +++++------- yt_dlp/extractor/generic.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 31a45b37a..18a52a855 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1467,10 +1467,6 @@ class InfoExtractor: if not json_ld: return {} info = {} - if not isinstance(json_ld, (list, tuple, dict)): - return info - if isinstance(json_ld, dict): - json_ld = [json_ld] INTERACTION_TYPE_MAP = { 'CommentAction': 'comment', @@ -1570,11 +1566,13 @@ class InfoExtractor: extract_chapter_information(e) def traverse_json_ld(json_ld, at_top_level=True): - for e in json_ld: + for e in variadic(json_ld): + if not isinstance(e, dict): + continue if at_top_level and '@context' not in e: continue if at_top_level and set(e.keys()) == {'@context', '@graph'}: - traverse_json_ld(variadic(e['@graph'], allowed_types=(dict,)), at_top_level=False) + traverse_json_ld(e['@graph'], at_top_level=False) break if expected_type is not None and not is_type(e, expected_type): continue @@ -1629,8 +1627,8 @@ class InfoExtractor: continue else: break - traverse_json_ld(json_ld) + traverse_json_ld(json_ld) return filter_dict(info) def _search_nextjs_data(self, webpage, video_id, *, transform_source=None, fatal=True, **kw): diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 73422f937..92390a387 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2463,6 +2463,21 @@ class GenericIE(InfoExtractor): 'duration': 111.0, } }, + { + 'note': 'JSON LD with unexpected data type', + 'url': 'https://www.autoweek.nl/autotests/artikel/porsche-911-gt3-rs-rij-impressie-2/', + 'info_dict': { + 'id': 'porsche-911-gt3-rs-rij-impressie-2', + 'ext': 'mp4', + 'title': 'Test: Porsche 911 GT3 RS', + 'description': 'Je ziet het niet, maar het is er wel. Downforce, hebben we het dan over. En in de nieuwe Porsche 911 GT3 RS is er zelfs heel veel downforce.', + 'timestamp': 1664920902, + 'upload_date': '20221004', + 'thumbnail': r're:^https://media.autoweek.nl/m/.+\.jpg$', + 'age_limit': 0, + 'direct': True, + } + } ] def report_following_redirect(self, new_url):