From 5d45484cc762861f8fe59fa42d499db5a284c2c7 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Fri, 1 Apr 2022 19:31:58 +0900 Subject: [PATCH] [niconico] Fix extraction of thumbnails and uploader (#3266) --- yt_dlp/extractor/niconico.py | 18 ++++++++++++++---- yt_dlp/utils.py | 7 +++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/niconico.py b/yt_dlp/extractor/niconico.py index 74828f833..a5a1a01e0 100644 --- a/yt_dlp/extractor/niconico.py +++ b/yt_dlp/extractor/niconico.py @@ -25,7 +25,10 @@ from ..utils import ( parse_duration, parse_filesize, parse_iso8601, + parse_resolution, + qualities, remove_start, + str_or_none, traverse_obj, try_get, unescapeHTML, @@ -430,18 +433,25 @@ class NiconicoIE(InfoExtractor): # find in json (logged in) tags = traverse_obj(api_data, ('tag', 'items', ..., 'name')) + thumb_prefs = qualities(['url', 'middleUrl', 'largeUrl', 'player', 'ogp']) + return { 'id': video_id, '_api_data': api_data, 'title': get_video_info(('originalTitle', 'title')) or self._og_search_title(webpage, default=None), 'formats': formats, - 'thumbnail': get_video_info('thumbnail', 'url') or self._html_search_meta( - ('image', 'og:image'), webpage, 'thumbnail', default=None), + 'thumbnails': [{ + 'id': key, + 'url': url, + 'ext': 'jpg', + 'preference': thumb_prefs(key), + **parse_resolution(url, lenient=True), + } for key, url in (get_video_info('thumbnail') or {}).items() if url], 'description': clean_html(get_video_info('description')), - 'uploader': traverse_obj(api_data, ('owner', 'nickname')), + 'uploader': traverse_obj(api_data, ('owner', 'nickname'), ('channel', 'name'), ('community', 'name')), + 'uploader_id': str_or_none(traverse_obj(api_data, ('owner', 'id'), ('channel', 'id'), ('community', 'id'))), 'timestamp': parse_iso8601(get_video_info('registeredAt')) or parse_iso8601( self._html_search_meta('video:release_date', webpage, 'date published', default=None)), - 'uploader_id': traverse_obj(api_data, ('owner', 'id')), 'channel': traverse_obj(api_data, ('channel', 'name'), ('community', 'name')), 'channel_id': traverse_obj(api_data, ('channel', 'id'), ('community', 'id')), 'view_count': int_or_none(get_video_info('count', 'view')), diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index a2fa29afe..ce918750d 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -2418,11 +2418,14 @@ def parse_count(s): return str_to_int(mobj.group(1)) -def parse_resolution(s): +def parse_resolution(s, *, lenient=False): if s is None: return {} - mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) + if lenient: + mobj = re.search(r'(?P\d+)\s*[xX×,]\s*(?P\d+)', s) + else: + mobj = re.search(r'(?\d+)\s*[xX×,]\s*(?P\d+)(?![a-zA-Z0-9])', s) if mobj: return { 'width': int(mobj.group('w')),