From 0ba692acc8feffd46b6e1085fb4a2849b685945c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Thu, 15 Jul 2021 22:49:59 +0530 Subject: [PATCH] [youtube] Extract more thumbnails * The thumbnail URLs are hard-coded and their actual existence is tested lazily * Added option `--no-check-formats` to not test them Closes #340, Related: #402, #337, https://github.com/ytdl-org/youtube-dl/issues/29049 --- README.md | 4 +++- yt_dlp/YoutubeDL.py | 37 +++++++++++++++++++++--------- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/youtube.py | 45 ++++++++++++++++++++----------------- yt_dlp/options.py | 8 +++++-- 5 files changed, 61 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 8fd327f3e..6ff6d93d6 100644 --- a/README.md +++ b/README.md @@ -638,7 +638,9 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Check that the formats selected are - actually downloadable (Experimental) + actually downloadable + --no-check-formats Do not check that the formats selected are + actually downloadable -F, --list-formats List all available formats of requested videos --merge-output-format FORMAT If a merge is required (e.g. diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5b603690c..d4d1af4fd 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -209,6 +209,9 @@ class YoutubeDL(object): into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file + check_formats Whether to test if the formats are downloadable. + Can be True (check all), False (check none) + or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils.py) outtmpl: Dictionary of templates for output names. Allowed keys @@ -1944,15 +1947,24 @@ class YoutubeDL(object): t.get('id') if t.get('id') is not None else '', t.get('url'))) - def test_thumbnail(t): - self.to_screen('[info] Testing thumbnail %s' % t['id']) - try: - self.urlopen(HEADRequest(t['url'])) - except network_exceptions as err: - self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( - t['id'], t['url'], error_to_compat_str(err))) - return False - return True + def thumbnail_tester(): + if self.params.get('check_formats'): + def to_screen(msg): + return self.to_screen(f'[info] {msg}') + else: + to_screen = self.write_debug + + def test_thumbnail(t): + to_screen('Testing thumbnail %s' % t['id']) + try: + self.urlopen(HEADRequest(t['url'])) + except network_exceptions as err: + to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % ( + t['id'], t['url'], error_to_compat_str(err))) + return False + return True + + return test_thumbnail for i, t in enumerate(thumbnails): if t.get('id') is None: @@ -1960,8 +1972,11 @@ class YoutubeDL(object): if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) - if self.params.get('check_formats'): - info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse() + + if self.params.get('check_formats') is not False: + info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse() + else: + info_dict['thumbnails'] = thumbnails def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 0ee7ee3b1..a6fc5d11a 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -229,6 +229,7 @@ class InfoExtractor(object): * "resolution" (optional, string "{width}x{height}", deprecated) * "filesize" (optional, int) + * "_test_url" (optional, bool) - If true, test the URL thumbnail: Full URL to a video thumbnail image. description: Full video description. uploader: Full name of the video uploader. diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index dee2dbebc..ae1c1bca5 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2645,7 +2645,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): f['stretched_ratio'] = ratio break + category = microformat.get('category') or search_meta('genre') + channel_id = video_details.get('channelId') \ + or microformat.get('externalChannelId') \ + or search_meta('channelId') + duration = int_or_none( + video_details.get('lengthSeconds') + or microformat.get('lengthSeconds')) \ + or parse_duration(search_meta('duration')) + is_live = video_details.get('isLive') + is_upcoming = video_details.get('isUpcoming') + owner_profile_url = microformat.get('ownerProfileUrl') + thumbnails = [] + thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3'] + for container in (video_details, microformat): for thumbnail in (try_get( container, @@ -2662,34 +2676,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': thumbnail_url, 'height': int_or_none(thumbnail.get('height')), 'width': int_or_none(thumbnail.get('width')), - 'preference': 1 if 'maxresdefault' in thumbnail_url else -1 }) thumbnail_url = search_meta(['og:image', 'twitter:image']) if thumbnail_url: thumbnails.append({ 'url': thumbnail_url, - 'preference': 1 if 'maxresdefault' in thumbnail_url else -1 }) - # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage - # See: https://github.com/ytdl-org/youtube-dl/issues/29049 - thumbnails.append({ - 'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id, - 'preference': 1, - }) + # The best resolution thumbnails sometimes does not appear in the webpage + # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 + thumbnails.extend({ + 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( + video_id=video_id, name=name, ext=ext, + webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), + '_test_url': True, + } for name in thumbnail_types for ext in ('webp', 'jpg')) + for thumb in thumbnails: + i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20) + thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) self._remove_duplicate_formats(thumbnails) - category = microformat.get('category') or search_meta('genre') - channel_id = video_details.get('channelId') \ - or microformat.get('externalChannelId') \ - or search_meta('channelId') - duration = int_or_none( - video_details.get('lengthSeconds') - or microformat.get('lengthSeconds')) \ - or parse_duration(search_meta('duration')) - is_live = video_details.get('isLive') - is_upcoming = video_details.get('isUpcoming') - owner_profile_url = microformat.get('ownerProfileUrl') - info = { 'id': video_id, 'title': self._live_title(video_title) if is_live else video_title, diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 2a42712b6..f9201bf01 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -524,8 +524,12 @@ def parseOpts(overrideArguments=None): help="Don't give any special preference to free containers (default)") video_format.add_option( '--check-formats', - action='store_true', dest='check_formats', default=False, - help="Check that the formats selected are actually downloadable (Experimental)") + action='store_true', dest='check_formats', default=None, + help='Check that the formats selected are actually downloadable') + video_format.add_option( + '--no-check-formats', + action='store_false', dest='check_formats', + help='Do not check that the formats selected are actually downloadable') video_format.add_option( '-F', '--list-formats', action='store_true', dest='listformats',