[extractor/youtube:tab] Update tab handling for redesign (#5439)

Closes #5432, #5430, #5419
Authored by: coletdjnz, pukkandan
pull/5491/head
Matthew 2 years ago committed by GitHub
parent c61473c1d6
commit 86973308cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -88,7 +88,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t
* Supports some (but not all) age-gated content without cookies * Supports some (but not all) age-gated content without cookies
* Download livestreams from the start using `--live-from-start` (*experimental*) * Download livestreams from the start using `--live-from-start` (*experimental*)
* `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given * `255kbps` audio is extracted (if available) from YouTube Music when premium cookies are given
* Redirect channel's home URL automatically to `/video` to preserve the old behaviour * Channel URLs download all uploads of the channel, including shorts and live
* **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]`
@ -142,7 +142,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu
* `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior
* The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this
* Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading
* YouTube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections * YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections
* Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this
* The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date.
* If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this

@ -10,6 +10,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from test.helper import FakeYDL, is_download_test from test.helper import FakeYDL, is_download_test
from yt_dlp.extractor import YoutubeIE, YoutubeTabIE from yt_dlp.extractor import YoutubeIE, YoutubeTabIE
from yt_dlp.utils import ExtractorError
@is_download_test @is_download_test
@ -53,6 +54,18 @@ class TestYoutubeLists(unittest.TestCase):
self.assertEqual(video['duration'], 10) self.assertEqual(video['duration'], 10)
self.assertEqual(video['uploader'], 'Philipp Hagemeister') self.assertEqual(video['uploader'], 'Philipp Hagemeister')
def test_youtube_channel_no_uploads(self):
dl = FakeYDL()
dl.params['extract_flat'] = True
ie = YoutubeTabIE(dl)
# no uploads
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA')
# no uploads and no UCID given
with self.assertRaisesRegex(ExtractorError, r'no uploads'):
ie.extract('https://www.youtube.com/news')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -4589,13 +4589,16 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
@staticmethod @staticmethod
def _extract_selected_tab(tabs, fatal=True): def _extract_selected_tab(tabs, fatal=True):
for tab in tabs: for tab_renderer in tabs:
renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} if tab_renderer.get('selected'):
if renderer.get('selected') is True: return tab_renderer
return renderer if fatal:
else: raise ExtractorError('Unable to find selected tab')
if fatal:
raise ExtractorError('Unable to find selected tab') @staticmethod
def _extract_tab_renderers(response):
return traverse_obj(
response, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs', ..., ('tabRenderer', 'expandableTabRenderer')), expected_type=dict)
def _extract_from_tabs(self, item_id, ytcfg, data, tabs): def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
playlist_id = title = description = channel_url = channel_name = channel_id = None playlist_id = title = description = channel_url = channel_name = channel_id = None
@ -4897,8 +4900,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
# Reject webpage data if redirected to home page without explicitly requesting # Reject webpage data if redirected to home page without explicitly requesting
selected_tab = self._extract_selected_tab(traverse_obj( selected_tab = self._extract_selected_tab(self._extract_tab_renderers(data), fatal=False) or {}
data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {}
if (url != 'https://www.youtube.com/feed/recommended' if (url != 'https://www.youtube.com/feed/recommended'
and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page
and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])): and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])):
@ -5392,18 +5394,19 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'categories': ['News & Politics'], 'categories': ['News & Politics'],
'tags': list, 'tags': list,
'like_count': int, 'like_count': int,
'release_timestamp': 1642502819, 'release_timestamp': int,
'channel': 'Sky News', 'channel': 'Sky News',
'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ', 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ',
'age_limit': 0, 'age_limit': 0,
'view_count': int, 'view_count': int,
'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg', 'thumbnail': r're:https?://i\.ytimg\.com/vi/[^/]+/maxresdefault(?:_live)?\.jpg',
'playable_in_embed': True, 'playable_in_embed': True,
'release_date': '20220118', 'release_date': r're:\d+',
'availability': 'public', 'availability': 'public',
'live_status': 'is_live', 'live_status': 'is_live',
'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ', 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ',
'channel_follower_count': int 'channel_follower_count': int,
'concurrent_view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -5538,16 +5541,14 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
], ],
'playlist_mincount': 101, 'playlist_mincount': 101,
}, { }, {
'note': 'Topic without a UU playlist', # Destination channel with only a hidden self tab (tab id is UCtFRv9O2AHqOZjjynzrv-xg)
# Treat as a general feed
'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg', 'url': 'https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg',
'info_dict': { 'info_dict': {
'id': 'UCtFRv9O2AHqOZjjynzrv-xg', 'id': 'UCtFRv9O2AHqOZjjynzrv-xg',
'title': 'UCtFRv9O2AHqOZjjynzrv-xg', 'title': 'UCtFRv9O2AHqOZjjynzrv-xg',
'tags': [], 'tags': [],
}, },
'expected_warnings': [
'the playlist redirect gave error',
],
'playlist_mincount': 9, 'playlist_mincount': 9,
}, { }, {
'note': 'Youtube music Album', 'note': 'Youtube music Album',
@ -5615,6 +5616,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
'extractor_args': {'youtubetab': {'skip': ['webpage']}} 'extractor_args': {'youtubetab': {'skip': ['webpage']}}
}, },
'skip': 'Query for sorting no longer works',
}, { }, {
'note': 'API Fallback: Topic, should redirect to playlist?list=UU...', 'note': 'API Fallback: Topic, should redirect to playlist?list=UU...',
'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw', 'url': 'https://music.youtube.com/browse/UC9ALqqC4aIeG5iDs7i90Bfw',
@ -5633,10 +5635,6 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw', 'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
'availability': 'public', 'availability': 'public',
}, },
'expected_warnings': [
'does not have a videos tab',
r'[Uu]navailable videos (are|will be) hidden',
],
'playlist_mincount': 101, 'playlist_mincount': 101,
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -5715,13 +5713,155 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
}, },
'playlist_mincount': 50, 'playlist_mincount': 50,
}, {
# Channel with a real live tab (not to be mistaken with streams tab)
# Do not treat like it should redirect to live stream
'url': 'https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live',
'info_dict': {
'id': 'UCEH7P7kyJIkS_gJf93VYbmg',
'title': 'UCEH7P7kyJIkS_gJf93VYbmg - Live',
'tags': [],
},
'playlist_mincount': 20,
}, {
# Tab name is not the same as tab id
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/letsplay',
'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Let\'s play',
'tags': [],
},
'playlist_mincount': 8,
}, {
# Home tab id is literally home. Not to get mistaken with featured
'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/home',
'info_dict': {
'id': 'UCQvWX73GQygcwXOTSf_VDVg',
'title': 'UCQvWX73GQygcwXOTSf_VDVg - Home',
'tags': [],
},
'playlist_mincount': 8,
}, {
# Should get three playlists for videos, shorts and streams tabs
'url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA',
'info_dict': {
'id': 'UCK9V2B22uJYu3N7eR_BT9QA',
'title': 'Uploads for UCK9V2B22uJYu3N7eR_BT9QA'
},
'playlist_count': 3,
}, {
# Shorts tab with channel with handle
'url': 'https://www.youtube.com/@NotJustBikes/shorts',
'info_dict': {
'id': 'UC0intLFzLaudFG-xAvUEO-A',
'title': 'Not Just Bikes - Shorts',
'tags': 'count:12',
'uploader': 'Not Just Bikes',
'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'description': 'md5:7513148b1f02b924783157d84c4ea555',
'channel_follower_count': int,
'uploader_id': 'UC0intLFzLaudFG-xAvUEO-A',
'channel_id': 'UC0intLFzLaudFG-xAvUEO-A',
'uploader_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A',
'channel': 'Not Just Bikes',
},
'playlist_mincount': 10,
}, {
# Streams tab
'url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig/streams',
'info_dict': {
'id': 'UC3eYAvjCVwNHgkaGbXX3sig',
'title': '中村悠一 - Live',
'tags': 'count:7',
'channel_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
'channel_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'uploader_id': 'UC3eYAvjCVwNHgkaGbXX3sig',
'channel': '中村悠一',
'uploader_url': 'https://www.youtube.com/channel/UC3eYAvjCVwNHgkaGbXX3sig',
'channel_follower_count': int,
'uploader': '中村悠一',
'description': 'md5:e744f6c93dafa7a03c0c6deecb157300',
},
'playlist_mincount': 60,
}, {
# Channel with no uploads and hence no videos, streams, shorts tabs or uploads playlist. This should fail.
# See test_youtube_lists
'url': 'https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA',
'only_matching': True,
}, {
# No uploads and no UCID given. Should fail with no uploads error
# See test_youtube_lists
'url': 'https://www.youtube.com/news',
'only_matching': True
}, {
# No videos tab but has a shorts tab
'url': 'https://www.youtube.com/c/TKFShorts',
'info_dict': {
'id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
'title': 'Shorts Break - Shorts',
'tags': 'count:32',
'channel_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
'channel': 'Shorts Break',
'description': 'md5:a6c234cf3d50d878ef8721e34457cd11',
'uploader': 'Shorts Break',
'channel_follower_count': int,
'uploader_id': 'UCgJ5_1F6yJhYLnyMszUdmUg',
'uploader_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
'channel_url': 'https://www.youtube.com/channel/UCgJ5_1F6yJhYLnyMszUdmUg',
},
'playlist_mincount': 30,
}, {
# Trending Now Tab. tab id is empty
'url': 'https://www.youtube.com/feed/trending',
'info_dict': {
'id': 'trending',
'title': 'trending - Now',
'tags': [],
},
'playlist_mincount': 30,
}, {
# Trending Gaming Tab. tab id is empty
'url': 'https://www.youtube.com/feed/trending?bp=4gIcGhpnYW1pbmdfY29ycHVzX21vc3RfcG9wdWxhcg%3D%3D',
'info_dict': {
'id': 'trending',
'title': 'trending - Gaming',
'tags': [],
},
'playlist_mincount': 30,
}] }]
@classmethod @classmethod
def suitable(cls, url): def suitable(cls, url):
return False if YoutubeIE.suitable(url) else super().suitable(url) return False if YoutubeIE.suitable(url) else super().suitable(url)
_URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/\w+))?(?P<post>.*)$') _URL_RE = re.compile(rf'(?P<pre>{_VALID_URL})(?(not_channel)|(?P<tab>/[^?#/]+))?(?P<post>.*)$')
def _get_url_mobj(self, url):
mobj = self._URL_RE.match(url).groupdict()
mobj.update((k, '') for k, v in mobj.items() if v is None)
return mobj
def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'):
tab_name = (tab.get('title') or '').lower()
tab_url = urljoin(base_url, traverse_obj(
tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
tab_id = (traverse_obj(tab, 'tabIdentifier', expected_type=str)
or tab_url and self._get_url_mobj(tab_url)['tab'][1:])
if tab_id:
return tab_id, tab_name
# Fallback to tab name if we cannot get the tab id.
# XXX: should we strip non-ascii letters? e.g. in case of 'let's play' tab example on special gaming channel
# Note that in the case of translated tab name this may result in an empty string, which we don't want.
self.write_debug(f'Falling back to selected tab name: {tab_name}')
return {
'home': 'featured',
'live': 'streams',
}.get(tab_name, tab_name), tab_name
def _has_tab(self, tabs, tab_id):
return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs)
@YoutubeTabBaseInfoExtractor.passthrough_smuggled_data @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
def _real_extract(self, url, smuggled_data): def _real_extract(self, url, smuggled_data):
@ -5730,14 +5870,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
urllib.parse.urlparse(url)._replace(netloc='www.youtube.com')) urllib.parse.urlparse(url)._replace(netloc='www.youtube.com'))
compat_opts = self.get_param('compat_opts', []) compat_opts = self.get_param('compat_opts', [])
def get_mobj(url): mobj = self._get_url_mobj(url)
mobj = self._URL_RE.match(url).groupdict() pre, tab, post, is_channel = mobj['pre'], mobj['tab'], mobj['post'], not mobj['not_channel']
mobj.update((k, '') for k, v in mobj.items() if v is None)
return mobj
mobj, redirect_warning = get_mobj(url), None
# Youtube returns incomplete data if tabname is not lower case
pre, tab, post, is_channel = mobj['pre'], mobj['tab'].lower(), mobj['post'], not mobj['not_channel']
if is_channel: if is_channel:
if smuggled_data.get('is_music_url'): if smuggled_data.get('is_music_url'):
if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist if item_id[:2] == 'VL': # Youtube music VL channels have an equivalent playlist
@ -5750,19 +5884,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
get_all=False, expected_type=str) get_all=False, expected_type=str)
if not murl: if not murl:
raise ExtractorError('Failed to resolve album to playlist') raise ExtractorError('Failed to resolve album to playlist')
return self.url_result(murl, ie=YoutubeTabIE.ie_key()) return self.url_result(murl, YoutubeTabIE)
elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/ elif mobj['channel_type'] == 'browse': # Youtube music /browse/ should be changed to /channel/
pre = f'https://www.youtube.com/channel/{item_id}' pre = f'https://www.youtube.com/channel/{item_id}'
original_tab_name = tab original_tab_id = tab[1:]
if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts: if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
# Home URLs should redirect to /videos/
redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
'To download only the videos in the home page, add a "/featured" to the URL')
tab = '/videos' tab = '/videos'
url = ''.join((pre, tab, post)) url = ''.join((pre, tab, post))
mobj = get_mobj(url) mobj = self._get_url_mobj(url)
# Handle both video/playlist URLs # Handle both video/playlist URLs
qs = parse_qs(url) qs = parse_qs(url)
@ -5775,77 +5906,94 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
# Common mistake: https://www.youtube.com/watch?list=playlist_id # Common mistake: https://www.youtube.com/watch?list=playlist_id
self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}') self.report_warning(f'A video URL was given without video ID. Trying to download playlist {playlist_id}')
url = f'https://www.youtube.com/playlist?list={playlist_id}' url = f'https://www.youtube.com/playlist?list={playlist_id}'
mobj = get_mobj(url) mobj = self._get_url_mobj(url)
if video_id and playlist_id: if not self._yes_playlist(playlist_id, video_id):
if self.get_param('noplaylist'): return self.url_result(
self.to_screen(f'Downloading just video {video_id} because of --no-playlist') f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
return self.url_result(f'https://www.youtube.com/watch?v={video_id}',
ie=YoutubeIE.ie_key(), video_id=video_id)
self.to_screen(f'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
data, ytcfg = self._extract_data(url, item_id) data, ytcfg = self._extract_data(url, item_id)
# YouTube may provide a non-standard redirect to the regional channel # YouTube may provide a non-standard redirect to the regional channel
# See: https://github.com/yt-dlp/yt-dlp/issues/2694 # See: https://github.com/yt-dlp/yt-dlp/issues/2694
# https://support.google.com/youtube/answer/2976814#zippy=,conditional-redirects
redirect_url = traverse_obj( redirect_url = traverse_obj(
data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False) data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
if redirect_url and 'no-youtube-channel-redirect' not in compat_opts: if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
redirect_url = ''.join(( redirect_url = ''.join((
urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post'])) urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}') self.to_screen(f'This playlist is likely not available in your region. Following conditional redirect to {redirect_url}')
return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key()) return self.url_result(redirect_url, YoutubeTabIE)
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list) tab_results = []
if tabs: tabs = self._extract_tab_renderers(data)
if is_channel and tabs and 'no-youtube-channel-redirect' not in compat_opts:
selected_tab = self._extract_selected_tab(tabs) selected_tab = self._extract_selected_tab(tabs)
selected_tab_url = urljoin( selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated
url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'))) self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}')
translated_tab_name = selected_tab.get('title', '').lower()
if not original_tab_id and selected_tab_name:
# Prefer tab name from tab url as it is always in en, self.to_screen('Channel URLs download all uploads of the channel. '
# but only when preferred lang is set as it may not extract reliably in all cases. 'To download only the videos in a specific tab, pass the tab\'s URL')
selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name if self._has_tab(tabs, 'streams'):
or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:] # primary tab_results.append(self.url_result(''.join((pre, '/streams', post))))
or translated_tab_name) if self._has_tab(tabs, 'shorts'):
tab_results.append(self.url_result(''.join((pre, '/shorts', post))))
if selected_tab_name == 'home': # XXX: Members-only tab should also be extracted
selected_tab_name = 'featured'
requested_tab_name = mobj['tab'][1:] if not tab_results and selected_tab_id != 'videos':
# Channel does not have streams, shorts or videos tabs
if 'no-youtube-channel-redirect' not in compat_opts: if item_id[:2] != 'UC':
if requested_tab_name == 'live': # Live tab should have redirected to the video raise ExtractorError('This channel has no uploads', expected=True)
raise UserNotLive(video_id=mobj['id'])
if requested_tab_name not in ('', selected_tab_name): # Topic channels don't have /videos. Use the equivalent playlist instead
redirect_warning = f'The channel does not have a {requested_tab_name} tab' pl_id = f'UU{item_id[2:]}'
if not original_tab_name: pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
if item_id[:2] == 'UC': try:
# Topic channels don't have /videos. Use the equivalent playlist instead data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
pl_id = f'UU{item_id[2:]}' except ExtractorError:
pl_url = f'https://www.youtube.com/playlist?list={pl_id}' raise ExtractorError('This channel has no uploads', expected=True)
try:
data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
except ExtractorError:
redirect_warning += ' and the playlist redirect gave error'
else:
item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
redirect_warning += f'. Redirecting to playlist {pl_id} instead'
if selected_tab_name and selected_tab_name != requested_tab_name:
redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
else: else:
raise ExtractorError(redirect_warning, expected=True) item_id, url = pl_id, pl_url
self.to_screen(
f'The channel does not have a videos, shorts, or live tab. Redirecting to playlist {pl_id} instead')
elif tab_results and selected_tab_id != 'videos':
# When there are shorts/live tabs but not videos tab
url, data = ''.join((pre, post)), None
elif (original_tab_id or 'videos') != selected_tab_id:
if original_tab_id == 'live':
# Live tab should have redirected to the video
# Except in the case the channel has an actual live tab
# Example: https://www.youtube.com/channel/UCEH7P7kyJIkS_gJf93VYbmg/live
raise UserNotLive(video_id=mobj['id'])
elif selected_tab_name:
raise ExtractorError(f'This channel does not have a {original_tab_id} tab', expected=True)
# For channels such as https://www.youtube.com/channel/UCtFRv9O2AHqOZjjynzrv-xg
url = f'{pre}{post}'
if redirect_warning:
self.to_screen(redirect_warning)
self.write_debug(f'Final URL: {url}') self.write_debug(f'Final URL: {url}')
# YouTube sometimes provides a button to reload playlist with unavailable videos. # YouTube sometimes provides a button to reload playlist with unavailable videos.
if 'no-youtube-unavailable-videos' not in compat_opts: if 'no-youtube-unavailable-videos' not in compat_opts:
data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data data = self._reload_with_unavailable_videos(item_id, data, ytcfg) or data
self._extract_and_report_alerts(data, only_once=True) self._extract_and_report_alerts(data, only_once=True)
tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
tabs = self._extract_tab_renderers(data)
if tabs: if tabs:
return self._extract_from_tabs(item_id, ytcfg, data, tabs) tab_results[:0] = [self._extract_from_tabs(item_id, ytcfg, data, tabs)]
tab_results[0].update({
'extractor_key': YoutubeTabIE.ie_key(),
'extractor': YoutubeTabIE.IE_NAME,
'webpage_url': url,
})
if len(tab_results) == 1:
return tab_results[0]
elif len(tab_results) > 1:
return self.playlist_result(tab_results, item_id, title=f'Uploads for {item_id}')
playlist = traverse_obj( playlist = traverse_obj(
data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict) data, ('contents', 'twoColumnWatchNextResults', 'playlist', 'playlist'), expected_type=dict)
@ -5857,8 +6005,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
if video_id: if video_id:
if mobj['tab'] != '/live': # live tab is expected to redirect to video if mobj['tab'] != '/live': # live tab is expected to redirect to video
self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}') self.report_warning(f'Unable to recognize playlist. Downloading just video {video_id}')
return self.url_result(f'https://www.youtube.com/watch?v={video_id}', return self.url_result(f'https://www.youtube.com/watch?v={video_id}', YoutubeIE, video_id)
ie=YoutubeIE.ie_key(), video_id=video_id)
raise ExtractorError('Unable to recognize tab page') raise ExtractorError('Unable to recognize tab page')
@ -5891,12 +6038,13 @@ class YoutubePlaylistIE(InfoExtractor):
'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 'uploader_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2', 'description': 'md5:8fa6f52abb47a9552002fa3ddfc57fc2',
'view_count': int, 'view_count': int,
'uploader_url': 'https://www.youtube.com/user/Wickydoo', 'uploader_url': 'https://www.youtube.com/c/WickmanVT',
'modified_date': r're:\d{8}', 'modified_date': r're:\d{8}',
'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q', 'channel_id': 'UCKSpbfbl5kRQpTdL7kMc-1Q',
'channel': 'Wickman', 'channel': 'Wickman',
'tags': [], 'tags': [],
'channel_url': 'https://www.youtube.com/user/Wickydoo', 'channel_url': 'https://www.youtube.com/c/WickmanVT',
'availability': 'public',
}, },
'playlist_mincount': 29, 'playlist_mincount': 29,
}, { }, {
@ -5926,7 +6074,7 @@ class YoutubePlaylistIE(InfoExtractor):
'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw', 'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
'availability': 'public', 'availability': 'public',
}, },
'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'],
}, { }, {
'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
'playlist_mincount': 455, 'playlist_mincount': 455,

Loading…
Cancel
Save