diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index cdf19c4d6..94e113d44 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -131,6 +131,8 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, }, + # ios has HLS live streams + # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 'ios': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { @@ -162,6 +164,8 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26 }, + # mweb has 'ultralow' formats + # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { 'INNERTUBE_API_KEY': 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8', 'INNERTUBE_CONTEXT': { @@ -205,23 +209,28 @@ build_innertube_clients() class YoutubeBaseInfoExtractor(InfoExtractor): """Provide base functions for Youtube extractors""" - _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' - _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' - - _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' - _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' - _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' _RESERVED_NAMES = ( - r'channel|c|user|browse|playlist|watch|w|v|embed|e|watch_popup|shorts|' - r'movies|results|shared|hashtag|trending|feed|feeds|oembed|get_video_info|' + r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|' + r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' + r'browse|oembed|get_video_info|iframe_api|s/player|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') + _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' + _NETRC_MACHINE = 'youtube' + # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False - _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' + r''' # Unused since login is broken + _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' + _TWOFACTOR_URL = 'https://accounts.google.com/signin/challenge' + + _LOOKUP_URL = 'https://accounts.google.com/_/signin/sl/lookup' + _CHALLENGE_URL = 'https://accounts.google.com/_/signin/sl/challenge' + _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' + ''' def _login(self): """ @@ -460,10 +469,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return _func(ytcfg) or _func(self._get_default_ytcfg(default_client)) def _extract_client_name(self, ytcfg, default_client='web'): - return ( - try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_NAME'], compat_str) - or self._ytcfg_get_safe( - ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientName'], compat_str, default_client)) + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_NAME'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientName']), compat_str, default_client) @staticmethod def _extract_session_index(*data): @@ -473,10 +481,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return session_index def _extract_client_version(self, ytcfg, default_client='web'): - return ( - try_get(ytcfg, lambda x: x['INNERTUBE_CLIENT_VERSION'], compat_str) - or self._ytcfg_get_safe( - ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion'], compat_str, default_client)) + return self._ytcfg_get_safe( + ytcfg, (lambda x: x['INNERTUBE_CLIENT_VERSION'], + lambda x: x['INNERTUBE_CONTEXT']['client']['clientVersion']), compat_str, default_client) def _extract_api_key(self, ytcfg=None, default_client='web'): return self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_API_KEY'], compat_str, default_client) @@ -1181,7 +1188,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'Tq92D6wQ1mg', 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', - 'ext': 'mp4','upload_date': '20191227', + 'ext': 'mp4', + 'upload_date': '20191227', 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 'uploader': 'Projekt Melody', 'description': 'md5:17eccca93a786d51bc67646756894066', @@ -2741,8 +2749,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for f in formats: if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled f['source_preference'] = -10 - note = f.get('format_note') - f['format_note'] = f'{note} (throttled)' if note else '(throttled)' + # TODO: this method is not reliable + f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)' # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this