@ -2,6 +2,7 @@ import base64
import calendar
import calendar
import copy
import copy
import datetime
import datetime
import enum
import hashlib
import hashlib
import itertools
import itertools
import json
import json
@ -275,6 +276,15 @@ def build_innertube_clients():
build_innertube_clients ( )
build_innertube_clients ( )
class BadgeType ( enum . Enum ) :
AVAILABILITY_UNLISTED = enum . auto ( )
AVAILABILITY_PRIVATE = enum . auto ( )
AVAILABILITY_PUBLIC = enum . auto ( )
AVAILABILITY_PREMIUM = enum . auto ( )
AVAILABILITY_SUBSCRIPTION = enum . auto ( )
LIVE_NOW = enum . auto ( )
class YoutubeBaseInfoExtractor ( InfoExtractor ) :
class YoutubeBaseInfoExtractor ( InfoExtractor ) :
""" Provide base functions for Youtube extractors """
""" Provide base functions for Youtube extractors """
@ -367,6 +377,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r ' (?:www \ .)?piped \ .privacy \ .com \ .de ' ,
r ' (?:www \ .)?piped \ .privacy \ .com \ .de ' ,
)
)
# extracted from account/account_menu ep
# XXX: These are the supported YouTube UI and API languages,
# which is slightly different from languages supported for translation in YouTube studio
_SUPPORTED_LANG_CODES = [
' af ' , ' az ' , ' id ' , ' ms ' , ' bs ' , ' ca ' , ' cs ' , ' da ' , ' de ' , ' et ' , ' en-IN ' , ' en-GB ' , ' en ' , ' es ' ,
' es-419 ' , ' es-US ' , ' eu ' , ' fil ' , ' fr ' , ' fr-CA ' , ' gl ' , ' hr ' , ' zu ' , ' is ' , ' it ' , ' sw ' , ' lv ' ,
' lt ' , ' hu ' , ' nl ' , ' no ' , ' uz ' , ' pl ' , ' pt-PT ' , ' pt ' , ' ro ' , ' sq ' , ' sk ' , ' sl ' , ' sr-Latn ' , ' fi ' ,
' sv ' , ' vi ' , ' tr ' , ' be ' , ' bg ' , ' ky ' , ' kk ' , ' mk ' , ' mn ' , ' ru ' , ' sr ' , ' uk ' , ' el ' , ' hy ' , ' iw ' ,
' ur ' , ' ar ' , ' fa ' , ' ne ' , ' mr ' , ' hi ' , ' as ' , ' bn ' , ' pa ' , ' gu ' , ' or ' , ' ta ' , ' te ' , ' kn ' , ' ml ' ,
' si ' , ' th ' , ' lo ' , ' my ' , ' ka ' , ' am ' , ' km ' , ' zh-CN ' , ' zh-TW ' , ' zh-HK ' , ' ja ' , ' ko '
]
@functools.cached_property
def _preferred_lang ( self ) :
"""
Returns a language code supported by YouTube for the user preferred language .
Returns None if no preferred language set .
"""
preferred_lang = self . _configuration_arg ( ' lang ' , ie_key = ' Youtube ' , casesense = True , default = [ ' ' ] ) [ 0 ]
if not preferred_lang :
return
if preferred_lang not in self . _SUPPORTED_LANG_CODES :
raise ExtractorError (
f ' Unsupported language code: { preferred_lang } . Supported language codes (case-sensitive): { join_nonempty ( * self . _SUPPORTED_LANG_CODES , delim = " , " ) } . ' ,
expected = True )
elif preferred_lang != ' en ' :
self . report_warning (
f ' Preferring " { preferred_lang } " translated fields. Note that some metadata extraction may fail or be incorrect. ' )
return preferred_lang
def _initialize_consent ( self ) :
def _initialize_consent ( self ) :
cookies = self . _get_cookies ( ' https://www.youtube.com/ ' )
cookies = self . _get_cookies ( ' https://www.youtube.com/ ' )
if cookies . get ( ' __Secure-3PSID ' ) :
if cookies . get ( ' __Secure-3PSID ' ) :
@ -391,7 +431,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
pref = dict ( urllib . parse . parse_qsl ( pref_cookie . value ) )
pref = dict ( urllib . parse . parse_qsl ( pref_cookie . value ) )
except ValueError :
except ValueError :
self . report_warning ( ' Failed to parse user PREF cookie ' + bug_reports_message ( ) )
self . report_warning ( ' Failed to parse user PREF cookie ' + bug_reports_message ( ) )
pref . update ( { ' hl ' : ' en ' , ' tz ' : ' UTC ' } )
pref . update ( { ' hl ' : self . _preferred_lang or ' en ' , ' tz ' : ' UTC ' } )
self . _set_cookie ( ' .youtube.com ' , name = ' PREF ' , value = urllib . parse . urlencode ( pref ) )
self . _set_cookie ( ' .youtube.com ' , name = ' PREF ' , value = urllib . parse . urlencode ( pref ) )
def _real_initialize ( self ) :
def _real_initialize ( self ) :
@ -439,7 +479,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
( ytcfg , self . _get_default_ytcfg ( default_client ) ) , ' INNERTUBE_CONTEXT ' , expected_type = dict )
( ytcfg , self . _get_default_ytcfg ( default_client ) ) , ' INNERTUBE_CONTEXT ' , expected_type = dict )
# Enforce language and tz for extraction
# Enforce language and tz for extraction
client_context = traverse_obj ( context , ' client ' , expected_type = dict , default = { } )
client_context = traverse_obj ( context , ' client ' , expected_type = dict , default = { } )
client_context . update ( { ' hl ' : ' en ' , ' timeZone ' : ' UTC ' , ' utcOffsetMinutes ' : 0 } )
client_context . update ( { ' hl ' : self . _preferred_lang or ' en ' , ' timeZone ' : ' UTC ' , ' utcOffsetMinutes ' : 0 } )
return context
return context
_SAPISID = None
_SAPISID = None
@ -678,13 +718,49 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
return self . _report_alerts ( self . _extract_alerts ( data ) , * args , * * kwargs )
return self . _report_alerts ( self . _extract_alerts ( data ) , * args , * * kwargs )
def _extract_badges ( self , renderer : dict ) :
def _extract_badges ( self , renderer : dict ) :
badges = set ( )
privacy_icon_map = {
for badge in try_get ( renderer , lambda x : x [ ' badges ' ] , list ) or [ ] :
' PRIVACY_UNLISTED ' : BadgeType . AVAILABILITY_UNLISTED ,
label = try_get ( badge , lambda x : x [ ' metadataBadgeRenderer ' ] [ ' label ' ] , str )
' PRIVACY_PRIVATE ' : BadgeType . AVAILABILITY_PRIVATE ,
if label :
' PRIVACY_PUBLIC ' : BadgeType . AVAILABILITY_PUBLIC
badges . add ( label . lower ( ) )
}
badge_style_map = {
' BADGE_STYLE_TYPE_MEMBERS_ONLY ' : BadgeType . AVAILABILITY_SUBSCRIPTION ,
' BADGE_STYLE_TYPE_PREMIUM ' : BadgeType . AVAILABILITY_PREMIUM ,
' BADGE_STYLE_TYPE_LIVE_NOW ' : BadgeType . LIVE_NOW
}
label_map = {
' unlisted ' : BadgeType . AVAILABILITY_UNLISTED ,
' private ' : BadgeType . AVAILABILITY_PRIVATE ,
' members only ' : BadgeType . AVAILABILITY_SUBSCRIPTION ,
' live ' : BadgeType . LIVE_NOW ,
' premium ' : BadgeType . AVAILABILITY_PREMIUM
}
badges = [ ]
for badge in traverse_obj ( renderer , ( ' badges ' , . . . , ' metadataBadgeRenderer ' ) , default = [ ] ) :
badge_type = (
privacy_icon_map . get ( traverse_obj ( badge , ( ' icon ' , ' iconType ' ) , expected_type = str ) )
or badge_style_map . get ( traverse_obj ( badge , ' style ' ) )
)
if badge_type :
badges . append ( { ' type ' : badge_type } )
continue
# fallback, won't work in some languages
label = traverse_obj ( badge , ' label ' , expected_type = str , default = ' ' )
for match , label_badge_type in label_map . items ( ) :
if match in label . lower ( ) :
badges . append ( { ' type ' : badge_type } )
continue
return badges
return badges
@staticmethod
def _has_badge ( badges , badge_type ) :
return bool ( traverse_obj ( badges , lambda _ , v : v [ ' type ' ] == badge_type ) )
@staticmethod
@staticmethod
def _get_text ( data , * path_list , max_runs = None ) :
def _get_text ( data , * path_list , max_runs = None ) :
for path in path_list or [ None ] :
for path in path_list or [ None ] :
@ -755,9 +831,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
except ValueError :
except ValueError :
return None
return None
def _ extract_time_text( self , renderer , * path_lis t) :
def _ parse_time_text( self , tex t) :
""" @returns (timestamp, time_text) """
if not text :
text = self . _get_text ( renderer , * path_list ) or ' '
return
dt = self . extract_relative_time ( text )
dt = self . extract_relative_time ( text )
timestamp = None
timestamp = None
if isinstance ( dt , datetime . datetime ) :
if isinstance ( dt , datetime . datetime ) :
@ -770,9 +846,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
( r ' ([a-z]+ \ s* \ d { 1,2},? \ s*20 \ d {2} ) ' , r ' (?:.+|^)(?:live|premieres|ed|ing)(?: \ s*(?:on|for))? \ s*(.+ \ d) ' ) ,
( r ' ([a-z]+ \ s* \ d { 1,2},? \ s*20 \ d {2} ) ' , r ' (?:.+|^)(?:live|premieres|ed|ing)(?: \ s*(?:on|for))? \ s*(.+ \ d) ' ) ,
text . lower ( ) , ' time text ' , default = None ) ) )
text . lower ( ) , ' time text ' , default = None ) ) )
if text and timestamp is None :
if text and timestamp is None and self . _preferred_lang in ( None , ' en ' ) :
self . report_warning ( f " Cannot parse localized time text ' { text } ' " + bug_reports_message ( ) , only_once = True )
self . report_warning (
return timestamp , text
f ' Cannot parse localized time text " { text } " ' , only_once = True )
return timestamp
def _extract_response ( self , item_id , query , note = ' Downloading API JSON ' , headers = None ,
def _extract_response ( self , item_id , query , note = ' Downloading API JSON ' , headers = None ,
ytcfg = None , check_get_keys = None , ep = ' browse ' , fatal = True , api_hostname = None ,
ytcfg = None , check_get_keys = None , ep = ' browse ' , fatal = True , api_hostname = None ,
@ -848,7 +925,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
channel_id = traverse_obj (
channel_id = traverse_obj (
renderer , ( ' shortBylineText ' , ' runs ' , . . . , ' navigationEndpoint ' , ' browseEndpoint ' , ' browseId ' ) ,
renderer , ( ' shortBylineText ' , ' runs ' , . . . , ' navigationEndpoint ' , ' browseEndpoint ' , ' browseId ' ) ,
expected_type = str , get_all = False )
expected_type = str , get_all = False )
time stamp, time _text = self . _ extrac t_time _text( renderer , ' publishedTimeText ' )
time _text = self . _ g et_text( renderer , ' publishedTimeText ' ) or ' '
scheduled_timestamp = str_to_int ( traverse_obj ( renderer , ( ' upcomingEventData ' , ' startTime ' ) , get_all = False ) )
scheduled_timestamp = str_to_int ( traverse_obj ( renderer , ( ' upcomingEventData ' , ' startTime ' ) , get_all = False ) )
overlay_style = traverse_obj (
overlay_style = traverse_obj (
renderer , ( ' thumbnailOverlays ' , . . . , ' thumbnailOverlayTimeStatusRenderer ' , ' style ' ) ,
renderer , ( ' thumbnailOverlays ' , . . . , ' thumbnailOverlayTimeStatusRenderer ' , ' style ' ) ,
@ -874,15 +951,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
' uploader ' : uploader ,
' uploader ' : uploader ,
' channel_id ' : channel_id ,
' channel_id ' : channel_id ,
' thumbnails ' : thumbnails ,
' thumbnails ' : thumbnails ,
' upload_date ' : ( strftime_or_none ( timestamp , ' % Y % m %d ' )
' upload_date ' : ( strftime_or_none ( self . _parse_time_text ( time_text ) , ' % Y % m %d ' )
if self . _configuration_arg ( ' approximate_date ' , ie_key = ' youtubetab ' )
if self . _configuration_arg ( ' approximate_date ' , ie_key = ' youtubetab ' )
else None ) ,
else None ) ,
' live_status ' : ( ' is_upcoming ' if scheduled_timestamp is not None
' live_status ' : ( ' is_upcoming ' if scheduled_timestamp is not None
else ' was_live ' if ' streamed ' in time_text . lower ( )
else ' was_live ' if ' streamed ' in time_text . lower ( )
else ' is_live ' if overlay_style == ' LIVE ' or ' live now ' in badges
else ' is_live ' if overlay_style == ' LIVE ' or self . _has_badge ( badges , BadgeType . LIVE_NOW )
else None ) ,
else None ) ,
' release_timestamp ' : scheduled_timestamp ,
' release_timestamp ' : scheduled_timestamp ,
' availability ' : self . _availability ( needs_premium = ' premium ' in badges , needs_subscription = ' members only ' in badges )
' availability ' :
' public ' if self . _has_badge ( badges , BadgeType . AVAILABILITY_PUBLIC )
else self . _availability (
is_private = self . _has_badge ( badges , BadgeType . AVAILABILITY_PRIVATE ) or None ,
needs_premium = self . _has_badge ( badges , BadgeType . AVAILABILITY_PREMIUM ) or None ,
needs_subscription = self . _has_badge ( badges , BadgeType . AVAILABILITY_SUBSCRIPTION ) or None ,
is_unlisted = self . _has_badge ( badges , BadgeType . AVAILABILITY_UNLISTED ) or None )
}
}
@ -2306,6 +2389,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' tags ' : [ ] ,
' tags ' : [ ] ,
' uploader_url ' : ' http://www.youtube.com/user/nao20010128nao ' ,
' uploader_url ' : ' http://www.youtube.com/user/nao20010128nao ' ,
}
}
} , {
# Prefer primary title+description language metadata by default
# Do not prefer translated description if primary is empty
' url ' : ' https://www.youtube.com/watch?v=el3E4MbxRqQ ' ,
' info_dict ' : {
' id ' : ' el3E4MbxRqQ ' ,
' ext ' : ' mp4 ' ,
' title ' : ' dlp test video 2 - primary sv no desc ' ,
' description ' : ' ' ,
' channel ' : ' cole-dlp-test-acc ' ,
' tags ' : [ ] ,
' view_count ' : int ,
' channel_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' like_count ' : int ,
' playable_in_embed ' : True ,
' availability ' : ' unlisted ' ,
' thumbnail ' : ' https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp ' ,
' age_limit ' : 0 ,
' duration ' : 5 ,
' uploader_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' uploader_url ' : ' http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' live_status ' : ' not_live ' ,
' upload_date ' : ' 20220908 ' ,
' categories ' : [ ' People & Blogs ' ] ,
' uploader ' : ' cole-dlp-test-acc ' ,
' channel_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
} ,
' params ' : { ' skip_download ' : True }
} , {
# Extractor argument: prefer translated title+description
' url ' : ' https://www.youtube.com/watch?v=gHKT4uU8Zng ' ,
' info_dict ' : {
' id ' : ' gHKT4uU8Zng ' ,
' ext ' : ' mp4 ' ,
' channel ' : ' cole-dlp-test-acc ' ,
' tags ' : [ ] ,
' duration ' : 5 ,
' live_status ' : ' not_live ' ,
' channel_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' upload_date ' : ' 20220728 ' ,
' uploader_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' view_count ' : int ,
' categories ' : [ ' People & Blogs ' ] ,
' thumbnail ' : ' https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp ' ,
' title ' : ' dlp test video title translated (fr) ' ,
' availability ' : ' public ' ,
' uploader ' : ' cole-dlp-test-acc ' ,
' age_limit ' : 0 ,
' description ' : ' dlp test video description translated (fr) ' ,
' playable_in_embed ' : True ,
' channel_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' uploader_url ' : ' http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
} ,
' params ' : { ' skip_download ' : True , ' extractor_args ' : { ' youtube ' : { ' lang ' : [ ' fr ' ] } } } ,
' expected_warnings ' : [ r ' Preferring " fr " translated fields ' ] ,
} , {
} , {
' note ' : ' 6 channel audio ' ,
' note ' : ' 6 channel audio ' ,
' url ' : ' https://www.youtube.com/watch?v=zgdo7-RRjgo ' ,
' url ' : ' https://www.youtube.com/watch?v=zgdo7-RRjgo ' ,
@ -2907,8 +3045,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
text = self . _get_text ( comment_renderer , ' contentText ' )
text = self . _get_text ( comment_renderer , ' contentText ' )
# note: timestamp is an estimate calculated from the current time and time_text
# Timestamp is an estimate calculated from the current time and time_text
timestamp , time_text = self . _extract_time_text ( comment_renderer , ' publishedTimeText ' )
time_text = self . _get_text ( comment_renderer , ' publishedTimeText ' ) or ' '
timestamp = self . _parse_time_text ( time_text )
author = self . _get_text ( comment_renderer , ' authorText ' )
author = self . _get_text ( comment_renderer , ' authorText ' )
author_id = try_get ( comment_renderer ,
author_id = try_get ( comment_renderer ,
lambda x : x [ ' authorEndpoint ' ] [ ' browseEndpoint ' ] [ ' browseId ' ] , str )
lambda x : x [ ' authorEndpoint ' ] [ ' browseEndpoint ' ] [ ' browseId ' ] , str )
@ -3554,11 +3694,19 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
microformats = traverse_obj (
microformats = traverse_obj (
player_responses , ( . . . , ' microformat ' , ' playerMicroformatRenderer ' ) ,
player_responses , ( . . . , ' microformat ' , ' playerMicroformatRenderer ' ) ,
expected_type = dict , default = [ ] )
expected_type = dict , default = [ ] )
video_title = (
get_first ( video_details , ' title ' )
translated_title = self . _get_text ( microformats , ( . . . , ' title ' ) )
or self . _get_text ( microformats , ( . . . , ' title ' ) )
video_title = ( self . _preferred_lang and translated_title
or get_first ( video_details , ' title ' ) # primary
or translated_title
or search_meta ( [ ' og:title ' , ' twitter:title ' , ' title ' ] ) )
or search_meta ( [ ' og:title ' , ' twitter:title ' , ' title ' ] ) )
video_description = get_first ( video_details , ' shortDescription ' )
translated_description = self . _get_text ( microformats , ( . . . , ' description ' ) )
original_description = get_first ( video_details , ' shortDescription ' )
video_description = (
self . _preferred_lang and translated_description
# If original description is blank, it will be an empty string.
# Do not prefer translated description in this case.
or original_description if original_description is not None else translated_description )
multifeed_metadata_list = get_first (
multifeed_metadata_list = get_first (
player_responses ,
player_responses ,
@ -3988,7 +4136,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
and info . get ( ' live_status ' ) != ' is_upcoming '
and info . get ( ' live_status ' ) != ' is_upcoming '
and ' no-youtube-prefer-utc-upload-date ' not in self . get_param ( ' compat_opts ' , [ ] )
and ' no-youtube-prefer-utc-upload-date ' not in self . get_param ( ' compat_opts ' , [ ] )
) :
) :
upload_date = strftime_or_none ( self . _extract_time_text ( vpir , ' dateText ' ) [ 0 ] , ' % Y % m %d ' ) or upload_date
upload_date = strftime_or_none (
self . _parse_time_text ( self . _get_text ( vpir , ' dateText ' ) ) , ' % Y % m %d ' ) or upload_date
info [ ' upload_date ' ] = upload_date
info [ ' upload_date ' ] = upload_date
for to , frm in fallbacks . items ( ) :
for to , frm in fallbacks . items ( ) :
@ -4000,33 +4149,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if v :
if v :
info [ d_k ] = v
info [ d_k ] = v
is_private = get_first ( video_details , ' isPrivate ' , expected_type = bool )
badges = self . _extract_badges ( traverse_obj ( contents , ( . . . , ' videoPrimaryInfoRenderer ' ) , get_all = False ) )
is_unlisted = get_first ( microformats , ' isUnlisted ' , expected_type = bool )
is_membersonly = None
is_private = ( self . _has_badge ( badges , BadgeType . AVAILABILITY_PRIVATE )
is_premium = None
or get_first ( video_details , ' isPrivate ' , expected_type = bool ) )
if initial_data and is_private is not None :
is_membersonly = False
info [ ' availability ' ] = (
is_premium = False
' public ' if self . _has_badge ( badges , BadgeType . AVAILABILITY_PUBLIC )
contents = try_get ( initial_data , lambda x : x [ ' contents ' ] [ ' twoColumnWatchNextResults ' ] [ ' results ' ] [ ' results ' ] [ ' contents ' ] , list ) or [ ]
else self . _availability (
badge_labels = set ( )
for content in contents :
if not isinstance ( content , dict ) :
continue
badge_labels . update ( self . _extract_badges ( content . get ( ' videoPrimaryInfoRenderer ' ) ) )
for badge_label in badge_labels :
if badge_label . lower ( ) == ' members only ' :
is_membersonly = True
elif badge_label . lower ( ) == ' premium ' :
is_premium = True
elif badge_label . lower ( ) == ' unlisted ' :
is_unlisted = True
info [ ' availability ' ] = self . _availability (
is_private = is_private ,
is_private = is_private ,
needs_premium = is_premium ,
needs_premium = (
needs_subscription = is_membersonly ,
self . _has_badge ( badges , BadgeType . AVAILABILITY_PREMIUM )
or False if initial_data and is_private is not None else None ) ,
needs_subscription = (
self . _has_badge ( badges , BadgeType . AVAILABILITY_SUBSCRIPTION )
or False if initial_data and is_private is not None else None ) ,
needs_auth = info [ ' age_limit ' ] > = 18 ,
needs_auth = info [ ' age_limit ' ] > = 18 ,
is_unlisted = None if is_private is None else is_unlisted )
is_unlisted = None if is_private is None else (
self . _has_badge ( badges , BadgeType . AVAILABILITY_UNLISTED )
or get_first ( microformats , ' isUnlisted ' , expected_type = bool ) ) ) )
info [ ' __post_extractor ' ] = self . extract_comments ( master_ytcfg , video_id , contents , webpage )
info [ ' __post_extractor ' ] = self . extract_comments ( master_ytcfg , video_id , contents , webpage )
@ -4472,7 +4613,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
playlist_id = item_id
playlist_id = item_id
playlist_stats = traverse_obj ( primary_sidebar_renderer , ' stats ' )
playlist_stats = traverse_obj ( primary_sidebar_renderer , ' stats ' )
last_updated_unix , _ = self . _extract_time_text ( playlist_stats , 2 )
last_updated_unix = self . _parse_time_text ( self . _get_text ( playlist_stats , 2 ) )
if title is None :
if title is None :
title = self . _get_text ( data , ( ' header ' , ' hashtagHeaderRenderer ' , ' hashtag ' ) ) or playlist_id
title = self . _get_text ( data , ( ' header ' , ' hashtagHeaderRenderer ' , ' hashtag ' ) ) or playlist_id
title + = format_field ( selected_tab , ' title ' , ' - %s ' )
title + = format_field ( selected_tab , ' title ' , ' - %s ' )
@ -4566,31 +4707,37 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
Note : Unless YouTube tells us explicitly , we do not assume it is public
Note : Unless YouTube tells us explicitly , we do not assume it is public
@param data : response
@param data : response
"""
"""
is_private = is_unlisted = None
renderer = self . _extract_sidebar_info_renderer ( data , ' playlistSidebarPrimaryInfoRenderer ' ) or { }
renderer = self . _extract_sidebar_info_renderer ( data , ' playlistSidebarPrimaryInfoRenderer ' ) or { }
badge_labels = self . _extract_badges ( renderer )
player_header_privacy = traverse_obj (
data , ( ' header ' , ' playlistHeaderRenderer ' , ' privacy ' ) , expected_type = str )
badges = self . _extract_badges ( renderer )
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
# Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
privacy_dropdown_entries = try_get (
privacy_setting_icon = traverse_obj (
renderer , lambda x : x [ ' privacyForm ' ] [ ' dropdownFormFieldRenderer ' ] [ ' dropdown ' ] [ ' dropdownRenderer ' ] [ ' entries ' ] , list ) or [ ]
renderer , (
for renderer_dict in privacy_dropdown_entries :
' privacyForm ' , ' dropdownFormFieldRenderer ' , ' dropdown ' , ' dropdownRenderer ' , ' entries ' ,
is_selected = try_get (
lambda _ , v : v [ ' privacyDropdownItemRenderer ' ] [ ' isSelected ' ] , ' privacyDropdownItemRenderer ' , ' icon ' , ' iconType ' ) ,
renderer_dict , lambda x : x [ ' privacyDropdownItemRenderer ' ] [ ' isSelected ' ] , bool ) or False
get_all = False , expected_type = str )
if not is_selected :
continue
label = self . _get_text ( renderer_dict , ( ' privacyDropdownItemRenderer ' , ' label ' ) )
if label :
badge_labels . add ( label . lower ( ) )
break
for badge_label in badge_labels :
return (
if badge_label == ' unlisted ' :
' public ' if (
is_unlisted = True
self . _has_badge ( badges , BadgeType . AVAILABILITY_PUBLIC )
elif badge_label == ' private ' :
or player_header_privacy == ' PUBLIC '
is_private = True
or privacy_setting_icon == ' PRIVACY_PUBLIC ' )
elif badge_label == ' public ' :
else self . _availability (
is_unlisted = is_private = False
is_private = (
return self . _availability ( is_private , False , False , False , is_unlisted )
self . _has_badge ( badges , BadgeType . AVAILABILITY_PRIVATE )
or player_header_privacy == ' PRIVATE ' if player_header_privacy is not None
else privacy_setting_icon == ' PRIVACY_PRIVATE ' if privacy_setting_icon is not None else None ) ,
is_unlisted = (
self . _has_badge ( badges , BadgeType . AVAILABILITY_UNLISTED )
or player_header_privacy == ' UNLISTED ' if player_header_privacy is not None
else privacy_setting_icon == ' PRIVACY_UNLISTED ' if privacy_setting_icon is not None else None ) ,
needs_subscription = self . _has_badge ( badges , BadgeType . AVAILABILITY_SUBSCRIPTION ) or None ,
needs_premium = self . _has_badge ( badges , BadgeType . AVAILABILITY_PREMIUM ) or None ,
needs_auth = False ) )
@staticmethod
@staticmethod
def _extract_sidebar_info_renderer ( data , info_renderer , expected_type = dict ) :
def _extract_sidebar_info_renderer ( data , info_renderer , expected_type = dict ) :
@ -4866,6 +5013,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_count ' : 1 ,
' playlist_count ' : 1 ,
} , {
} , {
@ -4883,6 +5031,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_id ' : ' UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_count ' : 0 ,
' playlist_count ' : 0 ,
} , {
} , {
@ -5029,6 +5178,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_id ' : ' UCEPzS1rYsrkqzSLNp76nrcg ' ,
' channel_id ' : ' UCEPzS1rYsrkqzSLNp76nrcg ' ,
' channel_url ' : ' https://www.youtube.com/c/ChRiStIaAn008 ' ,
' channel_url ' : ' https://www.youtube.com/c/ChRiStIaAn008 ' ,
' channel ' : ' Christiaan008 ' ,
' channel ' : ' Christiaan008 ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_count ' : 96 ,
' playlist_count ' : 96 ,
} , {
} , {
@ -5047,6 +5197,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' view_count ' : int ,
' view_count ' : int ,
' description ' : ' ' ,
' description ' : ' ' ,
' channel_id ' : ' UCBABnxM4Ar9ten8Mdjj1j0Q ' ,
' channel_id ' : ' UCBABnxM4Ar9ten8Mdjj1j0Q ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 1123 ,
' playlist_mincount ' : 1123 ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
@ -5070,6 +5221,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel ' : ' Interstellar Movie ' ,
' channel ' : ' Interstellar Movie ' ,
' description ' : ' ' ,
' description ' : ' ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 21 ,
' playlist_mincount ' : 21 ,
} , {
} , {
@ -5088,6 +5240,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_url ' : ' https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q ' ,
' channel_id ' : ' UCTYLiWFZy8xtPwxFwX9rV7Q ' ,
' channel_id ' : ' UCTYLiWFZy8xtPwxFwX9rV7Q ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 200 ,
' playlist_mincount ' : 200 ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
@ -5107,6 +5260,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' uploader_url ' : ' https://www.youtube.com/c/blanktv ' ,
' uploader_url ' : ' https://www.youtube.com/c/blanktv ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' description ' : ' ' ,
' description ' : ' ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 1000 ,
' playlist_mincount ' : 1000 ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
@ -5125,6 +5279,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_id ' : ' UC9-y-6csu5WGm29I7JiwpnA ' ,
' channel_id ' : ' UC9-y-6csu5WGm29I7JiwpnA ' ,
' channel_url ' : ' https://www.youtube.com/user/Computerphile ' ,
' channel_url ' : ' https://www.youtube.com/user/Computerphile ' ,
' channel ' : ' Computerphile ' ,
' channel ' : ' Computerphile ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 11 ,
' playlist_mincount ' : 11 ,
} , {
} , {
@ -5290,6 +5445,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel_id ' : ' UC_aEa8K-EOJ3D6gOs7HcyNg ' ,
' channel_id ' : ' UC_aEa8K-EOJ3D6gOs7HcyNg ' ,
' tags ' : [ ] ,
' tags ' : [ ] ,
' channel ' : ' NoCopyrightSounds ' ,
' channel ' : ' NoCopyrightSounds ' ,
' availability ' : ' public ' ,
} ,
} ,
' playlist_mincount ' : 166 ,
' playlist_mincount ' : 166 ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
@ -5310,6 +5466,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' modified_date ' : r ' re: \ d {8} ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' description ' : ' ' ,
' description ' : ' ' ,
' availability ' : ' public ' ,
} ,
} ,
' expected_warnings ' : [
' expected_warnings ' : [
' The URL does not have a videos tab ' ,
' The URL does not have a videos tab ' ,
@ -5410,6 +5567,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' channel ' : ' Royalty Free Music - Topic ' ,
' channel ' : ' Royalty Free Music - Topic ' ,
' view_count ' : int ,
' view_count ' : int ,
' uploader_url ' : ' https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw ' ,
' availability ' : ' public ' ,
} ,
} ,
' expected_warnings ' : [
' expected_warnings ' : [
' does not have a videos tab ' ,
' does not have a videos tab ' ,
@ -5443,6 +5601,45 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
' uploader_url ' : ' https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q ' ,
} ,
} ,
' playlist_mincount ' : 2
' playlist_mincount ' : 2
} , {
' note ' : ' translated tab name ' ,
' url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists ' ,
' info_dict ' : {
' id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' tags ' : [ ] ,
' uploader_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' description ' : ' ' ,
' title ' : ' cole-dlp-test-acc - 再生リスト ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' uploader ' : ' cole-dlp-test-acc ' ,
' channel_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' channel ' : ' cole-dlp-test-acc ' ,
} ,
' playlist_mincount ' : 1 ,
' params ' : { ' extractor_args ' : { ' youtube ' : { ' lang ' : [ ' ja ' ] } } } ,
' expected_warnings ' : [ ' Preferring " ja " ' ] ,
} , {
# XXX: this should really check flat playlist entries, but the test suite doesn't support that
' note ' : ' preferred lang set with playlist with translated video titles ' ,
' url ' : ' https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0 ' ,
' info_dict ' : {
' id ' : ' PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0 ' ,
' tags ' : [ ] ,
' view_count ' : int ,
' channel_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' uploader ' : ' cole-dlp-test-acc ' ,
' uploader_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' channel ' : ' cole-dlp-test-acc ' ,
' channel_id ' : ' UCiu-3thuViMebBjw_5nWYrA ' ,
' description ' : ' test ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA ' ,
' title ' : ' dlp test playlist ' ,
' availability ' : ' public ' ,
} ,
' playlist_mincount ' : 1 ,
' params ' : { ' extractor_args ' : { ' youtube ' : { ' lang ' : [ ' ja ' ] } } } ,
' expected_warnings ' : [ ' Preferring " ja " ' ] ,
} ]
} ]
@classmethod
@classmethod
@ -5527,10 +5724,20 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
tabs = traverse_obj ( data , ( ' contents ' , ' twoColumnBrowseResultsRenderer ' , ' tabs ' ) , expected_type = list )
tabs = traverse_obj ( data , ( ' contents ' , ' twoColumnBrowseResultsRenderer ' , ' tabs ' ) , expected_type = list )
if tabs :
if tabs :
selected_tab = self . _extract_selected_tab ( tabs )
selected_tab = self . _extract_selected_tab ( tabs )
selected_tab_name = selected_tab . get ( ' title ' , ' ' ) . lower ( )
selected_tab_url = urljoin (
url , traverse_obj ( selected_tab , ( ' endpoint ' , ' commandMetadata ' , ' webCommandMetadata ' , ' url ' ) ) )
translated_tab_name = selected_tab . get ( ' title ' , ' ' ) . lower ( )
# Prefer tab name from tab url as it is always in en,
# but only when preferred lang is set as it may not extract reliably in all cases.
selected_tab_name = ( self . _preferred_lang in ( None , ' en ' ) and translated_tab_name
or selected_tab_url and get_mobj ( selected_tab_url ) [ ' tab ' ] [ 1 : ] # primary
or translated_tab_name )
if selected_tab_name == ' home ' :
if selected_tab_name == ' home ' :
selected_tab_name = ' featured '
selected_tab_name = ' featured '
requested_tab_name = mobj [ ' tab ' ] [ 1 : ]
requested_tab_name = mobj [ ' tab ' ] [ 1 : ]
if ' no-youtube-channel-redirect ' not in compat_opts :
if ' no-youtube-channel-redirect ' not in compat_opts :
if requested_tab_name == ' live ' : # Live tab should have redirected to the video
if requested_tab_name == ' live ' : # Live tab should have redirected to the video
raise UserNotLive ( video_id = mobj [ ' id ' ] )
raise UserNotLive ( video_id = mobj [ ' id ' ] )
@ -5642,6 +5849,7 @@ class YoutubePlaylistIE(InfoExtractor):
' channel ' : ' milan ' ,
' channel ' : ' milan ' ,
' channel_id ' : ' UCEI1-PVPcYXjB73Hfelbmaw ' ,
' channel_id ' : ' UCEI1-PVPcYXjB73Hfelbmaw ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw ' ,
' uploader_url ' : ' https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw ' ,
' availability ' : ' public ' ,
} ,
} ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
} , {
} , {
@ -5660,6 +5868,7 @@ class YoutubePlaylistIE(InfoExtractor):
' uploader_url ' : ' https://www.youtube.com/c/愛低音的國王 ' ,
' uploader_url ' : ' https://www.youtube.com/c/愛低音的國王 ' ,
' channel_id ' : ' UC21nz3_MesPLqtDqwdvnoxA ' ,
' channel_id ' : ' UC21nz3_MesPLqtDqwdvnoxA ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' modified_date ' : r ' re: \ d {8} ' ,
' availability ' : ' public ' ,
} ,
} ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
' expected_warnings ' : [ r ' [Uu]navailable videos (are|will be) hidden ' ] ,
} , {
} , {
@ -5848,7 +6057,7 @@ class YoutubeNotificationsIE(YoutubeTabBaseInfoExtractor):
title = self . _search_regex (
title = self . _search_regex (
rf ' { re . escape ( channel or " " ) } [^:]+: (.+) ' , notification_title ,
rf ' { re . escape ( channel or " " ) } [^:]+: (.+) ' , notification_title ,
' video title ' , default = None )
' video title ' , default = None )
upload_date = ( strftime_or_none ( self . _ extract _time_text( notification , ' sentTimeText ' ) [0 ] , ' % Y % m %d ' )
upload_date = ( strftime_or_none ( self . _ pars e_time_text( self . _get _text( notification , ' sentTimeText ' ) ) , ' % Y % m %d ' )
if self . _configuration_arg ( ' approximate_date ' , ie_key = YoutubeTabIE . ie_key ( ) )
if self . _configuration_arg ( ' approximate_date ' , ie_key = YoutubeTabIE . ie_key ( ) )
else None )
else None )
return {
return {