@ -69,6 +69,8 @@ from ..utils import (
)
)
STREAMING_DATA_CLIENT_NAME = ' __yt_dlp_client '
STREAMING_DATA_CLIENT_NAME = ' __yt_dlp_client '
STREAMING_DATA_PO_TOKEN = ' __yt_dlp_po_token '
# any clients starting with _ cannot be explicitly requested by the user
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
INNERTUBE_CLIENTS = {
' web ' : {
' web ' : {
@ -79,6 +81,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
} ,
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
' web_safari ' : {
' web_safari ' : {
@ -90,6 +93,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
} ,
' web_embedded ' : {
' web_embedded ' : {
' INNERTUBE_CONTEXT ' : {
' INNERTUBE_CONTEXT ' : {
@ -132,6 +136,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 3 ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 3 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
} ,
' android_music ' : {
' android_music ' : {
' INNERTUBE_CONTEXT ' : {
' INNERTUBE_CONTEXT ' : {
@ -146,6 +151,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 21 ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 21 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
} ,
' android_creator ' : {
' android_creator ' : {
' INNERTUBE_CONTEXT ' : {
' INNERTUBE_CONTEXT ' : {
@ -160,6 +166,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 14 ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 14 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
} ,
# YouTube Kids videos aren't returned on this client for some reason
# YouTube Kids videos aren't returned on this client for some reason
' android_vr ' : {
' android_vr ' : {
@ -323,6 +330,7 @@ def build_innertube_clients():
for client , ytcfg in tuple ( INNERTUBE_CLIENTS . items ( ) ) :
for client , ytcfg in tuple ( INNERTUBE_CLIENTS . items ( ) ) :
ytcfg . setdefault ( ' INNERTUBE_HOST ' , ' www.youtube.com ' )
ytcfg . setdefault ( ' INNERTUBE_HOST ' , ' www.youtube.com ' )
ytcfg . setdefault ( ' REQUIRE_JS_PLAYER ' , True )
ytcfg . setdefault ( ' REQUIRE_JS_PLAYER ' , True )
ytcfg . setdefault ( ' REQUIRE_PO_TOKEN ' , False )
ytcfg . setdefault ( ' PLAYER_PARAMS ' , None )
ytcfg . setdefault ( ' PLAYER_PARAMS ' , None )
ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] . setdefault ( ' hl ' , ' en ' )
ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] . setdefault ( ' hl ' , ' en ' )
@ -688,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r ' \ bID_TOKEN[ " \' ] \ s*: \ s*[ " \' ](.+?)[ " \' ] ' , webpage ,
r ' \ bID_TOKEN[ " \' ] \ s*: \ s*[ " \' ](.+?)[ " \' ] ' , webpage ,
' identity token ' , default = None , fatal = False )
' identity token ' , default = None , fatal = False )
@staticmethod
def _data_sync_id_to_delegated_session_id ( self , data_sync_id ) :
def _extract_account_syncid ( * args ) :
if not data_sync_id :
return
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
channel_syncid , _ , user_syncid = data_sync_id . partition ( ' || ' )
if user_syncid :
return channel_syncid
def _extract_account_syncid ( self , * args ) :
"""
"""
Extract syncId required to download private playlists of secondary channels
Extract current session ID required to download private playlists of secondary channels
@params response and / or ytcfg
@params response and / or ytcfg
"""
"""
for data in args :
# ytcfg includes channel_syncid if on secondary channel
# ytcfg includes channel_syncid if on secondary channel
if delegated_sid := traverse_obj ( args , ( . . . , ' DELEGATED_SESSION_ID ' , { str } , any ) ) :
delegated_sid = try_get ( data , lambda x : x [ ' DELEGATED_SESSION_ID ' ] , str )
return delegated_sid
if delegated_sid :
return delegated_sid
sync_ids = ( try_get (
data , ( lambda x : x [ ' responseContext ' ] [ ' mainAppWebResponseContext ' ] [ ' datasyncId ' ] ,
lambda x : x [ ' DATASYNC_ID ' ] ) , str ) or ' ' ) . split ( ' || ' )
if len ( sync_ids ) > = 2 and sync_ids [ 1 ] :
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids [ 0 ]
@staticmethod
data_sync_id = self . _extract_data_sync_id ( * args )
def _extract_visitor_data ( * args ) :
return self . _data_sync_id_to_delegated_session_id ( data_sync_id )
def _extract_data_sync_id ( self , * args ) :
"""
Extract current account dataSyncId .
In the format DELEGATED_SESSION_ID | | USER_SESSION_ID or USER_SESSION_ID | |
@params response and / or ytcfg
"""
if data_sync_id := self . _configuration_arg ( ' data_sync_id ' , [ None ] , ie_key = YoutubeIE , casesense = True ) [ 0 ] :
return data_sync_id
return traverse_obj (
args , ( . . . , ( ' DATASYNC_ID ' , ( ' responseContext ' , ' mainAppWebResponseContext ' , ' datasyncId ' ) ) , { str } , any ) )
def _extract_visitor_data ( self , * args ) :
"""
"""
Extracts visitorData from an API response or ytcfg
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
Appears to be used to track session state
"""
"""
if visitor_data := self . _configuration_arg ( ' visitor_data ' , [ None ] , ie_key = YoutubeIE , casesense = True ) [ 0 ] :
return visitor_data
return get_first (
return get_first (
args , [ ( ' VISITOR_DATA ' , ( ' INNERTUBE_CONTEXT ' , ' client ' , ' visitorData ' ) , ( ' responseContext ' , ' visitorData ' ) ) ] ,
args , [ ( ' VISITOR_DATA ' , ( ' INNERTUBE_CONTEXT ' , ' client ' , ' visitorData ' ) , ( ' responseContext ' , ' visitorData ' ) ) ] ,
expected_type = str )
expected_type = str )
@ -1334,11 +1357,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' 401 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.12M.08 ' } ,
' 401 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.12M.08 ' } ,
}
}
_SUBTITLE_FORMATS = ( ' json3 ' , ' srv1 ' , ' srv2 ' , ' srv3 ' , ' ttml ' , ' vtt ' )
_SUBTITLE_FORMATS = ( ' json3 ' , ' srv1 ' , ' srv2 ' , ' srv3 ' , ' ttml ' , ' vtt ' )
_POTOKEN_EXPERIMENTS = ( ' 51217476 ' , ' 51217102 ' )
_BROKEN_CLIENTS = {
short_client_name ( client ) : client
for client in ( ' android ' , ' android_creator ' , ' android_music ' )
}
_DEFAULT_CLIENTS = ( ' ios ' , ' web_creator ' )
_DEFAULT_CLIENTS = ( ' ios ' , ' web_creator ' )
_GEO_BYPASS = False
_GEO_BYPASS = False
@ -3701,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
* * cls . _get_checkok_params ( ) ,
* * cls . _get_checkok_params ( ) ,
}
}
def _get_config_po_token ( self , client ) :
po_token_strs = self . _configuration_arg ( ' po_token ' , [ ] , ie_key = YoutubeIE , casesense = True )
for token_str in po_token_strs :
po_token_client , sep , po_token = token_str . partition ( ' + ' )
if not sep :
self . report_warning (
f ' Invalid po_token configuration format. Expected " client+po_token " , got " { token_str } " ' , only_once = True )
continue
if po_token_client == client :
return po_token
def fetch_po_token ( self , client = ' web ' , visitor_data = None , data_sync_id = None , player_url = None , * * kwargs ) :
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
if not visitor_data and not self . is_authenticated and player_url :
self . report_warning (
f ' Unable to fetch PO Token for { client } client: Missing required Visitor Data. '
f ' You may need to pass Visitor Data with --extractor-args " youtube:visitor_data=XXX " ' )
return
config_po_token = self . _get_config_po_token ( client )
if config_po_token :
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
# if using first channel in an account then we don't need the data_sync_id anymore...
if not data_sync_id and self . is_authenticated and player_url :
self . report_warning (
f ' Got a PO Token for { client } client, but missing Data Sync ID for account. Formats may not work. '
f ' You may need to pass a Data Sync ID with --extractor-args " youtube:data_sync_id=XXX " ' )
return config_po_token
# Require PO Token if logged in for external fetching
if not data_sync_id and self . is_authenticated and player_url :
self . report_warning (
f ' Unable to fetch PO Token for { client } client: Missing required Data Sync ID for account. '
f ' You may need to pass a Data Sync ID with --extractor-args " youtube:data_sync_id=XXX " ' )
return
return self . _fetch_po_token (
client = client ,
visitor_data = visitor_data ,
data_sync_id = data_sync_id ,
player_url = player_url ,
* * kwargs ,
)
def _fetch_po_token ( self , client , visitor_data = None , data_sync_id = None , player_url = None , * * kwargs ) :
""" External PO Token fetch stub """
@staticmethod
@staticmethod
def _is_agegated ( player_response ) :
def _is_agegated ( player_response ) :
if traverse_obj ( player_response , ( ' playabilityStatus ' , ' desktopLegacyAgeGateReason ' ) ) :
if traverse_obj ( player_response , ( ' playabilityStatus ' , ' desktopLegacyAgeGateReason ' ) ) :
@ -3717,13 +3783,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable ( player_response ) :
def _is_unplayable ( player_response ) :
return traverse_obj ( player_response , ( ' playabilityStatus ' , ' status ' ) ) == ' UNPLAYABLE '
return traverse_obj ( player_response , ( ' playabilityStatus ' , ' status ' ) ) == ' UNPLAYABLE '
def _extract_player_response ( self , client , video_id , master_ytcfg , player_ytcfg , player_url , initial_pr , smuggled_data ) :
def _extract_player_response ( self , client , video_id , master_ytcfg , player_ytcfg , player_url , initial_pr , visitor_data , data_sync_id , po_token ) :
session_index = self . _extract_session_index ( player_ytcfg , master_ytcfg )
syncid = self . _extract_account_syncid ( player_ytcfg , master_ytcfg , initial_pr )
sts = self . _extract_signature_timestamp ( video_id , player_url , master_ytcfg , fatal = False ) if player_url else None
headers = self . generate_api_headers (
headers = self . generate_api_headers (
ytcfg = player_ytcfg , account_syncid = syncid , session_index = session_index , default_client = client )
ytcfg = player_ytcfg ,
default_client = client ,
visitor_data = visitor_data ,
session_index = self . _extract_session_index ( master_ytcfg , player_ytcfg ) ,
account_syncid = (
self . _data_sync_id_to_delegated_session_id ( data_sync_id )
or self . _extract_account_syncid ( master_ytcfg , initial_pr , player_ytcfg )
) ,
)
yt_query = {
yt_query = {
' videoId ' : video_id ,
' videoId ' : video_id ,
@ -3734,6 +3804,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_params := self . _configuration_arg ( ' player_params ' , [ default_pp ] , casesense = True ) [ 0 ] :
if player_params := self . _configuration_arg ( ' player_params ' , [ default_pp ] , casesense = True ) [ 0 ] :
yt_query [ ' params ' ] = player_params
yt_query [ ' params ' ] = player_params
if po_token :
yt_query [ ' serviceIntegrityDimensions ' ] = { ' poToken ' : po_token }
sts = self . _extract_signature_timestamp ( video_id , player_url , master_ytcfg , fatal = False ) if player_url else None
yt_query . update ( self . _generate_player_context ( sts ) )
yt_query . update ( self . _generate_player_context ( sts ) )
return self . _extract_response (
return self . _extract_response (
item_id = video_id , ep = ' player ' , query = yt_query ,
item_id = video_id , ep = ' player ' , query = yt_query ,
@ -3744,7 +3818,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients ( self , url , smuggled_data ) :
def _get_requested_clients ( self , url , smuggled_data ) :
requested_clients = [ ]
requested_clients = [ ]
broken_clients = [ ]
excluded_clients = [ ]
excluded_clients = [ ]
allowed_clients = sorted (
allowed_clients = sorted (
( client for client in INNERTUBE_CLIENTS if client [ : 1 ] != ' _ ' ) ,
( client for client in INNERTUBE_CLIENTS if client [ : 1 ] != ' _ ' ) ,
@ -3758,12 +3831,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
excluded_clients . append ( client [ 1 : ] )
excluded_clients . append ( client [ 1 : ] )
elif client not in allowed_clients :
elif client not in allowed_clients :
self . report_warning ( f ' Skipping unsupported client " { client } " ' )
self . report_warning ( f ' Skipping unsupported client " { client } " ' )
elif client in self . _BROKEN_CLIENTS . values ( ) :
broken_clients . append ( client )
else :
else :
requested_clients . append ( client )
requested_clients . append ( client )
# Force deprioritization of _BROKEN_CLIENTS for format de-duplication
requested_clients . extend ( broken_clients )
if not requested_clients :
if not requested_clients :
requested_clients . extend ( self . _DEFAULT_CLIENTS )
requested_clients . extend ( self . _DEFAULT_CLIENTS )
for excluded_client in excluded_clients :
for excluded_client in excluded_clients :
@ -3788,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return pr_id
return pr_id
def _extract_player_responses ( self , clients , video_id , webpage , master_ytcfg , smuggled_data ) :
def _extract_player_responses ( self , clients , video_id , webpage , master_ytcfg , smuggled_data ) :
initial_pr = ignore_initial_response = None
initial_pr = None
if webpage :
if webpage :
if ' web ' in clients :
experiments = traverse_obj ( master_ytcfg , (
' WEB_PLAYER_CONTEXT_CONFIGS ' , . . . , ' serializedExperimentIds ' , { lambda x : x . split ( ' , ' ) } , . . . ) )
if all ( x in experiments for x in self . _POTOKEN_EXPERIMENTS ) :
self . report_warning (
' Webpage contains broken formats (poToken experiment detected). Ignoring initial player response ' )
ignore_initial_response = True
initial_pr = self . _search_json (
initial_pr = self . _search_json (
self . _YT_INITIAL_PLAYER_RESPONSE_RE , webpage , ' initial player response ' , video_id , fatal = False )
self . _YT_INITIAL_PLAYER_RESPONSE_RE , webpage , ' initial player response ' , video_id , fatal = False )
prs = [ ]
prs = [ ]
deprioritized_prs = [ ]
if initial_pr and not self . _invalid_player_response ( initial_pr , video_id ) :
if initial_pr and not self . _invalid_player_response ( initial_pr , video_id ) :
# Android player_response does not have microFormats which are needed for
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
# extraction of some data. So we return the initial_pr with formats
@ -3822,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return
return
tried_iframe_fallback = False
tried_iframe_fallback = False
player_url = None
player_url = visitor_data = data_sync_id = None
skipped_clients = { }
skipped_clients = { }
while clients :
while clients :
deprioritize_pr = False
client , base_client , variant = _split_innertube_client ( clients . pop ( ) )
client , base_client , variant = _split_innertube_client ( clients . pop ( ) )
player_ytcfg = { }
player_ytcfg = master_ytcfg if client == ' web ' else { }
if client == ' web ' :
if ' configs ' not in self . _configuration_arg ( ' player_skip ' ) and client != ' web ' :
player_ytcfg = self . _get_default_ytcfg ( ) if ignore_initial_response else master_ytcfg
elif ' configs ' not in self . _configuration_arg ( ' player_skip ' ) :
player_ytcfg = self . _download_ytcfg ( client , video_id ) or player_ytcfg
player_ytcfg = self . _download_ytcfg ( client , video_id ) or player_ytcfg
player_url = player_url or self . _extract_player_url ( master_ytcfg , player_ytcfg , webpage = webpage )
player_url = player_url or self . _extract_player_url ( master_ytcfg , player_ytcfg , webpage = webpage )
@ -3842,34 +3905,53 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_url = self . _download_player_url ( video_id )
player_url = self . _download_player_url ( video_id )
tried_iframe_fallback = True
tried_iframe_fallback = True
pr = initial_pr if client == ' web ' and not ignore_initial_response else None
visitor_data = visitor_data or self . _extract_visitor_data ( master_ytcfg , initial_pr , player_ytcfg )
for retry in self . RetryManager ( fatal = False ) :
data_sync_id = data_sync_id or self . _extract_data_sync_id ( master_ytcfg , initial_pr , player_ytcfg )
try :
po_token = self . fetch_po_token (
pr = pr or self . _extract_player_response (
client = client , visitor_data = visitor_data ,
client , video_id , player_ytcfg or master_ytcfg , player_ytcfg ,
data_sync_id = data_sync_id if self . is_authenticated else None ,
player_url if require_js_player else None , initial_pr , smuggled_data )
player_url = player_url if require_js_player else None ,
except ExtractorError as e :
)
self . report_warning ( e )
break
require_po_token = self . _get_default_ytcfg ( client ) . get ( ' REQUIRE_PO_TOKEN ' )
experiments = traverse_obj ( pr , (
if not po_token and require_po_token :
' responseContext ' , ' serviceTrackingParams ' , lambda _ , v : v [ ' service ' ] == ' GFEEDBACK ' ,
self . report_warning (
' params ' , lambda _ , v : v [ ' key ' ] == ' e ' , ' value ' , { lambda x : x . split ( ' , ' ) } , . . . ) )
f ' No PO Token provided for { client } client, '
if all ( x in experiments for x in self . _POTOKEN_EXPERIMENTS ) :
f ' which is required for working { client } formats. '
pr = None
f ' You can manually pass a PO Token for this client with '
retry . error = ExtractorError ( ' API returned broken formats (poToken experiment detected) ' , expected = True )
f ' --extractor-args " youtube:po_token= { client } +XXX " ' ,
if not pr :
only_once = True )
deprioritize_pr = True
pr = initial_pr if client == ' web ' else None
try :
pr = pr or self . _extract_player_response (
client , video_id ,
master_ytcfg = player_ytcfg or master_ytcfg ,
player_ytcfg = player_ytcfg ,
player_url = player_url ,
initial_pr = initial_pr ,
visitor_data = visitor_data ,
data_sync_id = data_sync_id ,
po_token = po_token )
except ExtractorError as e :
self . report_warning ( e )
continue
continue
if pr_id := self . _invalid_player_response ( pr , video_id ) :
if pr_id := self . _invalid_player_response ( pr , video_id ) :
skipped_clients [ client ] = pr_id
skipped_clients [ client ] = pr_id
elif pr :
elif pr :
# Save client name for introspection later
# Save client name for introspection later
name = short_client_name ( client )
sd = traverse_obj ( pr , ( ' streamingData ' , { dict } ) ) or { }
sd = traverse_obj ( pr , ( ' streamingData ' , { dict } ) ) or { }
sd [ STREAMING_DATA_CLIENT_NAME ] = name
sd [ STREAMING_DATA_CLIENT_NAME ] = client
sd [ STREAMING_DATA_PO_TOKEN ] = po_token
for f in traverse_obj ( sd , ( ( ' formats ' , ' adaptiveFormats ' ) , . . . , { dict } ) ) :
for f in traverse_obj ( sd , ( ( ' formats ' , ' adaptiveFormats ' ) , . . . , { dict } ) ) :
f [ STREAMING_DATA_CLIENT_NAME ] = name
f [ STREAMING_DATA_CLIENT_NAME ] = client
prs . append ( pr )
f [ STREAMING_DATA_PO_TOKEN ] = po_token
if deprioritize_pr :
deprioritized_prs . append ( pr )
else :
prs . append ( pr )
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self . _is_agegated ( pr ) and variant != ' tv_embedded ' :
if self . _is_agegated ( pr ) and variant != ' tv_embedded ' :
@ -3893,6 +3975,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# _producer, _testsuite, & _vr variants can also work around age-verification
# _producer, _testsuite, & _vr variants can also work around age-verification
append_client ( ' web_creator ' , ' mediaconnect ' )
append_client ( ' web_creator ' , ' mediaconnect ' )
prs . extend ( deprioritized_prs )
if skipped_clients :
if skipped_clients :
self . report_warning (
self . report_warning (
f ' Skipping player responses from { " / " . join ( skipped_clients ) } clients '
f ' Skipping player responses from { " / " . join ( skipped_clients ) } clients '
@ -4027,13 +4111,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f ' { video_id } : Some formats are possibly damaged. They will be deprioritized ' , only_once = True )
f ' { video_id } : Some formats are possibly damaged. They will be deprioritized ' , only_once = True )
client_name = fmt . get ( STREAMING_DATA_CLIENT_NAME )
client_name = fmt . get ( STREAMING_DATA_CLIENT_NAME )
# _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
po_token = fmt . get ( STREAMING_DATA_PO_TOKEN )
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
is_broken = client_name in self . _BROKEN_CLIENTS
if po_token :
fmt_url = update_url_query ( fmt_url , { ' pot ' : po_token } )
# Clients that require PO Token return videoplayback URLs that may return 403
is_broken = ( not po_token and self . _get_default_ytcfg ( client_name ) . get ( ' REQUIRE_PO_TOKEN ' ) )
if is_broken :
if is_broken :
self . report_warning (
self . report_warning (
f ' { video_id } : { self . _BROKEN_CLIENTS [ client_name ] } client formats are broken '
f ' { video_id } : { client_name } client formats require a PO Token which was not provided. '
' and may yield HTTP Error 403. They will be deprioritized ' , only_once = True )
' They will be deprioritized as they may yield HTTP Error 403 ' , only_once = True )
name = fmt . get ( ' qualityLabel ' ) or quality . replace ( ' audio_quality_ ' , ' ' ) or ' '
name = fmt . get ( ' qualityLabel ' ) or quality . replace ( ' audio_quality_ ' , ' ' ) or ' '
fps = int_or_none ( fmt . get ( ' fps ' ) ) or 0
fps = int_or_none ( fmt . get ( ' fps ' ) ) or 0
@ -4109,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif skip_bad_formats and live_status == ' is_live ' and needs_live_processing != ' is_live ' :
elif skip_bad_formats and live_status == ' is_live ' and needs_live_processing != ' is_live ' :
skip_manifests . add ( ' dash ' )
skip_manifests . add ( ' dash ' )
def process_manifest_format ( f , proto , client_name , itag ):
def process_manifest_format ( f , proto , client_name , itag , po_token ):
key = ( proto , f . get ( ' language ' ) )
key = ( proto , f . get ( ' language ' ) )
if not all_formats and key in itags [ itag ] :
if not all_formats and key in itags [ itag ] :
return False
return False
itags [ itag ] . add ( key )
itags [ itag ] . add ( key )
if f . get ( ' source_preference ' ) is None :
f [ ' source_preference ' ] = - 1
# Clients that require PO Token return videoplayback URLs that may return 403
# hls does not currently require PO Token
if ( not po_token and self . _get_default_ytcfg ( client_name ) . get ( ' REQUIRE_PO_TOKEN ' ) ) and proto != ' hls ' :
self . report_warning (
f ' { video_id } : { client_name } client { proto } formats require a PO Token which was not provided. '
' They will be deprioritized as they may yield HTTP Error 403 ' , only_once = True )
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' BROKEN ' , delim = ' ' )
f [ ' source_preference ' ] - = 20
if itag and all_formats :
if itag and all_formats :
f [ ' format_id ' ] = f ' { itag } - { proto } '
f [ ' format_id ' ] = f ' { itag } - { proto } '
elif any ( p != proto for p , _ in itags [ itag ] ) :
elif any ( p != proto for p , _ in itags [ itag ] ) :
@ -4126,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' (default) ' , delim = ' ' )
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' (default) ' , delim = ' ' )
f [ ' language_preference ' ] = PREFERRED_LANG_VALUE
f [ ' language_preference ' ] = PREFERRED_LANG_VALUE
if f . get ( ' source_preference ' ) is None :
f [ ' source_preference ' ] = - 1
if itag in ( ' 616 ' , ' 235 ' ) :
if itag in ( ' 616 ' , ' 235 ' ) :
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' Premium ' , delim = ' ' )
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' Premium ' , delim = ' ' )
f [ ' source_preference ' ] + = 100
f [ ' source_preference ' ] + = 100
@ -4149,23 +4246,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles = { }
subtitles = { }
for sd in streaming_data :
for sd in streaming_data :
client_name = sd . get ( STREAMING_DATA_CLIENT_NAME )
client_name = sd . get ( STREAMING_DATA_CLIENT_NAME )
po_token = sd . get ( STREAMING_DATA_PO_TOKEN )
hls_manifest_url = ' hls ' not in skip_manifests and sd . get ( ' hlsManifestUrl ' )
hls_manifest_url = ' hls ' not in skip_manifests and sd . get ( ' hlsManifestUrl ' )
if hls_manifest_url :
if hls_manifest_url :
if po_token :
hls_manifest_url = hls_manifest_url . rstrip ( ' / ' ) + f ' /pot/ { po_token } '
fmts , subs = self . _extract_m3u8_formats_and_subtitles (
fmts , subs = self . _extract_m3u8_formats_and_subtitles (
hls_manifest_url , video_id , ' mp4 ' , fatal = False , live = live_status == ' is_live ' )
hls_manifest_url , video_id , ' mp4 ' , fatal = False , live = live_status == ' is_live ' )
subtitles = self . _merge_subtitles ( subs , subtitles )
subtitles = self . _merge_subtitles ( subs , subtitles )
for f in fmts :
for f in fmts :
if process_manifest_format ( f , ' hls ' , client_name , self . _search_regex (
if process_manifest_format ( f , ' hls ' , client_name , self . _search_regex (
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) ):
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) , po_token ):
yield f
yield f
dash_manifest_url = ' dash ' not in skip_manifests and sd . get ( ' dashManifestUrl ' )
dash_manifest_url = ' dash ' not in skip_manifests and sd . get ( ' dashManifestUrl ' )
if dash_manifest_url :
if dash_manifest_url :
if po_token :
dash_manifest_url = dash_manifest_url . rstrip ( ' / ' ) + f ' /pot/ { po_token } '
formats , subs = self . _extract_mpd_formats_and_subtitles ( dash_manifest_url , video_id , fatal = False )
formats , subs = self . _extract_mpd_formats_and_subtitles ( dash_manifest_url , video_id , fatal = False )
subtitles = self . _merge_subtitles ( subs , subtitles ) # Prioritize HLS subs over DASH
subtitles = self . _merge_subtitles ( subs , subtitles ) # Prioritize HLS subs over DASH
for f in formats :
for f in formats :
if process_manifest_format ( f , ' dash ' , client_name , f [ ' format_id ' ] ):
if process_manifest_format ( f , ' dash ' , client_name , f [ ' format_id ' ] , po_token ):
f [ ' filesize ' ] = int_or_none ( self . _search_regex (
f [ ' filesize ' ] = int_or_none ( self . _search_regex (
r ' /clen/( \ d+) ' , f . get ( ' fragment_base_url ' ) or f [ ' url ' ] , ' file size ' , default = None ) )
r ' /clen/( \ d+) ' , f . get ( ' fragment_base_url ' ) or f [ ' url ' ] , ' file size ' , default = None ) )
if needs_live_processing :
if needs_live_processing :