@ -69,6 +69,8 @@ from ..utils import (
)
STREAMING_DATA_CLIENT_NAME = ' __yt_dlp_client '
STREAMING_DATA_PO_TOKEN = ' __yt_dlp_po_token '
# any clients starting with _ cannot be explicitly requested by the user
INNERTUBE_CLIENTS = {
' web ' : {
@ -79,6 +81,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
# Safari UA returns pre-merged video+audio 144p/240p/360p/720p/1080p HLS formats
' web_safari ' : {
@ -90,6 +93,7 @@ INNERTUBE_CLIENTS = {
} ,
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 1 ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
' web_embedded ' : {
' INNERTUBE_CONTEXT ' : {
@ -132,6 +136,7 @@ INNERTUBE_CLIENTS = {
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 3 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
' android_music ' : {
' INNERTUBE_CONTEXT ' : {
@ -146,6 +151,7 @@ INNERTUBE_CLIENTS = {
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 21 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
' android_creator ' : {
' INNERTUBE_CONTEXT ' : {
@ -160,6 +166,7 @@ INNERTUBE_CLIENTS = {
} ,
' INNERTUBE_CONTEXT_CLIENT_NAME ' : 14 ,
' REQUIRE_JS_PLAYER ' : False ,
' REQUIRE_PO_TOKEN ' : True ,
} ,
# YouTube Kids videos aren't returned on this client for some reason
' android_vr ' : {
@ -323,6 +330,7 @@ def build_innertube_clients():
for client , ytcfg in tuple ( INNERTUBE_CLIENTS . items ( ) ) :
ytcfg . setdefault ( ' INNERTUBE_HOST ' , ' www.youtube.com ' )
ytcfg . setdefault ( ' REQUIRE_JS_PLAYER ' , True )
ytcfg . setdefault ( ' REQUIRE_PO_TOKEN ' , False )
ytcfg . setdefault ( ' PLAYER_PARAMS ' , None )
ytcfg [ ' INNERTUBE_CONTEXT ' ] [ ' client ' ] . setdefault ( ' hl ' , ' en ' )
@ -688,31 +696,46 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
r ' \ bID_TOKEN[ " \' ] \ s*: \ s*[ " \' ](.+?)[ " \' ] ' , webpage ,
' identity token ' , default = None , fatal = False )
@staticmethod
def _extract_account_syncid ( * args ) :
def _data_sync_id_to_delegated_session_id ( self , data_sync_id ) :
if not data_sync_id :
return
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
channel_syncid , _ , user_syncid = data_sync_id . partition ( ' || ' )
if user_syncid :
return channel_syncid
def _extract_account_syncid ( self , * args ) :
"""
Extract syncId required to download private playlists of secondary channels
Extract current session ID required to download private playlists of secondary channels
@params response and / or ytcfg
"""
for data in args :
# ytcfg includes channel_syncid if on secondary channel
delegated_sid = try_get ( data , lambda x : x [ ' DELEGATED_SESSION_ID ' ] , str )
if delegated_sid :
return delegated_sid
sync_ids = ( try_get (
data , ( lambda x : x [ ' responseContext ' ] [ ' mainAppWebResponseContext ' ] [ ' datasyncId ' ] ,
lambda x : x [ ' DATASYNC_ID ' ] ) , str ) or ' ' ) . split ( ' || ' )
if len ( sync_ids ) > = 2 and sync_ids [ 1 ] :
# datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
# and just "user_syncid||" for primary channel. We only want the channel_syncid
return sync_ids [ 0 ]
# ytcfg includes channel_syncid if on secondary channel
if delegated_sid := traverse_obj ( args , ( . . . , ' DELEGATED_SESSION_ID ' , { str } , any ) ) :
return delegated_sid
@staticmethod
def _extract_visitor_data ( * args ) :
data_sync_id = self . _extract_data_sync_id ( * args )
return self . _data_sync_id_to_delegated_session_id ( data_sync_id )
def _extract_data_sync_id ( self , * args ) :
"""
Extract current account dataSyncId .
In the format DELEGATED_SESSION_ID | | USER_SESSION_ID or USER_SESSION_ID | |
@params response and / or ytcfg
"""
if data_sync_id := self . _configuration_arg ( ' data_sync_id ' , [ None ] , ie_key = YoutubeIE , casesense = True ) [ 0 ] :
return data_sync_id
return traverse_obj (
args , ( . . . , ( ' DATASYNC_ID ' , ( ' responseContext ' , ' mainAppWebResponseContext ' , ' datasyncId ' ) ) , { str } , any ) )
def _extract_visitor_data ( self , * args ) :
"""
Extracts visitorData from an API response or ytcfg
Appears to be used to track session state
"""
if visitor_data := self . _configuration_arg ( ' visitor_data ' , [ None ] , ie_key = YoutubeIE , casesense = True ) [ 0 ] :
return visitor_data
return get_first (
args , [ ( ' VISITOR_DATA ' , ( ' INNERTUBE_CONTEXT ' , ' client ' , ' visitorData ' ) , ( ' responseContext ' , ' visitorData ' ) ) ] ,
expected_type = str )
@ -1334,11 +1357,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
' 401 ' : { ' ext ' : ' mp4 ' , ' height ' : 2160 , ' format_note ' : ' DASH video ' , ' vcodec ' : ' av01.0.12M.08 ' } ,
}
_SUBTITLE_FORMATS = ( ' json3 ' , ' srv1 ' , ' srv2 ' , ' srv3 ' , ' ttml ' , ' vtt ' )
_POTOKEN_EXPERIMENTS = ( ' 51217476 ' , ' 51217102 ' )
_BROKEN_CLIENTS = {
short_client_name ( client ) : client
for client in ( ' android ' , ' android_creator ' , ' android_music ' )
}
_DEFAULT_CLIENTS = ( ' ios ' , ' web_creator ' )
_GEO_BYPASS = False
@ -3701,6 +3719,54 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
* * cls . _get_checkok_params ( ) ,
}
def _get_config_po_token ( self , client ) :
po_token_strs = self . _configuration_arg ( ' po_token ' , [ ] , ie_key = YoutubeIE , casesense = True )
for token_str in po_token_strs :
po_token_client , sep , po_token = token_str . partition ( ' + ' )
if not sep :
self . report_warning (
f ' Invalid po_token configuration format. Expected " client+po_token " , got " { token_str } " ' , only_once = True )
continue
if po_token_client == client :
return po_token
def fetch_po_token ( self , client = ' web ' , visitor_data = None , data_sync_id = None , player_url = None , * * kwargs ) :
# PO Token is bound to visitor_data / Visitor ID when logged out. Must have visitor_data for it to function.
if not visitor_data and not self . is_authenticated and player_url :
self . report_warning (
f ' Unable to fetch PO Token for { client } client: Missing required Visitor Data. '
f ' You may need to pass Visitor Data with --extractor-args " youtube:visitor_data=XXX " ' )
return
config_po_token = self . _get_config_po_token ( client )
if config_po_token :
# PO token is bound to data_sync_id / account Session ID when logged in. However, for the config po_token,
# if using first channel in an account then we don't need the data_sync_id anymore...
if not data_sync_id and self . is_authenticated and player_url :
self . report_warning (
f ' Got a PO Token for { client } client, but missing Data Sync ID for account. Formats may not work. '
f ' You may need to pass a Data Sync ID with --extractor-args " youtube:data_sync_id=XXX " ' )
return config_po_token
# Require PO Token if logged in for external fetching
if not data_sync_id and self . is_authenticated and player_url :
self . report_warning (
f ' Unable to fetch PO Token for { client } client: Missing required Data Sync ID for account. '
f ' You may need to pass a Data Sync ID with --extractor-args " youtube:data_sync_id=XXX " ' )
return
return self . _fetch_po_token (
client = client ,
visitor_data = visitor_data ,
data_sync_id = data_sync_id ,
player_url = player_url ,
* * kwargs ,
)
def _fetch_po_token ( self , client , visitor_data = None , data_sync_id = None , player_url = None , * * kwargs ) :
""" External PO Token fetch stub """
@staticmethod
def _is_agegated ( player_response ) :
if traverse_obj ( player_response , ( ' playabilityStatus ' , ' desktopLegacyAgeGateReason ' ) ) :
@ -3717,13 +3783,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _is_unplayable ( player_response ) :
return traverse_obj ( player_response , ( ' playabilityStatus ' , ' status ' ) ) == ' UNPLAYABLE '
def _extract_player_response ( self , client , video_id , master_ytcfg , player_ytcfg , player_url , initial_pr , smuggled_data ) :
session_index = self . _extract_session_index ( player_ytcfg , master_ytcfg )
syncid = self . _extract_account_syncid ( player_ytcfg , master_ytcfg , initial_pr )
sts = self . _extract_signature_timestamp ( video_id , player_url , master_ytcfg , fatal = False ) if player_url else None
def _extract_player_response ( self , client , video_id , master_ytcfg , player_ytcfg , player_url , initial_pr , visitor_data , data_sync_id , po_token ) :
headers = self . generate_api_headers (
ytcfg = player_ytcfg , account_syncid = syncid , session_index = session_index , default_client = client )
ytcfg = player_ytcfg ,
default_client = client ,
visitor_data = visitor_data ,
session_index = self . _extract_session_index ( master_ytcfg , player_ytcfg ) ,
account_syncid = (
self . _data_sync_id_to_delegated_session_id ( data_sync_id )
or self . _extract_account_syncid ( master_ytcfg , initial_pr , player_ytcfg )
) ,
)
yt_query = {
' videoId ' : video_id ,
@ -3734,6 +3804,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if player_params := self . _configuration_arg ( ' player_params ' , [ default_pp ] , casesense = True ) [ 0 ] :
yt_query [ ' params ' ] = player_params
if po_token :
yt_query [ ' serviceIntegrityDimensions ' ] = { ' poToken ' : po_token }
sts = self . _extract_signature_timestamp ( video_id , player_url , master_ytcfg , fatal = False ) if player_url else None
yt_query . update ( self . _generate_player_context ( sts ) )
return self . _extract_response (
item_id = video_id , ep = ' player ' , query = yt_query ,
@ -3744,7 +3818,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
def _get_requested_clients ( self , url , smuggled_data ) :
requested_clients = [ ]
broken_clients = [ ]
excluded_clients = [ ]
allowed_clients = sorted (
( client for client in INNERTUBE_CLIENTS if client [ : 1 ] != ' _ ' ) ,
@ -3758,12 +3831,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
excluded_clients . append ( client [ 1 : ] )
elif client not in allowed_clients :
self . report_warning ( f ' Skipping unsupported client " { client } " ' )
elif client in self . _BROKEN_CLIENTS . values ( ) :
broken_clients . append ( client )
else :
requested_clients . append ( client )
# Force deprioritization of _BROKEN_CLIENTS for format de-duplication
requested_clients . extend ( broken_clients )
if not requested_clients :
requested_clients . extend ( self . _DEFAULT_CLIENTS )
for excluded_client in excluded_clients :
@ -3788,19 +3857,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return pr_id
def _extract_player_responses ( self , clients , video_id , webpage , master_ytcfg , smuggled_data ) :
initial_pr = ignore_initial_response = None
initial_pr = None
if webpage :
if ' web ' in clients :
experiments = traverse_obj ( master_ytcfg , (
' WEB_PLAYER_CONTEXT_CONFIGS ' , . . . , ' serializedExperimentIds ' , { lambda x : x . split ( ' , ' ) } , . . . ) )
if all ( x in experiments for x in self . _POTOKEN_EXPERIMENTS ) :
self . report_warning (
' Webpage contains broken formats (poToken experiment detected). Ignoring initial player response ' )
ignore_initial_response = True
initial_pr = self . _search_json (
self . _YT_INITIAL_PLAYER_RESPONSE_RE , webpage , ' initial player response ' , video_id , fatal = False )
prs = [ ]
deprioritized_prs = [ ]
if initial_pr and not self . _invalid_player_response ( initial_pr , video_id ) :
# Android player_response does not have microFormats which are needed for
# extraction of some data. So we return the initial_pr with formats
@ -3822,14 +3886,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return
tried_iframe_fallback = False
player_url = None
player_url = visitor_data = data_sync_id = None
skipped_clients = { }
while clients :
deprioritize_pr = False
client , base_client , variant = _split_innertube_client ( clients . pop ( ) )
player_ytcfg = { }
if client == ' web ' :
player_ytcfg = self . _get_default_ytcfg ( ) if ignore_initial_response else master_ytcfg
elif ' configs ' not in self . _configuration_arg ( ' player_skip ' ) :
player_ytcfg = master_ytcfg if client == ' web ' else { }
if ' configs ' not in self . _configuration_arg ( ' player_skip ' ) and client != ' web ' :
player_ytcfg = self . _download_ytcfg ( client , video_id ) or player_ytcfg
player_url = player_url or self . _extract_player_url ( master_ytcfg , player_ytcfg , webpage = webpage )
@ -3842,34 +3905,53 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
player_url = self . _download_player_url ( video_id )
tried_iframe_fallback = True
pr = initial_pr if client == ' web ' and not ignore_initial_response else None
for retry in self . RetryManager ( fatal = False ) :
try :
pr = pr or self . _extract_player_response (
client , video_id , player_ytcfg or master_ytcfg , player_ytcfg ,
player_url if require_js_player else None , initial_pr , smuggled_data )
except ExtractorError as e :
self . report_warning ( e )
break
experiments = traverse_obj ( pr , (
' responseContext ' , ' serviceTrackingParams ' , lambda _ , v : v [ ' service ' ] == ' GFEEDBACK ' ,
' params ' , lambda _ , v : v [ ' key ' ] == ' e ' , ' value ' , { lambda x : x . split ( ' , ' ) } , . . . ) )
if all ( x in experiments for x in self . _POTOKEN_EXPERIMENTS ) :
pr = None
retry . error = ExtractorError ( ' API returned broken formats (poToken experiment detected) ' , expected = True )
if not pr :
visitor_data = visitor_data or self . _extract_visitor_data ( master_ytcfg , initial_pr , player_ytcfg )
data_sync_id = data_sync_id or self . _extract_data_sync_id ( master_ytcfg , initial_pr , player_ytcfg )
po_token = self . fetch_po_token (
client = client , visitor_data = visitor_data ,
data_sync_id = data_sync_id if self . is_authenticated else None ,
player_url = player_url if require_js_player else None ,
)
require_po_token = self . _get_default_ytcfg ( client ) . get ( ' REQUIRE_PO_TOKEN ' )
if not po_token and require_po_token :
self . report_warning (
f ' No PO Token provided for { client } client, '
f ' which is required for working { client } formats. '
f ' You can manually pass a PO Token for this client with '
f ' --extractor-args " youtube:po_token= { client } +XXX " ' ,
only_once = True )
deprioritize_pr = True
pr = initial_pr if client == ' web ' else None
try :
pr = pr or self . _extract_player_response (
client , video_id ,
master_ytcfg = player_ytcfg or master_ytcfg ,
player_ytcfg = player_ytcfg ,
player_url = player_url ,
initial_pr = initial_pr ,
visitor_data = visitor_data ,
data_sync_id = data_sync_id ,
po_token = po_token )
except ExtractorError as e :
self . report_warning ( e )
continue
if pr_id := self . _invalid_player_response ( pr , video_id ) :
skipped_clients [ client ] = pr_id
elif pr :
# Save client name for introspection later
name = short_client_name ( client )
sd = traverse_obj ( pr , ( ' streamingData ' , { dict } ) ) or { }
sd [ STREAMING_DATA_CLIENT_NAME ] = name
sd [ STREAMING_DATA_CLIENT_NAME ] = client
sd [ STREAMING_DATA_PO_TOKEN ] = po_token
for f in traverse_obj ( sd , ( ( ' formats ' , ' adaptiveFormats ' ) , . . . , { dict } ) ) :
f [ STREAMING_DATA_CLIENT_NAME ] = name
prs . append ( pr )
f [ STREAMING_DATA_CLIENT_NAME ] = client
f [ STREAMING_DATA_PO_TOKEN ] = po_token
if deprioritize_pr :
deprioritized_prs . append ( pr )
else :
prs . append ( pr )
# tv_embedded can work around age-gate and age-verification IF the video is embeddable
if self . _is_agegated ( pr ) and variant != ' tv_embedded ' :
@ -3893,6 +3975,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# _producer, _testsuite, & _vr variants can also work around age-verification
append_client ( ' web_creator ' , ' mediaconnect ' )
prs . extend ( deprioritized_prs )
if skipped_clients :
self . report_warning (
f ' Skipping player responses from { " / " . join ( skipped_clients ) } clients '
@ -4027,13 +4111,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f ' { video_id } : Some formats are possibly damaged. They will be deprioritized ' , only_once = True )
client_name = fmt . get ( STREAMING_DATA_CLIENT_NAME )
# _BROKEN_CLIENTS return videoplayback URLs that expire after 30 seconds
# Ref: https://github.com/yt-dlp/yt-dlp/issues/9554
is_broken = client_name in self . _BROKEN_CLIENTS
po_token = fmt . get ( STREAMING_DATA_PO_TOKEN )
if po_token :
fmt_url = update_url_query ( fmt_url , { ' pot ' : po_token } )
# Clients that require PO Token return videoplayback URLs that may return 403
is_broken = ( not po_token and self . _get_default_ytcfg ( client_name ) . get ( ' REQUIRE_PO_TOKEN ' ) )
if is_broken :
self . report_warning (
f ' { video_id } : { self . _BROKEN_CLIENTS [ client_name ] } client formats are broken '
' and may yield HTTP Error 403. They will be deprioritized ' , only_once = True )
f ' { video_id } : { client_name } client formats require a PO Token which was not provided. '
' They will be deprioritized as they may yield HTTP Error 403 ' , only_once = True )
name = fmt . get ( ' qualityLabel ' ) or quality . replace ( ' audio_quality_ ' , ' ' ) or ' '
fps = int_or_none ( fmt . get ( ' fps ' ) ) or 0
@ -4109,12 +4197,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif skip_bad_formats and live_status == ' is_live ' and needs_live_processing != ' is_live ' :
skip_manifests . add ( ' dash ' )
def process_manifest_format ( f , proto , client_name , itag ):
def process_manifest_format ( f , proto , client_name , itag , po_token ):
key = ( proto , f . get ( ' language ' ) )
if not all_formats and key in itags [ itag ] :
return False
itags [ itag ] . add ( key )
if f . get ( ' source_preference ' ) is None :
f [ ' source_preference ' ] = - 1
# Clients that require PO Token return videoplayback URLs that may return 403
# hls does not currently require PO Token
if ( not po_token and self . _get_default_ytcfg ( client_name ) . get ( ' REQUIRE_PO_TOKEN ' ) ) and proto != ' hls ' :
self . report_warning (
f ' { video_id } : { client_name } client { proto } formats require a PO Token which was not provided. '
' They will be deprioritized as they may yield HTTP Error 403 ' , only_once = True )
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' BROKEN ' , delim = ' ' )
f [ ' source_preference ' ] - = 20
if itag and all_formats :
f [ ' format_id ' ] = f ' { itag } - { proto } '
elif any ( p != proto for p , _ in itags [ itag ] ) :
@ -4126,9 +4226,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' (default) ' , delim = ' ' )
f [ ' language_preference ' ] = PREFERRED_LANG_VALUE
if f . get ( ' source_preference ' ) is None :
f [ ' source_preference ' ] = - 1
if itag in ( ' 616 ' , ' 235 ' ) :
f [ ' format_note ' ] = join_nonempty ( f . get ( ' format_note ' ) , ' Premium ' , delim = ' ' )
f [ ' source_preference ' ] + = 100
@ -4149,23 +4246,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
subtitles = { }
for sd in streaming_data :
client_name = sd . get ( STREAMING_DATA_CLIENT_NAME )
po_token = sd . get ( STREAMING_DATA_PO_TOKEN )
hls_manifest_url = ' hls ' not in skip_manifests and sd . get ( ' hlsManifestUrl ' )
if hls_manifest_url :
if po_token :
hls_manifest_url = hls_manifest_url . rstrip ( ' / ' ) + f ' /pot/ { po_token } '
fmts , subs = self . _extract_m3u8_formats_and_subtitles (
hls_manifest_url , video_id , ' mp4 ' , fatal = False , live = live_status == ' is_live ' )
subtitles = self . _merge_subtitles ( subs , subtitles )
for f in fmts :
if process_manifest_format ( f , ' hls ' , client_name , self . _search_regex (
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) ):
r ' /itag/( \ d+) ' , f [ ' url ' ] , ' itag ' , default = None ) , po_token ):
yield f
dash_manifest_url = ' dash ' not in skip_manifests and sd . get ( ' dashManifestUrl ' )
if dash_manifest_url :
if po_token :
dash_manifest_url = dash_manifest_url . rstrip ( ' / ' ) + f ' /pot/ { po_token } '
formats , subs = self . _extract_mpd_formats_and_subtitles ( dash_manifest_url , video_id , fatal = False )
subtitles = self . _merge_subtitles ( subs , subtitles ) # Prioritize HLS subs over DASH
for f in formats :
if process_manifest_format ( f , ' dash ' , client_name , f [ ' format_id ' ] ):
if process_manifest_format ( f , ' dash ' , client_name , f [ ' format_id ' ] , po_token ):
f [ ' filesize ' ] = int_or_none ( self . _search_regex (
r ' /clen/( \ d+) ' , f . get ( ' fragment_base_url ' ) or f [ ' url ' ] , ' file size ' , default = None ) )
if needs_live_processing :