[ie/youtube] Determine wait time from player response (#14646)

Closes #14645
Authored by: WhatAmISupposedToPutHere, bashonly

Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com>
pull/15041/merge
WhatAmISupposedToPutHere 2 weeks ago committed by GitHub
parent 0c696239ef
commit 715af0c636
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1870,7 +1870,6 @@ The following extractors use this feature:
* `po_token`: Proof of Origin (PO) Token(s) to use. Comma seperated list of PO Tokens in the format `CLIENT.CONTEXT+PO_TOKEN`, e.g. `youtube:po_token=web.gvs+XXX,web.player=XXX,web_safari.gvs+YYY`. Context can be any of `gvs` (Google Video Server URLs), `player` (Innertube player request) or `subs` (Subtitles)
* `pot_trace`: Enable debug logging for PO Token fetching. Either `true` or `false` (default)
* `fetch_pot`: Policy to use for fetching a PO Token from providers. One of `always` (always try fetch a PO Token regardless if the client requires one for the given context), `never` (never fetch a PO Token), or `auto` (default; only fetch a PO Token if the client requires one for the given context)
* `playback_wait`: Duration (in seconds) to wait inbetween the extraction and download stages in order to ensure the formats are available. The default is `6` seconds
* `jsc_trace`: Enable debug logging for JS Challenge fetching. Either `true` or `false` (default)
#### youtube-ejs

@ -76,7 +76,7 @@ STREAMING_DATA_FETCH_GVS_PO_TOKEN = '__yt_dlp_fetch_gvs_po_token'
STREAMING_DATA_PLAYER_TOKEN_PROVIDED = '__yt_dlp_player_token_provided'
STREAMING_DATA_INNERTUBE_CONTEXT = '__yt_dlp_innertube_context'
STREAMING_DATA_IS_PREMIUM_SUBSCRIBER = '__yt_dlp_is_premium_subscriber'
STREAMING_DATA_FETCHED_TIMESTAMP = '__yt_dlp_fetched_timestamp'
STREAMING_DATA_AVAILABLE_AT_TIMESTAMP = '__yt_dlp_available_at_timestamp'
PO_TOKEN_GUIDE_URL = 'https://github.com/yt-dlp/yt-dlp/wiki/PO-Token-Guide'
@ -3032,7 +3032,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
elif pr:
# Save client details for introspection later
innertube_context = traverse_obj(player_ytcfg or self._get_default_ytcfg(client), 'INNERTUBE_CONTEXT')
fetched_timestamp = int(time.time())
sd = pr.setdefault('streamingData', {})
sd[STREAMING_DATA_CLIENT_NAME] = client
sd[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
@ -3040,7 +3039,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
sd[STREAMING_DATA_INNERTUBE_CONTEXT] = innertube_context
sd[STREAMING_DATA_FETCH_SUBS_PO_TOKEN] = fetch_subs_po_token_func
sd[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER] = is_premium_subscriber
sd[STREAMING_DATA_FETCHED_TIMESTAMP] = fetched_timestamp
sd[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP] = self._get_available_at_timestamp(pr, video_id, client)
for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})):
f[STREAMING_DATA_CLIENT_NAME] = client
f[STREAMING_DATA_FETCH_GVS_PO_TOKEN] = fetch_gvs_po_token_func
@ -3172,9 +3171,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# save pots per client to avoid fetching again
gvs_pots = {}
# For handling potential pre-playback required waiting period
playback_wait = int_or_none(self._configuration_arg('playback_wait', [None])[0], default=6)
def get_language_code_and_preference(fmt_stream):
audio_track = fmt_stream.get('audioTrack') or {}
display_name = audio_track.get('displayName') or ''
@ -3199,7 +3195,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
is_premium_subscriber = streaming_data[STREAMING_DATA_IS_PREMIUM_SUBSCRIBER]
player_token_provided = streaming_data[STREAMING_DATA_PLAYER_TOKEN_PROVIDED]
client_name = streaming_data.get(STREAMING_DATA_CLIENT_NAME)
available_at = streaming_data[STREAMING_DATA_FETCHED_TIMESTAMP] + playback_wait
available_at = streaming_data[STREAMING_DATA_AVAILABLE_AT_TIMESTAMP]
streaming_formats = traverse_obj(streaming_data, (('formats', 'adaptiveFormats'), ...))
def get_stream_id(fmt_stream):
@ -3653,6 +3649,36 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
}))
return webpage
def _get_available_at_timestamp(self, player_response, video_id, client):
now = time.time()
wait_seconds = 0
for renderer in traverse_obj(player_response, (
'adSlots', lambda _, v: v['adSlotRenderer']['adSlotMetadata']['triggerEvent'] == 'SLOT_TRIGGER_EVENT_BEFORE_CONTENT',
'adSlotRenderer', 'fulfillmentContent', 'fulfilledLayout', 'playerBytesAdLayoutRenderer', 'renderingContent', (
None,
('playerBytesSequentialLayoutRenderer', 'sequentialLayouts', ..., 'playerBytesAdLayoutRenderer', 'renderingContent'),
), 'instreamVideoAdRenderer', {dict},
)):
duration = traverse_obj(renderer, ('playerVars', {urllib.parse.parse_qs}, 'length_seconds', -1, {int_or_none}))
ad = 'an ad' if duration is None else f'a {duration}s ad'
skip_time = traverse_obj(renderer, ('skipOffsetMilliseconds', {float_or_none(scale=1000)}))
if skip_time is not None:
# YT allows skipping this ad; use the wait-until-skip time instead of full ad duration
skip_time = skip_time if skip_time % 1 else int(skip_time)
ad += f' skippable after {skip_time}s'
duration = skip_time
if duration is not None:
self.write_debug(f'{video_id}: Detected {ad} for {client}')
wait_seconds += duration
if wait_seconds:
return math.ceil(now) + wait_seconds
return int(now)
def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None):
live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails'))
is_live = get_first(video_details, 'isLive')

Loading…
Cancel
Save