@ -28,7 +28,6 @@ from ..utils import (
clean_html ,
clean_html ,
datetime_from_str ,
datetime_from_str ,
dict_get ,
dict_get ,
error_to_compat_str ,
float_or_none ,
float_or_none ,
format_field ,
format_field ,
get_first ,
get_first ,
@ -45,7 +44,6 @@ from ..utils import (
parse_iso8601 ,
parse_iso8601 ,
parse_qs ,
parse_qs ,
qualities ,
qualities ,
remove_end ,
remove_start ,
remove_start ,
smuggle_url ,
smuggle_url ,
str_or_none ,
str_or_none ,
@ -763,27 +761,21 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_response ( self , item_id , query , note = ' Downloading API JSON ' , headers = None ,
def _extract_response ( self , item_id , query , note = ' Downloading API JSON ' , headers = None ,
ytcfg = None , check_get_keys = None , ep = ' browse ' , fatal = True , api_hostname = None ,
ytcfg = None , check_get_keys = None , ep = ' browse ' , fatal = True , api_hostname = None ,
default_client = ' web ' ) :
default_client = ' web ' ) :
response = None
for retry in self . RetryManager ( ) :
last_error = None
count = - 1
retries = self . get_param ( ' extractor_retries ' , 3 )
if check_get_keys is None :
check_get_keys = [ ]
while count < retries :
count + = 1
if last_error :
self . report_warning ( ' %s . Retrying ... ' % remove_end ( last_error , ' . ' ) )
try :
try :
response = self . _call_api (
response = self . _call_api (
ep = ep , fatal = True , headers = headers ,
ep = ep , fatal = True , headers = headers ,
video_id = item_id , query = query ,
video_id = item_id , query = query , note = note ,
context = self . _extract_context ( ytcfg , default_client ) ,
context = self . _extract_context ( ytcfg , default_client ) ,
api_key = self . _extract_api_key ( ytcfg , default_client ) ,
api_key = self . _extract_api_key ( ytcfg , default_client ) ,
api_hostname = api_hostname , default_client = default_client ,
api_hostname = api_hostname , default_client = default_client )
note = ' %s %s ' % ( note , ' (retry # %d ) ' % count if count else ' ' ) )
except ExtractorError as e :
except ExtractorError as e :
if isinstance ( e . cause , network_exceptions ) :
if not isinstance ( e . cause , network_exceptions ) :
if isinstance ( e . cause , urllib . error . HTTPError ) :
return self . _error_or_warning ( e , fatal = fatal )
elif not isinstance ( e . cause , urllib . error . HTTPError ) :
retry . error = e
continue
first_bytes = e . cause . read ( 512 )
first_bytes = e . cause . read ( 512 )
if not is_html ( first_bytes ) :
if not is_html ( first_bytes ) :
yt_error = try_get (
yt_error = try_get (
@ -793,43 +785,29 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
if yt_error :
if yt_error :
self . _report_alerts ( [ ( ' ERROR ' , yt_error ) ] , fatal = False )
self . _report_alerts ( [ ( ' ERROR ' , yt_error ) ] , fatal = False )
# Downloading page may result in intermittent 5xx HTTP error
# Downloading page may result in intermittent 5xx HTTP error
# Sometimes a 404 is also rec e ived. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# Sometimes a 404 is also rec ie ved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# We also want to catch all other network exceptions since errors in later pages can be troublesome
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
# See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
if not isinstance ( e . cause , urllib . error . HTTPError ) or e . cause . code not in ( 403 , 429 ) :
if e . cause . code not in ( 403 , 429 ) :
last_error = error_to_compat_str ( e . cause or e . msg )
retry . error = e
if count < retries :
continue
continue
if fatal :
return self . _error_or_warning ( e , fatal = fatal )
raise
else :
self . report_warning ( error_to_compat_str ( e ) )
return
else :
try :
try :
self . _extract_and_report_alerts ( response , only_once = True )
self . _extract_and_report_alerts ( response , only_once = True )
except ExtractorError as e :
except ExtractorError as e :
# YouTube servers may return errors we want to retry on in a 200 OK response
# YouTube servers may return errors we want to retry on in a 200 OK response
# See: https://github.com/yt-dlp/yt-dlp/issues/839
# See: https://github.com/yt-dlp/yt-dlp/issues/839
if ' unknown error ' in e . msg . lower ( ) :
if ' unknown error ' in e . msg . lower ( ) :
last_error = e . msg
retry . error = e
continue
continue
if fatal :
return self . _error_or_warning ( e , fatal = fatal )
raise
self . report_warning ( error_to_compat_str ( e ) )
return
if not check_get_keys or dict_get ( response , check_get_keys ) :
break
# Youtube sometimes sends incomplete data
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
last_error = ' Incomplete data received '
if not traverse_obj ( response , * variadic ( check_get_keys ) ) :
if count > = retries :
retry . error = ExtractorError ( ' Incomplete data received ' )
if fatal :
continue
raise ExtractorError ( last_error )
else :
self . report_warning ( last_error )
return
return response
return response
@staticmethod
@staticmethod
@ -4522,48 +4500,30 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
return ' webpage ' in self . _configuration_arg ( ' skip ' , ie_key = YoutubeTabIE . ie_key ( ) )
return ' webpage ' in self . _configuration_arg ( ' skip ' , ie_key = YoutubeTabIE . ie_key ( ) )
def _extract_webpage ( self , url , item_id , fatal = True ) :
def _extract_webpage ( self , url , item_id , fatal = True ) :
retries = self . get_param ( ' extractor_retries ' , 3 )
webpage , data = None , None
count = - 1
for retry in self . RetryManager ( fatal = fatal ) :
webpage = data = last_error = None
while count < retries :
count + = 1
# Sometimes youtube returns a webpage with incomplete ytInitialData
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if last_error :
self . report_warning ( ' %s . Retrying ... ' % last_error )
try :
try :
webpage = self . _download_webpage (
webpage = self . _download_webpage ( url , item_id , note = ' Downloading webpage ' )
url , item_id ,
note = ' Downloading webpage %s ' % ( ' (retry # %d ) ' % count if count else ' ' , ) )
data = self . extract_yt_initial_data ( item_id , webpage or ' ' , fatal = fatal ) or { }
data = self . extract_yt_initial_data ( item_id , webpage or ' ' , fatal = fatal ) or { }
except ExtractorError as e :
except ExtractorError as e :
if isinstance ( e . cause , network_exceptions ) :
if isinstance ( e . cause , network_exceptions ) :
if not isinstance ( e . cause , urllib . error . HTTPError ) or e . cause . code not in ( 403 , 429 ) :
if not isinstance ( e . cause , urllib . error . HTTPError ) or e . cause . code not in ( 403 , 429 ) :
last_error = error_to_compat_str ( e . cause or e . msg )
retry . error = e
if count < retries :
continue
continue
if fatal :
self . _error_or_warning ( e , fatal = fatal )
raise
self . report_warning ( error_to_compat_str ( e ) )
break
break
else :
try :
try :
self . _extract_and_report_alerts ( data )
self . _extract_and_report_alerts ( data )
except ExtractorError as e :
except ExtractorError as e :
if fatal :
self . _error_or_warning ( e , fatal = fatal )
raise
self . report_warning ( error_to_compat_str ( e ) )
break
if dict_get ( data , ( ' contents ' , ' currentVideoEndpoint ' , ' onResponseReceivedActions ' ) ) :
break
break
last_error = ' Incomplete yt initial data received '
# Sometimes youtube returns a webpage with incomplete ytInitialData
if count > = retries :
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if fatal :
if not traverse_obj ( data , ' contents ' , ' currentVideoEndpoint ' , ' onResponseReceivedActions ' ) :
raise ExtractorError ( last_error )
retry . error = ExtractorError ( ' Incomplete yt initial data received ' )
self . report_warning ( last_error )
continue
break
return webpage , data
return webpage , data