|
|
|
|
@ -4029,6 +4029,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
STREAMING_DATA_CLIENT_NAME: client_name,
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
def set_audio_lang_from_orig_subs_lang(lang_code):
|
|
|
|
|
for f in formats:
|
|
|
|
|
if f.get('acodec') != 'none' and not f.get('language'):
|
|
|
|
|
f['language'] = lang_code
|
|
|
|
|
|
|
|
|
|
subtitles = {}
|
|
|
|
|
skipped_subs_clients = set()
|
|
|
|
|
|
|
|
|
|
@ -4088,7 +4093,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
|
|
|
|
|
orig_lang = qs.get('lang', [None])[-1]
|
|
|
|
|
lang_name = self._get_text(caption_track, 'name', max_runs=1)
|
|
|
|
|
if caption_track.get('kind') != 'asr':
|
|
|
|
|
is_manual_subs = caption_track.get('kind') != 'asr'
|
|
|
|
|
if is_manual_subs:
|
|
|
|
|
if not lang_code:
|
|
|
|
|
continue
|
|
|
|
|
process_language(
|
|
|
|
|
@ -4099,16 +4105,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
if not trans_code:
|
|
|
|
|
continue
|
|
|
|
|
orig_trans_code = trans_code
|
|
|
|
|
if caption_track.get('kind') != 'asr' and trans_code != 'und':
|
|
|
|
|
if is_manual_subs and trans_code != 'und':
|
|
|
|
|
if not get_translated_subs:
|
|
|
|
|
continue
|
|
|
|
|
trans_code += f'-{lang_code}'
|
|
|
|
|
trans_name += format_field(lang_name, None, ' from %s')
|
|
|
|
|
if lang_code == f'a-{orig_trans_code}':
|
|
|
|
|
# Set audio language based on original subtitles
|
|
|
|
|
for f in formats:
|
|
|
|
|
if f.get('acodec') != 'none' and not f.get('language'):
|
|
|
|
|
f['language'] = orig_trans_code
|
|
|
|
|
set_audio_lang_from_orig_subs_lang(orig_trans_code)
|
|
|
|
|
# Add an "-orig" label to the original language so that it can be distinguished.
|
|
|
|
|
# The subs are returned without "-orig" as well for compatibility
|
|
|
|
|
process_language(
|
|
|
|
|
@ -4119,6 +4123,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
automatic_captions, base_url, trans_code, trans_name, client_name,
|
|
|
|
|
pot_params if orig_lang == orig_trans_code else {'tlang': trans_code, **pot_params})
|
|
|
|
|
|
|
|
|
|
# Extract automatic captions when the language is not in 'translationLanguages'
|
|
|
|
|
# e.g. Cantonese [yue], see https://github.com/yt-dlp/yt-dlp/issues/14889
|
|
|
|
|
lang_code = remove_start(lang_code, 'a-')
|
|
|
|
|
if is_manual_subs or not lang_code or lang_code in automatic_captions:
|
|
|
|
|
continue
|
|
|
|
|
lang_name = remove_end(lang_name, ' (auto-generated)')
|
|
|
|
|
if caption_track.get('isTranslatable'):
|
|
|
|
|
# We can assume this is the original audio language
|
|
|
|
|
set_audio_lang_from_orig_subs_lang(lang_code)
|
|
|
|
|
process_language(
|
|
|
|
|
automatic_captions, base_url, f'{lang_code}-orig',
|
|
|
|
|
f'{lang_name} (Original)', client_name, pot_params)
|
|
|
|
|
process_language(
|
|
|
|
|
automatic_captions, base_url, lang_code, lang_name, client_name, pot_params)
|
|
|
|
|
|
|
|
|
|
# Avoid duplication if we've already got everything we need
|
|
|
|
|
need_subs_langs.difference_update(subtitles)
|
|
|
|
|
need_caps_langs.difference_update(automatic_captions)
|
|
|
|
|
|