[utils] Add `subs_list_to_dict()` traversal helper

Thx: yt-dlp/yt-dlp#10653, etc
pull/29686/merge
dirkf 1 month ago
parent a9b4649d92
commit 70b40dd1ef

@ -16,6 +16,7 @@ from youtube_dl.traversal import (
dict_get,
get_first,
require,
subs_list_to_dict,
T,
traverse_obj,
unpack,
@ -30,6 +31,7 @@ from youtube_dl.compat import (
compat_zip as zip,
)
from youtube_dl.utils import (
determine_ext,
ExtractorError,
int_or_none,
join_nonempty,
@ -495,6 +497,105 @@ class TestTraversalHelpers(_TestCase):
traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str',
'`require` should pass through non-`None` values')
def test_subs_list_to_dict(self):
self.assertEqual(traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.vtt'},
{'name': 'en', 'url': 'https://example.com/subs/en1.ass'},
{'name': 'en', 'url': 'https://example.com/subs/en2.ass'},
], [Ellipsis, {
'id': 'name',
'url': 'url',
}, all, T(subs_list_to_dict)]), {
'de': [{'url': 'https://example.com/subs/de.vtt'}],
'en': [
{'url': 'https://example.com/subs/en1.ass'},
{'url': 'https://example.com/subs/en2.ass'},
],
}, 'function should build subtitle dict from list of subtitles')
self.assertEqual(traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [Ellipsis, {
'id': 'name',
'data': 'content',
'url': 'url',
}, all, T(subs_list_to_dict(lang=None))]), {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [{'data': 'content'}],
}, 'subs with mandatory items missing should be filtered')
self.assertEqual(traverse_obj([
{'url': 'https://example.com/subs/de.ass', 'name': 'de'},
{'url': 'https://example.com/subs/en', 'name': 'en'},
], [Ellipsis, {
'id': 'name',
'ext': ['url', T(determine_ext(default_ext=None))],
'url': 'url',
}, all, T(subs_list_to_dict(ext='ext'))]), {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}],
}, '`ext` should set default ext but leave existing value untouched')
self.assertEqual(traverse_obj([
{'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True},
{'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False},
], [Ellipsis, {
'id': 'name',
'quality': ['prio', T(int)],
'url': 'url',
}, all, T(subs_list_to_dict(ext='ext'))]), {'en': [
{'url': 'https://example.com/subs/en1', 'ext': 'ext'},
{'url': 'https://example.com/subs/en2', 'ext': 'ext'},
]}, '`quality` key should sort subtitle list accordingly')
self.assertEqual(traverse_obj([
{'name': 'de', 'url': 'https://example.com/subs/de.ass'},
{'name': 'de'},
{'name': 'en', 'content': 'content'},
{'url': 'https://example.com/subs/en'},
], [Ellipsis, {
'id': 'name',
'url': 'url',
'data': 'content',
}, all, T(subs_list_to_dict(lang='en'))]), {
'de': [{'url': 'https://example.com/subs/de.ass'}],
'en': [
{'data': 'content'},
{'url': 'https://example.com/subs/en'},
],
}, 'optionally provided lang should be used if no id available')
self.assertEqual(traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [Ellipsis, {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, T(subs_list_to_dict(lang=None))]), {
'de': [
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be ignored for id and ext')
self.assertEqual(traverse_obj([
{'name': 1, 'url': 'https://example.com/subs/de1'},
{'name': {}, 'url': 'https://example.com/subs/de2'},
{'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'},
{'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'},
], [Ellipsis, {
'id': 'name',
'url': 'url',
'ext': 'ext',
}, all, T(subs_list_to_dict(lang='de'))]), {
'de': [
{'url': 'https://example.com/subs/de1'},
{'url': 'https://example.com/subs/de2'},
{'url': 'https://example.com/subs/de3'},
{'url': 'https://example.com/subs/de4'},
],
}, 'non str types should be replaced by default id')
def test_unpack(self):
self.assertEqual(
unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123')

@ -6,6 +6,7 @@ from .utils import (
dict_get,
get_first,
require,
subs_list_to_dict,
T,
traverse_obj,
unpack,

@ -6599,6 +6599,51 @@ class require(ExtractorError):
return value
@partial_application
# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict]
def subs_list_to_dict(subs, lang='und', ext=None):
"""
Convert subtitles from a traversal into a subtitle dict.
The path should have an `all` immediately before this function.
Arguments:
`lang` The default language tag for subtitle dicts with no
`lang` (`und`: undefined)
`ext` The default value for `ext` in the subtitle dicts
In the dict you can set the following additional items:
`id` The language tag to which the subtitle dict should be added
`quality` The sort order for each subtitle dict
"""
result = collections.defaultdict(list)
for sub in subs:
tn_url = url_or_none(sub.pop('url', None))
if tn_url:
sub['url'] = tn_url
elif not sub.get('data'):
continue
sub_lang = sub.pop('id', None)
if not isinstance(sub_lang, compat_str):
if not lang:
continue
sub_lang = lang
sub_ext = sub.get('ext')
if not isinstance(sub_ext, compat_str):
if not ext:
sub.pop('ext', None)
else:
sub['ext'] = ext
result[sub_lang].append(sub)
result = dict(result)
for subs in result.values():
subs.sort(key=lambda x: x.pop('quality', 0) or 0)
return result
def unpack(func, **kwargs):
"""Make a function that applies `partial(func, **kwargs)` to its argument as *args"""
@functools.wraps(func)

Loading…
Cancel
Save