From 4476d2c7649b622834d9f3b116c3ccd094061d22 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Mon, 8 Nov 2021 21:17:57 +0530 Subject: [PATCH] [outtmpl] Add alternate forms for `q` and `j` --- README.md | 2 +- test/test_YoutubeDL.py | 15 ++++++++++----- yt_dlp/YoutubeDL.py | 11 ++++++----- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index ea7194c30..c049b76e1 100644 --- a/README.md +++ b/README.md @@ -1049,7 +1049,7 @@ The field names themselves (the part inside the parenthesis) can also have some 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. Eg: `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Alternatives**: Alternate fields can be specified seperated with a `,`. Eg: `%(release_date>%Y,upload_date>%Y|Unknown)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` seperator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s` -1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son, a comma seperated **l**ist (alternate form flag `#` makes it new line `\n` seperated) and a string **q**uoted for the terminal, respectively +1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma seperated **l**ist (flag `#` for `\n` newline-seperated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively 1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 40c4169c8..5a0dabeb6 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -656,7 +656,7 @@ class TestYoutubeDL(unittest.TestCase): 'playlist_autonumber': 2, '_last_playlist_index': 100, 'n_entries': 10, - 'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}] + 'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}] } def test_prepare_outtmpl_and_filename(self): @@ -763,14 +763,15 @@ class TestYoutubeDL(unittest.TestCase): test('a%(width|)d', 'a', outtmpl_na_placeholder='none') FORMATS = self.outtmpl_info['formats'] - sanitize = lambda x: x.replace(':', ' -').replace('"', "'") + sanitize = lambda x: x.replace(':', ' -').replace('"', "'").replace('\n', ' ') # Custom type casting - test('%(formats.:.id)l', 'id1, id2, id3') - test('%(formats.:.id)#l', ('id1\nid2\nid3', 'id1 id2 id3')) + test('%(formats.:.id)l', 'id 1, id 2, id 3') + test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3')) test('%(ext)l', 'mp4') - test('%(formats.:.id) 15l', ' id1, id2, id3') + test('%(formats.:.id) 18l', ' id 1, id 2, id 3') test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS)))) + test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4)))) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') @@ -778,8 +779,12 @@ class TestYoutubeDL(unittest.TestCase): test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A') if compat_os_name == 'nt': test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'")) + test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'")) + test('%(formats.0.id)#q', ('"id 1"', "'id 1'")) else: test('%(title4)q', ('\'foo "bar" test\'', "'foo 'bar' test'")) + test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") + test('%(formats.0.id)#q', "'id 1'") # Internal formatting test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 2bf527770..c95198a83 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1104,22 +1104,23 @@ class YoutubeDL(object): value = default if value is None else value + flags = outer_mobj.group('conversion') or '' str_fmt = f'{fmt[:-1]}s' if fmt[-1] == 'l': # list - delim = '\n' if '#' in (outer_mobj.group('conversion') or '') else ', ' + delim = '\n' if '#' in flags else ', ' value, fmt = delim.join(variadic(value)), str_fmt elif fmt[-1] == 'j': # json - value, fmt = json.dumps(value, default=_dumpjson_default), str_fmt + value, fmt = json.dumps(value, default=_dumpjson_default, indent=4 if '#' in flags else None), str_fmt elif fmt[-1] == 'q': # quoted - value, fmt = compat_shlex_quote(str(value)), str_fmt + value = map(str, variadic(value) if '#' in flags else [value]) + value, fmt = ' '.join(map(compat_shlex_quote, value)), str_fmt elif fmt[-1] == 'B': # bytes value = f'%{str_fmt}'.encode('utf-8') % str(value).encode('utf-8') value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized - opts = outer_mobj.group('conversion') or '' value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD - 'NF%s%s' % ('K' if '+' in opts else '', 'D' if '#' in opts else 'C'), + 'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'c': if value: