diff --git a/README.md b/README.md index a9720bfb9..45b5541cc 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t * [Subtitle Options](#subtitle-options) * [Authentication Options](#authentication-options) * [Post-processing Options](#post-processing-options) - * [SponSkrub (SponsorBlock) Options](#sponskrub-sponsorblock-options) + * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) * [Authentication with .netrc file](#authentication-with-netrc-file) @@ -62,7 +62,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES The major new features from the latest release of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) are: -* **[SponSkrub Integration](#sponskrub-sponsorblock-options)**: You can use [SponSkrub](https://github.com/yt-dlp/SponSkrub) to mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API +* **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in youtube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) @@ -194,7 +194,6 @@ On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https: While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging seperate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. Licence [depends on the build](https://www.ffmpeg.org/legal.html) -* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the [sponskrub options](#sponskrub-sponsorblock-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * [**mutagen**](https://github.com/quodlibet/mutagen) - For embedding thumbnail in certain formats. Licenced under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**pycryptodome**](https://github.com/Legrandin/pycryptodome) - For decrypting various data. Licenced under [BSD2](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**websockets**](https://github.com/aaugustin/websockets) - For downloading over websocket. Licenced under [BSD3](https://github.com/aaugustin/websockets/blob/main/LICENSE) @@ -203,6 +202,7 @@ While all the other dependancies are optional, `ffmpeg` and `ffprobe` are highly * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp` streams. ffmpeg will be used as a fallback. Licenced under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licenced under [BSD3](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) +* [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licenced under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * Any external downloader that you want to use with `--downloader` To use or redistribute the dependencies, you must agree to their respective licensing terms. @@ -744,24 +744,23 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t and the arguments separated by a colon ":" to give the argument to the specified postprocessor/executable. Supported PP are: - Merger, ExtractAudio, SplitChapters, + Merger, ModifyChapters, SplitChapters, + ExtractAudio, VideoRemuxer, VideoConvertor, Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, - VideoRemuxer, VideoConvertor, SponSkrub, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. The supported executables are: AtomicParsley, - FFmpeg, FFprobe, and SponSkrub. You can - also specify "PP+EXE:ARGS" to give the - arguments to the specified executable only - when being used by the specified - postprocessor. Additionally, for - ffmpeg/ffprobe, "_i"/"_o" can be appended - to the prefix optionally followed by a - number to pass the argument before the - specified input/output file. Eg: --ppa - "Merger+ffmpeg_i1:-v quiet". You can use - this option multiple times to give + FFmpeg and FFprobe.You can also specify + "PP+EXE:ARGS" to give the arguments to the + specified executable only when being used + by the specified postprocessor. + Additionally, for ffmpeg/ffprobe, "_i"/"_o" + can be appended to the prefix optionally + followed by a number to pass the argument + before the specified input/output file. Eg: + --ppa "Merger+ffmpeg_i1:-v quiet". You can + use this option multiple times to give different arguments to different postprocessors. (Alias: --ppa) -k, --keep-video Keep the intermediate video file on disk @@ -775,11 +774,15 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) - --embed-metadata Embed metadata including chapter markers - (if supported by the format) to the video - file (Alias: --add-metadata) - --no-embed-metadata Do not write metadata (default) + --embed-metadata Embed metadata to the video file. Also adds + chapters to file unless --no-add-chapters + is used (Alias: --add-metadata) + --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) + --embed-chapters Add chapter markers to the video file + (Alias: --add-chapters) + --no-embed-chapters Do not add chapter markers (default) + (Alias: --no-add-chapters) --parse-metadata FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details @@ -827,27 +830,51 @@ Then simply run `make`. You can also run `make yt-dlp` instead to compile only t files. See "OUTPUT TEMPLATE" for details --no-split-chapters Do not split video based on chapters (default) - -## SponSkrub (SponsorBlock) Options: -[SponSkrub](https://github.com/yt-dlp/SponSkrub) is a utility to - mark/remove sponsor segments from downloaded YouTube videos using + --remove-chapters REGEX Remove chapters whose title matches the + given regular expression. This option can + be used multiple times + --no-remove-chapters Do not remove any normal chapters from the + file (default) + --force-keyframes-at-cuts Force keyframes around the chapters before + removing/splitting them. Requires a + reencode and thus is very slow, but the + resulting video may have fewer artifacts + around the cuts + --no-force-keyframes-at-cuts Do not force keyframes around the chapters + when cutting/splitting (default) + +## SponsorBlock Options: +Make chapter entries for, or remove various segments (sponsor, + introductions, etc.) from downloaded YouTube videos using the [SponsorBlock API](https://sponsor.ajay.app) - --sponskrub Use sponskrub to mark sponsored sections. - This is enabled by default if the sponskrub - binary exists (Youtube only) - --no-sponskrub Do not use sponskrub - --sponskrub-cut Cut out the sponsor sections instead of - simply marking them - --no-sponskrub-cut Simply mark the sponsor sections, not cut - them out (default) - --sponskrub-force Run sponskrub even if the video was already - downloaded - --no-sponskrub-force Do not cut out the sponsor sections if the - video was already downloaded (default) - --sponskrub-location PATH Location of the sponskrub binary; either - the path to the binary or its containing - directory + --sponsorblock-mark CATS SponsorBlock categories to create chapters + for, separated by commas. Available + categories are all, sponsor, intro, outro, + selfpromo, interaction, preview, + music_offtopic. You can prefix the category + with a "-" to exempt it. See + https://wiki.sponsor.ajay.app/index.php/Segment_Categories + for description of the categories. Eg: + --sponsorblock-query all,-preview + --sponsorblock-remove CATS SponsorBlock categories to be removed from + the video file, separated by commas. If a + category is present in both mark and + remove, remove takes precedence. The syntax + and available categories are the same as + for --sponsorblock-mark + --sponsorblock-chapter-title TEMPLATE + The title template for SponsorBlock + chapters created by --sponsorblock-mark. + The same syntax as the output template is + used, but the only available fields are + start_time, end_time, category, categories, + name, category_names. Defaults to + "[SponsorBlock]: %(category_names)l" + --no-sponsorblock Disable both --sponsorblock-mark and + --sponsorblock-remove + --sponsorblock-api URL SponsorBlock API location, defaults to + https://sponsor.ajay.app ## Extractor Options: --extractor-retries RETRIES Number of retries for known extractor @@ -1057,6 +1084,15 @@ Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - `filename` (string): Name of the video file. Note that the actual filename may be different due to post-processing. Use `--exec echo` to get the name after all postprocessing is complete + +Available only in `--sponsorblock-chapter-title`: + + - `start_time` (numeric): Start time of the chapter in seconds + - `end_time` (numeric): End time of the chapter in seconds + - `categories` (list): The SponsorBlock categories the chapter belongs to + - `category` (string): The smallest SponsorBlock category the chapter belongs to + - `category_names` (list): Friendly names of the categories + - `name` (string): Friendly name of the smallest category Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. Note that some of the sequences are not guaranteed to be present since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). @@ -1501,6 +1537,18 @@ These are aliases that are no longer documented for various reasons --write-srt --write-subs --yes-overwrites --force-overwrites +#### Sponskrub Options +Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of `--sponsorblock` + + --sponskrub --sponsorblock-mark all + --no-sponskrub --no-sponsorblock + --sponskrub-cut --sponsorblock-remove all + --no-sponskrub-cut --sponsorblock-remove -all + --sponskrub-force Not applicable + --no-sponskrub-force Not applicable + --sponskrub-location Not applicable + --sponskrub-args Not applicable + #### No longer supported These options may no longer work as intended diff --git a/test/test_postprocessors.py b/test/test_postprocessors.py index b15cbd28c..7d1368769 100644 --- a/test/test_postprocessors.py +++ b/test/test_postprocessors.py @@ -6,6 +6,7 @@ from __future__ import unicode_literals import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp import YoutubeDL @@ -15,6 +16,7 @@ from yt_dlp.postprocessor import ( FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, + ModifyChaptersPP ) @@ -68,3 +70,461 @@ class TestExec(unittest.TestCase): self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd) + + +class TestModifyChaptersPP(unittest.TestCase): + def setUp(self): + self._pp = ModifyChaptersPP(YoutubeDL()) + + @staticmethod + def _sponsor_chapter(start, end, cat, remove=False): + c = {'start_time': start, 'end_time': end, '_categories': [(cat, start, end)]} + if remove: + c['remove'] = True + return c + + @staticmethod + def _chapter(start, end, title=None, remove=False): + c = {'start_time': start, 'end_time': end} + if title is not None: + c['title'] = title + if remove: + c['remove'] = True + return c + + def _chapters(self, ends, titles): + self.assertEqual(len(ends), len(titles)) + start = 0 + chapters = [] + for e, t in zip(ends, titles): + chapters.append(self._chapter(start, e, t)) + start = e + return chapters + + def _remove_marked_arrange_sponsors_test_impl( + self, chapters, expected_chapters, expected_removed): + actual_chapters, actual_removed = ( + self._pp._remove_marked_arrange_sponsors(chapters)) + for c in actual_removed: + c.pop('title', None) + c.pop('_categories', None) + actual_chapters = [{ + 'start_time': c['start_time'], + 'end_time': c['end_time'], + 'title': c['title'], + } for c in actual_chapters] + self.assertSequenceEqual(expected_chapters, actual_chapters) + self.assertSequenceEqual(expected_removed, actual_removed) + + def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'preview'), + self._sponsor_chapter(50, 60, 'sponsor')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap', + 'c', '[SponsorBlock]: Sponsor', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): + chapters = self._chapters([120], ['c']) + [ + self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), + self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] + expected = self._chapters( + [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Sponsor', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor', + '[SponsorBlock]: Unpaid/Self Promotion']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): + cuts = [self._chapter(10, 20, remove=True), + self._sponsor_chapter(30, 40, 'sponsor', remove=True), + self._chapter(50, 60, remove=True)] + chapters = self._chapters([70], ['c']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([40], ['c']), cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(30, 40, 'selfpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 40, 50, 60], + ['c', '[SponsorBlock]: Sponsor', 'c', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): + cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), + self._chapter(40, 50, remove=True)] + chapters = self._chapters([70], ['c']) + [self._sponsor_chapter(10, 60, 'sponsor')] + cuts + expected = self._chapters( + [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): + cuts = [self._sponsor_chapter(20, 50, 'selpromo', remove=True)] + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 20, 'intro'), + self._sponsor_chapter(30, 40, 'sponsor'), + self._sponsor_chapter(50, 60, 'outro'), + ] + cuts + expected = self._chapters( + [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'selfpromo'), + self._sponsor_chapter(30, 40, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 20, 'sponsor'), + self._sponsor_chapter(20, 30, 'interaction', remove=True), + self._chapter(30, 40, remove=True), + self._sponsor_chapter(40, 50, 'selpromo', remove=True), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters([10, 20, 30, 40], + ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(20, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'selfpromo'), + self._sponsor_chapter(40, 60, 'interaction')] + expected = self._chapters( + [10, 20, 30, 40, 50, 60, 70], + ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor', remove=True), + self._sponsor_chapter(20, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 60, 'interaction', remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): + chapters = self._chapters([170], ['c']) + [ + self._sponsor_chapter(0, 30, 'intro'), + self._sponsor_chapter(20, 50, 'sponsor'), + self._sponsor_chapter(40, 60, 'selfpromo'), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'sponsor'), + self._sponsor_chapter(90, 110, 'sponsor'), + self._sponsor_chapter(120, 140, 'selfpromo'), + self._sponsor_chapter(130, 160, 'interaction'), + self._sponsor_chapter(150, 170, 'outro')] + expected = self._chapters( + [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170], + ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', + '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', + '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): + chapters = self._chapters([170], ['c']) + [ + self._chapter(0, 30, remove=True), + self._sponsor_chapter(20, 50, 'sponsor', remove=True), + self._chapter(40, 60, remove=True), + self._sponsor_chapter(70, 90, 'sponsor', remove=True), + self._chapter(80, 100, remove=True), + self._chapter(90, 110, remove=True), + self._sponsor_chapter(120, 140, 'sponsor', remove=True), + self._sponsor_chapter(130, 160, 'selfpromo', remove=True), + self._chapter(150, 170, remove=True)] + expected_cuts = [self._chapter(0, 60, remove=True), + self._chapter(70, 110, remove=True), + self._chapter(120, 170, remove=True)] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([20], ['c']), expected_cuts) + + def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): + chapters = self._chapters([60], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(10, 40, 'intro'), + self._sponsor_chapter(30, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True), + self._sponsor_chapter(40, 50, 'interaction'), + self._sponsor_chapter(50, 60, 'outro')] + expected = self._chapters( + [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 30, 'sponsor'), + self._sponsor_chapter(20, 50, 'interaction'), + self._sponsor_chapter(30, 50, 'selpromo', remove=True), + self._sponsor_chapter(40, 60, 'sponsor'), + self._sponsor_chapter(50, 60, 'interaction')] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): + chapters = self._chapters([70], ['c']) + [ + self._sponsor_chapter(10, 60, 'sponsor'), + self._sponsor_chapter(20, 60, 'interaction'), + self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] + expected = self._chapters( + [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', + '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) + self._remove_marked_arrange_sponsors_test_impl( + chapters, expected, [self._chapter(30, 50, remove=True)]) + + def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): + chapters = self._chapters([200], ['c']) + [ + self._sponsor_chapter(10, 40, 'sponsor'), + self._sponsor_chapter(10, 30, 'intro'), + self._chapter(20, 30, remove=True), + self._sponsor_chapter(30, 40, 'selfpromo'), + self._sponsor_chapter(50, 70, 'sponsor'), + self._sponsor_chapter(60, 80, 'interaction'), + self._chapter(70, 80, remove=True), + self._sponsor_chapter(70, 90, 'sponsor'), + self._sponsor_chapter(80, 100, 'interaction'), + self._sponsor_chapter(120, 170, 'selfpromo'), + self._sponsor_chapter(130, 180, 'outro'), + self._chapter(140, 150, remove=True), + self._chapter(150, 160, remove=True)] + expected = self._chapters( + [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160], + ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', + 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', + '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion', + '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c']) + expected_cuts = [self._chapter(20, 30, remove=True), + self._chapter(70, 80, remove=True), + self._chapter(140, 160, remove=True)] + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) + + def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): + chapters = (self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + + [self._sponsor_chapter(10, 90, 'sponsor')]) + expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): + cuts = [self._chapter(10, 90, remove=True)] + chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts + expected = self._chapters([10, 20], ['c1', 'c5']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): + chapters = (self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(20, 30, 'sponsor'), + self._sponsor_chapter(50, 70, 'selfpromo')]) + expected = self._chapters([10, 20, 30, 40, 50, 70, 80], + ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', + '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self): + cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)] + chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): + chapters = (self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'music_offtopic')]) + expected = self._chapters( + [10, 30, 40, 50, 60], + ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): + chapters = (self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): + chapters = (self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(20, 30, 'sponsor')]) + expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): + cuts = [self._chapter(20, 30, remove=True)] + chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(10, 30, 'sponsor')]) + expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): + cuts = [self._chapter(10, 30, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + expected = self._chapters([10, 20], ['c1', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): + chapters = (self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')]) + expected = self._chapters( + [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): + cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)] + chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): + chapters = (self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')]) + expected = self._chapters( + [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)] + chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts + expected = self._chapters([10], ['c2']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) + + def test_remove_marked_arrange_sponsors_EverythingSponsored(self): + chapters = (self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + + [self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')]) + expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) + self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) + + def test_remove_marked_arrange_sponsors_EverythingCut(self): + cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)] + chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, [], [self._chapter(0, 40, remove=True)]) + + def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self): + chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4']) + self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) + + def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self): + chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'), + self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'), + self._sponsor_chapter(0.4, 0.5, 'outro')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), []) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self): + cuts = [self._chapter(1.5, 2.5, remove=True)] + chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) + + def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): + chapters = self._chapters([1, 3, 4], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1.5, 2.5, 'sponsor')] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1.5, 3, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) + + def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): + chapters = self._chapters([2, 3, 5], ['c1', 'c2', 'c3']) + [ + self._sponsor_chapter(1, 3, 'sponsor'), + self._sponsor_chapter(2.5, 4, 'selfpromo') + ] + self._remove_marked_arrange_sponsors_test_impl( + chapters, self._chapters([1, 3, 4, 5], [ + 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) + + def test_make_concat_opts_CommonCase(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self): + sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +inpoint 1.000000 +outpoint 10.000000 +file 'file:test' +inpoint 20.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 30) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self): + sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] + expected = '''ffconcat version 1.0 +file 'file:test' +outpoint 1.000000 +file 'file:test' +inpoint 2.000000 +outpoint 10.000000 +''' + opts = self._pp._make_concat_opts(sponsor_chapters, 20) + self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) + + def test_quote_for_concat_RunsOfQuotes(self): + self.assertEqual( + r"'special '\'' '\'\''characters'\'\'\''galore'", + self._pp._quote_for_ffmpeg("special ' ''characters'''galore")) + + def test_quote_for_concat_QuotesAtStart(self): + self.assertEqual( + r"\'\'\''special '\'' characters '\'' galore'", + self._pp._quote_for_ffmpeg("'''special ' characters ' galore")) + + def test_quote_for_concat_QuotesAtEnd(self): + self.assertEqual( + r"'special '\'' characters '\'' galore'\'\'\'", + self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 58e8ea5d9..91b2bcb85 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -13,7 +13,6 @@ import random import re import sys - from .options import ( parseOpts, ) @@ -307,6 +306,7 @@ def _real_main(argv=None): opts.forceprint = opts.forceprint or [] for tmpl in opts.forceprint or []: validate_outtmpl(tmpl, 'print template') + validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') if opts.extractaudio and not opts.keepvideo and opts.format is None: opts.format = 'bestaudio/best' @@ -353,16 +353,35 @@ def _real_main(argv=None): if opts.getcomments and not printing_json: opts.writeinfojson = True + if opts.no_sponsorblock: + opts.sponsorblock_mark = set() + opts.sponsorblock_remove = set() + sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove + + if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: + opts.addchapters = True + opts.remove_chapters = opts.remove_chapters or [] + def report_conflict(arg1, arg2): warnings.append('%s is ignored since %s was given' % (arg2, arg1)) - if opts.remuxvideo and opts.recodevideo: - report_conflict('--recode-video', '--remux-video') - opts.remuxvideo = False + if (opts.remove_chapters or sponsorblock_query) and opts.sponskrub is not False: + if opts.sponskrub: + if opts.remove_chapters: + report_conflict('--remove-chapters', '--sponskrub') + if opts.sponsorblock_mark: + report_conflict('--sponsorblock-mark', '--sponskrub') + if opts.sponsorblock_remove: + report_conflict('--sponsorblock-remove', '--sponskrub') + opts.sponskrub = False if opts.sponskrub_cut and opts.split_chapters and opts.sponskrub is not False: report_conflict('--split-chapter', '--sponskrub-cut') opts.sponskrub_cut = False + if opts.remuxvideo and opts.recodevideo: + report_conflict('--recode-video', '--remux-video') + opts.remuxvideo = False + if opts.allow_unplayable_formats: if opts.extractaudio: report_conflict('--allow-unplayable-formats', '--extract-audio') @@ -388,12 +407,26 @@ def _real_main(argv=None): if opts.fixup and opts.fixup.lower() not in ('never', 'ignore'): report_conflict('--allow-unplayable-formats', '--fixup') opts.fixup = 'never' + if opts.remove_chapters: + report_conflict('--allow-unplayable-formats', '--remove-chapters') + opts.remove_chapters = [] + if opts.sponsorblock_remove: + report_conflict('--allow-unplayable-formats', '--sponsorblock-remove') + opts.sponsorblock_remove = set() if opts.sponskrub: report_conflict('--allow-unplayable-formats', '--sponskrub') opts.sponskrub = False # PostProcessors postprocessors = [] + if sponsorblock_query: + postprocessors.append({ + 'key': 'SponsorBlock', + 'categories': sponsorblock_query, + 'api': opts.sponsorblock_api, + # Run this immediately after extraction is complete + 'when': 'pre_process' + }) if opts.parse_metadata: postprocessors.append({ 'key': 'MetadataParser', @@ -439,16 +472,7 @@ def _real_main(argv=None): 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, }) - # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and - # FFmpegExtractAudioPP as containers before conversion may not support - # metadata (3gp, webm, etc.) - # And this post-processor should be placed before other metadata - # manipulating post-processors (FFmpegEmbedSubtitle) to prevent loss of - # extra metadata. By default ffmpeg preserves metadata applicable for both - # source and target containers. From this point the container won't change, - # so metadata can be added here. - if opts.addmetadata: - postprocessors.append({'key': 'FFmpegMetadata'}) + # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: already_have_subtitle = opts.writesubtitles and 'no-keep-subs' not in compat_opts postprocessors.append({ @@ -462,6 +486,33 @@ def _real_main(argv=None): # this was the old behaviour if only --all-sub was given. if opts.allsubtitles and not opts.writeautomaticsub: opts.writesubtitles = True + # ModifyChapters must run before FFmpegMetadataPP + remove_chapters_patterns = [] + for regex in opts.remove_chapters: + try: + remove_chapters_patterns.append(re.compile(regex)) + except re.error as err: + parser.error(f'invalid --remove-chapters regex {regex!r} - {err}') + if opts.remove_chapters or sponsorblock_query: + postprocessors.append({ + 'key': 'ModifyChapters', + 'remove_chapters_patterns': remove_chapters_patterns, + 'remove_sponsor_segments': opts.sponsorblock_remove, + 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, + 'force_keyframes': opts.force_keyframes_at_cuts + }) + # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and + # FFmpegExtractAudioPP as containers before conversion may not support + # metadata (3gp, webm, etc.) + # By default ffmpeg preserves metadata applicable for both + # source and target containers. From this point the container won't change, + # so metadata can be added here. + if opts.addmetadata or opts.addchapters: + postprocessors.append({ + 'key': 'FFmpegMetadata', + 'add_chapters': opts.addchapters, + 'add_metadata': opts.addmetadata, + }) # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 @@ -485,7 +536,10 @@ def _real_main(argv=None): if not already_have_thumbnail: opts.writethumbnail = True if opts.split_chapters: - postprocessors.append({'key': 'FFmpegSplitChapters'}) + postprocessors.append({ + 'key': 'FFmpegSplitChapters', + 'force_keyframes': opts.force_keyframes_at_cuts, + }) # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0f8ce8ce8..483cce8d8 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -28,7 +28,9 @@ from .postprocessor import ( FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, FFmpegVideoRemuxerPP, + SponsorBlockPP, ) +from .postprocessor.modify_chapters import DEFAULT_SPONSORBLOCK_CHAPTER_TITLE def _hide_login_info(opts): @@ -1218,10 +1220,10 @@ def parseOpts(overrideArguments=None): 'Give these arguments to the postprocessors. ' 'Specify the postprocessor/executable name and the arguments separated by a colon ":" ' 'to give the argument to the specified postprocessor/executable. Supported PP are: ' - 'Merger, ExtractAudio, SplitChapters, Metadata, EmbedSubtitle, EmbedThumbnail, ' - 'SubtitlesConvertor, ThumbnailsConvertor, VideoRemuxer, VideoConvertor, ' - 'SponSkrub, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. ' - 'The supported executables are: AtomicParsley, FFmpeg, FFprobe, and SponSkrub. ' + 'Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, ' + 'Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, ' + 'FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. ' + 'The supported executables are: AtomicParsley, FFmpeg and FFprobe. ' 'You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable ' 'only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, ' '"_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument ' @@ -1263,11 +1265,19 @@ def parseOpts(overrideArguments=None): postproc.add_option( '--embed-metadata', '--add-metadata', action='store_true', dest='addmetadata', default=False, - help='Embed metadata including chapter markers (if supported by the format) to the video file (Alias: --add-metadata)') + help='Embed metadata to the video file. Also adds chapters to file unless --no-add-chapters is used (Alias: --add-metadata)') postproc.add_option( '--no-embed-metadata', '--no-add-metadata', action='store_false', dest='addmetadata', - help='Do not write metadata (default) (Alias: --no-add-metadata)') + help='Do not add metadata to file (default) (Alias: --no-add-metadata)') + postproc.add_option( + '--embed-chapters', '--add-chapters', + action='store_true', dest='addchapters', default=None, + help='Add chapter markers to the video file (Alias: --add-chapters)') + postproc.add_option( + '--no-embed-chapters', '--no-add-chapters', + action='store_false', dest='addchapters', + help='Do not add chapter markers (default) (Alias: --no-add-chapters)') postproc.add_option( '--metadata-from-title', metavar='FORMAT', dest='metafromtitle', @@ -1354,41 +1364,90 @@ def parseOpts(overrideArguments=None): '--no-split-chapters', '--no-split-tracks', dest='split_chapters', action='store_false', help='Do not split video based on chapters (default)') + postproc.add_option( + '--remove-chapters', + metavar='REGEX', dest='remove_chapters', action='append', + help='Remove chapters whose title matches the given regular expression. This option can be used multiple times') + postproc.add_option( + '--no-remove-chapters', dest='remove_chapters', action='store_const', const=None, + help='Do not remove any chapters from the file (default)') + postproc.add_option( + '--force-keyframes-at-cuts', + action='store_true', dest='force_keyframes_at_cuts', default=False, + help=( + 'Force keyframes around the chapters before removing/splitting them. ' + 'Requires a reencode and thus is very slow, but the resulting video ' + 'may have fewer artifacts around the cuts')) + postproc.add_option( + '--no-force-keyframes-at-cuts', + action='store_false', dest='force_keyframes_at_cuts', + help='Do not force keyframes around the chapters when cutting/splitting (default)') - sponskrub = optparse.OptionGroup(parser, 'SponSkrub (SponsorBlock) Options', description=( - 'SponSkrub (https://github.com/yt-dlp/SponSkrub) is a utility to mark/remove sponsor segments ' - 'from downloaded YouTube videos using SponsorBlock API (https://sponsor.ajay.app)')) - sponskrub.add_option( + sponsorblock = optparse.OptionGroup(parser, 'SponsorBlock Options', description=( + 'Make chapter entries for, or remove various segments (sponsor, introductions, etc.) ' + 'from downloaded YouTube videos using the SponsorBlock API (https://sponsor.ajay.app)')) + sponsorblock.add_option( + '--sponsorblock-mark', metavar='CATS', + dest='sponsorblock_mark', default=set(), action='callback', type='str', + callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()}, + help=( + 'SponsorBlock categories to create chapters for, separated by commas. ' + 'Available categories are all, %s. You can prefix the category with a "-" to exempt it. ' + 'See https://wiki.sponsor.ajay.app/index.php/Segment_Categories for description of the categories. ' + 'Eg: --sponsorblock-query all,-preview' % ', '.join(SponsorBlockPP.CATEGORIES.keys()))) + sponsorblock.add_option( + '--sponsorblock-remove', metavar='CATS', + dest='sponsorblock_remove', default=set(), action='callback', type='str', + callback=_set_from_options_callback, callback_kwargs={'allowed_values': SponsorBlockPP.CATEGORIES.keys()}, + help=( + 'SponsorBlock categories to be removed from the video file, separated by commas. ' + 'If a category is present in both mark and remove, remove takes precedence. ' + 'The syntax and available categories are the same as for --sponsorblock-mark')) + sponsorblock.add_option( + '--sponsorblock-chapter-title', metavar='TEMPLATE', + default=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, dest='sponsorblock_chapter_title', + help=( + 'The title template for SponsorBlock chapters created by --sponsorblock-mark. ' + 'The same syntax as the output template is used, but the only available fields are ' + 'start_time, end_time, category, categories, name, category_names. Defaults to "%default"')) + sponsorblock.add_option( + '--no-sponsorblock', default=False, + action='store_true', dest='no_sponsorblock', + help='Disable both --sponsorblock-mark and --sponsorblock-remove') + sponsorblock.add_option( + '--sponsorblock-api', metavar='URL', + default='https://sponsor.ajay.app', dest='sponsorblock_api', + help='SponsorBlock API location, defaults to %default') + + sponsorblock.add_option( '--sponskrub', action='store_true', dest='sponskrub', default=None, - help=( - 'Use sponskrub to mark sponsored sections. ' - 'This is enabled by default if the sponskrub binary exists (Youtube only)')) - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub', action='store_false', dest='sponskrub', - help='Do not use sponskrub') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-cut', default=False, action='store_true', dest='sponskrub_cut', - help='Cut out the sponsor sections instead of simply marking them') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub-cut', action='store_false', dest='sponskrub_cut', - help='Simply mark the sponsor sections, not cut them out (default)') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-force', default=False, action='store_true', dest='sponskrub_force', - help='Run sponskrub even if the video was already downloaded') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--no-sponskrub-force', action='store_true', dest='sponskrub_force', - help='Do not cut out the sponsor sections if the video was already downloaded (default)') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-location', metavar='PATH', dest='sponskrub_path', default='', - help='Location of the sponskrub binary; either the path to the binary or its containing directory') - sponskrub.add_option( + help=optparse.SUPPRESS_HELP) + sponsorblock.add_option( '--sponskrub-args', dest='sponskrub_args', metavar='ARGS', help=optparse.SUPPRESS_HELP) @@ -1457,7 +1516,7 @@ def parseOpts(overrideArguments=None): parser.add_option_group(subtitles) parser.add_option_group(authentication) parser.add_option_group(postproc) - parser.add_option_group(sponskrub) + parser.add_option_group(sponsorblock) parser.add_option_group(extractor) if overrideArguments is not None: diff --git a/yt_dlp/postprocessor/__init__.py b/yt_dlp/postprocessor/__init__.py index 31c2d7c68..adbcd3755 100644 --- a/yt_dlp/postprocessor/__init__.py +++ b/yt_dlp/postprocessor/__init__.py @@ -26,7 +26,9 @@ from .metadataparser import ( MetadataParserPP, ) from .movefilesafterdownload import MoveFilesAfterDownloadPP +from .sponsorblock import SponsorBlockPP from .sponskrub import SponSkrubPP +from .modify_chapters import ModifyChaptersPP def get_postprocessor(key): @@ -56,6 +58,8 @@ __all__ = [ 'MetadataFromFieldPP', 'MetadataFromTitlePP', 'MoveFilesAfterDownloadPP', + 'SponsorBlockPP', 'SponSkrubPP', + 'ModifyChaptersPP', 'XAttrMetadataPP', ] diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 7537d5db4..806334645 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -8,22 +8,22 @@ import time import re import json - from .common import AudioConversionError, PostProcessor from ..compat import compat_str, compat_numeric_types from ..utils import ( + dfxp2srt, encodeArgument, encodeFilename, get_exe_version, is_outdated_version, + ISO639Utils, + orderedSet, PostProcessingError, prepend_extension, - shell_quote, - dfxp2srt, - ISO639Utils, process_communicate_or_kill, replace_extension, + shell_quote, traverse_obj, variadic, ) @@ -281,7 +281,8 @@ class FFmpegPostProcessor(PostProcessor): def run_ffmpeg(self, path, out_path, opts, **kwargs): return self.run_ffmpeg_multiple_files([path], out_path, opts, **kwargs) - def _ffmpeg_filename_argument(self, fn): + @staticmethod + def _ffmpeg_filename_argument(fn): # Always use 'file:' because the filename may contain ':' (ffmpeg # interprets that as a protocol) or can start with '-' (-- is broken in # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) @@ -290,6 +291,62 @@ class FFmpegPostProcessor(PostProcessor): return fn return 'file:' + fn if fn != '-' else fn + @staticmethod + def _quote_for_ffmpeg(string): + # See https://ffmpeg.org/ffmpeg-utils.html#toc-Quoting-and-escaping + # A sequence of '' produces '\'''\''; + # final replace removes the empty '' between \' \'. + string = string.replace("'", r"'\''").replace("'''", "'") + # Handle potential ' at string boundaries. + string = string[1:] if string[0] == "'" else "'" + string + return string[:-1] if string[-1] == "'" else string + "'" + + def force_keyframes(self, filename, timestamps): + timestamps = orderedSet(timestamps) + if timestamps[0] == 0: + timestamps = timestamps[1:] + keyframe_file = prepend_extension(filename, 'keyframes.temp') + self.to_screen(f'Re-encoding "{filename}" with appropriate keyframes') + self.run_ffmpeg(filename, keyframe_file, ['-force_key_frames', ','.join( + f'{t:.6f}' for t in timestamps)]) + return keyframe_file + + def concat_files(self, in_files, out_file, concat_opts=None): + """ + Use concat demuxer to concatenate multiple files having identical streams. + + Only inpoint, outpoint, and duration concat options are supported. + See https://ffmpeg.org/ffmpeg-formats.html#concat-1 for details + """ + concat_file = f'{out_file}.concat' + self.write_debug(f'Writing concat spec to {concat_file}') + with open(concat_file, 'wt', encoding='utf-8') as f: + f.writelines(self._concat_spec(in_files, concat_opts)) + + out_flags = ['-c', 'copy'] + if out_file.rpartition('.')[-1] in ('mp4', 'mov'): + # For some reason, '-c copy' is not enough to copy subtitles + out_flags.extend(['-c:s', 'mov_text', '-movflags', '+faststart']) + + try: + self.real_run_ffmpeg( + [(concat_file, ['-hide_banner', '-nostdin', '-f', 'concat', '-safe', '0'])], + [(out_file, out_flags)]) + finally: + os.remove(concat_file) + + @classmethod + def _concat_spec(cls, in_files, concat_opts=None): + if concat_opts is None: + concat_opts = [{}] * len(in_files) + yield 'ffconcat version 1.0\n' + for file, opts in zip(in_files, concat_opts): + yield f'file {cls._quote_for_ffmpeg(cls._ffmpeg_filename_argument(file))}\n' + # Iterate explicitly to yield the following directives in order, ignoring the rest. + for directive in 'inpoint', 'outpoint', 'duration': + if directive in opts: + yield f'{directive} {opts[directive]}\n' + class FFmpegExtractAudioPP(FFmpegPostProcessor): COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma') @@ -531,6 +588,11 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): class FFmpegMetadataPP(FFmpegPostProcessor): + def __init__(self, downloader, add_metadata=True, add_chapters=True): + FFmpegPostProcessor.__init__(self, downloader) + self._add_metadata = add_metadata + self._add_chapters = add_chapters + @staticmethod def _options(target_ext): yield from ('-map', '0', '-dn') @@ -541,6 +603,46 @@ class FFmpegMetadataPP(FFmpegPostProcessor): @PostProcessor._restrict_to(images=False) def run(self, info): + filename, metadata_filename = info['filepath'], None + options = [] + if self._add_chapters and info.get('chapters'): + metadata_filename = replace_extension(filename, 'meta') + options.extend(self._get_chapter_opts(info['chapters'], metadata_filename)) + if self._add_metadata: + options.extend(self._get_metadata_opts(info)) + + if not options: + self.to_screen('There isn\'t any metadata to add') + return [], info + + temp_filename = prepend_extension(filename, 'temp') + self.to_screen('Adding metadata to "%s"' % filename) + self.run_ffmpeg_multiple_files( + (filename, metadata_filename), temp_filename, + itertools.chain(self._options(info['ext']), *options)) + if metadata_filename: + os.remove(metadata_filename) + os.replace(temp_filename, filename) + return [], info + + @staticmethod + def _get_chapter_opts(chapters, metadata_filename): + with io.open(metadata_filename, 'wt', encoding='utf-8') as f: + def ffmpeg_escape(text): + return re.sub(r'([\\=;#\n])', r'\\\1', text) + + metadata_file_content = ';FFMETADATA1\n' + for chapter in chapters: + metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' + metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) + metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) + chapter_title = chapter.get('title') + if chapter_title: + metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) + f.write(metadata_file_content) + yield ('-map_metadata', '1') + + def _get_metadata_opts(self, info): metadata = {} def add(meta_list, info_list=None): @@ -577,61 +679,27 @@ class FFmpegMetadataPP(FFmpegPostProcessor): for key in filter(lambda k: k.startswith(prefix), info.keys()): add(key[len(prefix):], key) - filename, metadata_filename = info['filepath'], None - options = [('-metadata', f'{name}={value}') for name, value in metadata.items()] + for name, value in metadata.items(): + yield ('-metadata', f'{name}={value}') stream_idx = 0 for fmt in info.get('requested_formats') or []: stream_count = 2 if 'none' not in (fmt.get('vcodec'), fmt.get('acodec')) else 1 if fmt.get('language'): lang = ISO639Utils.short2long(fmt['language']) or fmt['language'] - options.extend(('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) - for i in range(stream_count)) + for i in range(stream_count): + yield ('-metadata:s:%d' % (stream_idx + i), 'language=%s' % lang) stream_idx += stream_count - chapters = info.get('chapters', []) - if chapters: - metadata_filename = replace_extension(filename, 'meta') - with io.open(metadata_filename, 'wt', encoding='utf-8') as f: - def ffmpeg_escape(text): - return re.sub(r'([\\=;#\n])', r'\\\1', text) - - metadata_file_content = ';FFMETADATA1\n' - for chapter in chapters: - metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n' - metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000) - metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000) - chapter_title = chapter.get('title') - if chapter_title: - metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title) - f.write(metadata_file_content) - options.append(('-map_metadata', '1')) - if ('no-attach-info-json' not in self.get_param('compat_opts', []) and '__infojson_filename' in info and info['ext'] in ('mkv', 'mka')): - old_stream, new_stream = self.get_stream_number(filename, ('tags', 'mimetype'), 'application/json') + old_stream, new_stream = self.get_stream_number(info['filepath'], ('tags', 'mimetype'), 'application/json') if old_stream is not None: - options.append(('-map', '-0:%d' % old_stream)) + yield ('-map', '-0:%d' % old_stream) new_stream -= 1 - options.append(( - '-attach', info['__infojson_filename'], - '-metadata:s:%d' % new_stream, 'mimetype=application/json' - )) - - if not options: - self.to_screen('There isn\'t any metadata to add') - return [], info - - temp_filename = prepend_extension(filename, 'temp') - self.to_screen('Adding metadata to "%s"' % filename) - self.run_ffmpeg_multiple_files( - (filename, metadata_filename), temp_filename, - itertools.chain(self._options(info['ext']), *options)) - if chapters: - os.remove(metadata_filename) - os.replace(temp_filename, filename) - return [], info + yield ('-attach', info['__infojson_filename'], + '-metadata:s:%d' % new_stream, 'mimetype=application/json') class FFmpegMergerPP(FFmpegPostProcessor): @@ -808,6 +876,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): class FFmpegSplitChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._force_keyframes = force_keyframes def _prepare_filename(self, number, chapter, info): info = info.copy() @@ -835,13 +906,18 @@ class FFmpegSplitChaptersPP(FFmpegPostProcessor): def run(self, info): chapters = info.get('chapters') or [] if not chapters: - self.report_warning('Chapter information is unavailable') + self.to_screen('Chapter information is unavailable') return [], info + in_file = info['filepath'] + if self._force_keyframes and len(chapters) > 1: + in_file = self.force_keyframes(in_file, (c['start_time'] for c in chapters)) self.to_screen('Splitting video by chapters; %d chapters found' % len(chapters)) for idx, chapter in enumerate(chapters): destination, opts = self._ffmpeg_args_for_chapter(idx + 1, chapter, info) - self.real_run_ffmpeg([(info['filepath'], opts)], [(destination, ['-c', 'copy'])]) + self.real_run_ffmpeg([(in_file, opts)], [(destination, ['-c', 'copy'])]) + if in_file != info['filepath']: + os.remove(in_file) return [], info diff --git a/yt_dlp/postprocessor/modify_chapters.py b/yt_dlp/postprocessor/modify_chapters.py new file mode 100644 index 000000000..3d6493b68 --- /dev/null +++ b/yt_dlp/postprocessor/modify_chapters.py @@ -0,0 +1,333 @@ +import copy +import heapq +import os + +from .common import PostProcessor +from .ffmpeg import ( + FFmpegPostProcessor, + FFmpegSubtitlesConvertorPP +) +from .sponsorblock import SponsorBlockPP +from ..utils import ( + float_or_none, + orderedSet, + PostProcessingError, + prepend_extension, + traverse_obj, +) + + +_TINY_SPONSOR_OVERLAP_DURATION = 1 +DEFAULT_SPONSORBLOCK_CHAPTER_TITLE = '[SponsorBlock]: %(category_names)l' + + +class ModifyChaptersPP(FFmpegPostProcessor): + def __init__(self, downloader, remove_chapters_patterns=None, remove_sponsor_segments=None, + sponsorblock_chapter_title=DEFAULT_SPONSORBLOCK_CHAPTER_TITLE, force_keyframes=False): + FFmpegPostProcessor.__init__(self, downloader) + self._remove_chapters_patterns = set(remove_chapters_patterns or []) + self._remove_sponsor_segments = set(remove_sponsor_segments or []) + self._sponsorblock_chapter_title = sponsorblock_chapter_title + self._force_keyframes = force_keyframes + + @PostProcessor._restrict_to(images=False) + def run(self, info): + chapters, sponsor_chapters = self._mark_chapters_to_remove( + info.get('chapters') or [], info.get('sponsorblock_chapters') or []) + if not chapters and not sponsor_chapters: + return [], info + + real_duration = self._get_real_video_duration(info['filepath']) + if not chapters: + chapters = [{'start_time': 0, 'end_time': real_duration, 'title': info['title']}] + + info['chapters'], cuts = self._remove_marked_arrange_sponsors(chapters + sponsor_chapters) + if not cuts: + return [], info + + if abs(real_duration - info['duration']) > 1: + if abs(real_duration - info['chapters'][-1]['end_time']) < 1: + self.to_screen(f'Skipping {self.pp_key()} since the video appears to be already cut') + return [], info + if not info.get('__real_download'): + raise PostProcessingError('Cannot cut video since the real and expected durations mismatch. ' + 'Different chapters may have already been removed') + return [], info + else: + self.write_debug('Expected and actual durations mismatch') + + concat_opts = self._make_concat_opts(cuts, real_duration) + + def remove_chapters(file, is_sub): + return file, self.remove_chapters(file, cuts, concat_opts, self._force_keyframes and not is_sub) + + in_out_files = [remove_chapters(info['filepath'], False)] + in_out_files.extend(remove_chapters(in_file, True) for in_file in self._get_supported_subs(info)) + + # Renaming should only happen after all files are processed + files_to_remove = [] + for in_file, out_file in in_out_files: + uncut_file = prepend_extension(in_file, 'uncut') + os.replace(in_file, uncut_file) + os.replace(out_file, in_file) + files_to_remove.append(uncut_file) + + return files_to_remove, info + + def _mark_chapters_to_remove(self, chapters, sponsor_chapters): + if self._remove_chapters_patterns: + warn_no_chapter_to_remove = True + if not chapters: + self.to_screen('Chapter information is unavailable') + warn_no_chapter_to_remove = False + for c in chapters: + if any(regex.search(c['title']) for regex in self._remove_chapters_patterns): + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no chapters matching the regex') + + if self._remove_sponsor_segments: + warn_no_chapter_to_remove = True + if not sponsor_chapters: + self.to_screen('SponsorBlock information is unavailable') + warn_no_chapter_to_remove = False + for c in sponsor_chapters: + if c['category'] in self._remove_sponsor_segments: + c['remove'] = True + warn_no_chapter_to_remove = False + if warn_no_chapter_to_remove: + self.to_screen('There are no matching SponsorBlock chapters') + + return chapters, sponsor_chapters + + def _get_real_video_duration(self, filename): + duration = float_or_none( + traverse_obj(self.get_metadata_object(filename), ('format', 'duration'))) + if duration is None: + raise PostProcessingError('ffprobe returned empty duration') + return duration + + def _get_supported_subs(self, info): + for sub in (info.get('requested_subtitles') or {}).values(): + sub_file = sub.get('filepath') + # The file might have been removed by --embed-subs + if not sub_file or not os.path.exists(sub_file): + continue + ext = sub['ext'] + if ext not in FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS: + self.report_warning(f'Cannot remove chapters from external {ext} subtitles; "{sub_file}" is now out of sync') + continue + # TODO: create __real_download for subs? + yield sub_file + + def _remove_marked_arrange_sponsors(self, chapters): + # Store cuts separately, since adjacent and overlapping cuts must be merged. + cuts = [] + + def append_cut(c): + assert 'remove' in c + last_to_cut = cuts[-1] if cuts else None + if last_to_cut and last_to_cut['end_time'] >= c['start_time']: + last_to_cut['end_time'] = max(last_to_cut['end_time'], c['end_time']) + else: + cuts.append(c) + return len(cuts) - 1 + + def excess_duration(c): + # Cuts that are completely within the chapter reduce chapters' duration. + # Since cuts can overlap, excess duration may be less that the sum of cuts' durations. + # To avoid that, chapter stores the index to the fist cut within the chapter, + # instead of storing excess duration. append_cut ensures that subsequent cuts (if any) + # will be merged with previous ones (if necessary). + cut_idx, excess = c.pop('cut_idx', len(cuts)), 0 + while cut_idx < len(cuts): + cut = cuts[cut_idx] + if cut['start_time'] >= c['end_time']: + break + if cut['end_time'] > c['start_time']: + excess += min(cut['end_time'], c['end_time']) + excess -= max(cut['start_time'], c['start_time']) + cut_idx += 1 + return excess + + new_chapters = [] + + def chapter_length(c): + return c['end_time'] - c['start_time'] + + def original_uncut_chapter(c): + return '_was_cut' not in c and '_categories' not in c + + def append_chapter(c): + assert 'remove' not in c + length = chapter_length(c) - excess_duration(c) + # Chapter is completely covered by cuts or sponsors. + if length <= 0: + return + start = new_chapters[-1]['end_time'] if new_chapters else 0 + c.update(start_time=start, end_time=start + length) + # Append without checking for tininess to prevent having + # a completely empty chapter list. + if not new_chapters: + new_chapters.append(c) + return + old_c = new_chapters[-1] + # Merge with the previous if the chapter is tiny. + # Only tiny chapters resulting from a cut can be skipped. + # Chapters that were already tiny in the original list will be preserved. + if not original_uncut_chapter(c) and length < _TINY_SPONSOR_OVERLAP_DURATION: + old_c['end_time'] = c['end_time'] + # Previous tiny chapter was appended for the sake of preventing an empty chapter list. + # Replace it with the current one. + elif not original_uncut_chapter(old_c) and chapter_length(old_c) < _TINY_SPONSOR_OVERLAP_DURATION: + c['start_time'] = old_c['start_time'] + new_chapters[-1] = c + else: + new_chapters.append(c) + + # Turn into a priority queue, index is a tie breaker. + # Plain stack sorted by start_time is not enough: after splitting the chapter, + # the part returned to the stack is not guaranteed to have start_time + # less than or equal to the that of the stack's head. + chapters = [(c['start_time'], i, c) for i, c in enumerate(chapters)] + heapq.heapify(chapters) + + _, cur_i, cur_chapter = heapq.heappop(chapters) + while chapters: + _, i, c = heapq.heappop(chapters) + # Non-overlapping chapters or cuts can be appended directly. However, + # adjacent non-overlapping cuts must be merged, which is handled by append_cut. + if cur_chapter['end_time'] <= c['start_time']: + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + cur_i, cur_chapter = i, c + continue + + # Eight possibilities for overlapping chapters: (cut, cut), (cut, sponsor), + # (cut, normal), (sponsor, cut), (normal, cut), (sponsor, sponsor), + # (sponsor, normal), and (normal, sponsor). There is no (normal, normal): + # normal chapters are assumed not to overlap. + if 'remove' in cur_chapter: + # (cut, cut): adjust end_time. + if 'remove' in c: + cur_chapter['end_time'] = max(cur_chapter['end_time'], c['end_time']) + # (cut, sponsor/normal): chop the beginning of the later chapter + # (if it's not completely hidden by the cut). Push to the priority queue + # to restore sorting by start_time: with beginning chopped, c may actually + # start later than the remaining chapters from the queue. + elif cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (sponsor/normal, cut). + elif 'remove' in c: + cur_chapter['_was_cut'] = True + # Chop the end of the current chapter if the cut is not contained within it. + # Chopping the end doesn't break start_time sorting, no PQ push is necessary. + if cur_chapter['end_time'] <= c['end_time']: + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Current chapter contains the cut within it. If the current chapter is + # a sponsor chapter, check whether the categories before and after the cut differ. + if '_categories' in cur_chapter: + after_c = dict(cur_chapter, start_time=c['end_time'], _categories=[]) + cur_cats = [] + for cat_start_end in cur_chapter['_categories']: + if cat_start_end[1] < c['start_time']: + cur_cats.append(cat_start_end) + if cat_start_end[2] > c['end_time']: + after_c['_categories'].append(cat_start_end) + cur_chapter['_categories'] = cur_cats + if cur_chapter['_categories'] != after_c['_categories']: + # Categories before and after the cut differ: push the after part to PQ. + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + continue + # Either sponsor categories before and after the cut are the same or + # we're dealing with a normal chapter. Just register an outstanding cut: + # subsequent append_chapter will reduce the duration. + cur_chapter.setdefault('cut_idx', append_cut(c)) + # (sponsor, normal): if a normal chapter is not completely overlapped, + # chop the beginning of it and push it to PQ. + elif '_categories' in cur_chapter and '_categories' not in c: + if cur_chapter['end_time'] < c['end_time']: + c['start_time'] = cur_chapter['end_time'] + c['_was_cut'] = True + heapq.heappush(chapters, (c['start_time'], i, c)) + # (normal, sponsor) and (sponsor, sponsor) + else: + assert '_categories' in c + cur_chapter['_was_cut'] = True + c['_was_cut'] = True + # Push the part after the sponsor to PQ. + if cur_chapter['end_time'] > c['end_time']: + # deepcopy to make categories in after_c and cur_chapter/c refer to different lists. + after_c = dict(copy.deepcopy(cur_chapter), start_time=c['end_time']) + heapq.heappush(chapters, (after_c['start_time'], cur_i, after_c)) + # Push the part after the overlap to PQ. + elif c['end_time'] > cur_chapter['end_time']: + after_cur = dict(copy.deepcopy(c), start_time=cur_chapter['end_time']) + heapq.heappush(chapters, (after_cur['start_time'], cur_i, after_cur)) + c['end_time'] = cur_chapter['end_time'] + # (sponsor, sponsor): merge categories in the overlap. + if '_categories' in cur_chapter: + c['_categories'] = cur_chapter['_categories'] + c['_categories'] + # Inherit the cuts that the current chapter has accumulated within it. + if 'cut_idx' in cur_chapter: + c['cut_idx'] = cur_chapter['cut_idx'] + cur_chapter['end_time'] = c['start_time'] + append_chapter(cur_chapter) + cur_i, cur_chapter = i, c + (append_chapter if 'remove' not in cur_chapter else append_cut)(cur_chapter) + + i = -1 + for c in new_chapters.copy(): + i += 1 + c.pop('_was_cut', None) + cats = c.pop('_categories', None) + if cats: + category = min(cats, key=lambda c: c[2] - c[1])[0] + cats = orderedSet(x[0] for x in cats) + c.update({ + 'category': category, + 'categories': cats, + 'name': SponsorBlockPP.CATEGORIES[category], + 'category_names': [SponsorBlockPP.CATEGORIES[c] for c in cats] + }) + outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(self._sponsorblock_chapter_title, c) + c['title'] = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict + if i > 0 and c['title'] == new_chapters[i - 1]['title']: + new_chapters[i - 1]['end_time'] = c['end_time'] + new_chapters.pop(i) + i -= 1 + + return new_chapters, cuts + + def remove_chapters(self, filename, ranges_to_cut, concat_opts, force_keyframes=False): + in_file = filename + out_file = prepend_extension(in_file, 'temp') + if force_keyframes: + in_file = self.force_keyframes(in_file, (t for r in ranges_to_cut for t in r)) + self.to_screen(f'Removing chapters from {filename}') + self.concat_files([in_file] * len(concat_opts), out_file, concat_opts) + if in_file != filename: + os.remove(in_file) + return out_file + + @staticmethod + def _make_concat_opts(chapters_to_remove, duration): + opts = [{}] + for s in chapters_to_remove: + # Do not create 0 duration chunk at the beginning. + if s['start_time'] == 0: + opts[-1]['inpoint'] = f'{s["end_time"]:.6f}' + continue + opts[-1]['outpoint'] = f'{s["start_time"]:.6f}' + # Do not create 0 duration chunk at the end. + if s['end_time'] != duration: + opts.append({'inpoint': f'{s["end_time"]:.6f}'}) + return opts diff --git a/yt_dlp/postprocessor/sponskrub.py b/yt_dlp/postprocessor/sponskrub.py index 588f0ae12..932555a0e 100644 --- a/yt_dlp/postprocessor/sponskrub.py +++ b/yt_dlp/postprocessor/sponskrub.py @@ -17,6 +17,7 @@ from ..utils import ( ) +# Deprecated in favor of the native implementation class SponSkrubPP(PostProcessor): _temp_ext = 'spons' _exe_name = 'sponskrub' diff --git a/yt_dlp/postprocessor/sponsorblock.py b/yt_dlp/postprocessor/sponsorblock.py new file mode 100644 index 000000000..6264d45c5 --- /dev/null +++ b/yt_dlp/postprocessor/sponsorblock.py @@ -0,0 +1,96 @@ +import json +import re +from hashlib import sha256 + +from .ffmpeg import FFmpegPostProcessor +from ..compat import compat_urllib_parse_urlencode, compat_HTTPError +from ..utils import PostProcessingError, sanitized_Request + + +class SponsorBlockPP(FFmpegPostProcessor): + + EXTRACTORS = { + 'Youtube': 'YouTube', + } + CATEGORIES = { + 'sponsor': 'Sponsor', + 'intro': 'Intermission/Intro Animation', + 'outro': 'Endcards/Credits', + 'selfpromo': 'Unpaid/Self Promotion', + 'interaction': 'Interaction Reminder', + 'preview': 'Preview/Recap', + 'music_offtopic': 'Non-Music Section' + } + + def __init__(self, downloader, categories=None, api='https://sponsor.ajay.app'): + FFmpegPostProcessor.__init__(self, downloader) + self._categories = tuple(categories or self.CATEGORIES.keys()) + self._API_URL = api if re.match('^https?://', api) else 'https://' + api + + def run(self, info): + extractor = info['extractor_key'] + if extractor not in self.EXTRACTORS: + self.to_screen(f'SponsorBlock is not supported for {extractor}') + return [], info + + info['sponsorblock_chapters'] = self._get_sponsor_chapters(info, info['duration']) + return [], info + + def _get_sponsor_chapters(self, info, duration): + segments = self._get_sponsor_segments(info['id'], self.EXTRACTORS[info['extractor_key']]) + + def duration_filter(s): + start_end = s['segment'] + # Ignore milliseconds difference at the start. + if start_end[0] <= 1: + start_end[0] = 0 + # Ignore milliseconds difference at the end. + # Never allow the segment to exceed the video. + if duration and duration - start_end[1] <= 1: + start_end[1] = duration + # SponsorBlock duration may be absent or it may deviate from the real one. + return s['videoDuration'] == 0 or not duration or abs(duration - s['videoDuration']) <= 1 + + duration_match = [s for s in segments if duration_filter(s)] + if len(duration_match) != len(segments): + self.report_warning('Some SponsorBlock segments are from a video of different duration, maybe from an old version of this video') + + def to_chapter(s): + (start, end), cat = s['segment'], s['category'] + return { + 'start_time': start, + 'end_time': end, + 'category': cat, + 'title': self.CATEGORIES[cat], + '_categories': [(cat, start, end)] + } + + sponsor_chapters = [to_chapter(s) for s in duration_match] + if not sponsor_chapters: + self.to_screen('No segments were found in the SponsorBlock database') + else: + self.to_screen(f'Found {len(sponsor_chapters)} segments in the SponsorBlock database') + return sponsor_chapters + + def _get_sponsor_segments(self, video_id, service): + hash = sha256(video_id.encode('ascii')).hexdigest() + # SponsorBlock API recommends using first 4 hash characters. + url = f'{self._API_URL}/api/skipSegments/{hash[:4]}?' + compat_urllib_parse_urlencode({ + 'service': service, + 'categories': json.dumps(self._categories), + }) + for d in self._get_json(url): + if d['videoID'] == video_id: + return d['segments'] + return [] + + def _get_json(self, url): + self.write_debug(f'SponsorBlock query: {url}') + try: + rsp = self._downloader.urlopen(sanitized_Request(url)) + except compat_HTTPError as e: + if e.code == 404: + return [] + raise PostProcessingError(f'Error communicating with SponsorBlock API - {e}') + + return json.loads(rsp.read().decode(rsp.info().get_param('charset') or 'utf-8'))