From a9e7f54670cad336ccb5e21fccfb87ea1e27df51 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 15 Nov 2020 05:58:41 +0530 Subject: [PATCH] Sponskrub integration --- README.md | 13 ++++ youtube_dlc/YoutubeDL.py | 7 ++- youtube_dlc/__init__.py | 11 ++++ youtube_dlc/downloader/common.py | 4 +- youtube_dlc/options.py | 25 ++++++++ youtube_dlc/postprocessor/__init__.py | 2 + youtube_dlc/postprocessor/sponskrub.py | 86 ++++++++++++++++++++++++++ 7 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 youtube_dlc/postprocessor/sponskrub.py diff --git a/README.md b/README.md index 0681869c7..20d801555 100644 --- a/README.md +++ b/README.md @@ -523,6 +523,19 @@ I will add some memorable short links to the binaries so you can download them e --convert-subs FORMAT Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc) +## SponSkrub Options (SponsorBlock) + --sponskrub Use sponskrub to mark sponsored sections + with the data available in SponsorBlock API + (Youtube only) + --sponskrub-cut Cut out the sponsor sections instead of + simply marking them + --sponskrub-force Run sponskrub even if the video was + already downloaded. Use with caution + --sponskrub-location Location of the sponskrub binary; + either the path to the binary or its + containing directory + --sponskrub-args Give these arguments to sponskrub + ## Extractor Options: --ignore-dynamic-mpd Do not process dynamic DASH manifests diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py index cbfb03c7b..2cc02e46f 100644 --- a/youtube_dlc/YoutubeDL.py +++ b/youtube_dlc/YoutubeDL.py @@ -2110,13 +2110,16 @@ class YoutubeDL(object): if not ensure_dir_exists(fname): return downloaded.append(fname) - partial_success = dl(fname, new_info) + partial_success, real_download = dl(fname, new_info) success = success and partial_success info_dict['__postprocessors'] = postprocessors info_dict['__files_to_merge'] = downloaded + # Even if there were no downloads, it is being merged only now + info_dict['__real_download'] = True else: # Just a single file - success = dl(filename, info_dict) + success, real_download = dl(filename, info_dict) + info_dict['__real_download'] = real_download except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self.report_error('unable to download video data: %s' % error_to_compat_str(err)) return diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py index 72dd40a56..dd8925d68 100644 --- a/youtube_dlc/__init__.py +++ b/youtube_dlc/__init__.py @@ -310,6 +310,17 @@ def _real_main(argv=None): # contents if opts.xattrs: postprocessors.append({'key': 'XAttrMetadata'}) + # This should be below all ffmpeg PP because it may cut parts out from the video + # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found + if opts.sponskrub is not False: + postprocessors.append({ + 'key': 'SponSkrub', + 'path': opts.sponskrub_path, + 'args': opts.sponskrub_args, + 'cut': opts.sponskrub_cut, + 'force': opts.sponskrub_force, + 'ignoreerror': opts.sponskrub is None, + }) # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way. # So if the user is able to remove the file before your postprocessor runs it might cause a few problems. if opts.exec_cmd: diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py index 7d303be1c..a0acb6556 100644 --- a/youtube_dlc/downloader/common.py +++ b/youtube_dlc/downloader/common.py @@ -351,7 +351,7 @@ class FileDownloader(object): 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), }) - return True + return True, False if subtitle is False: min_sleep_interval = self.params.get('sleep_interval') @@ -372,7 +372,7 @@ class FileDownloader(object): '[download] Sleeping %s seconds...' % ( sleep_interval_sub)) time.sleep(sleep_interval_sub) - return self.real_download(filename, info_dict) + return self.real_download(filename, info_dict), True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py index f2878e468..093b71a21 100644 --- a/youtube_dlc/options.py +++ b/youtube_dlc/options.py @@ -946,6 +946,31 @@ def parseOpts(overrideArguments=None): metavar='FORMAT', dest='convertsubtitles', default=None, help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') + extractor = optparse.OptionGroup(parser, 'SponSkrub Options (SponsorBlock)') + extractor.add_option( + '--sponskrub', + action='store_true', dest='sponskrub', default=None, + help='Use sponskrub to mark sponsored sections with the data available in SponsorBlock API (Youtube only)') + extractor.add_option( + '--no-sponskrub', + action='store_false', dest='sponskrub', + help=optparse.SUPPRESS_HELP) + extractor.add_option( + '--sponskrub-cut', default=False, + action='store_true', dest='sponskrub_cut', + help='Cut out the sponsor sections instead of simply marking them') + extractor.add_option( + '--sponskrub-force', default=False, + action='store_true', dest='sponskrub_force', + help='Run sponskrub even if the video was already downloaded') + extractor.add_option( + '--sponskrub-location', metavar='PATH', + dest='sponskrub_path', default='', + help='Location of the sponskrub binary; either the path to the binary or its containing directory.') + extractor.add_option( + '--sponskrub-args', dest='sponskrub_args', + help='Give these arguments to sponskrub') + extractor = optparse.OptionGroup(parser, 'Extractor Options') extractor.add_option( '--allow-dynamic-mpd', diff --git a/youtube_dlc/postprocessor/__init__.py b/youtube_dlc/postprocessor/__init__.py index 2c4702823..e160909a7 100644 --- a/youtube_dlc/postprocessor/__init__.py +++ b/youtube_dlc/postprocessor/__init__.py @@ -17,6 +17,7 @@ from .ffmpeg import ( from .xattrpp import XAttrMetadataPP from .execafterdownload import ExecAfterDownloadPP from .metadatafromtitle import MetadataFromTitlePP +from .sponskrub import SponSkrubPP def get_postprocessor(key): @@ -38,5 +39,6 @@ __all__ = [ 'FFmpegVideoConvertorPP', 'FFmpegVideoRemuxerPP', 'MetadataFromTitlePP', + 'SponSkrubPP', 'XAttrMetadataPP', ] diff --git a/youtube_dlc/postprocessor/sponskrub.py b/youtube_dlc/postprocessor/sponskrub.py new file mode 100644 index 000000000..8ef612050 --- /dev/null +++ b/youtube_dlc/postprocessor/sponskrub.py @@ -0,0 +1,86 @@ +from __future__ import unicode_literals +import os +import subprocess + +from .common import PostProcessor +from ..compat import compat_shlex_split +from ..utils import ( + check_executable, + encodeArgument, + shell_quote, + PostProcessingError, +) + + +class SponSkrubPP(PostProcessor): + _temp_ext = 'spons' + _def_args = [] + _exe_name = 'sponskrub' + + def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False): + PostProcessor.__init__(self, downloader) + self.force = force + self.cutout = cut + self.args = ['-chapter'] if not cut else [] + self.args += self._def_args if args is None else compat_shlex_split(args) + self.path = self.get_exe(path) + + if not ignoreerror and self.path is None: + if path: + raise PostProcessingError('sponskrub not found in "%s"' % path) + else: + raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path.') + + def get_exe(self, path=''): + if not path or not check_executable(path, ['-h']): + path = os.path.join(path, self._exe_name) + if not check_executable(path, ['-h']): + return None + return path + + def run(self, information): + if self.path is None: + return [], information + + if information['extractor_key'].lower() != 'youtube': + self._downloader.to_screen('[sponskrub] Skipping sponskrub since it is not a YouTube video') + return [], information + if self.cutout and not self.force and not information.get('__real_download', False): + self._downloader.to_screen( + '[sponskrub] Skipping sponskrub since the video was already downloaded. ' + 'Use --sponskrub-force to run sponskrub anyway') + return [], information + + self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark')) + if self.cutout: + self._downloader.to_screen('WARNING: Cutting out sponsor segments will cause the subtitles to go out of sync.') + if not information.get('__real_download', False): + self._downloader.to_screen('WARNING: If sponskrub is run multiple times, unintended parts of the video could be cut out.') + + filename = information['filepath'] + temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1] + if os.path.exists(temp_filename): + os.remove(temp_filename) + + cmd = [self.path] + if self.args: + cmd += self.args + cmd += ['--', information['id'], filename, temp_filename] + cmd = [encodeArgument(i) for i in cmd] + + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] sponskrub command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) + stdout, stderr = p.communicate() + + if p.returncode == 0: + os.remove(filename) + os.rename(temp_filename, filename) + self._downloader.to_screen('[sponskrub] Sponsor sections have been %s' % ('removed' if self.cutout else 'marked')) + elif p.returncode != 3: # error code 3 means there was no info about the video + stderr = stderr.decode('utf-8', 'replace') + msg = stderr.strip().split('\n')[-1] + raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s!' % p.returncode) + else: + self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database') + return [], information