From a9e7f54670cad336ccb5e21fccfb87ea1e27df51 Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan@gmail.com>
Date: Sun, 15 Nov 2020 05:58:41 +0530
Subject: [PATCH] Sponskrub integration

---
 README.md                              | 13 ++++
 youtube_dlc/YoutubeDL.py               |  7 ++-
 youtube_dlc/__init__.py                | 11 ++++
 youtube_dlc/downloader/common.py       |  4 +-
 youtube_dlc/options.py                 | 25 ++++++++
 youtube_dlc/postprocessor/__init__.py  |  2 +
 youtube_dlc/postprocessor/sponskrub.py | 86 ++++++++++++++++++++++++++
 7 files changed, 144 insertions(+), 4 deletions(-)
 create mode 100644 youtube_dlc/postprocessor/sponskrub.py

diff --git a/README.md b/README.md
index 0681869c7..20d801555 100644
--- a/README.md
+++ b/README.md
@@ -523,6 +523,19 @@ I will add some memorable short links to the binaries so you can download them e
     --convert-subs FORMAT            Convert the subtitles to other format
                                      (currently supported: srt|ass|vtt|lrc)
 
+## SponSkrub Options (SponsorBlock)
+    --sponskrub                      Use sponskrub to mark sponsored sections
+                                     with the data available in SponsorBlock API
+                                     (Youtube only)
+    --sponskrub-cut                  Cut out the sponsor sections instead of
+                                     simply marking them
+    --sponskrub-force                Run sponskrub even if the video was
+                                     already downloaded. Use with caution
+    --sponskrub-location             Location of the sponskrub binary;
+                                     either the path to the binary or its
+                                     containing directory
+    --sponskrub-args                 Give these arguments to sponskrub
+
 ## Extractor Options:
     --ignore-dynamic-mpd             Do not process dynamic DASH manifests
 
diff --git a/youtube_dlc/YoutubeDL.py b/youtube_dlc/YoutubeDL.py
index cbfb03c7b..2cc02e46f 100644
--- a/youtube_dlc/YoutubeDL.py
+++ b/youtube_dlc/YoutubeDL.py
@@ -2110,13 +2110,16 @@ class YoutubeDL(object):
                             if not ensure_dir_exists(fname):
                                 return
                             downloaded.append(fname)
-                            partial_success = dl(fname, new_info)
+                            partial_success, real_download = dl(fname, new_info)
                             success = success and partial_success
                         info_dict['__postprocessors'] = postprocessors
                         info_dict['__files_to_merge'] = downloaded
+                        # Even if there were no downloads, it is being merged only now
+                        info_dict['__real_download'] = True
                 else:
                     # Just a single file
-                    success = dl(filename, info_dict)
+                    success, real_download = dl(filename, info_dict)
+                    info_dict['__real_download'] = real_download
             except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                 self.report_error('unable to download video data: %s' % error_to_compat_str(err))
                 return
diff --git a/youtube_dlc/__init__.py b/youtube_dlc/__init__.py
index 72dd40a56..dd8925d68 100644
--- a/youtube_dlc/__init__.py
+++ b/youtube_dlc/__init__.py
@@ -310,6 +310,17 @@ def _real_main(argv=None):
     # contents
     if opts.xattrs:
         postprocessors.append({'key': 'XAttrMetadata'})
+    # This should be below all ffmpeg PP because it may cut parts out from the video
+    # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found
+    if opts.sponskrub is not False:
+        postprocessors.append({
+            'key': 'SponSkrub',
+            'path': opts.sponskrub_path,
+            'args': opts.sponskrub_args,
+            'cut': opts.sponskrub_cut,
+            'force': opts.sponskrub_force,
+            'ignoreerror': opts.sponskrub is None,
+        })
     # Please keep ExecAfterDownload towards the bottom as it allows the user to modify the final file in any way.
     # So if the user is able to remove the file before your postprocessor runs it might cause a few problems.
     if opts.exec_cmd:
diff --git a/youtube_dlc/downloader/common.py b/youtube_dlc/downloader/common.py
index 7d303be1c..a0acb6556 100644
--- a/youtube_dlc/downloader/common.py
+++ b/youtube_dlc/downloader/common.py
@@ -351,7 +351,7 @@ class FileDownloader(object):
                     'status': 'finished',
                     'total_bytes': os.path.getsize(encodeFilename(filename)),
                 })
-                return True
+                return True, False
 
         if subtitle is False:
             min_sleep_interval = self.params.get('sleep_interval')
@@ -372,7 +372,7 @@ class FileDownloader(object):
                     '[download] Sleeping %s seconds...' % (
                         sleep_interval_sub))
                 time.sleep(sleep_interval_sub)
-        return self.real_download(filename, info_dict)
+        return self.real_download(filename, info_dict), True
 
     def real_download(self, filename, info_dict):
         """Real download process. Redefine in subclasses."""
diff --git a/youtube_dlc/options.py b/youtube_dlc/options.py
index f2878e468..093b71a21 100644
--- a/youtube_dlc/options.py
+++ b/youtube_dlc/options.py
@@ -946,6 +946,31 @@ def parseOpts(overrideArguments=None):
         metavar='FORMAT', dest='convertsubtitles', default=None,
         help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
 
+    extractor = optparse.OptionGroup(parser, 'SponSkrub Options (SponsorBlock)')
+    extractor.add_option(
+        '--sponskrub',
+        action='store_true', dest='sponskrub', default=None,
+        help='Use sponskrub to mark sponsored sections with the data available in SponsorBlock API (Youtube only)')
+    extractor.add_option(
+        '--no-sponskrub',
+        action='store_false', dest='sponskrub',
+        help=optparse.SUPPRESS_HELP)
+    extractor.add_option(
+        '--sponskrub-cut', default=False,
+        action='store_true', dest='sponskrub_cut',
+        help='Cut out the sponsor sections instead of simply marking them')
+    extractor.add_option(
+        '--sponskrub-force', default=False,
+        action='store_true', dest='sponskrub_force',
+        help='Run sponskrub even if the video was already downloaded')
+    extractor.add_option(
+        '--sponskrub-location', metavar='PATH',
+        dest='sponskrub_path', default='',
+        help='Location of the sponskrub binary; either the path to the binary or its containing directory.')
+    extractor.add_option(
+        '--sponskrub-args', dest='sponskrub_args',
+        help='Give these arguments to sponskrub')
+
     extractor = optparse.OptionGroup(parser, 'Extractor Options')
     extractor.add_option(
         '--allow-dynamic-mpd',
diff --git a/youtube_dlc/postprocessor/__init__.py b/youtube_dlc/postprocessor/__init__.py
index 2c4702823..e160909a7 100644
--- a/youtube_dlc/postprocessor/__init__.py
+++ b/youtube_dlc/postprocessor/__init__.py
@@ -17,6 +17,7 @@ from .ffmpeg import (
 from .xattrpp import XAttrMetadataPP
 from .execafterdownload import ExecAfterDownloadPP
 from .metadatafromtitle import MetadataFromTitlePP
+from .sponskrub import SponSkrubPP
 
 
 def get_postprocessor(key):
@@ -38,5 +39,6 @@ __all__ = [
     'FFmpegVideoConvertorPP',
     'FFmpegVideoRemuxerPP',
     'MetadataFromTitlePP',
+    'SponSkrubPP',
     'XAttrMetadataPP',
 ]
diff --git a/youtube_dlc/postprocessor/sponskrub.py b/youtube_dlc/postprocessor/sponskrub.py
new file mode 100644
index 000000000..8ef612050
--- /dev/null
+++ b/youtube_dlc/postprocessor/sponskrub.py
@@ -0,0 +1,86 @@
+from __future__ import unicode_literals
+import os
+import subprocess
+
+from .common import PostProcessor
+from ..compat import compat_shlex_split
+from ..utils import (
+    check_executable,
+    encodeArgument,
+    shell_quote,
+    PostProcessingError,
+)
+
+
+class SponSkrubPP(PostProcessor):
+    _temp_ext = 'spons'
+    _def_args = []
+    _exe_name = 'sponskrub'
+
+    def __init__(self, downloader, path='', args=None, ignoreerror=False, cut=False, force=False):
+        PostProcessor.__init__(self, downloader)
+        self.force = force
+        self.cutout = cut
+        self.args = ['-chapter'] if not cut else []
+        self.args += self._def_args if args is None else compat_shlex_split(args)
+        self.path = self.get_exe(path)
+
+        if not ignoreerror and self.path is None:
+            if path:
+                raise PostProcessingError('sponskrub not found in "%s"' % path)
+            else:
+                raise PostProcessingError('sponskrub not found. Please install or provide the path using --sponskrub-path.')
+
+    def get_exe(self, path=''):
+        if not path or not check_executable(path, ['-h']):
+            path = os.path.join(path, self._exe_name)
+            if not check_executable(path, ['-h']):
+                return None
+        return path
+
+    def run(self, information):
+        if self.path is None:
+            return [], information
+
+        if information['extractor_key'].lower() != 'youtube':
+            self._downloader.to_screen('[sponskrub] Skipping sponskrub since it is not a YouTube video')
+            return [], information
+        if self.cutout and not self.force and not information.get('__real_download', False):
+            self._downloader.to_screen(
+                '[sponskrub] Skipping sponskrub since the video was already downloaded. '
+                'Use --sponskrub-force to run sponskrub anyway')
+            return [], information
+
+        self._downloader.to_screen('[sponskrub] Trying to %s sponsor sections' % ('remove' if self.cutout else 'mark'))
+        if self.cutout:
+            self._downloader.to_screen('WARNING: Cutting out sponsor segments will cause the subtitles to go out of sync.')
+            if not information.get('__real_download', False):
+                self._downloader.to_screen('WARNING: If sponskrub is run multiple times, unintended parts of the video could be cut out.')
+
+        filename = information['filepath']
+        temp_filename = filename + '.' + self._temp_ext + os.path.splitext(filename)[1]
+        if os.path.exists(temp_filename):
+            os.remove(temp_filename)
+
+        cmd = [self.path]
+        if self.args:
+            cmd += self.args
+        cmd += ['--', information['id'], filename, temp_filename]
+        cmd = [encodeArgument(i) for i in cmd]
+
+        if self._downloader.params.get('verbose', False):
+            self._downloader.to_screen('[debug] sponskrub command line: %s' % shell_quote(cmd))
+        p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
+        stdout, stderr = p.communicate()
+
+        if p.returncode == 0:
+            os.remove(filename)
+            os.rename(temp_filename, filename)
+            self._downloader.to_screen('[sponskrub] Sponsor sections have been %s' % ('removed' if self.cutout else 'marked'))
+        elif p.returncode != 3:  # error code 3 means there was no info about the video
+            stderr = stderr.decode('utf-8', 'replace')
+            msg = stderr.strip().split('\n')[-1]
+            raise PostProcessingError(msg if msg else 'sponskrub failed with error code %s!' % p.returncode)
+        else:
+            self._downloader.to_screen('[sponskrub] No segments in the SponsorBlock database')
+        return [], information