From 35faefee5ddb67c447c3206199cc06124600e84d Mon Sep 17 00:00:00 2001
From: pukkandan <pukkandan.ytdlp@gmail.com>
Date: Mon, 6 Jun 2022 21:49:57 +0530
Subject: [PATCH] [ExtractAudio, cleanup] Refactor

---
 README.md                      |  13 ++--
 yt_dlp/__init__.py             |  12 ++--
 yt_dlp/options.py              |  16 ++---
 yt_dlp/postprocessor/common.py |   2 +-
 yt_dlp/postprocessor/ffmpeg.py | 106 ++++++++++++---------------------
 5 files changed, 57 insertions(+), 92 deletions(-)

diff --git a/README.md b/README.md
index 86f172a64..5347f2789 100644
--- a/README.md
+++ b/README.md
@@ -871,23 +871,22 @@ You can also fork the project on github and run your fork's [build workflow](.gi
 ## Post-Processing Options:
     -x, --extract-audio             Convert video files to audio-only files
                                     (requires ffmpeg and ffprobe)
-    --audio-format FORMAT           Specify audio format to convert the audio to
-                                    when -x is used. Currently supported formats
-                                    are: best (default) or one of aac, flac,
-                                    mp3, m4a, opus, vorbis, wav, alac
+    --audio-format FORMAT           Format to convert the audio to when -x is
+                                    used. (currently supported: best (default),
+                                    mp3, aac, m4a, opus, vorbis, flac, alac, wav)
     --audio-quality QUALITY         Specify ffmpeg audio quality to use when
                                     converting the audio with -x. Insert a value
                                     between 0 (best) and 10 (worst) for VBR or a
                                     specific bitrate like 128K (default 5)
     --remux-video FORMAT            Remux the video into another container if
                                     necessary (currently supported: mp4, mkv,
-                                    flv, webm, mov, avi, mka, ogg, aac, flac,
-                                    mp3, m4a, opus, vorbis, wav, alac). If
+                                    flv, webm, mov, avi, mka, ogg, mp3, aac,
+                                    m4a, opus, vorbis, flac, alac, wav). If
                                     target container does not support the
                                     video/audio codec, remuxing will fail. You
                                     can specify multiple rules; Eg.
                                     "aac>m4a/mov>mp4/mkv" will remux aac to m4a,
-                                    mov to mp4 and anything else to mkv.
+                                    mov to mp4 and anything else to mkv
     --recode-video FORMAT           Re-encode the video into another format if
                                     necessary. The syntax and supported formats
                                     are the same as --remux-video
diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py
index d1b78303e..10b31028b 100644
--- a/yt_dlp/__init__.py
+++ b/yt_dlp/__init__.py
@@ -215,13 +215,9 @@ def validate_options(opts):
     # Postprocessor formats
     validate_in('audio format', opts.audioformat, ['best'] + list(FFmpegExtractAudioPP.SUPPORTED_EXTS))
     validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS)
-    for name, value, pp in (
-        ('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP),
-        ('recode video format', opts.recodevideo, FFmpegVideoConvertorPP),
-        ('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP),
-    ):
-        if value is not None:
-            validate_regex(name, value.replace(' ', ''), pp.FORMAT_RE)
+    validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE)
+    validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE)
+    validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE)
     if opts.audioquality:
         opts.audioquality = opts.audioquality.strip('k').strip('K')
         # int_or_none prevents inf, nan
@@ -653,7 +649,7 @@ def parse_options(argv=None):
     final_ext = (
         opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS
         else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS
-        else opts.audioformat if (opts.extractaudio and opts.audioformat != 'best')
+        else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS)
         else None)
 
     return parser, opts, urls, {
diff --git a/yt_dlp/options.py b/yt_dlp/options.py
index b326e885f..97d8c61a9 100644
--- a/yt_dlp/options.py
+++ b/yt_dlp/options.py
@@ -1423,20 +1423,22 @@ def create_parser():
     postproc.add_option(
         '--audio-format', metavar='FORMAT', dest='audioformat', default='best',
         help=(
-            'Specify audio format to convert the audio to when -x is used. Currently supported formats are: '
-            'best (default) or one of %s' % ', '.join(FFmpegExtractAudioPP.SUPPORTED_EXTS)))
+            'Format to convert the audio to when -x is used. '
+            f'(currently supported: best (default), {", ".join(FFmpegExtractAudioPP.SUPPORTED_EXTS)})'))
     postproc.add_option(
         '--audio-quality', metavar='QUALITY',
         dest='audioquality', default='5',
-        help='Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)')
+        help=(
+            'Specify ffmpeg audio quality to use when converting the audio with -x. '
+            'Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default %default)'))
     postproc.add_option(
         '--remux-video',
         metavar='FORMAT', dest='remuxvideo', default=None,
         help=(
-            'Remux the video into another container if necessary (currently supported: %s). '
-            'If target container does not support the video/audio codec, remuxing will fail. '
-            'You can specify multiple rules; Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 '
-            'and anything else to mkv.' % ', '.join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)))
+            'Remux the video into another container if necessary '
+            f'(currently supported: {", ".join(FFmpegVideoRemuxerPP.SUPPORTED_EXTS)}). '
+            'If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; '
+            'Eg. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv'))
     postproc.add_option(
         '--recode-video',
         metavar='FORMAT', dest='recodevideo', default=None,
diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py
index 9f22b378d..7c3be0d1e 100644
--- a/yt_dlp/postprocessor/common.py
+++ b/yt_dlp/postprocessor/common.py
@@ -216,5 +216,5 @@ class PostProcessor(metaclass=PostProcessorMetaClass):
                 raise PostProcessingError(f'Unable to communicate with {self.PP_NAME} API: {e}')
 
 
-class AudioConversionError(PostProcessingError):
+class AudioConversionError(PostProcessingError):  # Deprecated
     pass
diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py
index 3777703eb..e38b493c2 100644
--- a/yt_dlp/postprocessor/ffmpeg.py
+++ b/yt_dlp/postprocessor/ffmpeg.py
@@ -6,7 +6,7 @@ import re
 import subprocess
 import time
 
-from .common import AudioConversionError, PostProcessor
+from .common import PostProcessor
 from ..compat import functools, imghdr
 from ..utils import (
     ISO639Utils,
@@ -45,19 +45,20 @@ EXT_TO_OUT_FORMATS = {
     'vtt': 'webvtt',
 }
 ACODECS = {
-    'mp3': 'libmp3lame',
-    'aac': 'aac',
-    'flac': 'flac',
-    'm4a': 'aac',
-    'opus': 'libopus',
-    'vorbis': 'libvorbis',
-    'wav': None,
-    'alac': None,
+    # name: (ext, encoder, opts)
+    'mp3': ('mp3', 'libmp3lame', ()),
+    'aac': ('m4a', 'aac', ('-f', 'adts')),
+    'm4a': ('m4a', 'aac', ('-bsf:a', 'aac_adtstoasc')),
+    'opus': ('opus', 'libopus', ()),
+    'vorbis': ('ogg', 'libvorbis', ()),
+    'flac': ('flac', 'flac', ()),
+    'alac': ('m4a', None, ('-acodec', 'alac')),
+    'wav': ('wav', None, ('-f', 'wav')),
 }
 
 
 def create_mapping_re(supported):
-    return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\w+>)?(?:%s)' % '|'.join(supported)))
+    return re.compile(r'{0}(?:/{0})*$'.format(r'(?:\s*\w+\s*>)?\s*(?:%s)\s*' % '|'.join(supported)))
 
 
 def resolve_mapping(source, mapping):
@@ -424,7 +425,7 @@ class FFmpegPostProcessor(PostProcessor):
 
 class FFmpegExtractAudioPP(FFmpegPostProcessor):
     COMMON_AUDIO_EXTS = ('wav', 'flac', 'm4a', 'aiff', 'mp3', 'ogg', 'mka', 'opus', 'wma')
-    SUPPORTED_EXTS = ('aac', 'flac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav', 'alac')
+    SUPPORTED_EXTS = tuple(ACODECS.keys())
 
     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
         FFmpegPostProcessor.__init__(self, downloader)
@@ -463,71 +464,45 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         try:
             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
         except FFmpegPostProcessorError as err:
-            raise AudioConversionError(err.msg)
+            raise PostProcessingError(f'audio conversion failed: {err.msg}')
 
     @PostProcessor._restrict_to(images=False)
     def run(self, information):
         orig_path = path = information['filepath']
-        orig_ext = information['ext']
-
-        if self._preferredcodec == 'best' and orig_ext in self.COMMON_AUDIO_EXTS:
-            self.to_screen('Skipping audio extraction since the file is already in a common audio format')
+        target_format = self._preferredcodec
+        if target_format == 'best' and information['ext'] in self.COMMON_AUDIO_EXTS:
+            self.to_screen(f'Not converting audio {orig_path}; the file is already in a common audio format')
             return [], information
 
         filecodec = self.get_audio_codec(path)
         if filecodec is None:
             raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
 
-        more_opts = []
-        if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
-            if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
-                # Lossless, but in another container
-                acodec = 'copy'
-                extension = 'm4a'
-                more_opts = ['-bsf:a', 'aac_adtstoasc']
-            elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
-                # Lossless if possible
-                acodec = 'copy'
-                extension = filecodec
-                if filecodec == 'aac':
-                    more_opts = ['-f', 'adts']
-                if filecodec == 'vorbis':
-                    extension = 'ogg'
-            elif filecodec == 'alac':
-                acodec = None
-                extension = 'm4a'
-                more_opts += ['-acodec', 'alac']
-            else:
-                # MP3 otherwise.
-                acodec = 'libmp3lame'
-                extension = 'mp3'
-                more_opts = self._quality_args(acodec)
+        if filecodec == 'aac' and target_format in ('m4a', 'best'):
+            # Lossless, but in another container
+            extension, _, more_opts, acodec = *ACODECS['m4a'], 'copy'
+        elif target_format == 'best' or target_format == filecodec:
+            # Lossless if possible
+            try:
+                extension, _, more_opts, acodec = *ACODECS[filecodec], 'copy'
+            except KeyError:
+                extension, acodec, more_opts = ACODECS['mp3']
         else:
             # We convert the audio (lossy if codec is lossy)
-            acodec = ACODECS[self._preferredcodec]
+            extension, acodec, more_opts = ACODECS[target_format]
             if acodec == 'aac' and self._features.get('fdk'):
-                acodec = 'libfdk_aac'
-            extension = self._preferredcodec
+                acodec, more_opts = 'libfdk_aac', []
+
+        more_opts = list(more_opts)
+        if acodec != 'copy':
             more_opts = self._quality_args(acodec)
-            if self._preferredcodec == 'aac':
-                more_opts += ['-f', 'adts']
-            elif self._preferredcodec == 'm4a':
-                more_opts += ['-bsf:a', 'aac_adtstoasc']
-            elif self._preferredcodec == 'vorbis':
-                extension = 'ogg'
-            elif self._preferredcodec == 'wav':
-                extension = 'wav'
-                more_opts += ['-f', 'wav']
-            elif self._preferredcodec == 'alac':
-                extension = 'm4a'
-                more_opts += ['-acodec', 'alac']
-
-        prefix, sep, ext = path.rpartition('.')  # not os.path.splitext, since the latter does not work on unicode in all setups
-        temp_path = new_path = prefix + sep + extension
+
+        # not os.path.splitext, since the latter does not work on unicode in all setups
+        temp_path = new_path = f'{path.rpartition(".")[0]}.{extension}'
 
         if new_path == path:
             if acodec == 'copy':
-                self.to_screen(f'File is already in target format {self._preferredcodec}, skipping')
+                self.to_screen(f'Not converting audio {orig_path}; file is already in target format {target_format}')
                 return [], information
             orig_path = prepend_extension(path, 'orig')
             temp_path = prepend_extension(path, 'temp')
@@ -536,14 +511,8 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
             self.to_screen('Post-process file %s exists, skipping' % new_path)
             return [], information
 
-        try:
-            self.to_screen(f'Destination: {new_path}')
-            self.run_ffmpeg(path, temp_path, acodec, more_opts)
-        except AudioConversionError as e:
-            raise PostProcessingError(
-                'audio conversion failed: ' + e.msg)
-        except Exception:
-            raise PostProcessingError('error running ' + self.basename)
+        self.to_screen(f'Destination: {new_path}')
+        self.run_ffmpeg(path, temp_path, acodec, more_opts)
 
         os.replace(path, orig_path)
         os.replace(temp_path, new_path)
@@ -553,8 +522,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor):
         # Try to update the date time for extracted audio file.
         if information.get('filetime') is not None:
             self.try_utime(
-                new_path, time.time(), information['filetime'],
-                errnote='Cannot update utime of audio file')
+                new_path, time.time(), information['filetime'], errnote='Cannot update utime of audio file')
 
         return [orig_path], information