From fe7866d0ed6bfa3904ce12b049a3424fdc0ea1fa Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 24 Aug 2022 05:42:16 +0530 Subject: [PATCH] Add option `--use-extractors` Deprecates `--force-generic-extractor` Closes #3234, Closes #2044 Related: #4307, #1791 --- README.md | 9 ++++++++- yt_dlp/YoutubeDL.py | 41 +++++++++++++++++++++++--------------- yt_dlp/__init__.py | 1 + yt_dlp/extractor/common.py | 13 ++++++++++++ yt_dlp/options.py | 12 ++++++++++- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 7cfeec4f1..aab20c079 100644 --- a/README.md +++ b/README.md @@ -375,7 +375,13 @@ You can also fork the project on github and run your fork's [build workflow](.gi --list-extractors List all supported extractors and exit --extractor-descriptions Output descriptions of all supported extractors and exit - --force-generic-extractor Force extraction to use the generic extractor + --use-extractors, --ies NAMES Extractor names to use separated by commas. + You can also use regexes, "all", "default" + and "end" (end URL matching); e.g. --ies + "holodex.*,end,youtube". Prefix the name + with a "-" to exclude it, e.g. --ies + default,-generic. Use --list-extractors for + a list of available extractor names --default-search PREFIX Use this prefix for unqualified URLs. E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". @@ -2058,6 +2064,7 @@ While these options are redundant, they are still expected to be used due to the #### Not recommended While these options still work, their use is not recommended since there are other alternatives to achieve the same + --force-generic-extractor --ies generic,default --exec-before-download CMD --exec "before_dl:CMD" --no-exec-before-download --no-exec --all-formats -f all diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 872e0bdc3..a3d562042 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -29,6 +29,7 @@ from .cookies import load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor +from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .postprocessor import _PLUGIN_CLASSES as plugin_postprocessors @@ -237,7 +238,7 @@ class YoutubeDL: Default is 'only_download' for CLI, but False for API skip_playlist_after_errors: Number of allowed failures until the rest of the playlist is skipped - force_generic_extractor: Force downloader to use the generic extractor + allowed_extractors: List of regexes to match against extractor names that are allowed overwrites: Overwrite all video and metadata files if True, overwrite only non-video files if None and don't overwrite any file if False @@ -477,6 +478,8 @@ class YoutubeDL: The following options are deprecated and may be removed in the future: + force_generic_extractor: Force downloader to use the generic extractor + - Use allowed_extractors = ['generic', 'default'] playliststart: - Use playlist_items Playlist item to start at. playlistend: - Use playlist_items @@ -758,13 +761,6 @@ class YoutubeDL: self._ies_instances[ie_key] = ie ie.set_downloader(self) - def _get_info_extractor_class(self, ie_key): - ie = self._ies.get(ie_key) - if ie is None: - ie = get_info_extractor(ie_key) - self.add_info_extractor(ie) - return ie - def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from @@ -781,8 +777,19 @@ class YoutubeDL: """ Add the InfoExtractors returned by gen_extractors to the end of the list """ - for ie in gen_extractor_classes(): - self.add_info_extractor(ie) + all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()} + all_ies['end'] = UnsupportedURLIE() + try: + ie_names = orderedSet_from_options( + self.params.get('allowed_extractors', ['default']), { + 'all': list(all_ies), + 'default': [name for name, ie in all_ies.items() if ie._ENABLED], + }, use_regex=True) + except re.error as e: + raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}') + for name in ie_names: + self.add_info_extractor(all_ies[name]) + self.write_debug(f'Loaded {len(ie_names)} extractors') def add_post_processor(self, pp, when='post_process'): """Add a PostProcessor object to the end of the chain.""" @@ -1413,11 +1420,11 @@ class YoutubeDL: ie_key = 'Generic' if ie_key: - ies = {ie_key: self._get_info_extractor_class(ie_key)} + ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {} else: ies = self._ies - for ie_key, ie in ies.items(): + for key, ie in ies.items(): if not ie.suitable(url): continue @@ -1426,14 +1433,16 @@ class YoutubeDL: 'and will probably not work.') temp_id = ie.get_temp_id(url) - if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}): - self.to_screen(f'[{ie_key}] {temp_id}: has already been recorded in the archive') + if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}): + self.to_screen(f'[{key}] {temp_id}: has already been recorded in the archive') if self.params.get('break_on_existing', False): raise ExistingVideoReached() break - return self.__extract_info(url, self.get_info_extractor(ie_key), download, extra_info, process) + return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: - self.report_error('no suitable InfoExtractor for URL %s' % url) + extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default']) + self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}', + tb=False if extractors_restricted else None) def _handle_extraction_exceptions(func): @functools.wraps(func) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index 317dd2623..e9234e6f4 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -766,6 +766,7 @@ def parse_options(argv=None): 'windowsfilenames': opts.windowsfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, + 'allowed_extractors': opts.allowed_extractors or ['default'], 'ratelimit': opts.ratelimit, 'throttledratelimit': opts.throttledratelimit, 'overwrites': opts.overwrites, diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index a534703e5..6337a13a4 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -480,6 +480,9 @@ class InfoExtractor: will be used by geo restriction bypass mechanism similarly to _GEO_COUNTRIES. + The _ENABLED attribute should be set to False for IEs that + are disabled by default and must be explicitly enabled. + The _WORKING attribute should be set to False for broken IEs in order to warn the users and skip the tests. """ @@ -491,6 +494,7 @@ class InfoExtractor: _GEO_COUNTRIES = None _GEO_IP_BLOCKS = None _WORKING = True + _ENABLED = True _NETRC_MACHINE = None IE_DESC = None SEARCH_KEY = None @@ -3941,3 +3945,12 @@ class SearchInfoExtractor(InfoExtractor): @classproperty def SEARCH_KEY(cls): return cls._SEARCH_KEY + + +class UnsupportedURLIE(InfoExtractor): + _VALID_URL = '.*' + _ENABLED = False + IE_DESC = False + + def _real_extract(self, url): + raise UnsupportedError(url) diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 0cddb7fd5..bee531d1b 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -353,10 +353,20 @@ def create_parser(): '--extractor-descriptions', action='store_true', dest='list_extractor_descriptions', default=False, help='Output descriptions of all supported extractors and exit') + general.add_option( + '--use-extractors', '--ies', + action='callback', dest='allowed_extractors', metavar='NAMES', type='str', + default=[], callback=_list_from_options_callback, + help=( + 'Extractor names to use separated by commas. ' + 'You can also use regexes, "all", "default" and "end" (end URL matching); ' + 'e.g. --ies "holodex.*,end,youtube". ' + 'Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. ' + 'Use --list-extractors for a list of available extractor names')) general.add_option( '--force-generic-extractor', action='store_true', dest='force_generic_extractor', default=False, - help='Force extraction to use the generic extractor') + help=optparse.SUPPRESS_HELP) general.add_option( '--default-search', dest='default_search', metavar='PREFIX',