From 8f18aca8717bb0dd49054555af8d386e5eda3a88 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 15 Aug 2021 13:42:23 +0530 Subject: [PATCH] Let `--match-filter` reject entries early Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` --- README.md | 4 ++++ test/test_utils.py | 12 +++++++++--- yt_dlp/YoutubeDL.py | 21 ++++++++++++--------- yt_dlp/options.py | 8 ++++---- yt_dlp/utils.py | 19 +++++++++++-------- 5 files changed, 40 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 7877a4a27..bd7d31c13 100644 --- a/README.md +++ b/README.md @@ -1439,6 +1439,10 @@ While these options are redundant, they are still expected to be used due to the -e, --get-title --print title -g, --get-url --print urls -j, --dump-json --print "%()j" + --match-title REGEX --match-filter "title ~= (?i)REGEX" + --reject-title REGEX --match-filter "title !~= (?i)REGEX" + --min-views COUNT --match-filter "view_count >=? COUNT" + --max-views COUNT --match-filter "view_count <=? COUNT" #### Not recommended diff --git a/test/test_utils.py b/test/test_utils.py index aef59e491..dedc598f7 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1285,9 +1285,15 @@ ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'})) # Example from docs - self.assertTrue( - r'!is_live & like_count>?100 & description~=\'(?i)\bcats \& dogs\b\'', - {'description': 'Raining Cats & Dogs'}) + self.assertTrue(match_str( + r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'", + {'description': 'Raining Cats & Dogs'})) + + # Incomplete + self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) + self.assertTrue(match_str('x', {'id': 'foo'}, True)) + self.assertTrue(match_str('!x', {'id': 'foo'}, True)) + self.assertFalse(match_str('x', {'id': 'foo'}, False)) def test_parse_dfxp_time_expr(self): self.assertEqual(parse_dfxp_time_expr(None), None) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index d12131acd..eef3f8b4c 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -1117,12 +1117,15 @@ class YoutubeDL(object): if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): return 'Skipping "%s" because it is age restricted' % video_title - if not incomplete: - match_filter = self.params.get('match_filter') - if match_filter is not None: - ret = match_filter(info_dict) - if ret is not None: - return ret + match_filter = self.params.get('match_filter') + if match_filter is not None: + try: + ret = match_filter(info_dict, incomplete=incomplete) + except TypeError: + # For backward compatibility + ret = None if incomplete else match_filter(info_dict) + if ret is not None: + return ret return None if self.in_download_archive(info_dict): @@ -2873,13 +2876,13 @@ class YoutubeDL(object): except UnavailableVideoError: self.report_error('unable to download video') except MaxDownloadsReached: - self.to_screen('[info] Maximum number of downloaded files reached') + self.to_screen('[info] Maximum number of downloads reached') raise except ExistingVideoReached: - self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing') + self.to_screen('[info] Encountered a video that is already in the archive, stopping due to --break-on-existing') raise except RejectedVideoReached: - self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject') + self.to_screen('[info] Encountered a video that did not match filter, stopping due to --break-on-reject') raise else: if self.params.get('dump_single_json', False): diff --git a/yt_dlp/options.py b/yt_dlp/options.py index 1499991a1..ef821eb11 100644 --- a/yt_dlp/options.py +++ b/yt_dlp/options.py @@ -356,11 +356,11 @@ def parseOpts(overrideArguments=None): selection.add_option( '--match-title', dest='matchtitle', metavar='REGEX', - help='Download only matching titles (regex or caseless sub-string)') + help=optparse.SUPPRESS_HELP) selection.add_option( '--reject-title', dest='rejecttitle', metavar='REGEX', - help='Skip download for matching titles (regex or caseless sub-string)') + help=optparse.SUPPRESS_HELP) selection.add_option( '--max-downloads', dest='max_downloads', metavar='NUMBER', type=int, default=None, @@ -395,11 +395,11 @@ def parseOpts(overrideArguments=None): selection.add_option( '--min-views', metavar='COUNT', dest='min_views', default=None, type=int, - help='Do not download any videos with less than COUNT views') + help=optparse.SUPPRESS_HELP) selection.add_option( '--max-views', metavar='COUNT', dest='max_views', default=None, type=int, - help='Do not download any videos with more than COUNT views') + help=optparse.SUPPRESS_HELP) selection.add_option( '--match-filter', metavar='FILTER', dest='match_filter', default=None, diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 0e8392fdf..6276ac726 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4657,7 +4657,7 @@ def render_table(header_row, data, delim=False, extraGap=0, hideEmpty=False): return '\n'.join(format_str % tuple(row) for row in table) -def _match_one(filter_part, dct): +def _match_one(filter_part, dct, incomplete): # TODO: Generalize code with YoutubeDL._build_format_filter STRING_OPERATORS = { '*=': operator.contains, @@ -4718,7 +4718,7 @@ def _match_one(filter_part, dct): 'Invalid integer value %r in filter part %r' % ( m.group('intval'), filter_part)) if actual_value is None: - return m.group('none_inclusive') + return incomplete or m.group('none_inclusive') return op(actual_value, comparison_value) UNARY_OPERATORS = { @@ -4733,22 +4733,25 @@ def _match_one(filter_part, dct): if m: op = UNARY_OPERATORS[m.group('op')] actual_value = dct.get(m.group('key')) + if incomplete and actual_value is None: + return True return op(actual_value) raise ValueError('Invalid filter part %r' % filter_part) -def match_str(filter_str, dct): - """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false """ - +def match_str(filter_str, dct, incomplete=False): + """ Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false + When incomplete, all conditions passes on missing fields + """ return all( - _match_one(filter_part.replace(r'\&', '&'), dct) + _match_one(filter_part.replace(r'\&', '&'), dct, incomplete) for filter_part in re.split(r'(?