diff --git a/test/helper.py b/test/helper.py index f19e1a34f..e918d8c46 100644 --- a/test/helper.py +++ b/test/helper.py @@ -92,6 +92,13 @@ def gettestcases(include_onlymatching=False): yield from ie.get_testcases(include_onlymatching) +def getwebpagetestcases(): + for ie in yt_dlp.extractor.gen_extractors(): + for tc in ie.get_webpage_testcases(): + tc.setdefault('add_ie', []).append('Generic') + yield tc + + md5 = lambda s: hashlib.md5(s.encode()).hexdigest() diff --git a/test/test_download.py b/test/test_download.py index c9f5e735c..787013c34 100755 --- a/test/test_download.py +++ b/test/test_download.py @@ -8,6 +8,7 @@ import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +import collections import hashlib import http.client import json @@ -20,6 +21,7 @@ from test.helper import ( expect_warnings, get_params, gettestcases, + getwebpagetestcases, is_download_test, report_warning, try_rm, @@ -32,6 +34,7 @@ from yt_dlp.utils import ( ExtractorError, UnavailableVideoError, format_bytes, + join_nonempty, ) RETRIES = 3 @@ -57,7 +60,9 @@ def _file_md5(fn): return hashlib.md5(f.read()).hexdigest() -defs = gettestcases() +normal_test_cases = gettestcases() +webpage_test_cases = getwebpagetestcases() +tests_counter = collections.defaultdict(collections.Counter) @is_download_test @@ -72,24 +77,13 @@ class TestDownload(unittest.TestCase): def __str__(self): """Identify each test with the `add_ie` attribute, if available.""" + cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie + return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:' - def strclass(cls): - """From 2.7's unittest; 2.6 had _strclass so we can't import it.""" - return f'{cls.__module__}.{cls.__name__}' - - add_ie = getattr(self, self._testMethodName).add_ie - return '%s (%s)%s:' % (self._testMethodName, - strclass(self.__class__), - ' [%s]' % add_ie if add_ie else '') - - def setUp(self): - self.defs = defs # Dynamically generate tests - def generator(test_case, tname): - def test_template(self): if self.COMPLETED_TESTS.get(tname): return @@ -255,39 +249,43 @@ def generator(test_case, tname): # And add them to TestDownload -tests_counter = {} -for test_case in defs: - name = test_case['name'] - i = tests_counter.get(name, 0) - tests_counter[name] = i + 1 - tname = f'test_{name}_{i}' if i else f'test_{name}' - test_method = generator(test_case, tname) - test_method.__name__ = str(tname) - ie_list = test_case.get('add_ie') - test_method.add_ie = ie_list and ','.join(ie_list) - setattr(TestDownload, test_method.__name__, test_method) - del test_method +def inject_tests(test_cases, label=''): + for test_case in test_cases: + name = test_case['name'] + tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_') + tests_counter[name][label] += 1 + test_method = generator(test_case, tname) + test_method.__name__ = tname + test_method.add_ie = ','.join(test_case.get('add_ie', [])) + setattr(TestDownload, test_method.__name__, test_method) -def batch_generator(name, num_tests): +inject_tests(normal_test_cases) + +# TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction +inject_tests(webpage_test_cases, 'webpage') + + +def batch_generator(name): def test_template(self): - for i in range(num_tests): - test_name = f'test_{name}_{i}' if i else f'test_{name}' - try: - getattr(self, test_name)() - except unittest.SkipTest: - print(f'Skipped {test_name}') + for label, num_tests in tests_counter[name].items(): + for i in range(num_tests): + test_name = join_nonempty('test', name, label, i, delim='_') + try: + getattr(self, test_name)() + except unittest.SkipTest: + print(f'Skipped {test_name}') return test_template -for name, num_tests in tests_counter.items(): - test_method = batch_generator(name, num_tests) +for name in tests_counter: + test_method = batch_generator(name) test_method.__name__ = f'test_{name}_all' test_method.add_ie = '' setattr(TestDownload, test_method.__name__, test_method) - del test_method +del test_method if __name__ == '__main__': diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index b8347fe4c..317aa270e 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -3665,11 +3665,18 @@ class InfoExtractor: t['name'] = cls.ie_key() yield t + @classmethod + def get_webpage_testcases(cls): + tests = getattr(cls, '_WEBPAGE_TESTS', []) + for t in tests: + t['name'] = cls.ie_key() + return tests + @classproperty def age_limit(cls): """Get age limit from the testcases""" return max(traverse_obj( - tuple(cls.get_testcases(include_onlymatching=False)), + (*cls.get_testcases(include_onlymatching=False), *cls.get_webpage_testcases()), (..., (('playlist', 0), None), 'info_dict', 'age_limit')) or [0]) @classmethod @@ -3844,7 +3851,10 @@ class InfoExtractor: def extract_from_webpage(cls, ydl, url, webpage): ie = (cls if isinstance(cls._extract_from_webpage, types.MethodType) else ydl.get_info_extractor(cls.ie_key())) - yield from ie._extract_from_webpage(url, webpage) or [] + for info in ie._extract_from_webpage(url, webpage) or []: + # url = None since we do not want to set (webpage/original)_url + ydl.add_default_extra_info(info, ie, None) + yield info @classmethod def _extract_from_webpage(cls, url, webpage): diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d6a6166a0..0dc9ae0da 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -933,21 +933,6 @@ class GenericIE(InfoExtractor): 'skip_download': True, } }, - # YouTube embed - { - 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/', - 'md5': '516718101ec834f74318df76259fb3cc', - 'info_dict': { - 'id': 'msN87y-iEx0', - 'ext': 'webm', - 'title': 'Feynman: Mirrors FUN TO IMAGINE 6', - 'upload_date': '20080526', - 'description': 'md5:0ffc78ea3f01b2e2c247d5f8d1d3c18d', - 'uploader': 'Christopher Sykes', - 'uploader_id': 'ChristopherJSykes', - }, - 'add_ie': ['Youtube'], - }, # Camtasia studio { 'url': 'http://www.ll.mit.edu/workshops/education/videocourses/antennas/lecture1/video/', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fb23afbad..4dc8e79ac 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2266,6 +2266,42 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } ] + _WEBPAGE_TESTS = [ + # YouTube embed + { + 'url': 'http://www.improbable.com/2017/04/03/untrained-modern-youths-and-ancient-masters-in-selfie-portraits/', + 'md5': '873c81d308b979f0e23ee7e620b312a3', + 'info_dict': { + 'id': 'msN87y-iEx0', + 'ext': 'mp4', + 'title': 'Feynman: Mirrors FUN TO IMAGINE 6', + 'upload_date': '20080526', + 'description': 'md5:873c81d308b979f0e23ee7e620b312a3', + 'uploader': 'Christopher Sykes', + 'uploader_id': 'ChristopherJSykes', + 'age_limit': 0, + 'tags': ['feynman', 'mirror', 'science', 'physics', 'imagination', 'fun', 'cool', 'puzzle'], + 'channel_id': 'UCCeo--lls1vna5YJABWAcVA', + 'playable_in_embed': True, + 'thumbnail': 'https://i.ytimg.com/vi/msN87y-iEx0/hqdefault.jpg', + 'like_count': int, + 'comment_count': int, + 'channel': 'Christopher Sykes', + 'live_status': 'not_live', + 'channel_url': 'https://www.youtube.com/channel/UCCeo--lls1vna5YJABWAcVA', + 'availability': 'public', + 'duration': 195, + 'view_count': int, + 'categories': ['Science & Technology'], + 'channel_follower_count': int, + 'uploader_url': 'http://www.youtube.com/user/ChristopherJSykes', + }, + 'params': { + 'skip_download': True, + } + }, + ] + @classmethod def suitable(cls, url): from ..utils import parse_qs