From 5ca095cbcde3e32642a4fe5b2d69e8e3c785a021 Mon Sep 17 00:00:00 2001 From: bashonly <88596187+bashonly@users.noreply.github.com> Date: Sat, 23 Sep 2023 15:00:31 -0500 Subject: [PATCH] [cleanup] Misc (#8182) Closes #7796, Closes #8028 Authored by: barsnick, sqrtNOT, gamer191, coletdjnz, Grub4K, bashonly --- CONTRIBUTING.md | 8 ++++---- README.md | 2 +- devscripts/make_changelog.py | 2 +- test/test_YoutubeDL.py | 1 - test/test_networking_utils.py | 6 +++--- yt_dlp/YoutubeDL.py | 6 +++--- yt_dlp/compat/urllib/__init__.py | 2 +- yt_dlp/extractor/abc.py | 1 - yt_dlp/extractor/ign.py | 4 ---- yt_dlp/extractor/nebula.py | 1 - yt_dlp/extractor/peekvids.py | 1 - yt_dlp/extractor/radiofrance.py | 2 +- yt_dlp/extractor/rcs.py | 6 +++--- yt_dlp/extractor/rokfin.py | 1 - yt_dlp/extractor/s4c.py | 2 -- yt_dlp/extractor/sovietscloset.py | 1 - yt_dlp/extractor/youtube.py | 2 +- yt_dlp/networking/__init__.py | 2 +- yt_dlp/networking/_urllib.py | 2 +- yt_dlp/networking/exceptions.py | 4 ++-- 20 files changed, 22 insertions(+), 34 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a8587fe92..90e7faf7c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -217,7 +217,7 @@ After you have ensured this site is distributing its content legally, you can fo 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. 1. Run `python test/test_download.py TestDownload.test_YourExtractor` (note that `YourExtractor` doesn't end with `IE`). This *should fail* at first, but you can continually re-run it until you're done. If you decide to add more than one test, the tests will then be named `TestDownload.test_YourExtractor`, `TestDownload.test_YourExtractor_1`, `TestDownload.test_YourExtractor_2`, etc. Note that tests with `only_matching` key in test's dict are not counted in. You can also run all the tests in one go with `TestDownload.test_YourExtractor_all` 1. Make sure you have atleast one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. -1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L91-L426). Add tests and code for as many as you want. +1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions) and check the code with [flake8](https://flake8.pycqa.org/en/latest/index.html#quickstart): $ flake8 yt_dlp/extractor/yourextractor.py @@ -251,7 +251,7 @@ Extractors are very fragile by nature since they depend on the layout of the sou ### Mandatory and optional metafields -For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L91-L426) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: +For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) @@ -696,7 +696,7 @@ formats = [ ### Use convenience conversion and parsing functions -Wrap all extracted numeric data into safe functions from [`yt_dlp/utils.py`](yt_dlp/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. +Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. @@ -704,7 +704,7 @@ Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. -Explore [`yt_dlp/utils.py`](yt_dlp/utils.py) for more useful convenience functions. +Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions. #### Examples diff --git a/README.md b/README.md index d94d8ea82..d9b11952d 100644 --- a/README.md +++ b/README.md @@ -1800,7 +1800,7 @@ The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively -* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. +* `player_client`: Clients to extract video data from. The main clients are `web`, `android` and `ios` with variants `_music`, `_embedded`, `_embedscreen`, `_creator` (e.g. `web_embedded`); and `mweb`, `mweb_embedscreen` and `tv_embedded` (agegate bypass) with no variants. By default, `ios,android,web` is used, but `tv_embedded` and `creator` variants are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. You can use `all` to use all the clients, and `default` for the default clients. * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index ac68dcd19..9ff65db14 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -260,7 +260,7 @@ class CommitRange: AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) MESSAGE_RE = re.compile(r''' (?:\[(?P[^\]]+)\]\ )? - (?:(?P`?[^:`]+`?): )? + (?:(?P`?[\w.-]+`?): )? (?P.+?) (?:\ \((?P\#\d+(?:,\ \#\d+)*)\))? ''', re.VERBOSE | re.DOTALL) diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 3cfb61fb2..916ee48b9 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -631,7 +631,6 @@ class TestYoutubeDL(unittest.TestCase): self.assertEqual(test_dict['playlist'], 'funny videos') outtmpl_info = { - 'id': '1234', 'id': '1234', 'ext': 'mp4', 'width': None, diff --git a/test/test_networking_utils.py b/test/test_networking_utils.py index dbf656090..419aae1e4 100644 --- a/test/test_networking_utils.py +++ b/test/test_networking_utils.py @@ -269,14 +269,14 @@ class TestNetworkingExceptions: assert not response.closed def test_incomplete_read_error(self): - error = IncompleteRead(b'test', 3, cause='test') + error = IncompleteRead(4, 3, cause='test') assert isinstance(error, IncompleteRead) assert repr(error) == '' assert str(error) == error.msg == '4 bytes read, 3 more expected' - assert error.partial == b'test' + assert error.partial == 4 assert error.expected == 3 assert error.cause == 'test' - error = IncompleteRead(b'aaa') + error = IncompleteRead(3) assert repr(error) == '' assert str(error) == '3 bytes read' diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 1feed3052..39aaf2c2e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -239,9 +239,9 @@ class YoutubeDL: 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' - 'temp' and the keys of OUTTMPL_TYPES (in utils.py) + 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) outtmpl: Dictionary of templates for output names. Allowed keys - are 'default' and the keys of OUTTMPL_TYPES (in utils.py). + are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names @@ -422,7 +422,7 @@ class YoutubeDL: asked whether to download the video. - Raise utils.DownloadCancelled(msg) to abort remaining downloads when a video is rejected. - match_filter_func in utils.py is one example for this. + match_filter_func in utils/_utils.py is one example for this. color: A Dictionary with output stream names as keys and their respective color policy as values. Can also just be a single color policy, diff --git a/yt_dlp/compat/urllib/__init__.py b/yt_dlp/compat/urllib/__init__.py index b27cc6133..9084b3c2b 100644 --- a/yt_dlp/compat/urllib/__init__.py +++ b/yt_dlp/compat/urllib/__init__.py @@ -1,7 +1,7 @@ # flake8: noqa: F405 from urllib import * # noqa: F403 -del request +del request # noqa: F821 from . import request # noqa: F401 from ..compat_utils import passthrough_module diff --git a/yt_dlp/extractor/abc.py b/yt_dlp/extractor/abc.py index f56133eb3..d2cf5f7c5 100644 --- a/yt_dlp/extractor/abc.py +++ b/yt_dlp/extractor/abc.py @@ -180,7 +180,6 @@ class ABCIViewIE(InfoExtractor): _VALID_URL = r'https?://iview\.abc\.net\.au/(?:[^/]+/)*video/(?P[^/?#]+)' _GEO_COUNTRIES = ['AU'] - # ABC iview programs are normally available for 14 days only. _TESTS = [{ 'url': 'https://iview.abc.net.au/show/gruen/series/11/video/LE1927H001S00', 'md5': '67715ce3c78426b11ba167d875ac6abf', diff --git a/yt_dlp/extractor/ign.py b/yt_dlp/extractor/ign.py index 64875f8ce..1c4f105e9 100644 --- a/yt_dlp/extractor/ign.py +++ b/yt_dlp/extractor/ign.py @@ -197,10 +197,6 @@ class IGNVideoIE(IGNBaseIE): 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', 'duration': 298, 'tags': 'count:13', - 'display_id': '112203', - 'thumbnail': 'https://sm.ign.com/ign_me/video/h/how-hitman/how-hitman-aims-to-be-different-than-every-other-s_8z14.jpg', - 'duration': 298, - 'tags': 'count:13', }, 'expected_warnings': ['HTTP Error 400: Bad Request'], }, { diff --git a/yt_dlp/extractor/nebula.py b/yt_dlp/extractor/nebula.py index 4f3e691b7..8fba2bcf7 100644 --- a/yt_dlp/extractor/nebula.py +++ b/yt_dlp/extractor/nebula.py @@ -127,7 +127,6 @@ class NebulaIE(NebulaBaseIE): 'channel_id': 'lindsayellis', 'uploader': 'Lindsay Ellis', 'uploader_id': 'lindsayellis', - 'timestamp': 1533009600, 'uploader_url': 'https://nebula.tv/lindsayellis', 'series': 'Lindsay Ellis', 'display_id': 'that-time-disney-remade-beauty-and-the-beast', diff --git a/yt_dlp/extractor/peekvids.py b/yt_dlp/extractor/peekvids.py index d1fc058b9..41f591b09 100644 --- a/yt_dlp/extractor/peekvids.py +++ b/yt_dlp/extractor/peekvids.py @@ -146,7 +146,6 @@ class PlayVidsIE(PeekVidsBaseIE): 'uploader': 'Brazzers', 'age_limit': 18, 'view_count': int, - 'age_limit': 18, 'categories': list, 'tags': list, }, diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index 35f4b91dd..ec1b97631 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -82,7 +82,7 @@ class RadioFranceBaseIE(InfoExtractor): def _extract_data_from_webpage(self, webpage, display_id, key): return traverse_obj(self._search_json( r'\bconst\s+data\s*=', webpage, key, display_id, - contains_pattern=r'(\[\{.*?\}\]);', transform_source=js_to_json), + contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json), (..., 'data', key, {dict}), get_all=False) or {} diff --git a/yt_dlp/extractor/rcs.py b/yt_dlp/extractor/rcs.py index 028d3d90b..b865f63fb 100644 --- a/yt_dlp/extractor/rcs.py +++ b/yt_dlp/extractor/rcs.py @@ -239,10 +239,10 @@ class RCSEmbedsIE(RCSBaseIE): } }, { 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', - 'match_only': True + 'only_matching': True }, { 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', - 'match_only': True + 'only_matching': True }] _WEBPAGE_TESTS = [{ 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/', @@ -325,7 +325,7 @@ class RCSIE(RCSBaseIE): } }, { 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', - 'match_only': True + 'only_matching': True }] diff --git a/yt_dlp/extractor/rokfin.py b/yt_dlp/extractor/rokfin.py index 4a4d40bef..cad76f0c9 100644 --- a/yt_dlp/extractor/rokfin.py +++ b/yt_dlp/extractor/rokfin.py @@ -40,7 +40,6 @@ class RokfinIE(InfoExtractor): 'channel': 'Jimmy Dore', 'channel_id': 65429, 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', - 'duration': 213.0, 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, diff --git a/yt_dlp/extractor/s4c.py b/yt_dlp/extractor/s4c.py index 990ea2b44..67eff723b 100644 --- a/yt_dlp/extractor/s4c.py +++ b/yt_dlp/extractor/s4c.py @@ -78,7 +78,6 @@ class S4CSeriesIE(InfoExtractor): 'info_dict': { 'id': '864982911', 'title': 'Iaith ar Daith', - 'description': 'md5:e878ebf660dce89bd2ef521d7ce06397' }, }, { 'url': 'https://www.s4c.cymru/clic/series/866852587', @@ -86,7 +85,6 @@ class S4CSeriesIE(InfoExtractor): 'info_dict': { 'id': '866852587', 'title': 'FFIT Cymru', - 'description': 'md5:abcb3c129cb68dbb6cd304fd33b07e96' }, }] diff --git a/yt_dlp/extractor/sovietscloset.py b/yt_dlp/extractor/sovietscloset.py index 453016ccb..493eea2a6 100644 --- a/yt_dlp/extractor/sovietscloset.py +++ b/yt_dlp/extractor/sovietscloset.py @@ -76,7 +76,6 @@ class SovietsClosetIE(SovietsClosetBaseIE): 'title': 'Arma 3 - Zeus Games #5', 'uploader': 'SovietWomble', 'thumbnail': r're:^https?://.*\.b-cdn\.net/c0e5e76f-3a93-40b4-bf01-12343c2eec5d/thumbnail\.jpg$', - 'uploader': 'SovietWomble', 'creator': 'SovietWomble', 'release_timestamp': 1461157200, 'release_date': '20160420', diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 023d8fd8c..a39d17cf1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -902,7 +902,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' """ - # XXX: this could be moved to a general function in utils.py + # XXX: this could be moved to a general function in utils/_utils.py # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py index 5e8876484..5b1599a6d 100644 --- a/yt_dlp/networking/__init__.py +++ b/yt_dlp/networking/__init__.py @@ -1,4 +1,4 @@ -# flake8: noqa: 401 +# flake8: noqa: F401 from .common import ( HEADRequest, PUTRequest, diff --git a/yt_dlp/networking/_urllib.py b/yt_dlp/networking/_urllib.py index c327f7744..9e2bf33e4 100644 --- a/yt_dlp/networking/_urllib.py +++ b/yt_dlp/networking/_urllib.py @@ -337,7 +337,7 @@ def handle_sslerror(e: ssl.SSLError): def handle_response_read_exceptions(e): if isinstance(e, http.client.IncompleteRead): - raise IncompleteRead(partial=e.partial, cause=e, expected=e.expected) from e + raise IncompleteRead(partial=len(e.partial), cause=e, expected=e.expected) from e elif isinstance(e, ssl.SSLError): handle_sslerror(e) elif isinstance(e, (OSError, EOFError, http.client.HTTPException, *CONTENT_DECODE_ERRORS)): diff --git a/yt_dlp/networking/exceptions.py b/yt_dlp/networking/exceptions.py index 465b18ba9..f58dc246e 100644 --- a/yt_dlp/networking/exceptions.py +++ b/yt_dlp/networking/exceptions.py @@ -75,10 +75,10 @@ class HTTPError(RequestError): class IncompleteRead(TransportError): - def __init__(self, partial, expected=None, **kwargs): + def __init__(self, partial: int, expected: int = None, **kwargs): self.partial = partial self.expected = expected - msg = f'{len(partial)} bytes read' + msg = f'{partial} bytes read' if expected is not None: msg += f', {expected} more expected'