From ed6bec168dd6af955f4ec0165356ac76b944c537 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 9 Nov 2022 15:48:25 +0530 Subject: [PATCH] [extractor/doodstream] Remove extractor It was added in youtube-dlc, likely without sufficient scrutiny Closes #3808, Closes #5251, Closes #5403 --- yt_dlp/extractor/_extractors.py | 3 +- yt_dlp/extractor/doodstream.py | 77 --------------------------------- yt_dlp/extractor/unsupported.py | 60 ++++++++++++++++++++----- 3 files changed, 51 insertions(+), 89 deletions(-) delete mode 100644 yt_dlp/extractor/doodstream.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0ca8b3e06..053ef44ae 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -477,7 +477,6 @@ from .digitalconcerthall import DigitalConcertHallIE from .discovery import DiscoveryIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE -from .doodstream import DoodStreamIE from .dropbox import DropboxIE from .dropout import ( DropoutSeasonIE, @@ -2023,7 +2022,7 @@ from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .unscripted import UnscriptedNewsVideoIE -from .unsupported import KnownDRMIE +from .unsupported import KnownDRMIE, KnownPiracyIE from .uol import UOLIE from .uplynk import ( UplynkIE, diff --git a/yt_dlp/extractor/doodstream.py b/yt_dlp/extractor/doodstream.py deleted file mode 100644 index b41da32e5..000000000 --- a/yt_dlp/extractor/doodstream.py +++ /dev/null @@ -1,77 +0,0 @@ -import string -import random -import time - -from .common import InfoExtractor - - -class DoodStreamIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?dood\.(?:to|watch|so|pm|wf)/[ed]/(?P[a-z0-9]+)' - _TESTS = [{ - 'url': 'http://dood.to/e/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', - 'info_dict': { - 'id': '5s1wmbdacezb', - 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', - } - }, { - 'url': 'http://dood.watch/d/5s1wmbdacezb', - 'md5': '4568b83b31e13242b3f1ff96c55f0595', - 'info_dict': { - 'id': '5s1wmbdacezb', - 'ext': 'mp4', - 'title': 'Kat Wonders - Monthly May 2020', - 'description': 'Kat Wonders - Monthly May 2020 | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/flyus84qgl2fsk4g.jpg', - } - }, { - 'url': 'https://dood.to/d/jzrxn12t2s7n', - 'md5': '3207e199426eca7c2aa23c2872e6728a', - 'info_dict': { - 'id': 'jzrxn12t2s7n', - 'ext': 'mp4', - 'title': 'Stacy Cruz Cute ALLWAYSWELL', - 'description': 'Stacy Cruz Cute ALLWAYSWELL | DoodStream.com', - 'thumbnail': 'https://img.doodcdn.com/snaps/8edqd5nppkac3x8u.jpg', - } - }, { - 'url': 'https://dood.so/d/jzrxn12t2s7n', - 'only_matching': True - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - url = f'https://dood.to/e/{video_id}' - webpage = self._download_webpage(url, video_id) - - title = self._html_search_meta( - ('og:title', 'twitter:title'), webpage, default=None) or self._html_extract_title(webpage) - thumb = self._html_search_meta(['og:image', 'twitter:image'], webpage, default=None) - token = self._html_search_regex(r'[?&]token=([a-z0-9]+)[&\']', webpage, 'token') - description = self._html_search_meta( - ['og:description', 'description', 'twitter:description'], webpage, default=None) - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/66.0', - 'referer': url - } - - pass_md5 = self._html_search_regex(r'(/pass_md5.*?)\'', webpage, 'pass_md5') - final_url = ''.join(( - self._download_webpage(f'https://dood.to{pass_md5}', video_id, headers=headers), - *(random.choice(string.ascii_letters + string.digits) for _ in range(10)), - f'?token={token}&expiry={int(time.time() * 1000)}', - )) - - return { - 'id': video_id, - 'title': title, - 'url': final_url, - 'http_headers': headers, - 'ext': 'mp4', - 'description': description, - 'thumbnail': thumb, - } diff --git a/yt_dlp/extractor/unsupported.py b/yt_dlp/extractor/unsupported.py index 87ad87ca2..e40666ec0 100644 --- a/yt_dlp/extractor/unsupported.py +++ b/yt_dlp/extractor/unsupported.py @@ -1,11 +1,32 @@ from .common import InfoExtractor -from ..utils import classproperty, ExtractorError +from ..utils import ExtractorError, classproperty, remove_start -class KnownDRMIE(InfoExtractor): +class UnsupportedInfoExtractor(InfoExtractor): IE_DESC = False - IE_NAME = 'unsupported:drm' - UNSUPPORTED_URLS = ( + URLS = () # Redefine in subclasses + + @classproperty + def IE_NAME(cls): + return remove_start(super().IE_NAME, 'Known') + + @classproperty + def _VALID_URL(cls): + return rf'https?://(?:www\.)?(?:{"|".join(cls.URLS)})' + + +LF = '\n ' + + +class KnownDRMIE(UnsupportedInfoExtractor): + """Sites that are known to use DRM for all their videos + + Add to this list only if: + * You are reasonably certain that the site uses DRM for ALL their videos + * Multiple users have asked about this site on github/reddit/discord + """ + + URLS = ( r'play\.hbomax\.com', r'channel(?:4|5)\.com', r'peacocktv\.com', @@ -82,12 +103,31 @@ class KnownDRMIE(InfoExtractor): 'only_matching': True, }] - @classproperty - def _VALID_URL(cls): - return rf'https?://(?:www\.)?(?:{"|".join(cls.UNSUPPORTED_URLS)})' + def _real_extract(self, url): + raise ExtractorError( + f'The requested site is known to use DRM protection. ' + f'It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported.{LF}' + f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, ' + 'unless you have evidence that the video is not DRM protected', expected=True) + + +class KnownPiracyIE(UnsupportedInfoExtractor): + """Sites that have been deemed to be piracy + + In order for this to not end up being a catalog of piracy sites, + only sites that were once supported should be added to this list + """ + + URLS = ( + r'dood\.(?:to|watch|so|pm|wf|ru)', + ) + + _TESTS = [{ + 'url': 'http://dood.to/e/5s1wmbdacezb', + 'only_matching': True, + }] def _real_extract(self, url): raise ExtractorError( - f'The requested site is known to use DRM protection. It will {self._downloader._format_err("NOT", self._downloader.Styles.EMPHASIS)} be supported by yt-dlp. ' - f'Please {self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open an issue, unless you have evidence that it is not DRM protected.', - expected=True) + f'This website is no longer supported since it has been determined to be primarily used for piracy.{LF}' + f'{self._downloader._format_err("DO NOT", self._downloader.Styles.ERROR)} open issues for it', expected=True)