From cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f Mon Sep 17 00:00:00 2001 From: zhgwn <130610452+zhgwn@users.noreply.github.com> Date: Tue, 18 Apr 2023 04:18:29 +0200 Subject: [PATCH] [extractor/pornez] Support new URL formats (#6792) Closes #6791, Closes #6298 Authored by: zhgwn --- yt_dlp/extractor/pornez.py | 64 ++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/yt_dlp/extractor/pornez.py b/yt_dlp/extractor/pornez.py index 3a22cb821..bc45f865e 100644 --- a/yt_dlp/extractor/pornez.py +++ b/yt_dlp/extractor/pornez.py @@ -1,42 +1,60 @@ from .common import InfoExtractor -from ..utils import int_or_none, urljoin +from ..utils import ( + clean_html, + int_or_none, + get_element_by_class, + urljoin, +) class PornezIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornez\.net/video(?P[0-9]+)/' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornez\.net/(?:video(?P\w+)|watch)/' + _TESTS = [{ 'url': 'https://pornez.net/video344819/mistresst-funny_penis_names-wmv/', - 'md5': '2e19a0a1cff3a5dbea0ef1b9e80bcbbc', 'info_dict': { 'id': '344819', 'ext': 'mp4', - 'title': r'mistresst funny_penis_names wmv', + 'title': 'mistresst funny_penis_names wmv', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 18, - } - } + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://pornez.net/watch/leana+lovings+stiff+for+stepdaughter/', + 'info_dict': { + 'id': '156161', + 'ext': 'mp4', + 'title': 'Watch leana lovings stiff for stepdaughter porn video.', + 'age_limit': 18, + }, + 'params': {'skip_download': 'm3u8'}, + }, { + 'url': 'https://pornez.net/videovzs27fj/tutor4k-e14-blue-wave-1080p-nbq-tutor4k-e14-blue-wave/', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - iframe_src = self._html_search_regex( - r']+src="([^"]+)"', webpage, 'iframe', fatal=True) - iframe_src = urljoin('https://pornez.net', iframe_src) - title = self._html_search_meta(['name', 'twitter:title', 'og:title'], webpage, 'title', default=None) - if title is None: - title = self._search_regex(r'

(.*?)

', webpage, 'title', fatal=True) - thumbnail = self._html_search_meta(['thumbnailUrl'], webpage, 'title', default=None) - webpage = self._download_webpage(iframe_src, video_id) - entries = self._parse_html5_media_entries(iframe_src, webpage, video_id)[0] - for format in entries['formats']: - height = self._search_regex(r'_(\d+)\.m3u8', format['url'], 'height') - format['format_id'] = '%sp' % height - format['height'] = int_or_none(height) + if not video_id: + video_id = self._search_regex( + r']+\bhref=["\']https?://pornez.net/\?p=(\w+)["\']', webpage, 'id') + + iframe_src = self._html_search_regex(r']+src="([^"]+)"', webpage, 'iframe') + iframe = self._download_webpage(urljoin('https://pornez.net', iframe_src), video_id) + + entries = self._parse_html5_media_entries(iframe_src, iframe, video_id)[0] + for fmt in entries['formats']: + height = self._search_regex(r'_(\d+)\.m3u8', fmt['url'], 'height') + fmt['format_id'] = '%sp' % height + fmt['height'] = int_or_none(height) entries.update({ 'id': video_id, - 'title': title, - 'thumbnail': thumbnail, - 'age_limit': 18 + 'title': (clean_html(get_element_by_class('video-title', webpage)) + or self._html_search_meta( + ['twitter:title', 'og:title', 'description'], webpage, 'title', default=None)), + 'thumbnail': self._html_search_meta(['thumbnailUrl'], webpage, 'thumb', default=None), + 'age_limit': 18, }) return entries