From 271808b6b2bd75ec9bdf943a55dbc4737bfa6f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 22 Jan 2017 03:43:27 +0700 Subject: [PATCH] [pornflip] Improve and extract dash formats (closes #11795) --- youtube_dl/extractor/pornflip.py | 77 +++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/youtube_dl/extractor/pornflip.py b/youtube_dl/extractor/pornflip.py index b6077f7cb..a4a5d390e 100644 --- a/youtube_dl/extractor/pornflip.py +++ b/youtube_dl/extractor/pornflip.py @@ -4,56 +4,89 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..compat import ( compat_parse_qs, + compat_str, ) from ..utils import ( int_or_none, try_get, - RegexNotFoundError, + unified_timestamp, ) class PornFlipIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?pornflip\.com/v/(?P[0-9A-Za-z]{11})' - _TEST = { + _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:v|embed)/(?P[0-9A-Za-z]{11})' + _TESTS = [{ 'url': 'https://www.pornflip.com/v/wz7DfNhMmep', 'md5': '98c46639849145ae1fd77af532a9278c', 'info_dict': { 'id': 'wz7DfNhMmep', 'ext': 'mp4', 'title': '2 Amateurs swallow make his dream cumshots true', - 'uploader': 'figifoto', 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 112, + 'timestamp': 1481655502, + 'upload_date': '20161213', + 'uploader_id': '106786', + 'uploader': 'figifoto', + 'view_count': int, 'age_limit': 18, } - } + }, { + 'url': 'https://www.pornflip.com/embed/wz7DfNhMmep', + 'only_matching': True, + }] def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - uploader = self._html_search_regex( - r'\s+]+flashvars=(["\'])(?P(?:(?!\1).)+)\1', + webpage, 'flashvars', group='flashvars')) + title = flashvars['video_vars[title]'][0] - thumbnail = try_get(flashvars, lambda x: x['video_vars[big_thumb]'][0]) - formats = [] - for k, v in flashvars.items(): - height = self._search_regex(r'video_vars\[video_urls\]\[(\d+).+?\]', k, 'height', default=None) - if height: - url = v[0] - formats.append({ - 'height': int_or_none(height), - 'url': url - }) + def flashvar(kind): + return try_get( + flashvars, lambda x: x['video_vars[%s]' % kind][0], compat_str) + + formats = [] + for key, value in flashvars.items(): + if not (value and isinstance(value, list)): + continue + format_url = value[0] + if key == 'video_vars[hds_manifest]': + formats.extend(self._extract_mpd_formats( + format_url, video_id, mpd_id='dash', fatal=False)) + continue + height = self._search_regex( + r'video_vars\[video_urls\]\[(\d+)', key, 'height', default=None) + if not height: + continue + formats.append({ + 'url': format_url, + 'format_id': 'http-%s' % height, + 'height': int_or_none(height), + }) self._sort_formats(formats) + uploader = self._html_search_regex( + (r']+class="name"[^>]*>\s*]+>\s*(?P[^<]+)', + r']+content=(["\'])[^>]*\buploaded by (?P.+?)\1'), + webpage, 'uploader', fatal=False, group='uploader') + return { 'id': video_id, 'formats': formats, 'title': title, + 'thumbnail': flashvar('big_thumb'), + 'duration': int_or_none(flashvar('duration')), + 'timestamp': unified_timestamp(self._html_search_meta( + 'uploadDate', webpage, 'timestamp')), + 'uploader_id': flashvar('author_id'), 'uploader': uploader, - 'thumbnail': thumbnail, + 'view_count': int_or_none(flashvar('views')), 'age_limit': 18, }