From 37e40d693b508c6be4a8013cfb7fe7e5b4934042 Mon Sep 17 00:00:00 2001 From: zenerdi0de <83358565+zenerdi0de@users.noreply.github.com> Date: Thu, 23 Jun 2022 07:31:34 +0530 Subject: [PATCH] [extractor/tennistv] Rewrite extractor (#2324) Closes #2177 Authored by: zenerdi0de, pukkandan --- yt_dlp/extractor/tennistv.py | 179 ++++++++++++++++++++++------------- 1 file changed, 113 insertions(+), 66 deletions(-) diff --git a/yt_dlp/extractor/tennistv.py b/yt_dlp/extractor/tennistv.py index 80acaf190..3bd7ce3c4 100644 --- a/yt_dlp/extractor/tennistv.py +++ b/yt_dlp/extractor/tennistv.py @@ -1,16 +1,17 @@ -import json +import urllib.parse from .common import InfoExtractor - from ..utils import ( ExtractorError, + random_uuidv4, unified_timestamp, + urlencode_postdata, ) class TennisTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?tennistv\.com/videos/(?P[-a-z0-9]+)' - _TEST = { + _TESTS = [{ 'url': 'https://www.tennistv.com/videos/indian-wells-2018-verdasco-fritz', 'info_dict': { 'id': 'indian-wells-2018-verdasco-fritz', @@ -25,86 +26,132 @@ class TennisTVIE(InfoExtractor): 'skip_download': True, }, 'skip': 'Requires email and password of a subscribed account', - } + }, { + 'url': 'https://www.tennistv.com/videos/2650480/best-matches-of-2022-part-5', + 'info_dict': { + 'id': '2650480', + 'ext': 'mp4', + 'title': 'Best Matches of 2022 - Part 5', + 'description': 'md5:36dec3bfae7ed74bd79e48045b17264c', + 'thumbnail': 'https://open.http.mp.streamamg.com/p/3001482/sp/300148200/thumbnail/entry_id/0_myef18pd/version/100001/height/1920', + }, + 'params': {'skip_download': 'm3u8'}, + 'skip': 'Requires email and password of a subscribed account', + }] _NETRC_MACHINE = 'tennistv' - _session_token = None - - def _perform_login(self, username, password): - login_form = { - 'Email': username, - 'Password': password, - } - login_json = json.dumps(login_form).encode('utf-8') - headers = { - 'content-type': 'application/json', - 'Referer': 'https://www.tennistv.com/login', - 'Origin': 'https://www.tennistv.com', - } - - login_result = self._download_json( - 'https://www.tennistv.com/api/users/v1/login', None, - note='Logging in', - errnote='Login failed (wrong password?)', - headers=headers, - data=login_json) + access_token, refresh_token = None, None + _PARTNER_ID = 3001482 + _FORMAT_URL = 'https://open.http.mp.streamamg.com/p/{partner}/sp/{partner}00/playManifest/entryId/{entry}/format/applehttp/protocol/https/a.m3u8?ks={session}' + _AUTH_BASE_URL = 'https://sso.tennistv.com/auth/realms/TennisTV/protocol/openid-connect' + _HEADERS = { + 'origin': 'https://www.tennistv.com', + 'referer': 'https://www.tennistv.com/', + 'content-Type': 'application/x-www-form-urlencoded' + } - if login_result['error']['errorCode']: - raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, login_result['error']['errorMessage'])) + def _perform_login(self, username, password): + login_page = self._download_webpage( + f'{self._AUTH_BASE_URL}/auth', None, 'Downloading login page', + query={ + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://tennistv.com', + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid' + }) + + post_url = self._html_search_regex(r'action=["\']([^"\']+?)["\']\s+method=["\']post["\']', login_page, 'login POST url') + temp_page = self._download_webpage( + post_url, None, 'Sending login data', 'Unable to send login data', + headers=self._HEADERS, data=urlencode_postdata({ + 'username': username, + 'password': password, + 'submitAction': 'Log In' + })) + if 'Your username or password was incorrect' in temp_page: + raise ExtractorError('Your username or password was incorrect', expected=True) + + handle = self._request_webpage( + f'{self._AUTH_BASE_URL}/auth', None, 'Logging in', headers=self._HEADERS, + query={ + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html', + 'state': random_uuidv4(), + 'response_mode': 'fragment', + 'response_type': 'code', + 'scope': 'openid', + 'nonce': random_uuidv4(), + 'prompt': 'none' + }) + + self.get_token(None, { + 'code': urllib.parse.parse_qs(handle.geturl())['code'][-1], + 'grant_type': 'authorization_code', + 'client_id': 'tennis-tv-web', + 'redirect_uri': 'https://www.tennistv.com/resources/v1.1.10/html/silent-check-sso.html' + }) + + def get_token(self, video_id, payload): + res = self._download_json( + f'{self._AUTH_BASE_URL}/token', video_id, 'Fetching tokens', + 'Unable to fetch tokens', headers=self._HEADERS, data=urlencode_postdata(payload)) + + self.access_token = res.get('access_token') or self.access_token + self.refresh_token = res.get('refresh_token') or self.refresh_token - if login_result['entitlement'] != 'SUBSCRIBED': - self.report_warning('%s may not be subscribed to %s.' % (username, self.IE_NAME)) + def _real_initialize(self): + if self.access_token and self.refresh_token: + return - self._session_token = login_result['sessionToken'] + cookies = self._get_cookies('https://www.tennistv.com/') + if not cookies.get('access_token') or not cookies.get('refresh_token'): + self.raise_login_required() + self.access_token, self.refresh_token = cookies['access_token'].value, cookies['refresh_token'].value - def _real_initialize(self): - if not self._session_token: - raise self.raise_login_required('Login info is needed for this website', method='password') + def _download_session_json(self, video_id, entryid,): + return self._download_json( + f'https://atppayments.streamamg.com/api/v1/session/ksession/?lang=en&apijwttoken={self.access_token}&entryId={entryid}', + video_id, 'Downloading ksession token', 'Failed to download ksession token', headers=self._HEADERS) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - internal_id = self._search_regex(r'video=([\w-]+)', webpage, 'internal video id') + entryid = self._search_regex(r'data-entry-id=["\']([^"\']+)', webpage, 'entryID') + session_json = self._download_session_json(video_id, entryid) - headers = { - 'Origin': 'https://www.tennistv.com', - 'authorization': 'ATP %s' % self._session_token, - 'content-type': 'application/json', - 'Referer': url, - } - check_data = { - 'videoID': internal_id, - 'VideoUrlType': 'HLS', - } - check_json = json.dumps(check_data).encode('utf-8') - check_result = self._download_json( - 'https://www.tennistv.com/api/users/v1/entitlementchecknondiva', - video_id, note='Checking video authorization', headers=headers, data=check_json) - formats = self._extract_m3u8_formats(check_result['contentUrl'], video_id, ext='mp4') - self._sort_formats(formats) + k_session = session_json.get('KSession') + if k_session is None: + self.get_token(video_id, { + 'grant_type': 'refresh_token', + 'refresh_token': self.refresh_token, + 'client_id': 'tennis-tv-web' + }) + k_session = self._download_session_json(video_id, entryid).get('KSession') + if k_session is None: + raise ExtractorError('Failed to get KSession, possibly a premium video', expected=True) - vdata = self._download_json( - 'https://www.tennistv.com/api/en/v2/none/common/video/%s' % video_id, - video_id, headers=headers) + if session_json.get('ErrorMessage'): + self.report_warning(session_json['ErrorMessage']) - timestamp = unified_timestamp(vdata['timestamp']) - thumbnail = vdata['video']['thumbnailUrl'] - description = vdata['displayText']['description'] - title = vdata['video']['title'] + formats, subtitles = self._extract_m3u8_formats_and_subtitles( + self._FORMAT_URL.format(partner=self._PARTNER_ID, entry=entryid, session=k_session), video_id) - series = vdata['tour'] - venue = vdata['displayText']['venue'] - round_str = vdata['seo']['round'] + self._sort_formats(formats) return { 'id': video_id, - 'title': title, - 'description': description, + 'title': self._html_extract_title(webpage) or self._og_search_title(webpage), + 'description': self._html_search_regex( + (r'', *self._og_regexes('description')), + webpage, 'description', fatal=False), + 'thumbnail': f'https://open.http.mp.streamamg.com/p/{self._PARTNER_ID}/sp/{self._PARTNER_ID}00/thumbnail/entry_id/{entryid}/version/100001/height/1920', + 'timestamp': unified_timestamp(self._html_search_regex( + r'', webpage, 'upload time')), + 'series': self._html_search_regex(r'data-series\s*?=\s*?"(.*?)"', webpage, 'series', fatal=False) or None, + 'season': self._html_search_regex(r'data-tournament-city\s*?=\s*?"(.*?)"', webpage, 'season', fatal=False) or None, + 'episode': self._html_search_regex(r'data-round\s*?=\s*?"(.*?)"', webpage, 'round', fatal=False) or None, 'formats': formats, - 'thumbnail': thumbnail, - 'timestamp': timestamp, - 'series': series, - 'season': venue, - 'episode': round_str, + 'subtitles': subtitles, }