From a9d069f5b8540f15caaf696bc39ce6a969f8b11c Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 29 Nov 2022 07:50:58 +0530 Subject: [PATCH] [extractor/amazonminitv] Cleanup 48652590ec401f4e747a5e51552cdcac20744aa1 --- yt_dlp/extractor/amazonminitv.py | 162 +++++++++++++------------------ 1 file changed, 65 insertions(+), 97 deletions(-) diff --git a/yt_dlp/extractor/amazonminitv.py b/yt_dlp/extractor/amazonminitv.py index 793fac2e4..730996853 100644 --- a/yt_dlp/extractor/amazonminitv.py +++ b/yt_dlp/extractor/amazonminitv.py @@ -4,16 +4,43 @@ from .common import InfoExtractor from ..utils import ExtractorError, int_or_none, traverse_obj, try_get -class AmazonMiniTVIE(InfoExtractor): +class AmazonMiniTVBaseIE(InfoExtractor): + def _real_initialize(self): + self._download_webpage( + 'https://www.amazon.in/minitv', None, + note='Fetching guest session cookies') + AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value + + def _call_api(self, asin, data=None, note=None): + device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'} + if data: + data['variables'].update({ + 'contentType': 'VOD', + 'sessionIdToken': self.session_id, + **device, + }) + + resp = self._download_json( + f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}', + asin, note=note, headers={'Content-Type': 'application/json'}, + data=json.dumps(data).encode() if data else None, + query=None if data else { + 'deviceType': 'A1WMMUXPCUJL4N', + 'contentId': asin, + **device, + }) + + if resp.get('errors'): + raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') + elif not data: + return resp + return resp['data'][data['operationName']] + + +class AmazonMiniTVIE(AmazonMiniTVBaseIE): _VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P[a-f0-9-]+)' - _HEADERS = { - 'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36', - } - _CLIENT_ID = 'ATVIN' - _DEVICE_LOCALE = 'en_GB' _TESTS = [{ 'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', - 'md5': '0045a5ea38dddd4de5a5fcec7274b476', 'info_dict': { 'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840', 'ext': 'mp4', @@ -24,11 +51,7 @@ class AmazonMiniTVIE(InfoExtractor): 'release_timestamp': 1644710400, 'release_date': '20220213', 'duration': 846, - 'chapters': [{ - 'start_time': 815.0, - 'end_time': 846, - 'title': 'End Credits', - }], + 'chapters': 'count:2', 'series': 'Couple Goals', 'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0', 'season': 'Season 3', @@ -40,7 +63,6 @@ class AmazonMiniTVIE(InfoExtractor): }, }, { 'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv', - 'md5': '9a977bffd5d99c4dd2a32b360aee1863', 'info_dict': { 'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab', 'ext': 'mp4', @@ -63,6 +85,7 @@ class AmazonMiniTVIE(InfoExtractor): 'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab', 'only_matching': True, }] + _GRAPHQL_QUERY_CONTENT = ''' query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) { content( @@ -107,68 +130,13 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } }''' - def _call_api(self, asin, data=None, note=None): - query = {} - headers = self._HEADERS.copy() - if data: - name = 'graphql' - data['variables'].update({ - 'clientId': self._CLIENT_ID, - 'contentType': 'VOD', - 'deviceLocale': self._DEVICE_LOCALE, - 'sessionIdToken': self.session_id, - }) - headers.update({'Content-Type': 'application/json'}) - else: - name = 'prs' - query.update({ - 'clientId': self._CLIENT_ID, - 'deviceType': 'A1WMMUXPCUJL4N', - 'contentId': asin, - 'deviceLocale': self._DEVICE_LOCALE, - }) - - resp = self._download_json( - f'https://www.amazon.in/minitv/api/web/{name}', - asin, query=query, data=json.dumps(data).encode() if data else None, - headers=headers, note=note) - - if 'errors' in resp: - raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}') - - if data: - resp = resp['data'][data['operationName']] - return resp - - def _real_initialize(self): - # Download webpage to get the required guest session cookies - self._download_webpage( - 'https://www.amazon.in/minitv', - None, - headers=self._HEADERS, - note='Downloading webpage') - - self.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value - def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - - title_info = self._call_api( - asin, data={ - 'operationName': 'content', - 'variables': { - 'contentId': asin, - }, - 'query': self._GRAPHQL_QUERY_CONTENT, - }, - note='Downloading title info') - prs = self._call_api(asin, note='Downloading playback info') - formats = [] - subtitles = {} + formats, subtitles = [], {} for type_, asset in prs['playbackAssets'].items(): - if not isinstance(asset, dict): + if not traverse_obj(asset, 'manifestUrl'): continue if type_ == 'hls': m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles( @@ -181,14 +149,16 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, asset['manifestUrl'], asin, mpd_id=type_, fatal=False) formats.extend(mpd_fmts) subtitles = self._merge_subtitles(subtitles, mpd_subs) + else: + self.report_warning(f'Unknown asset type: {type_}') - duration = traverse_obj(title_info, ('description', 'contentLengthInSeconds')) + title_info = self._call_api( + asin, note='Downloading title info', data={ + 'operationName': 'content', + 'variables': {'contentId': asin}, + 'query': self._GRAPHQL_QUERY_CONTENT, + }) credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000) - chapters = [{ - 'start_time': credits_time, - 'end_time': duration + credits_time, # FIXME: I suppose this is correct - 'title': 'End Credits', - }] if credits_time and duration else [] is_episode = title_info.get('vodType') == 'EPISODE' return { @@ -203,8 +173,11 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } for type_, url in (title_info.get('images') or {}).items()], 'description': traverse_obj(title_info, ('description', 'synopsis')), 'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)), - 'duration': duration, - 'chapters': chapters, + 'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')), + 'chapters': [{ + 'start_time': credits_time, + 'title': 'End Credits', + }] if credits_time else [], 'series': title_info.get('seriesName'), 'series_id': title_info.get('seriesId'), 'season_number': title_info.get('seasonNumber'), @@ -215,7 +188,7 @@ query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, } -class AmazonMiniTVSeasonIE(AmazonMiniTVIE): +class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE): IE_NAME = 'amazonminitv:season' _VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix' @@ -229,6 +202,7 @@ class AmazonMiniTVSeasonIE(AmazonMiniTVIE): 'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0', 'only_matching': True, }] + _GRAPHQL_QUERY = ''' query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) { getEpisodes( @@ -258,25 +232,22 @@ query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonI def _entries(self, asin): season_info = self._call_api( - asin, - data={ + asin, note='Downloading season info', data={ 'operationName': 'getEpisodes', - 'variables': { - 'episodeOrSeasonId': asin, - }, + 'variables': {'episodeOrSeasonId': asin}, 'query': self._GRAPHQL_QUERY, - }, - note='Downloading season info') + }) for episode in season_info['episodes']: - yield self.url_result(f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) + yield self.url_result( + f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), playlist_id=asin) + return self.playlist_result(self._entries(asin), asin) -class AmazonMiniTVSeriesIE(AmazonMiniTVIE): +class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE): IE_NAME = 'amazonminitv:series' _VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P[a-f0-9-]+)' _TESTS = [{ @@ -289,6 +260,7 @@ class AmazonMiniTVSeriesIE(AmazonMiniTVIE): 'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0', 'only_matching': True, }] + _GRAPHQL_QUERY = ''' query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) { getSeasons( @@ -304,19 +276,15 @@ query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeas def _entries(self, asin): season_info = self._call_api( - asin, - data={ + asin, note='Downloading series info', data={ 'operationName': 'getSeasons', - 'variables': { - 'episodeOrSeasonOrSeriesId': asin, - }, + 'variables': {'episodeOrSeasonOrSeriesId': asin}, 'query': self._GRAPHQL_QUERY, - }, - note='Downloading series info') + }) for season in season_info['seasons']: yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId']) def _real_extract(self, url): asin = f'amzn1.dv.gti.{self._match_id(url)}' - return self.playlist_result(self._entries(asin), playlist_id=asin) + return self.playlist_result(self._entries(asin), asin)