From 5c5fae6d2fd94f3845ec187a1ea29cb17ed83440 Mon Sep 17 00:00:00 2001 From: 2ShedsJackson <50157910+2ShedsJackson@users.noreply.github.com> Date: Sat, 20 Mar 2021 03:41:11 -0700 Subject: [PATCH] [amcnetworks] Fix extractor (#179) * Prefer use of manifest based on `releasePid` since the one based on `videoPid` may have Fairplay * Additional thumbnail images were added * Don't add `season_number` and `series` to `title` * `series` is now set to `None` rather than "_" when empty * fix bug with age limit Authored by: 2ShedsJackson --- yt_dlp/extractor/amcnetworks.py | 67 ++++++++++++++++++++++++--------- yt_dlp/utils.py | 1 + 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/yt_dlp/extractor/amcnetworks.py b/yt_dlp/extractor/amcnetworks.py index b8027bbca..1f16d3a33 100644 --- a/yt_dlp/extractor/amcnetworks.py +++ b/yt_dlp/extractor/amcnetworks.py @@ -65,15 +65,35 @@ class AMCNetworksIE(ThePlatformIE): def _real_extract(self, url): site, display_id = re.match(self._VALID_URL, url).groups() requestor_id = self._REQUESTOR_ID_MAP[site] - properties = self._download_json( - 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' % (requestor_id.lower(), display_id), - display_id)['data']['properties'] + page_data = self._download_json( + 'https://content-delivery-gw.svc.ds.amcn.com/api/v2/content/amcn/%s/url/%s' + % (requestor_id.lower(), display_id), display_id)['data'] + properties = page_data.get('properties') or {} query = { 'mbr': 'true', 'manifest': 'm3u', } - tp_path = 'M_UwQC/media/' + properties['videoPid'] - media_url = 'https://link.theplatform.com/s/' + tp_path + + video_player_count = 0 + try: + for v in page_data['children']: + if v.get('type') == 'video-player': + releasePid = v['properties']['currentVideo']['meta']['releasePid'] + tp_path = 'M_UwQC/' + releasePid + media_url = 'https://link.theplatform.com/s/' + tp_path + video_player_count += 1 + except KeyError: + pass + if video_player_count > 1: + self.report_warning( + 'The JSON data has %d video players. Only one will be extracted' % video_player_count) + + # Fall back to videoPid if releasePid not found. + # TODO: Fall back to videoPid if releasePid manifest uses DRM. + if not video_player_count: + tp_path = 'M_UwQC/media/' + properties['videoPid'] + media_url = 'https://link.theplatform.com/s/' + tp_path + theplatform_metadata = self._download_theplatform_metadata(tp_path, display_id) info = self._parse_theplatform_metadata(theplatform_metadata) video_id = theplatform_metadata['pid'] @@ -90,30 +110,41 @@ class AMCNetworksIE(ThePlatformIE): formats, subtitles = self._extract_theplatform_smil( media_url, video_id) self._sort_formats(formats) + + thumbnails = [] + thumbnail_urls = [properties.get('imageDesktop')] + if 'thumbnail' in info: + thumbnail_urls.append(info.pop('thumbnail')) + for thumbnail_url in thumbnail_urls: + if not thumbnail_url: + continue + mobj = re.search(r'(\d+)x(\d+)', thumbnail_url) + thumbnails.append({ + 'url': thumbnail_url, + 'width': int(mobj.group(1)) if mobj else None, + 'height': int(mobj.group(2)) if mobj else None, + }) + info.update({ + 'age_limit': parse_age_limit(rating), + 'formats': formats, 'id': video_id, 'subtitles': subtitles, - 'formats': formats, - 'age_limit': parse_age_limit(parse_age_limit(rating)), + 'thumbnails': thumbnails, }) ns_keys = theplatform_metadata.get('$xmlns', {}).keys() if ns_keys: ns = list(ns_keys)[0] - series = theplatform_metadata.get(ns + '$show') - season_number = int_or_none( - theplatform_metadata.get(ns + '$season')) - episode = theplatform_metadata.get(ns + '$episodeTitle') + episode = theplatform_metadata.get(ns + '$episodeTitle') or None episode_number = int_or_none( theplatform_metadata.get(ns + '$episode')) - if season_number: - title = 'Season %d - %s' % (season_number, title) - if series: - title = '%s - %s' % (series, title) + season_number = int_or_none( + theplatform_metadata.get(ns + '$season')) + series = theplatform_metadata.get(ns + '$show') or None info.update({ - 'title': title, - 'series': series, - 'season_number': season_number, 'episode': episode, 'episode_number': episode_number, + 'season_number': season_number, + 'series': series, }) return info diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 27d649565..eb194589e 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -4106,6 +4106,7 @@ def parse_age_limit(s): m = re.match(r'^(?P\d{1,2})\+?$', s) if m: return int(m.group('age')) + s = s.upper() if s in US_RATINGS: return US_RATINGS[s] m = re.match(r'^TV[_-]?(%s)$' % '|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)