yt-dlp/yt_dlp/extractor/amp.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    determine_ext,
    int_or_none,
    mimetype2ext,
    parse_iso8601,
    strip_jsonp,
    unified_timestamp,
    url_or_none,
)


class AMPIE(InfoExtractor):  # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
    # parse Akamai Adaptive Media Player feed
    def _extract_feed_info(self, url):
        feed = self._download_json(
            url, None, 'Downloading Akamai AMP feed',
            'Unable to download Akamai AMP feed', transform_source=strip_jsonp)
        item = feed.get('channel', {}).get('item')
        if not item:
            raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))

        video_id = item['guid']

        def get_media_node(name, default=None):
            media_name = 'media-%s' % name
            media_group = item.get('media-group') or item
            return media_group.get(media_name) or item.get(media_name) or item.get(name, default)

        thumbnails = []
        media_thumbnail = get_media_node('thumbnail')
        if media_thumbnail:
            if isinstance(media_thumbnail, dict):
                media_thumbnail = [media_thumbnail]
            for thumbnail_data in media_thumbnail:
                thumbnail = thumbnail_data.get('@attributes', {})
                thumbnail_url = url_or_none(thumbnail.get('url'))
                if not thumbnail_url:
                    continue
                thumbnails.append({
                    'url': self._proto_relative_url(thumbnail_url, 'http:'),
                    'width': int_or_none(thumbnail.get('width')),
                    'height': int_or_none(thumbnail.get('height')),
                })

        subtitles = {}
        media_subtitle = get_media_node('subTitle')
        if media_subtitle:
            if isinstance(media_subtitle, dict):
                media_subtitle = [media_subtitle]
            for subtitle_data in media_subtitle:
                subtitle = subtitle_data.get('@attributes', {})
                subtitle_href = url_or_none(subtitle.get('href'))
                if not subtitle_href:
                    continue
                subtitles.setdefault(subtitle.get('lang') or 'en', []).append({
                    'url': subtitle_href,
                    'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),
                })

        formats = []
        media_content = get_media_node('content')
        if isinstance(media_content, dict):
            media_content = [media_content]
        for media_data in media_content:
            media = media_data.get('@attributes', {})
            media_url = url_or_none(media.get('url'))
            if not media_url:
                continue
            ext = mimetype2ext(media.get('type')) or determine_ext(media_url)
            if ext == 'f4m':
                formats.extend(self._extract_f4m_formats(
                    media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',
                    video_id, f4m_id='hds', fatal=False))
            elif ext == 'm3u8':
                fmts, subs = self._extract_m3u8_formats_and_subtitles(
                    media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
                formats.extend(fmts)
                self._merge_subtitles(subs, target=subtitles)
            else:
                formats.append({
                    'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),
                    'url': media_url,
                    'tbr': int_or_none(media.get('bitrate')),
                    'filesize': int_or_none(media.get('fileSize')),
                    'ext': ext,
                })

        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))

        return {
            'id': video_id,
            'title': get_media_node('title'),
            'description': get_media_node('description'),
            'thumbnails': thumbnails,
            'timestamp': timestamp,
            'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),
            'subtitles': subtitles,
            'formats': formats,
        }
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`from .common import InfoExtractor`
			`from ..utils import (`
[amp] extract error message(closes #12795) 8 years ago			`ExtractorError,`
[misc] Add `hatch`, `ruff`, `pre-commit` and improve dev docs (#7409) Authored by: bashonly, seproDev, Grub4K Co-authored-by: bashonly <88596187+bashonly@users.noreply.github.com> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com> 6 months ago			`determine_ext,`
Improve URL extraction 6 years ago			`int_or_none,`
			`mimetype2ext,`
			`parse_iso8601,`
[extractor/foxnews] Fix extractors (#7222) Closes #6050 Authored by: bashonly 1 year ago			`strip_jsonp,`
Update to ytdl-2021.02.04.1 except youtube 4 years ago			`unified_timestamp,`
Improve URL extraction 6 years ago			`url_or_none,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`)`


[cleanup] Misc Closes #5541 2 years ago			`class AMPIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`# parse Akamai Adaptive Media Player feed`
			`def _extract_feed_info(self, url):`
[amp] extract error message(closes #12795) 8 years ago			`feed = self._download_json(`
[bleacherreport] fix style issues and simplify 9 years ago			`url, None, 'Downloading Akamai AMP feed',`
[extractor/foxnews] Fix extractors (#7222) Closes #6050 Authored by: bashonly 1 year ago			`'Unable to download Akamai AMP feed', transform_source=strip_jsonp)`
[amp] extract error message(closes #12795) 8 years ago			`item = feed.get('channel', {}).get('item')`
			`if not item:`
			`raise ExtractorError('%s said: %s' % (self.IE_NAME, feed['error']))`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago
			`video_id = item['guid']`
[bleacherreport] fix style issues and simplify 9 years ago
			`def get_media_node(name, default=None):`
			`media_name = 'media-%s' % name`
			`media_group = item.get('media-group') or item`
			`return media_group.get(media_name) or item.get(media_name) or item.get(name, default)`

[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`thumbnails = []`
[bleacherreport] fix style issues and simplify 9 years ago			`media_thumbnail = get_media_node('thumbnail')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`if media_thumbnail:`
			`if isinstance(media_thumbnail, dict):`
			`media_thumbnail = [media_thumbnail]`
			`for thumbnail_data in media_thumbnail:`
[amp] imporove thumbnail and subtitle extraction 8 years ago			`thumbnail = thumbnail_data.get('@attributes', {})`
Improve URL extraction 6 years ago			`thumbnail_url = url_or_none(thumbnail.get('url'))`
[amp] imporove thumbnail and subtitle extraction 8 years ago			`if not thumbnail_url:`
			`continue`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`thumbnails.append({`
[amp] imporove thumbnail and subtitle extraction 8 years ago			`'url': self._proto_relative_url(thumbnail_url, 'http:'),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`'width': int_or_none(thumbnail.get('width')),`
			`'height': int_or_none(thumbnail.get('height')),`
			`})`

			`subtitles = {}`
[bleacherreport] fix style issues and simplify 9 years ago			`media_subtitle = get_media_node('subTitle')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`if media_subtitle:`
			`if isinstance(media_subtitle, dict):`
			`media_subtitle = [media_subtitle]`
			`for subtitle_data in media_subtitle:`
[amp] imporove thumbnail and subtitle extraction 8 years ago			`subtitle = subtitle_data.get('@attributes', {})`
Improve URL extraction 6 years ago			`subtitle_href = url_or_none(subtitle.get('href'))`
[amp] imporove thumbnail and subtitle extraction 8 years ago			`if not subtitle_href:`
			`continue`
			`subtitles.setdefault(subtitle.get('lang') or 'en', []).append({`
			`'url': subtitle_href,`
			`'ext': mimetype2ext(subtitle.get('type')) or determine_ext(subtitle_href),`
			`})`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago
			`formats = []`
[bleacherreport] fix style issues and simplify 9 years ago			`media_content = get_media_node('content')`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`if isinstance(media_content, dict):`
			`media_content = [media_content]`
			`for media_data in media_content:`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`media = media_data.get('@attributes', {})`
Improve URL extraction 6 years ago			`media_url = url_or_none(media.get('url'))`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`if not media_url:`
			`continue`
[amp] Fix a typo 8 years ago			`ext = mimetype2ext(media.get('type')) or determine_ext(media_url)`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`if ext == 'f4m':`
Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list 9 years ago			`formats.extend(self._extract_f4m_formats(`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`media_url + '?hdcore=3.4.0&plugin=aasp-3.4.0.132.124',`
Simplify formats accumulation for f4m/m3u8/smil formats Now all _extract_*_formats routines return a list 9 years ago			`video_id, f4m_id='hds', fatal=False))`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`elif ext == 'm3u8':`
[extractor/foxnews] Fix extractors (#7222) Closes #6050 Authored by: bashonly 1 year ago			`fmts, subs = self._extract_m3u8_formats_and_subtitles(`
			`media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)`
			`formats.extend(fmts)`
			`self._merge_subtitles(subs, target=subtitles)`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`else:`
			`formats.append({`
[abcnews] Added a new extractor (closes #3992) Related: #6108, #8664, #9459 9 years ago			`'format_id': media_data.get('media-category', {}).get('@attributes', {}).get('label'),`
Improve URL extraction 6 years ago			`'url': media_url,`
[bleacherreport] fix style issues and simplify 9 years ago			`'tbr': int_or_none(media.get('bitrate')),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`'filesize': int_or_none(media.get('fileSize')),`
use mimetype2ext to determine manifest ext in multiple extractors 8 years ago			`'ext': ext,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`})`

Update to ytdl-2021.02.04.1 except youtube 4 years ago			`timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))`
[amp] Fix upload timestamp extraction (Closes #9007) 9 years ago
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`return {`
			`'id': video_id,`
[bleacherreport] fix style issues and simplify 9 years ago			`'title': get_media_node('title'),`
			`'description': get_media_node('description'),`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`'thumbnails': thumbnails,`
[amp] Fix upload timestamp extraction (Closes #9007) 9 years ago			`'timestamp': timestamp,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`'duration': int_or_none(media_content[0].get('@attributes', {}).get('duration')),`
[amp] Add missing subtitles to info dict 9 years ago			`'subtitles': subtitles,`
[amp] Add generic extractor for Akamai AMP feeds and use it in dramafever and foxnews extractors 9 years ago			`'formats': formats,`
			`}`