yt-dlp/youtube_dl/extractor/internetvideoarchive.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..compat import (
    compat_urlparse,
    compat_urllib_parse_urlencode,
)
from ..utils import (
    xpath_with_ns,
)


class InternetVideoArchiveIE(InfoExtractor):
    _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'

    _TEST = {
        'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
        'info_dict': {
            'id': '452693',
            'ext': 'mp4',
            'title': 'SKYFALL',
            'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
            'duration': 152,
        },
    }

    @staticmethod
    def _build_url(query):
        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query

    @staticmethod
    def _clean_query(query):
        NEEDED_ARGS = ['publishedid', 'customerid']
        query_dic = compat_urlparse.parse_qs(query)
        cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)
        # Other player ids return m3u8 urls
        cleaned_dic['playerid'] = '247'
        cleaned_dic['videokbrate'] = '100000'
        return compat_urllib_parse_urlencode(cleaned_dic)

    def _real_extract(self, url):
        query = compat_urlparse.urlparse(url).query
        query_dic = compat_urlparse.parse_qs(query)
        video_id = query_dic['publishedid'][0]
        url = self._build_url(query)

        flashconfiguration = self._download_xml(url, video_id,
                                                'Downloading flash configuration')
        file_url = flashconfiguration.find('file').text
        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
        # Replace some of the parameters in the query to get the best quality
        # and http links (no m3u8 manifests)
        file_url = re.sub(r'(?<=\?)(.+)$',
                          lambda m: self._clean_query(m.group()),
                          file_url)
        info = self._download_xml(file_url, video_id,
                                  'Downloading video info')
        item = info.find('channel/item')

        def _bp(p):
            return xpath_with_ns(
                p,
                {
                    'media': 'http://search.yahoo.com/mrss/',
                    'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',
                }
            )
        formats = []
        for content in item.findall(_bp('media:group/media:content')):
            attr = content.attrib
            f_url = attr['url']
            width = int(attr['width'])
            bitrate = int(attr['bitrate'])
            format_id = '%d-%dk' % (width, bitrate)
            formats.append({
                'format_id': format_id,
                'url': f_url,
                'width': width,
                'tbr': bitrate,
            })

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': item.find('title').text,
            'formats': formats,
            'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
            'description': item.find('description').text,
            'duration': int(attr['duration']),
        }
[internetvideoarchive] Modernize 10 years ago			`from __future__ import unicode_literals`

Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`import re`

			`from .common import InfoExtractor`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 10 years ago			`from ..compat import (`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`compat_urlparse,`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 8 years ago			`compat_urllib_parse_urlencode,`
Fix imports and general cleanup · Import from compat what comes from compat. Yes, some names are available in utils too, but that's an implementation detail. · Use _match_id consistently whenever possible · Fix some outdated tests · Use consistent valid URL (always match the whole protocol, no ^ at start required) · Use modern test definitions 10 years ago			`)`
			`from ..utils import (`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`xpath_with_ns,`
			`)`


			`class InternetVideoArchiveIE(InfoExtractor):`
			`_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.?\?.?publishedid.*?'`

			`_TEST = {`
[internetvideoarchive] Modernize 10 years ago			`'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',`
			`'info_dict': {`
			`'id': '452693',`
			`'ext': 'mp4',`
			`'title': 'SKYFALL',`
			`'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',`
[internetvideoarchive] Update test's duration field 10 years ago			`'duration': 152,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`},`
			`}`

			`@staticmethod`
			`def _build_url(query):`
			`return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query`

Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`@staticmethod`
			`def _clean_query(query):`
			`NEEDED_ARGS = ['publishedid', 'customerid']`
			`query_dic = compat_urlparse.parse_qs(query)`
PEP8 applied 10 years ago			`cleaned_dic = dict((k, v[0]) for (k, v) in query_dic.items() if k in NEEDED_ARGS)`
Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`# Other player ids return m3u8 urls`
			`cleaned_dic['playerid'] = '247'`
			`cleaned_dic['videokbrate'] = '100000'`
[compat] Add compat_urllib_parse_urlencode and eliminate encode_dict encode_dict functionality has been improved and moved directly into compat_urllib_parse_urlencode All occurrences of compat_urllib_parse.urlencode throughout the codebase have been replaced by compat_urllib_parse_urlencode Closes #8974 8 years ago			`return compat_urllib_parse_urlencode(cleaned_dic)`
Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`def _real_extract(self, url):`
			`query = compat_urlparse.urlparse(url).query`
			`query_dic = compat_urlparse.parse_qs(query)`
			`video_id = query_dic['publishedid'][0]`
			`url = self._build_url(query)`

Use the new '_download_xml' helper in more extractors 11 years ago			`flashconfiguration = self._download_xml(url, video_id,`
PEP8: applied even more rules 10 years ago			`'Downloading flash configuration')`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`file_url = flashconfiguration.find('file').text`
			`file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')`
Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`# Replace some of the parameters in the query to get the best quality`
			`# and http links (no m3u8 manifests)`
			`file_url = re.sub(r'(?<=\?)(.+)$',`
PEP8: applied even more rules 10 years ago			`lambda m: self._clean_query(m.group()),`
			`file_url)`
Use the new '_download_xml' helper in more extractors 11 years ago			`info = self._download_xml(file_url, video_id,`
PEP8: applied even more rules 10 years ago			`'Downloading video info')`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`item = info.find('channel/item')`

			`def _bp(p):`
PEP8: applied even more rules 10 years ago			`return xpath_with_ns(`
			`p,`
			`{`
			`'media': 'http://search.yahoo.com/mrss/',`
			`'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats',`
			`}`
			`)`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`formats = []`
			`for content in item.findall(_bp('media:group/media:content')):`
			`attr = content.attrib`
			`f_url = attr['url']`
[internetvideoarchive] Use centralized format sorting 11 years ago			`width = int(attr['width'])`
			`bitrate = int(attr['bitrate'])`
			`format_id = '%d-%dk' % (width, bitrate)`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`formats.append({`
[internetvideoarchive] Use centralized format sorting 11 years ago			`'format_id': format_id,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`'url': f_url,`
[internetvideoarchive] Use centralized format sorting 11 years ago			`'width': width,`
			`'tbr': bitrate,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`})`
[internetvideoarchive] Use centralized format sorting 11 years ago
			`self._sort_formats(formats)`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago
Fix the duration field for the VideoDetective and InternetVideoArchive tests Also remove the use of the old format system and the comment 11 years ago			`return {`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`'id': video_id,`
			`'title': item.find('title').text,`
			`'formats': formats,`
			`'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],`
			`'description': item.find('description').text,`
			`'duration': int(attr['duration']),`
			`}`