yt-dlp/youtube_dl/extractor/internetvideoarchive.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    compat_urlparse,
    compat_urllib_parse,
    xpath_with_ns,
)


class InternetVideoArchiveIE(InfoExtractor):
    _VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'

    _TEST = {
        'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',
        'info_dict': {
            'id': '452693',
            'ext': 'mp4',
            'title': 'SKYFALL',
            'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',
            'duration': 149,
        },
    }

    @staticmethod
    def _build_url(query):
        return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query

    @staticmethod
    def _clean_query(query):
        NEEDED_ARGS = ['publishedid', 'customerid']
        query_dic = compat_urlparse.parse_qs(query)
        cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)
        # Other player ids return m3u8 urls
        cleaned_dic['playerid'] = '247'
        cleaned_dic['videokbrate'] = '100000'
        return compat_urllib_parse.urlencode(cleaned_dic)

    def _real_extract(self, url):
        query = compat_urlparse.urlparse(url).query
        query_dic = compat_urlparse.parse_qs(query)
        video_id = query_dic['publishedid'][0]
        url = self._build_url(query)

        flashconfiguration = self._download_xml(url, video_id,
            'Downloading flash configuration')
        file_url = flashconfiguration.find('file').text
        file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')
        # Replace some of the parameters in the query to get the best quality
        # and http links (no m3u8 manifests)
        file_url = re.sub(r'(?<=\?)(.+)$',
            lambda m: self._clean_query(m.group()),
            file_url)
        info = self._download_xml(file_url, video_id,
            'Downloading video info')
        item = info.find('channel/item')

        def _bp(p):
            return xpath_with_ns(p,
                {'media': 'http://search.yahoo.com/mrss/',
                'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})
        formats = []
        for content in item.findall(_bp('media:group/media:content')):
            attr = content.attrib
            f_url = attr['url']
            width = int(attr['width'])
            bitrate = int(attr['bitrate'])
            format_id = '%d-%dk' % (width, bitrate)
            formats.append({
                'format_id': format_id,
                'url': f_url,
                'width': width,
                'tbr': bitrate,
            })

        self._sort_formats(formats)

        return {
            'id': video_id,
            'title': item.find('title').text,
            'formats': formats,
            'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],
            'description': item.find('description').text,
            'duration': int(attr['duration']),
        }
[internetvideoarchive] Modernize 10 years ago			`from __future__ import unicode_literals`

Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`import re`

			`from .common import InfoExtractor`
			`from ..utils import (`
			`compat_urlparse,`
Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`compat_urllib_parse,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`xpath_with_ns,`
			`)`


			`class InternetVideoArchiveIE(InfoExtractor):`
			`_VALID_URL = r'https?://video\.internetvideoarchive\.net/flash/players/.?\?.?publishedid.*?'`

			`_TEST = {`
[internetvideoarchive] Modernize 10 years ago			`'url': 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247',`
			`'info_dict': {`
			`'id': '452693',`
			`'ext': 'mp4',`
			`'title': 'SKYFALL',`
			`'description': 'In SKYFALL, Bond\'s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.',`
[internetvideoarchive] Fix test case 10 years ago			`'duration': 149,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`},`
			`}`

			`@staticmethod`
			`def _build_url(query):`
			`return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query`

Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`@staticmethod`
			`def _clean_query(query):`
			`NEEDED_ARGS = ['publishedid', 'customerid']`
			`query_dic = compat_urlparse.parse_qs(query)`
			`cleaned_dic = dict((k,v[0]) for (k,v) in query_dic.items() if k in NEEDED_ARGS)`
			`# Other player ids return m3u8 urls`
			`cleaned_dic['playerid'] = '247'`
			`cleaned_dic['videokbrate'] = '100000'`
			`return compat_urllib_parse.urlencode(cleaned_dic)`

Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`def _real_extract(self, url):`
			`query = compat_urlparse.urlparse(url).query`
			`query_dic = compat_urlparse.parse_qs(query)`
			`video_id = query_dic['publishedid'][0]`
			`url = self._build_url(query)`

Use the new '_download_xml' helper in more extractors 11 years ago			`flashconfiguration = self._download_xml(url, video_id,`
[internetvideoarchive] Modernize 10 years ago			`'Downloading flash configuration')`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`file_url = flashconfiguration.find('file').text`
			`file_url = file_url.replace('/playlist.aspx', '/mrssplaylist.aspx')`
Add an extractor for rottentomatoes.com and improve InternetVideoArchiveIE to get the best quality 11 years ago			`# Replace some of the parameters in the query to get the best quality`
			`# and http links (no m3u8 manifests)`
			`file_url = re.sub(r'(?<=\?)(.+)$',`
			`lambda m: self._clean_query(m.group()),`
			`file_url)`
Use the new '_download_xml' helper in more extractors 11 years ago			`info = self._download_xml(file_url, video_id,`
[internetvideoarchive] Modernize 10 years ago			`'Downloading video info')`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`item = info.find('channel/item')`

			`def _bp(p):`
			`return xpath_with_ns(p,`
			`{'media': 'http://search.yahoo.com/mrss/',`
			`'jwplayer': 'http://developer.longtailvideo.com/trac/wiki/FlashFormats'})`
			`formats = []`
			`for content in item.findall(_bp('media:group/media:content')):`
			`attr = content.attrib`
			`f_url = attr['url']`
[internetvideoarchive] Use centralized format sorting 11 years ago			`width = int(attr['width'])`
			`bitrate = int(attr['bitrate'])`
			`format_id = '%d-%dk' % (width, bitrate)`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`formats.append({`
[internetvideoarchive] Use centralized format sorting 11 years ago			`'format_id': format_id,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`'url': f_url,`
[internetvideoarchive] Use centralized format sorting 11 years ago			`'width': width,`
			`'tbr': bitrate,`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`})`
[internetvideoarchive] Use centralized format sorting 11 years ago
			`self._sort_formats(formats)`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago
Fix the duration field for the VideoDetective and InternetVideoArchive tests Also remove the use of the old format system and the comment 11 years ago			`return {`
Add an extractor for internetvideoarchive.com videos It's used by videodetective.com 11 years ago			`'id': video_id,`
			`'title': item.find('title').text,`
			`'formats': formats,`
			`'thumbnail': item.find(_bp('media:thumbnail')).attrib['url'],`
			`'description': item.find('description').text,`
			`'duration': int(attr['duration']),`
			`}`