[corus] Add new extractor(closes #12060)(#9164)

8 years ago · c7d6f614f3
parent 08a00eef79
commit c7d6f614f3
4 changed files with 77 additions and 50 deletions
--- a/youtube_dl/extractor/corus.py
+++ b/youtube_dl/extractor/corus.py
@ -0,0 +1,72 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import re
 from .theplatform import ThePlatformFeedIE
 from ..utils import int_or_none
 class CorusIE(ThePlatformFeedIE):
    _VALID_URL = r'https?://(?:www\.)?(?P<domain>(?:globaltv|etcanada)\.com|(?:hgtv|foodnetwork|slice)\.ca)/(?:video/|(?:[^/]+/)+(?:videos/[a-z0-9-]+-|video\.html\?.*?\bv=))(?P<id>\d+)'
    _TESTS = [{
        'url': 'http://www.hgtv.ca/shows/bryan-inc/videos/movie-night-popcorn-with-bryan-870923331648/',
        'md5': '05dcbca777bf1e58c2acbb57168ad3a6',
        'info_dict': {
            'id': '870923331648',
            'ext': 'mp4',
            'title': 'Movie Night Popcorn with Bryan',
            'description': 'Bryan whips up homemade popcorn, the old fashion way for Jojo and Lincoln.',
            'uploader': 'SHWM-NEW',
            'upload_date': '20170206',
            'timestamp': 1486392197,
        },
    }, {
        'url': 'http://www.foodnetwork.ca/shows/chopped/video/episode/chocolate-obsession/video.html?v=872683587753',
        'only_matching': True,
    }, {
        'url': 'http://etcanada.com/video/873675331955/meet-the-survivor-game-changers-castaways-part-2/',
        'only_matching': True,
    }]
    _TP_FEEDS = {
        'globaltv': {
            'feed_id': 'ChQqrem0lNUp',
            'account_id': 2269680845,
        },
        'etcanada': {
            'feed_id': 'ChQqrem0lNUp',
            'account_id': 2269680845,
        },
        'hgtv': {
            'feed_id': 'L0BMHXi2no43',
            'account_id': 2414428465,
        },
        'foodnetwork': {
            'feed_id': 'ukK8o58zbRmJ',
            'account_id': 2414429569,
        },
        'slice': {
            'feed_id': '5tUJLgV2YNJ5',
            'account_id': 2414427935,
        },
    }
    def _real_extract(self, url):
        domain, video_id = re.match(self._VALID_URL, url).groups()
        feed_info = self._TP_FEEDS[domain.split('.')[0]]
        return self._extract_feed_info('dtjsEC', feed_info['feed_id'], 'byId=' + video_id, video_id, lambda e: {
            'episode_number': int_or_none(e.get('pl1$episode')),
            'season_number': int_or_none(e.get('pl1$season')),
            'series': e.get('pl1$show'),
        }, {
            'HLS': {
                'manifest': 'm3u',
            },
            'DesktopHLS Default': {
                'manifest': 'm3u',
            },
            'MP4 MBR': {
                'manifest': 'm3u',
            },
        }, feed_info['account_id'])
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -202,6 +202,7 @@ from .commonprotocols import (
    RtmpIE,
 )
 from .condenast import CondeNastIE
 from .corus import CorusIE
 from .cracked import CrackedIE
 from .crackle import CrackleIE
 from .criterion import CriterionIE
@ -381,10 +382,7 @@ from .heise import HeiseIE
 from .hellporno import HellPornoIE
 from .helsinki import HelsinkiIE
 from .hentaistigma import HentaiStigmaIE
-from .hgtv import (
+from .hgtv import HGTVComShowIE
    HGTVIE,
    HGTVComShowIE,
 )
 from .historicfilms import HistoricFilmsIE
 from .hitbox import HitboxIE, HitboxLiveIE
 from .hitrecord import HitRecordIE
--- a/youtube_dl/extractor/hgtv.py
+++ b/youtube_dl/extractor/hgtv.py
@ -2,50 +2,6 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    int_or_none,
    js_to_json,
    smuggle_url,
 )
 class HGTVIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?hgtv\.ca/[^/]+/video/(?P<id>[^/]+)/video.html'
    _TEST = {
        'url': 'http://www.hgtv.ca/homefree/video/overnight-success/video.html?v=738081859718&p=1&s=da#video',
        'md5': '',
        'info_dict': {
            'id': 'aFH__I_5FBOX',
            'ext': 'mp4',
            'title': 'Overnight Success',
            'description': 'After weeks of hard work, high stakes, breakdowns and pep talks, the final 2 contestants compete to win the ultimate dream.',
            'uploader': 'SHWM-NEW',
            'timestamp': 1470320034,
            'upload_date': '20160804',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
    }
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        embed_vars = self._parse_json(self._search_regex(
            r'(?s)embed_vars\s*=\s*({.*?});',
            webpage, 'embed vars'), display_id, js_to_json)
        return {
            '_type': 'url_transparent',
            'url': smuggle_url(
                'http://link.theplatform.com/s/dtjsEC/%s?mbr=true&manifest=m3u' % embed_vars['pid'], {
                    'force_smil_url': True
                }),
            'series': embed_vars.get('show'),
            'season_number': int_or_none(embed_vars.get('season')),
            'episode_number': int_or_none(embed_vars.get('episode')),
            'ie_key': 'ThePlatform',
        }
 class HGTVComShowIE(InfoExtractor):
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@ -306,9 +306,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
        },
    }]
-    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
        real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
        entry = self._download_json(real_url, video_id)['entries'][0]
        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
        formats = []
        subtitles = {}
@ -333,7 +334,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
                if asset_type in asset_types_query:
                    query.update(asset_types_query[asset_type])
                cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
-                    smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+                    main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
                formats.extend(cur_formats)
                subtitles = self._merge_subtitles(subtitles, cur_subtitles)