# coding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . compat import compat_str
from . . utils import (
js_to_json ,
smuggle_url ,
try_get ,
)
class CBCIE ( InfoExtractor ) :
_VALID_URL = r ' https?://(?:www \ .)?cbc \ .ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+) '
_TESTS = [ {
# with mediaId
' url ' : ' http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs ' ,
' md5 ' : ' 97e24d09672fc4cf56256d6faa6c25bc ' ,
' info_dict ' : {
' id ' : ' 2682904050 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Don Cherry – All-Stars ' ,
' description ' : ' Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’ s got heart. ' ,
' timestamp ' : 1454463000 ,
' upload_date ' : ' 20160203 ' ,
' uploader ' : ' CBCC-NEW ' ,
} ,
' skip ' : ' Geo-restricted to Canada ' ,
} , {
# with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
' url ' : ' http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4 ' ,
' md5 ' : ' 162adfa070274b144f4fdc3c3b8207db ' ,
' info_dict ' : {
' id ' : ' 2414435309 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 22 Minutes Update: What Not To Wear Quebec ' ,
' description ' : " This week ' s latest Canadian top political story is What Not To Wear Quebec. " ,
' upload_date ' : ' 20131025 ' ,
' uploader ' : ' CBCC-NEW ' ,
' timestamp ' : 1382717907 ,
} ,
} , {
# with clipId, feed only available via tpfeed.cbc.ca
' url ' : ' http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live ' ,
' md5 ' : ' 0274a90b51a9b4971fe005c63f592f12 ' ,
' info_dict ' : {
' id ' : ' 2487345465 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Robin Williams freestyles on 90 Minutes Live ' ,
' description ' : ' Wacky American comedian Robin Williams shows off his infamous " freestyle " comedic talents while being interviewed on CBC \' s 90 Minutes Live. ' ,
' upload_date ' : ' 19780210 ' ,
' uploader ' : ' CBCC-NEW ' ,
' timestamp ' : 255977160 ,
} ,
} , {
# multiple iframes
' url ' : ' http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot ' ,
' playlist ' : [ {
' md5 ' : ' 377572d0b49c4ce0c9ad77470e0b96b4 ' ,
' info_dict ' : {
' id ' : ' 2680832926 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' An Eagle \' s-Eye View Off Burrard Bridge ' ,
' description ' : ' Hercules the eagle flies from Vancouver \' s Burrard Bridge down to a nearby park with a mini-camera strapped to his back. ' ,
' upload_date ' : ' 20160201 ' ,
' timestamp ' : 1454342820 ,
' uploader ' : ' CBCC-NEW ' ,
} ,
} , {
' md5 ' : ' 415a0e3f586113894174dfb31aa5bb1a ' ,
' info_dict ' : {
' id ' : ' 2658915080 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Fly like an eagle! ' ,
' description ' : ' Eagle equipped with a mini camera flies from the world \' s tallest tower ' ,
' upload_date ' : ' 20150315 ' ,
' timestamp ' : 1426443984 ,
' uploader ' : ' CBCC-NEW ' ,
} ,
} ] ,
' skip ' : ' Geo-restricted to Canada ' ,
} ]
@classmethod
def suitable ( cls , url ) :
return False if CBCPlayerIE . suitable ( url ) else super ( CBCIE , cls ) . suitable ( url )
def _real_extract ( self , url ) :
display_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , display_id )
player_init = self . _search_regex (
r ' CBC \ .APP \ .Caffeine \ .initInstance \ (( { .+?}) \ ); ' , webpage , ' player init ' ,
default = None )
if player_init :
player_info = self . _parse_json ( player_init , display_id , js_to_json )
media_id = player_info . get ( ' mediaId ' )
if not media_id :
clip_id = player_info [ ' clipId ' ]
feed = self . _download_json (
' http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue= { :mpsReleases} { %s } ' % clip_id ,
clip_id , fatal = False )
if feed :
media_id = try_get ( feed , lambda x : x [ ' entries ' ] [ 0 ] [ ' guid ' ] , compat_str )
if not media_id :
media_id = self . _download_json (
' http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases % 3DbyId % 253D ' + clip_id ,
clip_id ) [ ' entries ' ] [ 0 ] [ ' id ' ] . split ( ' / ' ) [ - 1 ]
return self . url_result ( ' cbcplayer: %s ' % media_id , ' CBCPlayer ' , media_id )
else :
entries = [ self . url_result ( ' cbcplayer: %s ' % media_id , ' CBCPlayer ' , media_id ) for media_id in re . findall ( r ' <iframe[^>]+src= " [^ " ]+?mediaId=( \ d+) " ' , webpage ) ]
return self . playlist_result ( entries )
class CBCPlayerIE ( InfoExtractor ) :
_VALID_URL = r ' (?:cbcplayer:|https?://(?:www \ .)?cbc \ .ca/(?:player/play/|i/caffeine/syndicate/ \ ?mediaId=))(?P<id> \ d+) '
_TESTS = [ {
' url ' : ' http://www.cbc.ca/player/play/2683190193 ' ,
' md5 ' : ' 64d25f841ddf4ddb28a235338af32e2c ' ,
' info_dict ' : {
' id ' : ' 2683190193 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Gerry Runs a Sweat Shop ' ,
' description ' : ' md5:b457e1c01e8ff408d9d801c1c2cd29b0 ' ,
' timestamp ' : 1455071400 ,
' upload_date ' : ' 20160210 ' ,
' uploader ' : ' CBCC-NEW ' ,
} ,
' skip ' : ' Geo-restricted to Canada ' ,
} , {
# Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
' url ' : ' http://www.cbc.ca/player/play/2657631896 ' ,
' md5 ' : ' e5e708c34ae6fca156aafe17c43e8b75 ' ,
' info_dict ' : {
' id ' : ' 2657631896 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' CBC Montreal is organizing its first ever community hackathon! ' ,
' description ' : ' The modern technology we tend to depend on so heavily, is never without it \' s share of hiccups and headaches. Next weekend - CBC Montreal will be getting members of the public for its first Hackathon. ' ,
' timestamp ' : 1425704400 ,
' upload_date ' : ' 20150307 ' ,
' uploader ' : ' CBCC-NEW ' ,
} ,
} , {
# available only when we add `formats=MPEG4,FLV,MP3` to theplatform url
' url ' : ' http://www.cbc.ca/player/play/2164402062 ' ,
' md5 ' : ' 17a61eb813539abea40618d6323a7f82 ' ,
' info_dict ' : {
' id ' : ' 2164402062 ' ,
' ext ' : ' flv ' ,
' title ' : ' Cancer survivor four times over ' ,
' description ' : ' Tim Mayer has beaten three different forms of cancer four times in five years. ' ,
' timestamp ' : 1320410746 ,
' upload_date ' : ' 20111104 ' ,
' uploader ' : ' CBCC-NEW ' ,
} ,
} ]
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
return {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' ThePlatform ' ,
' url ' : smuggle_url (
' http://link.theplatform.com/s/ExhSPC/media/guid/2655402169/ %s ?mbr=true&formats=MPEG4,FLV,MP3 ' % video_id , {
' force_smil_url ' : True
} ) ,
' id ' : video_id ,
}