@ -20,12 +20,17 @@ from ..utils import (
ExtractorError ,
find_xpath_attr ,
fix_xml_ampersands ,
float_or_none ,
js_to_json ,
int_or_none ,
parse_iso8601 ,
unescapeHTML ,
unsmuggle_url ,
)
class BrightcoveIE ( InfoExtractor ) :
class BrightcoveLegacyIE ( InfoExtractor ) :
IE_NAME = ' brightcove:legacy '
_VALID_URL = r ' (?:https?://.*brightcove \ .com/(services|viewer).*? \ ?|brightcove:)(?P<query>.*) '
_FEDERATED_URL_TEMPLATE = ' http://c.brightcove.com/services/viewer/htmlFederated? %s '
@ -346,3 +351,152 @@ class BrightcoveIE(InfoExtractor):
if ' url ' not in info and not info . get ( ' formats ' ) :
raise ExtractorError ( ' Unable to extract video url for %s ' % info [ ' id ' ] )
return info
class BrightcoveNewIE ( InfoExtractor ) :
IE_NAME = ' brightcove:new '
_VALID_URL = r ' https?://players \ .brightcove \ .net/(?P<account_id> \ d+)/(?P<player_id>[^/]+)_(?P<embed>[^/]+)/index \ .html \ ?.*videoId=(?P<video_id> \ d+) '
_TEST = {
' url ' : ' http://players.brightcove.net/929656772001/e41d32dc-ec74-459e-a845-6c69f7b724ea_default/index.html?videoId=4463358922001 ' ,
' md5 ' : ' c8100925723840d4b0d243f7025703be ' ,
' info_dict ' : {
' id ' : ' 4463358922001 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Meet the man behind Popcorn Time ' ,
' description ' : ' md5:eac376a4fe366edc70279bfb681aea16 ' ,
' timestamp ' : 1441391203 ,
' upload_date ' : ' 20150904 ' ,
' duration ' : 165.768 ,
' uploader_id ' : ' 929656772001 ' ,
}
}
@staticmethod
def _extract_urls ( webpage ) :
# Reference:
# 1. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideoiniframe
# 2. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/publish-video.html#setvideousingjavascript)
# 3. http://docs.brightcove.com/en/video-cloud/brightcove-player/guides/embed-in-page.html
entries = [ ]
# Look for iframe embeds [1]
for _ , url in re . findall (
r ' <iframe[^>]+src=([ " \' ])((?:https?:)//players \ .brightcove \ .net/ \ d+/[^/]+/index \ .html.+?) \ 1 ' , webpage ) :
entries . append ( url )
# Look for embed_in_page embeds [2]
# According to examples from [3] it's unclear whether video id may be optional
# and what to do when it is
for video_id , account_id , player_id , embed in re . findall (
r ''' (?sx)
< video [ ^ > ] +
data - video - id = [ " \' ]( \ d+)[ " \' ][^>]*>.*?
< / video > . * ?
< script [ ^ > ] +
src = [ " \' ](?:https?:)?//players \ .brightcove \ .net/
( \d + ) / ( [ \da - f - ] + ) _ ( [ ^ / ] + ) / index \. min \. js
''' , webpage):
entries . append (
' http://players.brightcove.net/ %s / %s _ %s /index.html?videoId= %s '
% ( account_id , player_id , embed , video_id ) )
return entries
def _real_extract ( self , url ) :
account_id , player_id , embed , video_id = re . match ( self . _VALID_URL , url ) . groups ( )
webpage = self . _download_webpage (
' http://players.brightcove.net/ %s / %s _ %s /index.min.js '
% ( account_id , player_id , embed ) , video_id )
policy_key = None
catalog = self . _search_regex (
r ' catalog \ (( { .+?}) \ ); ' , webpage , ' catalog ' , default = None )
if catalog :
catalog = self . _parse_json (
js_to_json ( catalog ) , video_id , fatal = False )
if catalog :
policy_key = catalog . get ( ' policyKey ' )
if not policy_key :
policy_key = self . _search_regex (
r ' policyKey \ s*: \ s*([ " \' ])(?P<pk>.+?) \ 1 ' ,
webpage , ' policy key ' , group = ' pk ' )
req = compat_urllib_request . Request (
' https://edge.api.brightcove.com/playback/v1/accounts/ %s /videos/ %s '
% ( account_id , video_id ) ,
headers = { ' Accept ' : ' application/json;pk= %s ' % policy_key } )
json_data = self . _download_json ( req , video_id )
title = json_data [ ' name ' ]
formats = [ ]
for source in json_data . get ( ' sources ' , [ ] ) :
source_type = source . get ( ' type ' )
src = source . get ( ' src ' )
if source_type == ' application/x-mpegURL ' :
if not src :
continue
m3u8_formats = self . _extract_m3u8_formats (
src , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False )
if m3u8_formats :
formats . extend ( m3u8_formats )
else :
streaming_src = source . get ( ' streaming_src ' )
stream_name , app_name = source . get ( ' stream_name ' ) , source . get ( ' app_name ' )
if not src and not streaming_src and ( not stream_name or not app_name ) :
continue
tbr = float_or_none ( source . get ( ' avg_bitrate ' ) , 1000 )
height = int_or_none ( source . get ( ' height ' ) )
f = {
' tbr ' : tbr ,
' width ' : int_or_none ( source . get ( ' width ' ) ) ,
' height ' : height ,
' filesize ' : int_or_none ( source . get ( ' size ' ) ) ,
' container ' : source . get ( ' container ' ) ,
' vcodec ' : source . get ( ' codec ' ) ,
' ext ' : source . get ( ' container ' ) . lower ( ) ,
}
def build_format_id ( kind ) :
format_id = kind
if tbr :
format_id + = ' - %d k ' % int ( tbr )
if height :
format_id + = ' - %d p ' % height
return format_id
if src or streaming_src :
f . update ( {
' url ' : src or streaming_src ,
' format_id ' : build_format_id ( ' http ' if src else ' http-streaming ' ) ,
' preference ' : 2 if src else 1 ,
} )
else :
f . update ( {
' url ' : app_name ,
' play_path ' : stream_name ,
' format_id ' : build_format_id ( ' rtmp ' ) ,
} )
formats . append ( f )
self . _sort_formats ( formats )
description = json_data . get ( ' description ' )
thumbnail = json_data . get ( ' thumbnail ' )
timestamp = parse_iso8601 ( json_data . get ( ' published_at ' ) )
duration = float_or_none ( json_data . get ( ' duration ' ) , 1000 )
tags = json_data . get ( ' tags ' , [ ] )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' duration ' : duration ,
' timestamp ' : timestamp ,
' uploader_id ' : account_id ,
' formats ' : formats ,
' tags ' : tags ,
}