@ -12,6 +12,7 @@ from ..utils import (
determine_ext ,
float_or_none ,
int_or_none ,
orderedSet ,
parse_age_limit ,
parse_duration ,
url_or_none ,
@ -66,135 +67,179 @@ class CrackleIE(InfoExtractor):
} ,
}
def _download_json ( self , url , * args , * * kwargs ) :
# Authorization generation algorithm is reverse engineered from:
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
timestamp = time . strftime ( ' % Y % m %d % H % M ' , time . gmtime ( ) )
h = hmac . new ( b ' IGSLUQCBDFHEOIFM ' , ' | ' . join ( [ url , timestamp ] ) . encode ( ) , hashlib . sha1 ) . hexdigest ( ) . upper ( )
headers = {
' Accept ' : ' application/json ' ,
' Authorization ' : ' | ' . join ( [ h , timestamp , ' 117 ' , ' 1 ' ] ) ,
}
return InfoExtractor . _download_json ( self , url , * args , headers = headers , * * kwargs )
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
country_code = self . _downloader . params . get ( ' geo_bypass_country ' , None )
countries = [ country_code ] if country_code else (
' US ' , ' AU ' , ' CA ' , ' AS ' , ' FM ' , ' GU ' , ' MP ' , ' PR ' , ' PW ' , ' MH ' , ' VI ' )
last_e = None
geo_bypass_country = self . _downloader . params . get ( ' geo_bypass_country ' , None )
countries = orderedSet ( ( geo_bypass_country , ' US ' , ' AU ' , ' CA ' , ' AS ' , ' FM ' , ' GU ' , ' MP ' , ' PR ' , ' PW ' , ' MH ' , ' VI ' , ' ' ) )
num_countries , num = len ( countries ) - 1 , 0
media = { }
for num , country in enumerate ( countries ) :
if num == 1 : # start hard-coded list
self . report_warning ( ' %s . Trying with a list of known countries ' % (
' Unable to obtain video formats from %s API ' % geo_bypass_country if geo_bypass_country
else ' No country code was given using --geo-bypass-country ' ) )
elif num == num_countries : # end of list
geo_info = self . _download_json (
' https://web-api-us.crackle.com/Service.svc/geo/country ' ,
video_id , fatal = False , note = ' Downloading geo-location information from crackle API ' ,
errnote = ' Unable to fetch geo-location information from crackle ' ) or { }
country = geo_info . get ( ' CountryCode ' )
if country is None :
continue
self . to_screen ( ' %s identified country as %s ' % ( self . IE_NAME , country ) )
if country in countries :
self . to_screen ( ' Downloading from %s API was already attempted. Skipping... ' % country )
continue
for country in countries :
if country is None :
continue
try :
# Authorization generation algorithm is reverse engineered from:
# https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
media_detail_url = ' https://web-api-us.crackle.com/Service.svc/details/media/ %s / %s ?disableProtocols=true ' % ( video_id , country )
timestamp = time . strftime ( ' % Y % m %d % H % M ' , time . gmtime ( ) )
h = hmac . new ( b ' IGSLUQCBDFHEOIFM ' , ' | ' . join ( [ media_detail_url , timestamp ] ) . encode ( ) , hashlib . sha1 ) . hexdigest ( ) . upper ( )
media = self . _download_json (
media_detail_url , video_id , ' Downloading media JSON as %s ' % country ,
' Unable to download media JSON ' , headers = {
' Accept ' : ' application/json ' ,
' Authorization ' : ' | ' . join ( [ h , timestamp , ' 117 ' , ' 1 ' ] ) ,
} )
' https://web-api-us.crackle.com/Service.svc/details/media/ %s / %s ?disableProtocols=true ' % ( video_id , country ) ,
video_id , note = ' Downloading media JSON from %s API ' % country ,
errnote = ' Unable to download media JSON ' )
except ExtractorError as e :
# 401 means geo restriction, trying next country
if isinstance ( e . cause , compat_HTTPError ) and e . cause . code == 401 :
last_e = e
continue
raise
media_urls = media . get ( ' MediaURLs ' )
if not media_urls or not isinstance ( media_urls , list ) :
status = media . get ( ' status ' )
if status . get ( ' messageCode ' ) != ' 0 ' :
raise ExtractorError (
' %s said: %s %s - %s ' % (
self . IE_NAME , status . get ( ' messageCodeDescription ' ) , status . get ( ' messageCode ' ) , status . get ( ' message ' ) ) ,
expected = True )
# Found video formats
if isinstance ( media . get ( ' MediaURLs ' ) , list ) :
break
ignore_no_formats = self . _downloader . params . get ( ' ignore_no_formats_error ' )
allow_unplayable_formats = self . _downloader . params . get ( ' allow_unplayable_formats ' )
if not media or ( not media . get ( ' MediaURLs ' ) and not ignore_no_formats ) :
raise ExtractorError (
' Unable to access the crackle API. Try passing your country code '
' to --geo-bypass-country. If it still does not work and the '
' video is available in your country ' )
title = media [ ' Title ' ]
formats , subtitles = [ ] , { }
has_drm = False
for e in media . get ( ' MediaURLs ' ) or [ ] :
if e . get ( ' UseDRM ' ) :
has_drm = True
if not allow_unplayable_formats :
continue
format_url = url_or_none ( e . get ( ' Path ' ) )
if not format_url :
continue
title = media [ ' Title ' ]
formats = [ ]
for e in media [ ' MediaURLs ' ] :
if not self . _downloader . params . get ( ' allow_unplayable_formats ' ) and e . get ( ' UseDRM ' ) is True :
ext = determine_ext ( format_url )
if ext == ' m3u8 ' :
fmts , subs = self . _extract_m3u8_formats_and_subtitles (
format_url , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False )
formats . extend ( fmts )
subtitles = self . _merge_subtitles ( subtitles , subs )
elif ext == ' mpd ' :
fmts , subs = self . _extract_mpd_formats_and_subtitles (
format_url , video_id , mpd_id = ' dash ' , fatal = False )
formats . extend ( fmts )
subtitles = self . _merge_subtitles ( subtitles , subs )
elif format_url . endswith ( ' .ism/Manifest ' ) :
fmts , subs = self . _extract_ism_formats_and_subtitles (
format_url , video_id , ism_id = ' mss ' , fatal = False )
formats . extend ( fmts )
subtitles = self . _merge_subtitles ( subtitles , subs )
else :
mfs_path = e . get ( ' Type ' )
mfs_info = self . _MEDIA_FILE_SLOTS . get ( mfs_path )
if not mfs_info :
continue
format_url = url_or_none ( e . get ( ' Path ' ) )
if not format_url :
formats . append ( {
' url ' : format_url ,
' format_id ' : ' http- ' + mfs_path . split ( ' . ' ) [ 0 ] ,
' width ' : mfs_info [ ' width ' ] ,
' height ' : mfs_info [ ' height ' ] ,
} )
if not formats and has_drm and not ignore_no_formats :
raise ExtractorError ( ' The video is DRM protected ' , expected = True )
self . _sort_formats ( formats )
description = media . get ( ' Description ' )
duration = int_or_none ( media . get (
' DurationInSeconds ' ) ) or parse_duration ( media . get ( ' Duration ' ) )
view_count = int_or_none ( media . get ( ' CountViews ' ) )
average_rating = float_or_none ( media . get ( ' UserRating ' ) )
age_limit = parse_age_limit ( media . get ( ' Rating ' ) )
genre = media . get ( ' Genre ' )
release_year = int_or_none ( media . get ( ' ReleaseYear ' ) )
creator = media . get ( ' Directors ' )
artist = media . get ( ' Cast ' )
if media . get ( ' MediaTypeDisplayValue ' ) == ' Full Episode ' :
series = media . get ( ' ShowName ' )
episode = title
season_number = int_or_none ( media . get ( ' Season ' ) )
episode_number = int_or_none ( media . get ( ' Episode ' ) )
else :
series = episode = season_number = episode_number = None
cc_files = media . get ( ' ClosedCaptionFiles ' )
if isinstance ( cc_files , list ) :
for cc_file in cc_files :
if not isinstance ( cc_file , dict ) :
continue
ext = determine_ext ( format_url )
if ext == ' m3u8 ' :
formats . extend ( self . _extract_m3u8_formats (
format_url , video_id , ' mp4 ' , entry_protocol = ' m3u8_native ' ,
m3u8_id = ' hls ' , fatal = False ) )
elif ext == ' mpd ' :
formats . extend ( self . _extract_mpd_formats (
format_url , video_id , mpd_id = ' dash ' , fatal = False ) )
elif format_url . endswith ( ' .ism/Manifest ' ) :
formats . extend ( self . _extract_ism_formats (
format_url , video_id , ism_id = ' mss ' , fatal = False ) )
else :
mfs_path = e . get ( ' Type ' )
mfs_info = self . _MEDIA_FILE_SLOTS . get ( mfs_path )
if not mfs_info :
continue
formats . append ( {
' url ' : format_url ,
' format_id ' : ' http- ' + mfs_path . split ( ' . ' ) [ 0 ] ,
' width ' : mfs_info [ ' width ' ] ,
' height ' : mfs_info [ ' height ' ] ,
} )
self . _sort_formats ( formats )
description = media . get ( ' Description ' )
duration = int_or_none ( media . get (
' DurationInSeconds ' ) ) or parse_duration ( media . get ( ' Duration ' ) )
view_count = int_or_none ( media . get ( ' CountViews ' ) )
average_rating = float_or_none ( media . get ( ' UserRating ' ) )
age_limit = parse_age_limit ( media . get ( ' Rating ' ) )
genre = media . get ( ' Genre ' )
release_year = int_or_none ( media . get ( ' ReleaseYear ' ) )
creator = media . get ( ' Directors ' )
artist = media . get ( ' Cast ' )
if media . get ( ' MediaTypeDisplayValue ' ) == ' Full Episode ' :
series = media . get ( ' ShowName ' )
episode = title
season_number = int_or_none ( media . get ( ' Season ' ) )
episode_number = int_or_none ( media . get ( ' Episode ' ) )
else :
series = episode = season_number = episode_number = None
subtitles = { }
cc_files = media . get ( ' ClosedCaptionFiles ' )
if isinstance ( cc_files , list ) :
for cc_file in cc_files :
if not isinstance ( cc_file , dict ) :
continue
cc_url = url_or_none ( cc_file . get ( ' Path ' ) )
if not cc_url :
continue
lang = cc_file . get ( ' Locale ' ) or ' en '
subtitles . setdefault ( lang , [ ] ) . append ( { ' url ' : cc_url } )
thumbnails = [ ]
images = media . get ( ' Images ' )
if isinstance ( images , list ) :
for image_key , image_url in images . items ( ) :
mobj = re . search ( r ' Img_( \ d+)[xX]( \ d+) ' , image_key )
if not mobj :
continue
thumbnails . append ( {
' url ' : image_url ,
' width ' : int ( mobj . group ( 1 ) ) ,
' height ' : int ( mobj . group ( 2 ) ) ,
} )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' view_count ' : view_count ,
' average_rating ' : average_rating ,
' age_limit ' : age_limit ,
' genre ' : genre ,
' creator ' : creator ,
' artist ' : artist ,
' release_year ' : release_year ,
' series ' : series ,
' episode ' : episode ,
' season_number ' : season_number ,
' episode_number ' : episode_number ,
' thumbnails ' : thumbnails ,
' subtitles ' : subtitles ,
' formats ' : formats ,
}
raise last_e
cc_url = url_or_none ( cc_file . get ( ' Path ' ) )
if not cc_url :
continue
lang = cc_file . get ( ' Locale ' ) or ' en '
subtitles . setdefault ( lang , [ ] ) . append ( { ' url ' : cc_url } )
thumbnails = [ ]
images = media . get ( ' Images ' )
if isinstance ( images , list ) :
for image_key , image_url in images . items ( ) :
mobj = re . search ( r ' Img_( \ d+)[xX]( \ d+) ' , image_key )
if not mobj :
continue
thumbnails . append ( {
' url ' : image_url ,
' width ' : int ( mobj . group ( 1 ) ) ,
' height ' : int ( mobj . group ( 2 ) ) ,
} )
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' duration ' : duration ,
' view_count ' : view_count ,
' average_rating ' : average_rating ,
' age_limit ' : age_limit ,
' genre ' : genre ,
' creator ' : creator ,
' artist ' : artist ,
' release_year ' : release_year ,
' series ' : series ,
' episode ' : episode ,
' season_number ' : season_number ,
' episode_number ' : episode_number ,
' thumbnails ' : thumbnails ,
' subtitles ' : subtitles ,
' formats ' : formats ,
}