@ -1,68 +1,66 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
int_or_none ,
qualities ,
parse_duration ,
)
class NDRBaseIE ( InfoExtractor ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
preferences = { ' xl ' : 4 , ' l ' : 3 , ' m ' : 2 , ' s ' : 1 , ' xs ' : 0 , }
page = self . _download_webpage ( url , video_id , ' Downloading page ' )
title = self . _og_search_title ( page ) . strip ( )
description = self . _og_search_description ( page )
if description :
description = description . strip ( )
class NDRBaseIE ( InfoExtractor ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
duration = int_or_none ( self . _html_search_regex ( r ' duration: ( \ d+), \ n ' , page , ' duration ' , default = None ) )
if not duration :
duration = parse_duration ( self . _html_search_regex (
r ' (<span class= " min " > \ d+</span>:<span class= " sec " > \ d+</span>) ' ,
page , ' duration ' , default = None ) )
json_data = self . _download_json ( ' http://www.ndr.de/ %s -ppjson.json ' % video_id , video_id , ' Downloading page ' )
formats = [ ]
mp3_url = re . search ( r ''' \ { src: ' (?P<audio>[^ ' ]+) ' , type: " audio/mp3 " }, ''' , page )
if mp3_url :
objetType = json_data . get ( ' config ' ) . get ( ' objectType ' )
if objetType == ' video ' :
for key , f in json_data . get ( ' playlist ' ) . items ( ) :
if key != ' config ' :
src = f [ ' src ' ]
if ' .f4m ' in src :
formats . extend ( self . _extract_f4m_formats ( src , video_id ) )
elif ' .m3u8 ' in src :
formats . extend ( self . _extract_m3u8_formats ( src , video_id ) )
else :
quality = f . get ( ' quality ' )
formats . append ( {
' url ' : mp3_url . group ( ' audio ' ) ,
' format_id ' : ' mp3 ' ,
' url ' : src ,
' format_id ' : quality ,
' preference ' : preferences . get ( quality ) ,
} )
elif objetType == ' audio ' :
for key , f in json_data . get ( ' playlist ' ) . items ( ) :
if key != ' config ' :
formats . append ( {
' url ' : f [ ' src ' ] ,
' format_id ' : ' mp3 ' ,
thumbnail = None
} )
else :
raise ExtractorError ( ' No media links available for %s ' % video_id )
video_url = re . search ( r ''' 3: \ { src: ' (?P<video>.+?) \ .(lo|hi|hq) \ .mp4 ' , type: " video/mp4 " }, ''' , page )
if video_url :
thumbnails = re . findall ( r ''' \ d+: \ { src: " ([^ " ]+) " (?: \ | \ | ' [^ ' ]+ ' )?, quality: ' ([^ ' ]+) ' } ''' , page )
if thumbnails :
quality_key = qualities ( [ ' xs ' , ' s ' , ' m ' , ' l ' , ' xl ' ] )
largest = max ( thumbnails , key = lambda thumb : quality_key ( thumb [ 1 ] ) )
thumbnail = ' http://www.ndr.de ' + largest [ 0 ]
self . _sort_formats ( formats )
for format_id in ' lo ' , ' hi ' , ' hq ' :
formats . append ( {
' url ' : ' %s . %s .mp4 ' % ( video_url . group ( ' video ' ) , format_id ) ,
' format_id ' : format_id ,
} )
config = json_data . get ( ' playlist ' ) . get ( ' config ' )
if not formats :
raise ExtractorError ( ' No media links available for %s ' % video_id )
title = config [ ' title ' ]
duration = int_or_none ( config . get ( ' duration ' ) )
thumbnails = [ {
' id ' : thumbnail . get ( ' quality ' ) ,
' url ' : thumbnail . get ( ' src ' ) ,
' preference ' : preferences . get ( thumbnail . get ( ' quality ' ) )
} for thumbnail in config . get ( ' poster ' ) . values ( ) ]
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' thumbnails ' : thumbnails ,
' duration ' : duration ,
' formats ' : formats ,
}
@ -71,7 +69,7 @@ class NDRBaseIE(InfoExtractor):
class NDRIE ( NDRBaseIE ) :
IE_NAME = ' ndr '
IE_DESC = ' NDR.de - Mediathek '
_VALID_URL = r ' https?://www \ .ndr \ .de/.+? (?P<id>\ d +)\ .html '
_VALID_URL = r ' https?://www \ .ndr \ .de/.+? ,(?P<id>\ w +)\ .html '
_TESTS = [
{
@ -79,10 +77,9 @@ class NDRIE(NDRBaseIE):
' md5 ' : ' 5bc5f5b92c82c0f8b26cddca34f8bb2c ' ,
' note ' : ' Video file ' ,
' info_dict ' : {
' id ' : ' 25866' ,
' id ' : ' nordmagazin 25866' ,
' ext ' : ' mp4 ' ,
' title ' : ' Kartoffeltage in der Lewitz ' ,
' description ' : ' md5:48c4c04dde604c8a9971b3d4e3b9eaa8 ' ,
' duration ' : 166 ,
} ,
' skip ' : ' 404 Not found ' ,
@ -91,22 +88,20 @@ class NDRIE(NDRBaseIE):
' url ' : ' http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html ' ,
' md5 ' : ' dadc003c55ae12a5d2f6bd436cd73f59 ' ,
' info_dict ' : {
' id ' : ' 988' ,
' id ' : ' hafengeburtstag 988' ,
' ext ' : ' mp4 ' ,
' title ' : ' Party, Pötte und Parade ' ,
' description ' : ' Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt. ' ,
' duration ' : 3498 ,
} ,
} ,
{
' url ' : ' http://www.ndr.de/info/ audio51535.html' ,
' url ' : ' http://www.ndr.de/info/ La-Valette-entgeht-der-Hinrichtung, audio51535.html' ,
' md5 ' : ' bb3cd38e24fbcc866d13b50ca59307b8 ' ,
' note ' : ' Audio file ' ,
' info_dict ' : {
' id ' : ' 51535' ,
' id ' : ' audio 51535' ,
' ext ' : ' mp3 ' ,
' title ' : ' La Valette entgeht der Hinrichtung ' ,
' description ' : ' md5:22f9541913a40fe50091d5cdd7c9f536 ' ,
' duration ' : 884 ,
}
}
@ -115,7 +110,7 @@ class NDRIE(NDRBaseIE):
class NJoyIE ( NDRBaseIE ) :
IE_NAME = ' N-JOY '
_VALID_URL = r ' https?://www \ .n-joy \ .de/.+? (?P<id>\ d +)\ .html '
_VALID_URL = r ' https?://www \ .n-joy \ .de/.+? ,(?P<id>\ w +)\ .html '
_TEST = {
' url ' : ' http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html ' ,
@ -124,7 +119,6 @@ class NJoyIE(NDRBaseIE):
' id ' : ' 2480 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Benaissa beim NDR Comedy Contest ' ,
' description ' : ' Von seinem sehr " behaarten " Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen. ' ,
' duration ' : 654 ,
}
}