@ -1,77 +1,92 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from . common import InfoExtractor
from . . utils import (
ExtractorError ,
int_or_none ,
qualities ,
parse_duration ,
)
class NDRBaseIE ( InfoExtractor ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
video_id = mobj . group ( ' id ' )
preference = qualities ( [ ' xs ' , ' s ' , ' m ' , ' l ' , ' xl ' ] )
page = self . _download_webpage ( url , video_id , ' Downloading page ' )
title = self . _og_search_title ( page ) . strip ( )
description = self . _og_search_description ( page )
if description :
description = description . strip ( )
duration = int_or_none ( self . _html_search_regex ( r ' duration: ( \ d+), \ n ' , page , ' duration ' , default = None ) )
if not duration :
duration = parse_duration ( self . _html_search_regex (
r ' (<span class= " min " > \ d+</span>:<span class= " sec " > \ d+</span>) ' ,
page , ' duration ' , default = None ) )
class NDRBaseIE ( InfoExtractor ) :
def extract_video_info ( self , playlist , video_id ) :
formats = [ ]
mp3_url = re . search ( r ''' \ { src: ' (?P<audio>[^ ' ]+) ' , type: " audio/mp3 " }, ''' , page )
if mp3_url :
streamType = playlist . get ( ' config ' ) . get ( ' streamType ' )
if streamType == ' httpVideo ' :
for key , f in playlist . items ( ) :
if key != ' config ' :
src = f [ ' src ' ]
if ' .f4m ' in src :
formats . extend ( self . _extract_f4m_formats ( src , video_id ) )
elif ' .m3u8 ' in src :
formats . extend ( self . _extract_m3u8_formats ( src , video_id , fatal = False ) )
else :
quality = f . get ( ' quality ' )
formats . append ( {
' url ' : src ,
' format_id ' : quality ,
' preference ' : preference ( quality ) ,
} )
elif streamType == ' httpAudio ' :
for key , f in playlist . items ( ) :
if key != ' config ' :
formats . append ( {
' url ' : mp3_url . group ( ' audio ' ) ,
' url ' : f[ ' src ' ] ,
' format_id ' : ' mp3 ' ,
' vcodec ' : ' none ' ,
} )
else :
raise ExtractorError ( ' No media links available for %s ' % video_id )
thumbnail = None
video_url = re . search ( r ''' 3: \ { src: ' (?P<video>.+?) \ .(lo|hi|hq) \ .mp4 ' , type: " video/mp4 " }, ''' , page )
if video_url :
thumbnails = re . findall ( r ''' \ d+: \ { src: " ([^ " ]+) " (?: \ | \ | ' [^ ' ]+ ' )?, quality: ' ([^ ' ]+) ' } ''' , page )
if thumbnails :
quality_key = qualities ( [ ' xs ' , ' s ' , ' m ' , ' l ' , ' xl ' ] )
largest = max ( thumbnails , key = lambda thumb : quality_key ( thumb [ 1 ] ) )
thumbnail = ' http://www.ndr.de ' + largest [ 0 ]
self . _sort_formats ( formats )
for format_id in ' lo ' , ' hi ' , ' hq ' :
formats . append ( {
' url ' : ' %s . %s .mp4 ' % ( video_url . group ( ' video ' ) , format_id ) ,
' format_id ' : format_id ,
} )
config = playlist . get ( ' config ' )
if not formats :
raise ExtractorError ( ' No media links available for %s ' % video_id )
title = config [ ' title ' ]
duration = int_or_none ( config . get ( ' duration ' ) )
thumbnails = [ {
' id ' : thumbnail . get ( ' quality ' ) ,
' url ' : thumbnail . get ( ' src ' ) ,
' preference ' : preference ( thumbnail . get ( ' quality ' ) )
} for thumbnail in config . get ( ' poster ' ) . values ( ) ]
return {
' id ' : video_id ,
' title ' : title ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' thumbnails ' : thumbnails ,
' duration ' : duration ,
' formats ' : formats ,
}
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
json_data = self . _download_json ( ' http://www.ndr.de/ %s -ppjson.json ' % video_id , video_id , fatal = False )
if not json_data :
webpage = self . _download_webpage ( url , video_id )
embed_url = self . _html_search_regex ( r ' <iframe[^>]+id= " pp_ \ w+ " [^>]+src= " (/.*) " ' , webpage , ' embed url ' , None , False )
if not embed_url :
embed_url = self . _html_search_meta ( ' embedURL ' , webpage , fatal = False )
if embed_url :
if embed_url . startswith ( ' / ' ) :
return self . url_result ( ' http://www.ndr.de %s ' % embed_url , ' NDREmbed ' )
else :
return self . url_result ( embed_url , ' NDREmbed ' )
raise ExtractorError ( ' No media links available for %s ' % video_id )
return self . extract_video_info ( json_data [ ' playlist ' ] , video_id )
class NDRIE ( NDRBaseIE ) :
IE_NAME = ' ndr '
IE_DESC = ' NDR.de - Mediathek '
_VALID_URL = r ' https?://www \ .ndr \ .de/.+?(?P<id> \ d+) \ .html '
_VALID_URL = r ' https?://www \ .ndr \ .de/.+? ,(?P<id>\ w +)\ .html '
_TESTS = [
{
@ -79,10 +94,9 @@ class NDRIE(NDRBaseIE):
' md5 ' : ' 5bc5f5b92c82c0f8b26cddca34f8bb2c ' ,
' note ' : ' Video file ' ,
' info_dict ' : {
' id ' : ' 25866' ,
' id ' : ' nordmagazin 25866' ,
' ext ' : ' mp4 ' ,
' title ' : ' Kartoffeltage in der Lewitz ' ,
' description ' : ' md5:48c4c04dde604c8a9971b3d4e3b9eaa8 ' ,
' duration ' : 166 ,
} ,
' skip ' : ' 404 Not found ' ,
@ -91,22 +105,20 @@ class NDRIE(NDRBaseIE):
' url ' : ' http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html ' ,
' md5 ' : ' dadc003c55ae12a5d2f6bd436cd73f59 ' ,
' info_dict ' : {
' id ' : ' 988' ,
' id ' : ' hafengeburtstag 988' ,
' ext ' : ' mp4 ' ,
' title ' : ' Party, Pötte und Parade ' ,
' description ' : ' Hunderttausende feiern zwischen Speicherstadt und St. Pauli den 826. Hafengeburtstag. Die NDR Sondersendung zeigt die schönsten und spektakulärsten Bilder vom Auftakt. ' ,
' duration ' : 3498 ,
} ,
} ,
{
' url ' : ' http://www.ndr.de/info/ audio51535.html' ,
' url ' : ' http://www.ndr.de/info/ La-Valette-entgeht-der-Hinrichtung, audio51535.html' ,
' md5 ' : ' bb3cd38e24fbcc866d13b50ca59307b8 ' ,
' note ' : ' Audio file ' ,
' info_dict ' : {
' id ' : ' 51535' ,
' id ' : ' audio 51535' ,
' ext ' : ' mp3 ' ,
' title ' : ' La Valette entgeht der Hinrichtung ' ,
' description ' : ' md5:22f9541913a40fe50091d5cdd7c9f536 ' ,
' duration ' : 884 ,
}
}
@ -115,16 +127,57 @@ class NDRIE(NDRBaseIE):
class NJoyIE ( NDRBaseIE ) :
IE_NAME = ' N-JOY '
_VALID_URL = r ' https?://www \ .n-joy \ .de/.+? (?P<id>\ d +)\ .html '
_VALID_URL = r ' https?://www \ .n-joy \ .de/.+? ,(?P<id>\ w +)\ .html '
_TEST = {
' url ' : ' http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html ' ,
' md5 ' : ' cb63be60cd6f9dd75218803146d8dc67 ' ,
' info_dict ' : {
' id ' : ' 2480' ,
' id ' : ' comedycontest 2480' ,
' ext ' : ' mp4 ' ,
' title ' : ' Benaissa beim NDR Comedy Contest ' ,
' description ' : ' Von seinem sehr " behaarten " Leben lässt sich Benaissa trotz aller Schwierigkeiten nicht unterkriegen. ' ,
' duration ' : 654 ,
}
}
class NDREmbedBaseIE ( NDRBaseIE ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
json_data = self . _download_json ( ' http://www.ndr.de/ %s -ppjson.json ' % video_id , video_id , fatal = False )
if not json_data :
raise ExtractorError ( ' No media links available for %s ' % video_id )
return self . extract_video_info ( json_data [ ' playlist ' ] , video_id )
class NDREmbedIE ( NDREmbedBaseIE ) :
IE_NAME = ' ndr:embed '
_VALID_URL = r ' https?://www \ .ndr \ .de/(?:[^/]+/)+(?P<id>[a-z0-9]+)-(?:player|externalPlayer) \ .html '
_TEST = {
' url ' : ' http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html ' ,
' md5 ' : ' cb63be60cd6f9dd75218803146d8dc67 ' ,
' info_dict ' : {
' id ' : ' ndraktuell28488 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Norddeutschland begrüßt Flüchtlinge ' ,
' duration ' : 132 ,
}
}
class NJoyEmbedIE ( NDREmbedBaseIE ) :
IE_NAME = ' N-JOY:embed '
_VALID_URL = r ' https?://www \ .n-joy \ .de/(?:[^/]+/)+(?P<id>[a-z0-9]+)-(?:player|externalPlayer) \ .html '
_TEST = {
' url ' : ' http://www.n-joy.de/entertainment/film/portraet374-player_image-832d9b79-fa8a-4026-92e2-e0fd99deb2f9_theme-n-joy.html ' ,
' md5 ' : ' cb63be60cd6f9dd75218803146d8dc67 ' ,
' info_dict ' : {
' id ' : ' portraet374 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Viviane Andereggen - " Schuld um Schuld " ' ,
' duration ' : 129 ,
}
}