@ -1,11 +1,15 @@
import re
from . common import InfoExtractor
from . . compat import compat_urllib_parse_urlparse
from . . utils import (
determine_ext ,
ExtractorError ,
int_or_none ,
parse_duration ,
merge_dicts ,
parse_iso8601 ,
qualities ,
try_get ,
unified_strdate ,
urljoin ,
)
@ -14,120 +18,139 @@ class NDRBaseIE(InfoExtractor):
def _real_extract ( self , url ) :
mobj = self . _match_valid_url ( url )
display_id = next ( group for group in mobj . groups ( ) if group )
id = mobj . group ( ' id ' )
webpage = self . _download_webpage ( url , display_id )
return self . _extract_embed ( webpage , display_id , id )
return self . _extract_embed ( webpage , display_id , url )
class NDRIE ( NDRBaseIE ) :
IE_NAME = ' ndr '
IE_DESC = ' NDR.de - Norddeutscher Rundfunk '
_VALID_URL = r ' https?://(?: www\ .)?(?:daserste \ .)?ndr \ .de/(?:[^/]+/)*(?P<display_id>[^/?#]+),(?P<id>[ \ da-z]+) \ .html '
_VALID_URL = r ' https?://(?: \ w+ \ .)*ndr \ .de/(?:[^/]+/)*(?P<id>[^/?#]+),[ \ da-z]+ \ .html '
_TESTS = [ {
# httpVideo, same content id
' url ' : ' http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html ' ,
' md5 ' : ' 6515bc255dc5c5f8c85bbc38e035a659 ' ,
' info_dict ' : {
' id ' : ' hafengeburtstag988 ' ,
' display_id ' : ' Party-Poette-und-Parade ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Party, Pötte und Parade ' ,
' thumbnail ' : ' https://www.ndr.de/fernsehen/hafengeburtstag990_v-contentxl.jpg ' ,
' description ' : ' md5:ad14f9d2f91d3040b6930c697e5f6b4c ' ,
' series' : None ,
' channel' : ' NDR Fernsehen ' ,
' upload_date ' : ' 201505 08 ' ,
' uploader' : ' ndrtv ' ,
' timestamp' : 1431255671 ,
' upload_date ' : ' 201505 1 0' ,
' duration ' : 3498 ,
} ,
} , {
' url ' : ' https://www.ndr.de/sport/fussball/Rostocks-Matchwinner-Froede-Ein-Hansa-Debuet-wie-im-Maerchen,hansa10312.html ' ,
' only_matching ' : True
} , {
' url ' : ' https://www.ndr.de/nachrichten/niedersachsen/kommunalwahl_niedersachsen_2021/Grosse-Parteien-zufrieden-mit-Ergebnissen-der-Kommunalwahl,kommunalwahl1296.html ' ,
' info_dict ' : {
' id ' : ' kommunalwahl1296 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Die Spitzenrunde: Die Wahl aus Sicht der Landespolitik ' ,
' thumbnail ' : ' https://www.ndr.de/fernsehen/screenshot1194912_v-contentxl.jpg ' ,
' description ' : ' md5:5c6e2ad744cef499135735a1036d7aa7 ' ,
' series ' : ' Hallo Niedersachsen ' ,
' channel ' : ' NDR Fernsehen ' ,
' upload_date ' : ' 20210913 ' ,
' duration ' : 438 ,
' params ' : {
' skip_download ' : True ,
} ,
' expected_warnings ' : [ ' Unable to download f4m manifest ' ] ,
} , {
' url ' : ' https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html ' ,
# httpVideo, different content id
' url ' : ' http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html ' ,
' md5 ' : ' 1043ff203eab307f0c51702ec49e9a71 ' ,
' info_dict ' : {
' id ' : ' sendung1091858 ' ,
' id ' : ' osna272 ' ,
' display_id ' : ' 40-Osnabrueck-spielt-sich-in-einen-Rausch ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Extra 3 vom 11.11.2020 mit Christian Ehring ' ,
' thumbnail ' : ' https://www.ndr.de/fernsehen/screenshot983938_v-contentxl.jpg ' ,
' description ' : ' md5:700f6de264010585012a72f97b0ac0c9 ' ,
' series ' : ' extra 3 ' ,
' channel ' : ' NDR Fernsehen ' ,
' upload_date ' : ' 20201111 ' ,
' duration ' : 1749 ,
}
' title ' : ' Osnabrück - Wehen Wiesbaden: Die Highlights ' ,
' description ' : ' md5:32e9b800b3d2d4008103752682d5dc01 ' ,
' uploader ' : ' ndrtv ' ,
' timestamp ' : 1442059200 ,
' upload_date ' : ' 20150912 ' ,
' duration ' : 510 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' No longer available ' ,
} , {
# httpAudio, same content id
' url ' : ' http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html ' ,
' md5 ' : ' bb3cd38e24fbcc866d13b50ca59307b8 ' ,
' info_dict ' : {
' id ' : ' audio51535 ' ,
' display_id ' : ' La-Valette-entgeht-der-Hinrichtung ' ,
' ext ' : ' mp3 ' ,
' title ' : ' La Valette entgeht der Hinrichtung ' ,
' thumbnail ' : ' https://www.ndr.de/mediathek/mediathekbild140_v-podcast.jpg ' ,
' description ' : ' md5:22f9541913a40fe50091d5cdd7c9f536 ' ,
' upload_date ' : ' 20140729 ' ,
' duration ' : 884.0 ,
' uploader ' : ' ndrinfo ' ,
' timestamp ' : 1631711863 ,
' upload_date ' : ' 20210915 ' ,
' duration ' : 884 ,
} ,
' params ' : {
' skip_download ' : True ,
} ,
} , {
# with subtitles
' url ' : ' https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html ' ,
' info_dict ' : {
' id ' : ' extra18674 ' ,
' display_id ' : ' extra-3-Satiremagazin-mit-Christian-Ehring ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Extra 3 vom 11.11.2020 mit Christian Ehring ' ,
' description ' : ' md5:700f6de264010585012a72f97b0ac0c9 ' ,
' uploader ' : ' ndrtv ' ,
' upload_date ' : ' 20201207 ' ,
' timestamp ' : 1614349457 ,
' duration ' : 1749 ,
' subtitles ' : {
' de ' : [ {
' ext ' : ' ttml ' ,
' url ' : r ' re:^https://www \ .ndr \ .de.+ ' ,
} ] ,
} ,
} ,
' expected_warnings ' : [ ' unable to extract json url ' ] ,
' params ' : {
' skip_download ' : True ,
} ,
' expected_warnings ' : [ ' Unable to download f4m manifest ' ] ,
} , {
' url ' : ' https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html ' ,
' only_matching ' : True ,
} ]
def _extract_embed ( self , webpage , display_id , id ) :
formats = [ ]
base_url = ' https://www.ndr.de '
json_url = self . _search_regex ( r ' <iframe[^>]+src= \ " ([^ \ " ]+)_theme-ndrde[^ \ .]* \ .html \ " ' , webpage ,
' json url ' , fatal = False )
if json_url :
data_json = self . _download_json ( base_url + json_url . replace ( ' ardplayer_image ' , ' ardjson_image ' ) + ' .json ' ,
id , fatal = False )
info_json = data_json . get ( ' _info ' , { } )
media_json = try_get ( data_json , lambda x : x [ ' _mediaArray ' ] [ 0 ] [ ' _mediaStreamArray ' ] )
for media in media_json :
if media . get ( ' _quality ' ) == ' auto ' :
formats . extend ( self . _extract_m3u8_formats ( media [ ' _stream ' ] , id ) )
subtitles = { }
sub_url = data_json . get ( ' _subtitleUrl ' )
if sub_url :
subtitles . setdefault ( ' de ' , [ ] ) . append ( {
' url ' : base_url + sub_url ,
} )
self . _sort_formats ( formats )
return {
' id ' : id ,
' title ' : info_json . get ( ' clipTitle ' ) ,
' thumbnail ' : base_url + data_json . get ( ' _previewImage ' ) ,
' description ' : info_json . get ( ' clipDescription ' ) ,
' series ' : info_json . get ( ' seriesTitle ' ) or None ,
' channel ' : info_json . get ( ' channelTitle ' ) ,
' upload_date ' : unified_strdate ( info_json . get ( ' clipDate ' ) ) ,
' duration ' : data_json . get ( ' _duration ' ) ,
' formats ' : formats ,
' subtitles ' : subtitles ,
}
else :
json_url = base_url + self . _search_regex ( r ' apiUrl \ s?= \ s? \' ([^ \' ]+) \' ' , webpage , ' json url ' ) . replace (
' _belongsToPodcast- ' , ' ' )
data_json = self . _download_json ( json_url , id , fatal = False )
return {
' id ' : id ,
' title ' : data_json . get ( ' title ' ) ,
' thumbnail ' : base_url + data_json . get ( ' poster ' ) ,
' description ' : data_json . get ( ' summary ' ) ,
' upload_date ' : unified_strdate ( data_json . get ( ' publicationDate ' ) ) ,
' duration ' : parse_duration ( data_json . get ( ' duration ' ) ) ,
' formats ' : [ {
' url ' : try_get ( data_json , ( lambda x : x [ ' audio ' ] [ 0 ] [ ' url ' ] , lambda x : x [ ' files ' ] [ 0 ] [ ' url ' ] ) ) ,
' vcodec ' : ' none ' ,
' ext ' : ' mp3 ' ,
} ] ,
}
def _extract_embed ( self , webpage , display_id , url ) :
embed_url = (
self . _html_search_meta (
' embedURL ' , webpage , ' embed URL ' ,
default = None )
or self . _search_regex (
r ' \ bembedUrl[ " \' ] \ s*: \ s*([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' , webpage ,
' embed URL ' , group = ' url ' , default = None )
or self . _search_regex (
r ' \ bvar \ s*sophoraID \ s*= \ s*([ " \' ])(?P<url>(?:(?! \ 1).)+) \ 1 ' , webpage ,
' embed URL ' , group = ' url ' , default = ' ' ) )
# some more work needed if we only found sophoraID
if re . match ( r ' ^[a-z]+ \ d+$ ' , embed_url ) :
# get the initial part of the url path,. eg /panorama/archiv/2022/
parsed_url = compat_urllib_parse_urlparse ( url )
path = self . _search_regex ( r ' (.+/) %s ' % display_id , parsed_url . path or ' ' , ' embed URL ' , default = ' ' )
# find tell-tale image with the actual ID
ndr_id = self . _search_regex ( r ' %s ([a-z]+ \ d+)(?! \ .) \ b ' % ( path , ) , webpage , ' embed URL ' , default = None )
# or try to use special knowledge!
NDR_INFO_URL_TPL = ' https://www.ndr.de/info/ %s -player.html '
embed_url = ' ndr: %s ' % ( ndr_id , ) if ndr_id else NDR_INFO_URL_TPL % ( embed_url , )
if not embed_url :
raise ExtractorError ( ' Unable to extract embedUrl ' )
description = self . _search_regex (
r ' <p[^>]+itemprop= " description " >([^<]+)</p> ' ,
webpage , ' description ' , default = None ) or self . _og_search_description ( webpage )
timestamp = parse_iso8601 (
self . _search_regex (
( r ' <span[^>]+itemprop= " (?:datePublished|uploadDate) " [^>]+content= " (?P<cont>[^ " ]+) " ' ,
r ' \ bvar \ s*pdt \ s*= \ s*(?P<q>[ " \' ])(?P<cont>(?:(?!(?P=q)).)+)(?P=q) ' , ) ,
webpage , ' upload date ' , group = ' cont ' , default = None ) )
info = self . _search_json_ld ( webpage , display_id , default = { } )
return merge_dicts ( {
' _type ' : ' url_transparent ' ,
' url ' : embed_url ,
' display_id ' : display_id ,
' description ' : description ,
' timestamp ' : timestamp ,
} , info )
class NJoyIE ( NDRBaseIE ) :
@ -151,19 +174,19 @@ class NJoyIE(NDRBaseIE):
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' No longer available ' ,
} , {
# httpVideo, different content id
' url ' : ' http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html ' ,
' md5 ' : ' 417660fffa90e6df2fda19f1b40a64d8 ' ,
' info_dict ' : {
' id ' : ' dockville882 ' ,
' id ' : ' livestream283 ' ,
' display_id ' : ' Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn- ' ,
' ext ' : ' mp 4 ' ,
' title ' : ' "Ich hab noch nie " mit Felix Jaehn' ,
' description ' : ' md5: 85dd312d53be1b99e1f998a16452a2f3 ' ,
' ext ' : ' mp 3 ' ,
' title ' : ' Das frueheste DJ Set des Nordens live mit Felix Jaehn' ,
' description ' : ' md5: 681698f527b8601e511e7b79edde7d2c ' ,
' uploader ' : ' njoy ' ,
' upload_date ' : ' 20150822 ' ,
' duration ' : 211 ,
' upload_date ' : ' 20210830 ' ,
} ,
' params ' : {
' skip_download ' : True ,
@ -173,18 +196,25 @@ class NJoyIE(NDRBaseIE):
' only_matching ' : True ,
} ]
def _extract_embed ( self , webpage , display_id , id ) :
def _extract_embed ( self , webpage , display_id , url = None ) :
# find tell-tale URL with the actual ID, or ...
video_id = self . _search_regex (
r ' <iframe[^>]+id= " pp_([ \ da-z]+) " ' , webpage , ' embed id ' )
description = self . _search_regex (
( r ''' \ bsrc \ s*= \ s*[ " ' ]?(?:/ \ w+)+/([a-z]+ \ d+)(?! \ .) \ b ''' ,
r ' <iframe[^>]+id= " pp_([ \ da-z]+) " ' , ) ,
webpage , ' NDR id ' , default = None )
description = (
self . _html_search_meta ( ' description ' , webpage )
or self . _search_regex (
r ' <div[^>]+class= " subline " [^>]*>[^<]+</div> \ s*<p>([^<]+)</p> ' ,
webpage , ' description ' , fatal = False )
webpage , ' description ' , fatal = False ) )
return {
' _type ' : ' url_transparent ' ,
' ie_key ' : ' NDREmbedBase ' ,
' url ' : ' ndr: %s ' % video_id ,
' display_id ' : display_id ,
' description ' : description ,
' title ' : display_id . replace ( ' - ' , ' ' ) . strip ( ) ,
}
@ -287,7 +317,7 @@ class NDREmbedBaseIE(InfoExtractor):
class NDREmbedIE ( NDREmbedBaseIE ) :
IE_NAME = ' ndr:embed '
_VALID_URL = r ' https?://(?: www\ .)?(?:daserste \ .)? ndr\ .de/(?:[^/]+/)*(?P<id>[ \ da-z]+)-(?: player|externalPlayer)\ .html '
_VALID_URL = r ' https?://(?: \ w+ \ .)* ndr\ .de/(?:[^/]+/)*(?P<id>[ \ da-z]+)-(?: (?:ard)? player|externalPlayer)\ .html '
_TESTS = [ {
' url ' : ' http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html ' ,
' md5 ' : ' 8b9306142fe65bbdefb5ce24edb6b0a9 ' ,
@ -300,6 +330,7 @@ class NDREmbedIE(NDREmbedBaseIE):
' upload_date ' : ' 20150907 ' ,
' duration ' : 132 ,
} ,
' skip ' : ' No longer available ' ,
} , {
' url ' : ' http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html ' ,
' md5 ' : ' 002085c44bae38802d94ae5802a36e78 ' ,
@ -315,6 +346,7 @@ class NDREmbedIE(NDREmbedBaseIE):
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' No longer available ' ,
} , {
' url ' : ' http://www.ndr.de/info/audio51535-player.html ' ,
' md5 ' : ' bb3cd38e24fbcc866d13b50ca59307b8 ' ,
@ -324,7 +356,7 @@ class NDREmbedIE(NDREmbedBaseIE):
' title ' : ' La Valette entgeht der Hinrichtung ' ,
' is_live ' : False ,
' uploader ' : ' ndrinfo ' ,
' upload_date ' : ' 20 14 072 9' ,
' upload_date ' : ' 20 2 10915 ' ,
' duration ' : 884 ,
} ,
' params ' : {
@ -345,15 +377,17 @@ class NDREmbedIE(NDREmbedBaseIE):
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' No longer available ' ,
} , {
# httpVideoLive
' url ' : ' http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html ' ,
' info_dict ' : {
' id ' : ' livestream217 ' ,
' ext ' : ' flv ' ,
' ext ' : ' mp4 ' ,
' title ' : r ' re:^NDR Fernsehen Niedersachsen \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
' is_live ' : True ,
' upload_date ' : ' 20150910 ' ,
' upload_date ' : ' 20210409 ' ,
' uploader ' : ' ndrtv ' ,
} ,
' params ' : {
' skip_download ' : True ,
@ -391,9 +425,10 @@ class NJoyEmbedIE(NDREmbedBaseIE):
' ext ' : ' mp4 ' ,
' title ' : ' Zehn Jahre Reeperbahn Festival - die Doku ' ,
' is_live ' : False ,
' upload_date ' : ' 20 150807 ' ,
' upload_date ' : ' 20 200826 ' ,
' duration ' : 1011 ,
} ,
' expected_warnings ' : [ ' Unable to download f4m manifest ' ] ,
} , {
# httpAudio
' url ' : ' http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html ' ,
@ -410,6 +445,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
' params ' : {
' skip_download ' : True ,
} ,
' skip ' : ' No longer available ' ,
} , {
# httpAudioLive, no explicit ext
' url ' : ' http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html ' ,
@ -419,7 +455,7 @@ class NJoyEmbedIE(NDREmbedBaseIE):
' title ' : r ' re:^N-JOY Weltweit \ d {4} - \ d {2} - \ d {2} \ d {2} : \ d {2} $ ' ,
' is_live ' : True ,
' uploader ' : ' njoy ' ,
' upload_date ' : ' 20 15081 0' ,
' upload_date ' : ' 20 21083 0' ,
} ,
' params ' : {
' skip_download ' : True ,