@ -2,12 +2,12 @@ from __future__ import unicode_literals
import xml . etree . ElementTree
import xml . etree . ElementTree
from . subtitles import Subtitles InfoExtractor
from . common import InfoExtractor
from . . utils import ExtractorError
from . . utils import ExtractorError
from . . compat import compat_HTTPError
from . . compat import compat_HTTPError
class BBCCoUkIE ( Subtitles InfoExtractor) :
class BBCCoUkIE ( InfoExtractor) :
IE_NAME = ' bbc.co.uk '
IE_NAME = ' bbc.co.uk '
IE_DESC = ' BBC iPlayer '
IE_DESC = ' BBC iPlayer '
_VALID_URL = r ' https?://(?:www \ .)?bbc \ .co \ .uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[ \ da-z] {8} ) '
_VALID_URL = r ' https?://(?:www \ .)?bbc \ .co \ .uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[ \ da-z] {8} ) '
@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
formats . extend ( conn_formats )
formats . extend ( conn_formats )
return formats
return formats
def _ extract_caption s( self , media , programme_id ) :
def _ get_subtitle s( self , media , programme_id ) :
subtitles = { }
subtitles = { }
for connection in self . _extract_connections ( media ) :
for connection in self . _extract_connections ( media ) :
captions = self . _download_xml ( connection . get ( ' href ' ) , programme_id , ' Downloading captions ' )
captions = self . _download_xml ( connection . get ( ' href ' ) , programme_id , ' Downloading captions ' )
lang = captions . get ( ' { http://www.w3.org/XML/1998/namespace}lang ' , ' en ' )
lang = captions . get ( ' { http://www.w3.org/XML/1998/namespace}lang ' , ' en ' )
ps = captions . findall ( ' ./ {0} body/ {0} div/ {0} p ' . format ( ' { http://www.w3.org/2006/10/ttaf1} ' ) )
ps = captions . findall ( ' ./ {0} body/ {0} div/ {0} p ' . format ( ' { http://www.w3.org/2006/10/ttaf1} ' ) )
srt = ' '
srt = ' '
def _extract_text ( p ) :
if p . text is not None :
stripped_text = p . text . strip ( )
if stripped_text :
return stripped_text
return ' ' . join ( span . text . strip ( ) for span in p . findall ( ' { http://www.w3.org/2006/10/ttaf1}span ' ) )
for pos , p in enumerate ( ps ) :
for pos , p in enumerate ( ps ) :
srt + = ' %s \r \n %s --> %s \r \n %s \r \n \r \n ' % ( str ( pos ) , p . get ( ' begin ' ) , p . get ( ' end ' ) ,
srt + = ' %s \r \n %s --> %s \r \n %s \r \n \r \n ' % ( str ( pos ) , p . get ( ' begin ' ) , p . get ( ' end ' ) , _extract_text ( p ) )
p . text . strip ( ) if p . text is not None else ' ' )
subtitles [ lang ] = [
subtitles [ lang ] = srt
{
' url ' : connection . get ( ' href ' ) ,
' ext ' : ' ttml ' ,
} ,
{
' data ' : srt ,
' ext ' : ' srt ' ,
} ,
]
return subtitles
return subtitles
def _download_media_selector ( self , programme_id ) :
def _download_media_selector ( self , programme_id ) :
@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
elif kind == ' video ' :
elif kind == ' video ' :
formats . extend ( self . _extract_video ( media , programme_id ) )
formats . extend ( self . _extract_video ( media , programme_id ) )
elif kind == ' captions ' :
elif kind == ' captions ' :
subtitles = self . _extract_caption s( media , programme_id )
subtitles = self . extract_subtitle s( media , programme_id )
return formats , subtitles
return formats , subtitles
@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
else :
else :
programme_id , title , description , duration , formats , subtitles = self . _download_playlist ( group_id )
programme_id , title , description , duration , formats , subtitles = self . _download_playlist ( group_id )
if self . _downloader . params . get ( ' listsubtitles ' , False ) :
self . _list_available_subtitles ( programme_id , subtitles )
return
self . _sort_formats ( formats )
self . _sort_formats ( formats )
return {
return {