[telemb] Extract all formats and modernize

pull/8/head
Sergey M․ 10 years ago
parent 8fb7ff25c5
commit adf2c0989d

@ -345,7 +345,7 @@ from .teachingchannel import TeachingChannelIE
from .teamcoco import TeamcocoIE
from .techtalks import TechTalksIE
from .ted import TEDIE
from .telemb import TelembIE
from .telemb import TeleMBIE
from .tenplay import TenPlayIE
from .testurl import TestURLIE
from .tf1 import TF1IE

@ -1,40 +1,77 @@
import re
# -*- coding: utf-8 -*-
# needed for the title french ê! coding utf-8- -*-
# based on the vine.co and lots of help from https://filippo.io/add-support-for-a-new-video-site-to-youtube-dl/
from .common import InfoExtractor
# coding: utf-8
from __future__ import unicode_literals
import re
class TelembIE(InfoExtractor):
from .common import InfoExtractor
from ..utils import remove_start
_VALID_URL = r'https?://www\.telemb\.be/(?P<id>.*)'
_TEST = {
u'url': u'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
u'file': u'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html.mp4',
u'md5': u'f45ea69878516ba039835794e0f8f783',
u'info_dict': {
u"title": u'TéléMB : Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages'
class TeleMBIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?telemb\.be/(?P<display_id>.+?)_d_(?P<id>\d+)\.html'
_TESTS = [
{
'url': 'http://www.telemb.be/mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-_d_13466.html',
'md5': 'f45ea69878516ba039835794e0f8f783',
'info_dict': {
'id': '13466',
'display_id': 'mons-cook-with-danielle-des-cours-de-cuisine-en-anglais-',
'ext': 'mp4',
'title': 'Mons - Cook with Danielle : des cours de cuisine en anglais ! - Les reportages',
'description': 'md5:bc5225f47b17c309761c856ad4776265',
'thumbnail': 're:^http://.*\.(?:jpg|png)$',
}
},
{
'url': 'http://telemb.be/les-reportages-havre-incendie-mortel_d_13514.html',
'md5': '6e9682736e5ccd4eab7f21e855350733',
'info_dict': {
'id': '13514',
'display_id': 'les-reportages-havre-incendie-mortel',
'ext': 'mp4',
'title': 'Havré - Incendie mortel - Les reportages',
'description': 'md5:5e54cb449acb029c2b7734e2d946bd4a',
'thumbnail': 're:^http://.*\.(?:jpg|png)$',
}
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage_url = 'http://www.telemb.be/' + video_id
webpage = self._download_webpage(webpage_url, video_id)
display_id = mobj.group('display_id')
webpage = self._download_webpage(url, display_id)
self.report_extraction(video_id)
formats = []
for video_url in re.findall(r'file\s*:\s*"([^"]+)"', webpage):
fmt = {
'url': video_url,
'format_id': video_url.split(':')[0]
}
rtmp = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url)
if rtmp:
fmt.update({
'play_path': rtmp.group('playpath'),
'app': rtmp.group('app'),
'player_url': 'http://p.jwpcdn.com/6/10/jwplayer.flash.swf',
'page_url': 'http://www.telemb.be',
'preference': -1,
})
formats.append(fmt)
self._sort_formats(formats)
video_url = self._html_search_regex(r'"(http://wowza\.imust\.org/srv/vod/.*\.mp4)"',
webpage, u'video URL')
title = remove_start(self._og_search_title(webpage), 'TéléMB : ')
description = self._html_search_regex(
r'<meta property="og:description" content="(.+?)" />',
webpage, 'description', fatal=False)
thumbnail = self._og_search_thumbnail(webpage)
return [{
return {
'id': video_id,
'url': video_url,
'ext': 'mp4',
'title': self._og_search_title(webpage),
'thumbnail': self._og_search_thumbnail(webpage),
}]
'display_id': display_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'formats': formats,
}

Loading…
Cancel
Save