From 94c4abce7fd55d076e9d9529f92696fe25ef2d17 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Thu, 6 Feb 2014 21:16:41 +0700 Subject: [PATCH 01/12] [nfb] Add support for nfb.ca (Closes #2069) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/nfb.py | 76 ++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 youtube_dl/extractor/nfb.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 4e0501ec3..7a97c3279 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -144,6 +144,7 @@ from .nba import NBAIE from .nbc import NBCNewsIE from .ndtv import NDTVIE from .newgrounds import NewgroundsIE +from .nfb import NFBIE from .nhl import NHLIE, NHLVideocenterIE from .niconico import NiconicoIE from .ninegag import NineGagIE diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py new file mode 100644 index 000000000..722bd8c2e --- /dev/null +++ b/youtube_dl/extractor/nfb.py @@ -0,0 +1,76 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + compat_urllib_request, + compat_urllib_parse, +) + + +class NFBIE(InfoExtractor): + IE_NAME = 'nfb' + IE_DESC = 'National Film Board of Canada' + _VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P[\da-z_-]+)' + + _TEST = { + 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', + 'info_dict': { + 'id': 'qallunaat_why_white_people_are_funny', + 'ext': 'mp4', + 'title': 'Qallunaat! Why White People Are Funny ', + 'description': 'md5:836d8aff55e087d04d9f6df554d4e038', + 'duration': 3128, + 'uploader': 'Mark Sandiford', + 'uploader_id': 'mark-sandiford', + }, + 'params': { + # rtmp download + 'skip_download': True, + } + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, 'Downloading film page') + + uploader_id = self._html_search_regex(r'([^<]+)', + page, 'director name', fatal=False) + + request = compat_urllib_request.Request('https://www.nfb.ca/film/%s/player_config' % video_id, + compat_urllib_parse.urlencode({'getConfig': 'true'})) + request.add_header('Content-Type', 'application/x-www-form-urlencoded') + request.add_header('X-NFB-Referer', 'http://www.nfb.ca/medias/flash/NFBVideoPlayer.swf') + + config = self._download_xml(request, video_id, 'Downloading player config XML') + + thumbnail = config.find("./player/stream/media[@type='posterImage']/assets/asset[@quality='high']/default/url").text + video = config.find("./player/stream/media[@type='video']") + duration = int(video.get('duration')) + title = video.find('title').text + description = video.find('description').text + + # It seems assets always go from lower to better quality, so no need to sort + formats = [{ + 'url': x.find('default/streamerURI').text + '/', + 'play_path': x.find('default/url').text, + 'rtmp_live': False, + 'ext': 'mp4', + 'format_id': x.get('quality'), + } for x in video.findall('assets/asset')] + + return { + 'id': video_id, + 'title': title, + 'description': description, + 'thumbnail': thumbnail, + 'duration': duration, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'formats': formats, + } \ No newline at end of file From 95c29381eb8994370ee3924427ecc344ec891f63 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Thu, 6 Feb 2014 21:26:12 +0700 Subject: [PATCH 02/12] [mooshare] Fix bogus video page URL --- youtube_dl/extractor/mooshare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/mooshare.py b/youtube_dl/extractor/mooshare.py index 909d21a99..f1875add5 100644 --- a/youtube_dl/extractor/mooshare.py +++ b/youtube_dl/extractor/mooshare.py @@ -61,7 +61,7 @@ class MooshareIE(InfoExtractor): } request = compat_urllib_request.Request( - 'http://mooshare.biz/8dqtk4bjbp8g', compat_urllib_parse.urlencode(download_form)) + 'http://mooshare.biz/%s' % video_id, compat_urllib_parse.urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') self.to_screen('%s: Waiting for timeout' % video_id) @@ -111,4 +111,4 @@ class MooshareIE(InfoExtractor): 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, - } + } \ No newline at end of file From 0bf35c5cf501ceda21e0b7c047f10c5ce9eea172 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Thu, 6 Feb 2014 21:41:31 +0700 Subject: [PATCH 03/12] [nfb] Add support for onf.ca URLs --- youtube_dl/extractor/nfb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/nfb.py b/youtube_dl/extractor/nfb.py index 722bd8c2e..09de724f9 100644 --- a/youtube_dl/extractor/nfb.py +++ b/youtube_dl/extractor/nfb.py @@ -12,7 +12,7 @@ from ..utils import ( class NFBIE(InfoExtractor): IE_NAME = 'nfb' IE_DESC = 'National Film Board of Canada' - _VALID_URL = r'https?://(?:www\.)?nfb\.ca/film/(?P[\da-z_-]+)' + _VALID_URL = r'https?://(?:www\.)?(nfb|onf)\.ca/film/(?P[\da-z_-]+)' _TEST = { 'url': 'https://www.nfb.ca/film/qallunaat_why_white_people_are_funny', @@ -35,7 +35,7 @@ class NFBIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') - page = self._download_webpage(url, video_id, 'Downloading film page') + page = self._download_webpage('https://www.nfb.ca/film/%s' % video_id, video_id, 'Downloading film page') uploader_id = self._html_search_regex(r' Date: Thu, 6 Feb 2014 15:45:47 +0100 Subject: [PATCH 04/12] release 2014.02.06.2 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 47a9a3635..18d179311 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.02.06.1' +__version__ = '2014.02.06.2' From e9ea0bf12347b785b352236cd0d0e0f25a8f26c5 Mon Sep 17 00:00:00 2001 From: "Sergey M." Date: Fri, 7 Feb 2014 00:35:26 +0700 Subject: [PATCH 05/12] [ndr] Add support for ndr.de (Closes #2325) --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/ndr.py | 89 ++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 youtube_dl/extractor/ndr.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 7a97c3279..a13b5cfb8 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -142,6 +142,7 @@ from .myvideo import MyVideoIE from .naver import NaverIE from .nba import NBAIE from .nbc import NBCNewsIE +from .ndr import NDRIE from .ndtv import NDTVIE from .newgrounds import NewgroundsIE from .nfb import NFBIE diff --git a/youtube_dl/extractor/ndr.py b/youtube_dl/extractor/ndr.py new file mode 100644 index 000000000..bf6782d7d --- /dev/null +++ b/youtube_dl/extractor/ndr.py @@ -0,0 +1,89 @@ +# encoding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + + +class NDRIE(InfoExtractor): + IE_NAME = 'ndr' + IE_DESC = 'NDR.de - Mediathek' + _VALID_URL = r'https?://www\.ndr\.de/.+?(?P\d+)\.html' + + _TESTS = [ + # video + { + 'url': 'http://www.ndr.de/fernsehen/sendungen/hallo_niedersachsen/media/hallonds19925.html', + 'md5': '20eba151ff165f386643dad9c1da08f7', + 'info_dict': { + 'id': '19925', + 'ext': 'mp4', + 'title': 'Hallo Niedersachsen ', + 'description': 'Bei Hallo Niedersachsen um 19:30 Uhr erfahren Sie alles, was am Tag in Niedersachsen los war.', + 'duration': 1722, + }, + }, + # audio + { + 'url': 'http://www.ndr.de/903/audio191719.html', + 'md5': '41ed601768534dd18a9ae34d84798129', + 'info_dict': { + 'id': '191719', + 'ext': 'mp3', + 'title': '"Es war schockierend"', + 'description': 'md5:ed7ff8364793545021a6355b97e95f10', + 'duration': 112, + } + } + ] + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + + page = self._download_webpage(url, video_id, 'Downloading page') + + title = self._og_search_title(page) + description = self._og_search_description(page) + + mobj = re.search( + r'
(?P\d+):(?P\d+)
', + page) + duration = int(mobj.group('minutes')) * 60 + int(mobj.group('seconds')) if mobj else None + + formats = [] + + mp3_url = re.search(r'''{src:'(?P