From dbda1b51473ddc452d75bc1e98b3edabf4a7f5e8 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 18 Aug 2013 08:15:18 +0200 Subject: [PATCH 01/21] Add RTLnow extractor Supports http://rtl2now.rtl2.de and http://rtl-now.rtl.de --- youtube_dl/extractor/rtlnow.py | 88 ++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 youtube_dl/extractor/rtlnow.py diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py new file mode 100644 index 000000000..15f01a2e2 --- /dev/null +++ b/youtube_dl/extractor/rtlnow.py @@ -0,0 +1,88 @@ +# encoding: utf-8 +import re + +from .common import InfoExtractor +from ..utils import ExtractorError + +class RTLnowIE(InfoExtractor): + """Information Extractor for RTL(2)now""" + _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + _TESTS = [{ + u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', + u'file': u'90419.flv', + u'info_dict': { + u'upload_date': u'20070416', + u'title': u'Ahornallee - Folge 1 - Der Einzug', + u'description': u'Folge 1 - Der Einzug', + }, + u'params': { + u'skip_download': True, + }, + }, + { + u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', + u'file': u'69756.flv', + u'info_dict': { + u'upload_date': u'20120519', + u'title': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit...', + u'description': u'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.', + u'thumbnail': u'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg', + }, + u'params': { + u'skip_download': True, + }, + },] + + def _real_extract(self,url): + mobj = re.match(self._VALID_URL, url) + + webpage_url = u'http://' + mobj.group('url') + video_page_url = u'http://' + mobj.group('base_url') + video_id = mobj.group(u'video_id') + + webpage = self._download_webpage(webpage_url, video_id) + video_title = self._html_search_regex(r'(?P<title>[^<]+)', + webpage, u'title') + playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'', + webpage, u'playerdata_url') + + playerdata = self._download_webpage(playerdata_url, video_id) + mobj = re.search(r'<!\[CDATA\[(?P<description>.+?)\s+- (?:Sendung )?vom (?P<upload_date_d>[0-9]{2})\.(?P<upload_date_m>[0-9]{2})\.(?:(?P<upload_date_Y>[0-9]{4})|(?P<upload_date_y>[0-9]{2})) [0-9]{2}:[0-9]{2} Uhr\]\]>', playerdata) + if mobj: + video_description = mobj.group(u'description') + if mobj.group('upload_date_Y'): + video_upload_date = mobj.group('upload_date_Y') + else: + video_upload_date = u'20' + mobj.group('upload_date_y') + video_upload_date += mobj.group('upload_date_m')+mobj.group('upload_date_d') + else: + video_description = None + video_upload_date = None + self._downloader.report_warning(u'Unable to extract description and upload date') + + # Thumbnail: not every video has an thumbnail + mobj = re.search(r'', webpage) + if mobj: + video_thumbnail = mobj.group(u'thumbnail') + else: + video_thumbnail = None + + mobj = re.search(r']+>rtmpe://(?:[^/]+/){2})(?P[^\]]+)\]\]>', playerdata) + if mobj is None: + raise ExtractorError(u'Unable to extract media URL') + video_url = mobj.group(u'url') + video_play_path = u'mp4:' + mobj.group(u'play_path') + video_player_url = video_page_url + u'includes/vodplayer.swf' + + return [{ + 'id': video_id, + 'url': video_url, + 'play_path': video_play_path, + 'page_url': video_page_url, + 'player_url': video_player_url, + 'ext': 'flv', + 'title': video_title, + 'description': video_description, + 'upload_date': video_upload_date, + 'thumbnail': video_thumbnail, + }] From 01b32990da992a5f271532e7408f4c6d546e162c Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Sun, 18 Aug 2013 08:16:53 +0200 Subject: [PATCH 02/21] Add RTLnow extractor --- youtube_dl/extractor/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 84c02c2ed..5bb44e764 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -56,6 +56,7 @@ from .rbmaradio import RBMARadioIE from .redtube import RedTubeIE from .ringtv import RingTVIE from .roxwel import RoxwelIE +from .rtlnow import RTLnowIE from .sina import SinaIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE From ea55b2a4cac1d56c578380b6bcb21b5fbc496a57 Mon Sep 17 00:00:00 2001 From: rzhxeo Date: Mon, 19 Aug 2013 08:57:36 +0200 Subject: [PATCH 03/21] Add VOXnow to RTLnow extractor --- youtube_dl/extractor/rtlnow.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index 15f01a2e2..d993a990a 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -5,8 +5,8 @@ from .common import InfoExtractor from ..utils import ExtractorError class RTLnowIE(InfoExtractor): - """Information Extractor for RTL(2)now""" - _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' + """Information Extractor for RTLnow, RTL2now and VOXnow""" + _VALID_URL = r'(?:http://)?(?P(?Prtl(?:(?P2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)' _TESTS = [{ u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1', u'file': u'90419.flv', @@ -31,7 +31,19 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, - },] + }, + { + u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', + u'file': u'13883.flv', + u'info_dict': { + u'upload_date': u'20090627', + u'title': u'Voxtours - Südafrika-Reporter II', + u'description': u'Südafrika-Reporter II', + }, + u'params': { + u'skip_download': True, + }, + }] def _real_extract(self,url): mobj = re.match(self._VALID_URL, url) From d741e55a423a09c40b3c5e19551f432a050353d7 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 19 Aug 2013 10:27:42 +0200 Subject: [PATCH 04/21] [youtube] Support watch_popup URLs (Fixes #1275) --- test/test_all_urls.py | 1 + youtube_dl/extractor/youtube.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index c73d0e467..c54faa380 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -50,6 +50,7 @@ class TestAllURLsMatching(unittest.TestCase): self.assertEqual(YoutubeIE()._extract_id('http://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?&v=BaW_jenozKc'), 'BaW_jenozKc') self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc'), 'BaW_jenozKc') + self.assertEqual(YoutubeIE()._extract_id('https://www.youtube.com/watch_popup?v=BaW_jenozKc'), 'BaW_jenozKc') def test_no_duplicates(self): ies = gen_extractors() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index f74718950..843a973ca 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -141,7 +141,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ |(?: # or the v= param in all its forms - (?:watch|movie(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) + (?:(?:watch|movie)(?:_popup)?(?:\.php)?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! (?:.*?&)? # any other preceding param (like /?s=tuff&v=xxxx) v= From 87f78946a56d19fe3696725fe7329767fd910320 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 03:50:56 +0200 Subject: [PATCH 05/21] [collegehumor] Allow old-style videos (Fixes #1285) --- youtube_dl/extractor/collegehumor.py | 52 ++++++++++++++++++---------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/youtube_dl/extractor/collegehumor.py b/youtube_dl/extractor/collegehumor.py index 30b9c7549..8d4c93d6d 100644 --- a/youtube_dl/extractor/collegehumor.py +++ b/youtube_dl/extractor/collegehumor.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree from .common import InfoExtractor from ..utils import ( compat_urllib_parse_urlparse, + determine_ext, ExtractorError, ) @@ -12,7 +13,7 @@ from ..utils import ( class CollegeHumorIE(InfoExtractor): _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/(video|embed|e)/(?P[0-9]+)/?(?P.*)$' - _TEST = { + _TESTS = [{ u'url': u'http://www.collegehumor.com/video/6902724/comic-con-cosplay-catastrophe', u'file': u'6902724.mp4', u'md5': u'1264c12ad95dca142a9f0bf7968105a0', @@ -20,7 +21,16 @@ class CollegeHumorIE(InfoExtractor): u'title': u'Comic-Con Cosplay Catastrophe', u'description': u'Fans get creative this year at San Diego. Too creative. And yes, that\'s really Joss Whedon.', }, - } + }, + { + u'url': u'http://www.collegehumor.com/video/3505939/font-conference', + u'file': u'3505939.mp4', + u'md5': u'c51ca16b82bb456a4397987791a835f5', + u'info_dict': { + u'title': u'Font Conference', + u'description': u'This video wasn\'t long enough, so we made it double-spaced.', + }, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -49,25 +59,29 @@ class CollegeHumorIE(InfoExtractor): info['description'] = videoNode.findall('./description')[0].text info['title'] = videoNode.findall('./caption')[0].text info['thumbnail'] = videoNode.findall('./thumbnail')[0].text - manifest_url = videoNode.findall('./file')[0].text + next_url = videoNode.findall('./file')[0].text except IndexError: raise ExtractorError(u'Invalid metadata XML file') - manifest_url += '?hdcore=2.10.3' - manifestXml = self._download_webpage(manifest_url, video_id, - u'Downloading XML manifest', - u'Unable to download video info XML') - - adoc = xml.etree.ElementTree.fromstring(manifestXml) - try: - media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] - node_id = media_node.attrib['url'] - video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text - except IndexError as err: - raise ExtractorError(u'Invalid manifest file') + if next_url.endswith(u'manifest.f4m'): + manifest_url = next_url + '?hdcore=2.10.3' + manifestXml = self._download_webpage(manifest_url, video_id, + u'Downloading XML manifest', + u'Unable to download video info XML') - url_pr = compat_urllib_parse_urlparse(info['thumbnail']) + adoc = xml.etree.ElementTree.fromstring(manifestXml) + try: + media_node = adoc.findall('./{http://ns.adobe.com/f4m/1.0}media')[0] + node_id = media_node.attrib['url'] + video_id = adoc.findall('./{http://ns.adobe.com/f4m/1.0}id')[0].text + except IndexError as err: + raise ExtractorError(u'Invalid manifest file') + url_pr = compat_urllib_parse_urlparse(info['thumbnail']) + info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') + info['ext'] = 'mp4' + else: + # Old-style direct links + info['url'] = next_url + info['ext'] = determine_ext(info['url']) - info['url'] = url_pr.scheme + '://' + url_pr.netloc + video_id[:-2].replace('.csmil','').replace(',','') - info['ext'] = 'mp4' - return [info] + return info From 79cb25776f46e0b9b1e95052fbd84a59440fa34f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 04:06:46 +0200 Subject: [PATCH 06/21] Cache suitable regular expressions This speeds up TestAllURLsMatching.test_no_duplicates by about 8000% at the cost of minimal memory overhead. --- youtube_dl/extractor/common.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index da50abfc1..8009c2d85 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -77,7 +77,13 @@ class InfoExtractor(object): @classmethod def suitable(cls, url): """Receives a URL and returns True if suitable for this IE.""" - return re.match(cls._VALID_URL, url) is not None + + # This does not use has/getattr intentionally - we want to know whether + # we have cached the regexp for *this* class, whereas getattr would also + # match the superclass + if '_VALID_URL_RE' not in cls.__dict__: + cls._VALID_URL_RE = re.compile(cls._VALID_URL) + return cls._VALID_URL_RE.match(url) is not None @classmethod def working(cls): From 3093468977e5c04d7f39016bbe983c483e47707f Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 04:31:57 +0200 Subject: [PATCH 07/21] [generic] Ignore stupid HTTP servers (#1284) --- youtube_dl/extractor/generic.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b633e896c..1c468f8f6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -107,8 +107,13 @@ class GenericIE(InfoExtractor): return new_url def _real_extract(self, url): - new_url = self._test_redirect(url) - if new_url: return [self.url_result(new_url)] + try: + new_url = self._test_redirect(url) + if new_url: + return [self.url_result(new_url)] + except compat_urllib_error.HTTPError: + # This may be a stupid server that doesn't like HEAD, our UA, or so + pass video_id = url.split('/')[-1] try: From 7fea7156cb41d4706059174f1fd00faa02278c8c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 04:32:22 +0200 Subject: [PATCH 08/21] [generic] support HTML5 video --- youtube_dl/extractor/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 1c468f8f6..da016f7ee 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -149,6 +149,9 @@ class GenericIE(InfoExtractor): # We only look in og:video if the MIME type is a video, don't try if it's a Flash player: if m_video_type is not None: mobj = re.search(r'.*? Date: Wed, 21 Aug 2013 04:33:57 +0200 Subject: [PATCH 09/21] release 2013.08.21 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 8c93a275c..58e26bc49 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.08.17' +__version__ = '2013.08.21' From 739674cd77d6a6c7025878701939d987fac5b446 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 05:24:58 +0200 Subject: [PATCH 10/21] [rtlnow] Add support for error message for queries from outside of Germany --- youtube_dl/extractor/rtlnow.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/rtlnow.py b/youtube_dl/extractor/rtlnow.py index d993a990a..2f134e6a7 100644 --- a/youtube_dl/extractor/rtlnow.py +++ b/youtube_dl/extractor/rtlnow.py @@ -2,7 +2,10 @@ import re from .common import InfoExtractor -from ..utils import ExtractorError +from ..utils import ( + clean_html, + ExtractorError, +) class RTLnowIE(InfoExtractor): """Information Extractor for RTLnow, RTL2now and VOXnow""" @@ -18,6 +21,7 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + u'skip': u'Only works from Germany', }, { u'url': u'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5', @@ -31,6 +35,7 @@ class RTLnowIE(InfoExtractor): u'params': { u'skip_download': True, }, + u'skip': u'Only works from Germany', }, { u'url': u'www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17', @@ -53,6 +58,14 @@ class RTLnowIE(InfoExtractor): video_id = mobj.group(u'video_id') webpage = self._download_webpage(webpage_url, video_id) + + note_m = re.search(r'''(?sx) + (.*?) + ''', webpage) + if note_m: + msg = clean_html(note_m.group(1)) + raise ExtractorError(msg) + video_title = self._html_search_regex(r'(?P<title>[^<]+)', webpage, u'title') playerdata_url = self._html_search_regex(r'\'playerdata\': \'(?P[^\']+)\'', From 6c3e6e88d3aaaea64ca3d96c005da654c89c8a3a Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 21 Aug 2013 05:44:19 +0200 Subject: [PATCH 11/21] Allow hours in ETA display (Fixes #1280) --- youtube_dl/FileDownloader.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ea6b9d626..217c4a52f 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -79,9 +79,13 @@ class FileDownloader(object): rate = float(current) / dif eta = int((float(total) - float(current)) / rate) (eta_mins, eta_secs) = divmod(eta, 60) - if eta_mins > 99: - return '--:--' - return '%02d:%02d' % (eta_mins, eta_secs) + (eta_hours, eta_mins) = divmod(eta_mins, 60) + if eta_hours > 99: + return '--:--:--' + if eta_hours == 0: + return '%02d:%02d' % (eta_mins, eta_secs) + else: + return '%02d:%02d:%02d' % (eta_hours, eta_mins, eta_secs) @staticmethod def calc_speed(start, now, bytes): From a91b954bb4571b766f4bc01dfbe0be870a1b0a25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 13:48:19 +0200 Subject: [PATCH 12/21] [vimeo] extract information for Vimeo Pro videos from http://player.vimeo.com/video/{video_id} (fixes #1197) For some videos https://vimeo.com/{video_id} doesn't work --- youtube_dl/extractor/vimeo.py | 41 ++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index cc9c8d018..512e06e2a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -20,18 +20,31 @@ class VimeoIE(InfoExtractor): _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)(?:[?].*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' - _TEST = { - u'url': u'http://vimeo.com/56015672', - u'file': u'56015672.mp4', - u'md5': u'8879b6cc097e987f02484baf890129e5', - u'info_dict': { - u"upload_date": u"20121220", - u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", - u"uploader_id": u"user7108434", - u"uploader": u"Filippo Valsorda", - u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550" - } - } + _TESTS = [ + { + u'url': u'http://vimeo.com/56015672', + u'file': u'56015672.mp4', + u'md5': u'8879b6cc097e987f02484baf890129e5', + u'info_dict': { + u"upload_date": u"20121220", + u"description": u"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + u"uploader_id": u"user7108434", + u"uploader": u"Filippo Valsorda", + u"title": u"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", + }, + }, + { + u'url': u'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', + u'file': u'68093876.mp4', + u'md5': u'3b5ca6aa22b60dfeeadf50b72e44ed82', + u'note': u'Vimeo Pro video (#1197)', + u'info_dict': { + u'uploader_id': u'openstreetmapus', + u'uploader': u'OpenStreetMap US', + u'title': u'Andy Allan - Putting the Carto into OpenStreetMap Cartography', + }, + }, + ] def _login(self): (username, password) = self._get_login_info() @@ -83,7 +96,9 @@ class VimeoIE(InfoExtractor): video_id = mobj.group('id') if not mobj.group('proto'): url = 'https://' + url - if mobj.group('direct_link') or mobj.group('pro'): + elif mobj.group('pro'): + url = 'http://player.vimeo.com/video/' + video_id + elif mobj.group('direct_link'): url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information From 668de34c6bbe48f574f23ad898fa904a7c1ad84b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 17:06:37 +0200 Subject: [PATCH 13/21] [soundcloud] Support widget urls (fixes #1252) --- youtube_dl/extractor/soundcloud.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 7c9f1c6b6..5f3a5540d 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -4,6 +4,7 @@ import re from .common import InfoExtractor from ..utils import ( compat_str, + compat_urlparse, ExtractorError, unified_strdate, @@ -22,6 +23,7 @@ class SoundcloudIE(InfoExtractor): _VALID_URL = r'''^(?:https?://)? (?:(?:(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)/?(?:[?].*)?$) |(?:api\.soundcloud\.com/tracks/(?P\d+)) + |(?Pw.soundcloud.com/player/?.*?url=.*) ) ''' IE_NAME = u'soundcloud' @@ -79,6 +81,9 @@ class SoundcloudIE(InfoExtractor): if track_id is not None: info_json_url = 'http://api.soundcloud.com/tracks/' + track_id + '.json?client_id=' + self._CLIENT_ID full_title = track_id + elif mobj.group('widget'): + query = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + return self.url_result(query['url'][0], ie='Soundcloud') else: # extract uploader (which is in the url) uploader = mobj.group(1) From 75340ee3838580b9ac763db57b5a2b419a286718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 18:20:03 +0200 Subject: [PATCH 14/21] [vevo] Fix urls with a query (#1258) --- youtube_dl/extractor/vevo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/vevo.py b/youtube_dl/extractor/vevo.py index 14abd58e8..70408c4f0 100644 --- a/youtube_dl/extractor/vevo.py +++ b/youtube_dl/extractor/vevo.py @@ -11,14 +11,14 @@ class VevoIE(InfoExtractor): Accepts urls from vevo.com or in the format 'vevo:{id}' (currently used by MTVIE) """ - _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*)$' + _VALID_URL = r'((http://www.vevo.com/watch/.*?/.*?/)|(vevo:))(?P.*?)(\?|$)' _TEST = { u'url': u'http://www.vevo.com/watch/hurts/somebody-to-die-for/GB1101300280', u'file': u'GB1101300280.mp4', u'md5': u'06bea460acb744eab74a9d7dcb4bfd61', u'info_dict': { - u"upload_date": u"20130624", - u"uploader": u"Hurts", + u"upload_date": u"20130624", + u"uploader": u"Hurts", u"title": u"Somebody to Die For" } } From e0cfeb2ea7c2c9c597e974d13716425b0d4565c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 18:58:25 +0200 Subject: [PATCH 15/21] [funnyordie] fix extraction of video url and title --- youtube_dl/extractor/funnyordie.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/funnyordie.py b/youtube_dl/extractor/funnyordie.py index 67a7e5f76..4508f0dfa 100644 --- a/youtube_dl/extractor/funnyordie.py +++ b/youtube_dl/extractor/funnyordie.py @@ -21,17 +21,14 @@ class FunnyOrDieIE(InfoExtractor): video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) - video_url = self._html_search_regex(r']*>\s*]*>\s*(?P.*?)</h1>", - r'<title>(?P<title>[^<]+?)'), webpage, 'title', flags=re.DOTALL) - info = { 'id': video_id, 'url': video_url, 'ext': 'mp4', - 'title': title, + 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), } return [info] From 683e98a8a4adeda8339ba167baedd4a2b89dc026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 19:20:27 +0200 Subject: [PATCH 16/21] [statigram] change test video The old one cannot be accessed. --- youtube_dl/extractor/statigram.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/youtube_dl/extractor/statigram.py b/youtube_dl/extractor/statigram.py index b8e6b3bf9..1ea4a9f2f 100644 --- a/youtube_dl/extractor/statigram.py +++ b/youtube_dl/extractor/statigram.py @@ -5,13 +5,13 @@ from .common import InfoExtractor class StatigramIE(InfoExtractor): _VALID_URL = r'(?:http://)?(?:www\.)?statigr\.am/p/([^/]+)' _TEST = { - u'url': u'http://statigr.am/p/484091715184808010_284179915', - u'file': u'484091715184808010_284179915.mp4', - u'md5': u'deda4ff333abe2e118740321e992605b', + u'url': u'http://statigr.am/p/522207370455279102_24101272', + u'file': u'522207370455279102_24101272.mp4', + u'md5': u'6eb93b882a3ded7c378ee1d6884b1814', u'info_dict': { - u"uploader_id": u"videoseconds", - u"title": u"Instagram photo by @videoseconds" - } + u'uploader_id': u'aguynamedpatrick', + u'title': u'Instagram photo by @aguynamedpatrick (Patrick Janelle)', + }, } def _real_extract(self, url): From 45ed795cb0aada683963b74bd001a872edc6b06b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 19:25:54 +0200 Subject: [PATCH 17/21] [youtube] update uploader name for a test video: 'IconaPop' has changed to 'Icona Pop' --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 843a973ca..4c9bab459 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -255,7 +255,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): u"upload_date": u"20120506", u"title": u"Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]", u"description": u"md5:b085c9804f5ab69f4adea963a2dceb3c", - u"uploader": u"IconaPop", + u"uploader": u"Icona Pop", u"uploader_id": u"IconaPop" } }, From d81aef3adf76f67661264a773389baf8a458bf45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Wed, 21 Aug 2013 21:51:58 +0200 Subject: [PATCH 18/21] Add an extractor for tv.slashdot.org (closes #1192) It uses the ooyala platform, so it just extracts the ooyala url. --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/slashdot.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 youtube_dl/extractor/slashdot.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 5bb44e764..d836a22b5 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -58,6 +58,7 @@ from .ringtv import RingTVIE from .roxwel import RoxwelIE from .rtlnow import RTLnowIE from .sina import SinaIE +from .slashdot import SlashdotIE from .soundcloud import SoundcloudIE, SoundcloudSetIE from .spiegel import SpiegelIE from .stanfordoc import StanfordOpenClassroomIE diff --git a/youtube_dl/extractor/slashdot.py b/youtube_dl/extractor/slashdot.py new file mode 100644 index 000000000..2cba53076 --- /dev/null +++ b/youtube_dl/extractor/slashdot.py @@ -0,0 +1,23 @@ +import re + +from .common import InfoExtractor + + +class SlashdotIE(InfoExtractor): + _VALID_URL = r'https?://tv.slashdot.org/video/\?embed=(?P.*?)(&|$)' + + _TEST = { + u'url': u'http://tv.slashdot.org/video/?embed=JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz', + u'file': u'JscHMzZDplD0p-yNLOzTfzC3Q3xzJaUz.mp4', + u'md5': u'd2222e7a4a4c1541b3e0cf732fb26735', + u'info_dict': { + u'title': u' Meet the Stampede Supercomputing Cluster\'s Administrator', + }, + } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + webpage = self._download_webpage(url, video_id) + ooyala_url = self._search_regex(r'