From 5ecd7b0a922a07fb74bd32ac1b55cfd73181849c Mon Sep 17 00:00:00 2001 From: Sebastian Haas Date: Sun, 3 Aug 2014 20:47:56 +0200 Subject: [PATCH 01/16] [fm4] Add new extractor --- youtube_dl/extractor/__init__.py | 1 + youtube_dl/extractor/fm4.py | 49 ++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 youtube_dl/extractor/fm4.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 66c873789..e5ce08bc1 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -96,6 +96,7 @@ from .fktv import ( FKTVPosteckeIE, ) from .flickr import FlickrIE +from .fm4 import FM4IE from .fourtube import FourTubeIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE diff --git a/youtube_dl/extractor/fm4.py b/youtube_dl/extractor/fm4.py new file mode 100644 index 000000000..4eb63ffa9 --- /dev/null +++ b/youtube_dl/extractor/fm4.py @@ -0,0 +1,49 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import calendar +import datetime +import re + +from .common import InfoExtractor + +# audios on fm4.orf.at are only available for 7 days, so we can't +# add tests. + + +class FM4IE(InfoExtractor): + IE_DESC = 'fm4.orf.at' + _VALID_URL = r'http://fm4\.orf\.at/7tage#(?P[0-9]+)/(?P[\w]+)' + + def _extract_entry_dict(self, info, title, subtitle): + result = { + 'id': info['loopStreamId'].replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], + 'title': title, + 'description': subtitle, + 'duration': (info['end'] - info['start']) / 1000, + 'timestamp': info['start'] / 1000, + 'ext': 'mp3' + } + + return result + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_date = mobj.group('date') + show_id = mobj.group('show') + + data = self._download_json( + 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), + show_id + ) + + entries = [ self._extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + + return { + '_type': 'playlist', + 'id': show_id, + 'title': data['title'], + 'description': data['subtitle'], + 'entries': entries + } From fb17b60811ea89fb857ab03a997d193898046466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sat, 9 Aug 2014 05:45:15 +0700 Subject: [PATCH 02/16] [arte] Do not filter formats when there are no videos of requested lang code (Closes #3433) --- youtube_dl/extractor/arte.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/youtube_dl/extractor/arte.py b/youtube_dl/extractor/arte.py index 9591bad8a..d86dbba8e 100644 --- a/youtube_dl/extractor/arte.py +++ b/youtube_dl/extractor/arte.py @@ -109,15 +109,19 @@ class ArteTVPlus7IE(InfoExtractor): regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l] return any(re.match(r, f['versionCode']) for r in regexes) # Some formats may not be in the same language as the url + # TODO: Might want not to drop videos that does not match requested language + # but to process those formats with lower precedence formats = filter(_match_lang, all_formats) - formats = list(formats) # in python3 filter returns an iterator + formats = list(formats) # in python3 filter returns an iterator if not formats: # Some videos are only available in the 'Originalversion' # they aren't tagged as being in French or German - if all(f['versionCode'] == 'VO' or f['versionCode'] == 'VA' for f in all_formats): - formats = all_formats - else: - raise ExtractorError(u'The formats list is empty') + # Sometimes there are neither videos of requested lang code + # nor original version videos available + # For such cases we just take all_formats as is + formats = all_formats + if not formats: + raise ExtractorError('The formats list is empty') if re.match(r'[A-Z]Q', formats[0]['quality']) is not None: def sort_key(f): From f5273890eeea6604c23367d91007fb8119768c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Aug 2014 01:04:10 +0700 Subject: [PATCH 03/16] [fm4] Remove unused imports and minor changes --- youtube_dl/extractor/fm4.py | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/fm4.py b/youtube_dl/extractor/fm4.py index 4eb63ffa9..c1e60774b 100644 --- a/youtube_dl/extractor/fm4.py +++ b/youtube_dl/extractor/fm4.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime import re from .common import InfoExtractor @@ -13,20 +11,7 @@ from .common import InfoExtractor class FM4IE(InfoExtractor): IE_DESC = 'fm4.orf.at' - _VALID_URL = r'http://fm4\.orf\.at/7tage#(?P[0-9]+)/(?P[\w]+)' - - def _extract_entry_dict(self, info, title, subtitle): - result = { - 'id': info['loopStreamId'].replace('.mp3', ''), - 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], - 'title': title, - 'description': subtitle, - 'duration': (info['end'] - info['start']) / 1000, - 'timestamp': info['start'] / 1000, - 'ext': 'mp3' - } - - return result + _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -38,7 +23,18 @@ class FM4IE(InfoExtractor): show_id ) - entries = [ self._extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + def extract_entry_dict(info, title, subtitle): + return { + 'id': info['loopStreamId'].replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], + 'title': title, + 'description': subtitle, + 'duration': (info['end'] - info['start']) / 1000, + 'timestamp': info['start'] / 1000, + 'ext': 'mp3' + } + + entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] return { '_type': 'playlist', From eb3680123a2f451e79e953ede5dd70fb7fb4c0ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Aug 2014 01:21:16 +0700 Subject: [PATCH 04/16] [orf] Move all ORF extractors in one place --- youtube_dl/extractor/__init__.py | 8 ++-- youtube_dl/extractor/fm4.py | 45 ------------------- youtube_dl/extractor/oe1.py | 40 ----------------- youtube_dl/extractor/orf.py | 76 +++++++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 89 deletions(-) delete mode 100644 youtube_dl/extractor/fm4.py delete mode 100644 youtube_dl/extractor/oe1.py diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 2401940c3..2ea2c73d2 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -96,7 +96,6 @@ from .fktv import ( FKTVPosteckeIE, ) from .flickr import FlickrIE -from .fm4 import FM4IE from .fourtube import FourTubeIE from .franceculture import FranceCultureIE from .franceinter import FranceInterIE @@ -226,9 +225,12 @@ from .nrk import ( from .ntv import NTVIE from .nytimes import NYTimesIE from .nuvid import NuvidIE -from .oe1 import OE1IE from .ooyala import OoyalaIE -from .orf import ORFIE +from .orf import ( + ORFTVthekIE, + ORFOE1IE, + ORFFM4IE, +) from .parliamentliveuk import ParliamentLiveUKIE from .pbs import PBSIE from .photobucket import PhotobucketIE diff --git a/youtube_dl/extractor/fm4.py b/youtube_dl/extractor/fm4.py deleted file mode 100644 index c1e60774b..000000000 --- a/youtube_dl/extractor/fm4.py +++ /dev/null @@ -1,45 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import re - -from .common import InfoExtractor - -# audios on fm4.orf.at are only available for 7 days, so we can't -# add tests. - - -class FM4IE(InfoExtractor): - IE_DESC = 'fm4.orf.at' - _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_date = mobj.group('date') - show_id = mobj.group('show') - - data = self._download_json( - 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), - show_id - ) - - def extract_entry_dict(info, title, subtitle): - return { - 'id': info['loopStreamId'].replace('.mp3', ''), - 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], - 'title': title, - 'description': subtitle, - 'duration': (info['end'] - info['start']) / 1000, - 'timestamp': info['start'] / 1000, - 'ext': 'mp3' - } - - entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] - - return { - '_type': 'playlist', - 'id': show_id, - 'title': data['title'], - 'description': data['subtitle'], - 'entries': entries - } diff --git a/youtube_dl/extractor/oe1.py b/youtube_dl/extractor/oe1.py deleted file mode 100644 index 38971ab4d..000000000 --- a/youtube_dl/extractor/oe1.py +++ /dev/null @@ -1,40 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -import calendar -import datetime -import re - -from .common import InfoExtractor - -# audios on oe1.orf.at are only available for 7 days, so we can't -# add tests. - - -class OE1IE(InfoExtractor): - IE_DESC = 'oe1.orf.at' - _VALID_URL = r'http://oe1\.orf\.at/programm/(?P[0-9]+)' - - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - show_id = mobj.group('id') - - data = self._download_json( - 'http://oe1.orf.at/programm/%s/konsole' % show_id, - show_id - ) - - timestamp = datetime.datetime.strptime('%s %s' % ( - data['item']['day_label'], - data['item']['time'] - ), '%d.%m.%Y %H:%M') - unix_timestamp = calendar.timegm(timestamp.utctimetuple()) - - return { - 'id': show_id, - 'title': data['item']['title'], - 'url': data['item']['url_stream'], - 'ext': 'mp3', - 'description': data['item'].get('info'), - 'timestamp': unix_timestamp - } diff --git a/youtube_dl/extractor/orf.py b/youtube_dl/extractor/orf.py index 03421d1d5..011e6be13 100644 --- a/youtube_dl/extractor/orf.py +++ b/youtube_dl/extractor/orf.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import json import re +import calendar +import datetime from .common import InfoExtractor from ..utils import ( @@ -12,7 +14,9 @@ from ..utils import ( ) -class ORFIE(InfoExtractor): +class ORFTVthekIE(InfoExtractor): + IE_NAME = 'orf:tvthek' + IE_DESC = 'ORF TVthek' _VALID_URL = r'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P\d+)' _TEST = { @@ -105,3 +109,73 @@ class ORFIE(InfoExtractor): 'entries': entries, 'id': playlist_id, } + + +# Audios on ORF radio are only available for 7 days, so we can't add tests. + + +class ORFOE1IE(InfoExtractor): + IE_NAME = 'orf:oe1' + IE_DESC = 'Radio Österreich 1' + _VALID_URL = r'http://oe1\.orf\.at/programm/(?P[0-9]+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_id = mobj.group('id') + + data = self._download_json( + 'http://oe1.orf.at/programm/%s/konsole' % show_id, + show_id + ) + + timestamp = datetime.datetime.strptime('%s %s' % ( + data['item']['day_label'], + data['item']['time'] + ), '%d.%m.%Y %H:%M') + unix_timestamp = calendar.timegm(timestamp.utctimetuple()) + + return { + 'id': show_id, + 'title': data['item']['title'], + 'url': data['item']['url_stream'], + 'ext': 'mp3', + 'description': data['item'].get('info'), + 'timestamp': unix_timestamp + } + + +class ORFFM4IE(InfoExtractor): + IE_DESC = 'orf:fm4' + IE_DESC = 'radio FM4' + _VALID_URL = r'http://fm4\.orf\.at/7tage/?#(?P[0-9]+)/(?P\w+)' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + show_date = mobj.group('date') + show_id = mobj.group('show') + + data = self._download_json( + 'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date, show_id), + show_id + ) + + def extract_entry_dict(info, title, subtitle): + return { + 'id': info['loopStreamId'].replace('.mp3', ''), + 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info['loopStreamId'], + 'title': title, + 'description': subtitle, + 'duration': (info['end'] - info['start']) / 1000, + 'timestamp': info['start'] / 1000, + 'ext': 'mp3' + } + + entries = [extract_entry_dict(t, data['title'], data['subtitle']) for t in data['streams']] + + return { + '_type': 'playlist', + 'id': show_id, + 'title': data['title'], + 'description': data['subtitle'], + 'entries': entries + } \ No newline at end of file From 56ca04f662d2c34713d85b0f0dc576e7c51275a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Aug 2014 01:26:23 +0700 Subject: [PATCH 05/16] Credit @sehaas for ORF FM4 extractor (#3431) --- youtube_dl/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 9b41587e7..962aedbff 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -68,6 +68,7 @@ __authors__ = ( 'Hassaan Ali', 'Dobrosław Żybort', 'David Fabijan', + 'Sebastian Haas', ) __license__ = 'Public Domain' From c767dc74b8bdfdc75bba14d11b460a95f85ed08d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Aug 2014 01:41:01 +0700 Subject: [PATCH 06/16] [downloader/common] Fix typo --- youtube_dl/downloader/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 917f3450e..9ce97f5fe 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -292,7 +292,7 @@ class FileDownloader(object): def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" - raise NotImplementedError(u'This method must be implemented by sublcasses') + raise NotImplementedError(u'This method must be implemented by subclasses') def _hook_progress(self, status): for ph in self._progress_hooks: From 4dc5286e1341adcd8c6b9876b06f8efbe6b9d89e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 10:45:27 +0200 Subject: [PATCH 07/16] [reverbnation] Make sure that the thumbnail url contain the protocol They are protocol relative. --- youtube_dl/extractor/reverbnation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 49cf427a1..6435beb5c 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -17,7 +17,7 @@ class ReverbNationIE(InfoExtractor): "title": "MONA LISA", "uploader": "ALKILADOS", "uploader_id": 216429, - "thumbnail": "//gp1.wac.edgecastcdn.net/802892/production_public/Photo/13761700/image/1366002176_AVATAR_MONA_LISA.jpg" + "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$" }, }] @@ -39,7 +39,8 @@ class ReverbNationIE(InfoExtractor): 'url': api_res.get('url'), 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': api_res.get('artist', {}).get('id'), - 'thumbnail': api_res.get('image', api_res.get('thumbnail')), + 'thumbnail': self._proto_relative_url( + api_res.get('image', api_res.get('thumbnail'))), 'ext': 'mp3', 'vcodec': 'none', } From 85a699246aa7554e2a88d5a93793dccb9a0b8d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 10:56:37 +0200 Subject: [PATCH 08/16] [reverbnation] Modernize test --- youtube_dl/extractor/reverbnation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index 6435beb5c..c77849336 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -11,9 +11,10 @@ class ReverbNationIE(InfoExtractor): _VALID_URL = r'^https?://(?:www\.)?reverbnation\.com/.*?/song/(?P\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', - 'file': '16965047.mp3', 'md5': '3da12ebca28c67c111a7f8b262d3f7a7', 'info_dict': { + "id": "16965047", + "ext": "mp3", "title": "MONA LISA", "uploader": "ALKILADOS", "uploader_id": 216429, From 511c4325dc8e3a60d81d3c23e8eb330b3a706883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 10:58:22 +0200 Subject: [PATCH 09/16] [reverbnation] Simplify json download We can directly get a json file instead of the jsonp. --- youtube_dl/extractor/reverbnation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index c77849336..b93adfca5 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -1,7 +1,6 @@ from __future__ import unicode_literals import re -import time from .common import InfoExtractor from ..utils import strip_jsonp @@ -27,10 +26,8 @@ class ReverbNationIE(InfoExtractor): song_id = mobj.group('id') api_res = self._download_json( - 'https://api.reverbnation.com/song/%s?callback=api_response_5&_=%d' - % (song_id, int(time.time() * 1000)), + 'https://api.reverbnation.com/song/%s' % song_id, song_id, - transform_source=strip_jsonp, note='Downloading information of song %s' % song_id ) From 40a90862f49f89bdfcd6feea5340538efc2b6a5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 11:00:14 +0200 Subject: [PATCH 10/16] [reverbnation] The 'uploader_id' field must be a string --- youtube_dl/extractor/reverbnation.py | 6 +++--- youtube_dl/utils.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/reverbnation.py b/youtube_dl/extractor/reverbnation.py index b93adfca5..ec7e7df7b 100644 --- a/youtube_dl/extractor/reverbnation.py +++ b/youtube_dl/extractor/reverbnation.py @@ -3,7 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import strip_jsonp +from ..utils import str_or_none class ReverbNationIE(InfoExtractor): @@ -16,7 +16,7 @@ class ReverbNationIE(InfoExtractor): "ext": "mp3", "title": "MONA LISA", "uploader": "ALKILADOS", - "uploader_id": 216429, + "uploader_id": "216429", "thumbnail": "re:^https://gp1\.wac\.edgecastcdn\.net/.*?\.jpg$" }, }] @@ -36,7 +36,7 @@ class ReverbNationIE(InfoExtractor): 'title': api_res.get('name'), 'url': api_res.get('url'), 'uploader': api_res.get('artist', {}).get('name'), - 'uploader_id': api_res.get('artist', {}).get('id'), + 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'thumbnail': self._proto_relative_url( api_res.get('image', api_res.get('thumbnail'))), 'ext': 'mp3', diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index e40b367c2..9b94407f4 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1275,6 +1275,9 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): v = getattr(v, get_attr, None) return default if v is None else (int(v) * invscale // scale) +def str_or_none(v, default=None): + return default if v is None else compat_str(v) + def str_to_int(int_str): if int_str is None: From 173a7026d59bacfbfe7a8eea92e10ef6e89d1798 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 11:08:56 +0200 Subject: [PATCH 11/16] [test/test_utils] Fix typo in method name --- test/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_utils.py b/test/test_utils.py index 51eb0b6b9..e26cc5b0c 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -280,7 +280,7 @@ class TestUtil(unittest.TestCase): d = json.loads(stripped) self.assertEqual(d, [{"id": "532cb", "x": 3}]) - def test_uppercase_escpae(self): + def test_uppercase_escape(self): self.assertEqual(uppercase_escape(u'aä'), u'aä') self.assertEqual(uppercase_escape(u'\\U0001d550'), u'𝕐') From a6da7b6b9657f621a927cb4c7bc46cf7c6c27b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Sun, 10 Aug 2014 11:55:24 +0200 Subject: [PATCH 12/16] [facebook] Allow '?' before '#!' (fixes #3477) --- test/test_all_urls.py | 1 + youtube_dl/extractor/facebook.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 0ff47cf1e..b1ad30bf1 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -99,6 +99,7 @@ class TestAllURLsMatching(unittest.TestCase): def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) + self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) def test_no_duplicates(self): ies = gen_extractors() diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index f0cd8f156..f7cf700b5 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -20,7 +20,7 @@ from ..utils import ( class FacebookIE(InfoExtractor): _VALID_URL = r'''(?x) https?://(?:\w+\.)?facebook\.com/ - (?:[^#?]*\#!/)? + (?:[^#]*?\#!/)? (?:video/video\.php|photo\.php|video/embed)\?(?:.*?) (?:v|video_id)=(?P[0-9]+) (?:.*)''' From 3a5beb0ca149cd0b1df5b29984033e20ccb85a61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 10 Aug 2014 17:53:17 +0700 Subject: [PATCH 13/16] [ard] Show error message for videos that are no longer available (#3422) --- youtube_dl/extractor/ard.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 957bdefcb..7f0da8ab6 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -51,6 +51,9 @@ class ARDIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + if '>Der gewünschte Beitrag ist nicht mehr verfügbar.<' in webpage: + raise ExtractorError('Video %s is no longer available' % video_id, expected=True) + title = self._html_search_regex( [r'(.*?)', r'', From 9572013de9b994e2c20e972f13ce0a54cbd3f886 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Aug 2014 13:04:45 +0200 Subject: [PATCH 14/16] [appletrailers] Support height-less videos --- youtube_dl/extractor/appletrailers.py | 5 +++-- youtube_dl/utils.py | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index dc8657b67..4359b88d1 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -6,6 +6,7 @@ import json from .common import InfoExtractor from ..utils import ( compat_urlparse, + int_or_none, ) @@ -110,8 +111,8 @@ class AppleTrailersIE(InfoExtractor): formats.append({ 'url': format_url, 'format': format['type'], - 'width': format['width'], - 'height': int(format['height']), + 'width': int_or_none(format['width']), + 'height': int_or_none(format['height']), }) self._sort_formats(formats) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 9b94407f4..65b492fb3 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -1273,8 +1273,11 @@ def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1): if get_attr: if v is not None: v = getattr(v, get_attr, None) + if v == '': + v = None return default if v is None else (int(v) * invscale // scale) + def str_or_none(v, default=None): return default if v is None else compat_str(v) From 90e075da3a15052c628c06d4d13ff66e88ff7765 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sun, 10 Aug 2014 19:47:15 +0200 Subject: [PATCH 15/16] release 2014.08.10 --- youtube_dl/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 08b5339f6..2ef0d59e3 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.08.05' +__version__ = '2014.08.10' From 6f600ff5d6bda54d8128e6263a468a08edfc3353 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Tue, 12 Aug 2014 20:54:08 +0700 Subject: [PATCH 16/16] [ooyala] Try mobile player JS URLs for all available devices (Closes #3498) Looks like some videos are only available for particular devices (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 is only available for ipad) Working around with fetching URLs for all the devices found starting with 'unknown' until we succeed or eventually fail for each device. --- youtube_dl/extractor/ooyala.py | 66 +++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/youtube_dl/extractor/ooyala.py b/youtube_dl/extractor/ooyala.py index 13f12824c..2044e107e 100644 --- a/youtube_dl/extractor/ooyala.py +++ b/youtube_dl/extractor/ooyala.py @@ -3,23 +3,38 @@ import re import json from .common import InfoExtractor -from ..utils import unescapeHTML +from ..utils import ( + unescapeHTML, + ExtractorError, +) class OoyalaIE(InfoExtractor): _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P.+?)(&|$)' - _TEST = { - # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video - 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', - 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', - 'info_dict': { - 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', - 'ext': 'mp4', - 'title': 'Explaining Data Recovery from Hard Drives and SSDs', - 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + _TESTS = [ + { + # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video + 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', + 'md5': '3f5cceb3a7bf461d6c29dc466cf8033c', + 'info_dict': { + 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8', + 'ext': 'mp4', + 'title': 'Explaining Data Recovery from Hard Drives and SSDs', + 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.', + }, + }, { + # Only available for ipad + 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', + 'md5': '4b9754921fddb68106e48c142e2a01e6', + 'info_dict': { + 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0', + 'ext': 'mp4', + 'title': 'Simulation Overview - Levels of Simulation', + 'description': '', + }, }, - } + ] @staticmethod def _url_for_embed_code(embed_code): @@ -47,13 +62,30 @@ class OoyalaIE(InfoExtractor): player = self._download_webpage(player_url, embedCode) mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="', player, 'mobile player url') - mobile_player = self._download_webpage(mobile_url, embedCode) - videos_info = self._search_regex( - r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', - mobile_player, 'info').replace('\\"','"') - videos_more_info = self._search_regex(r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"','"') + # Looks like some videos are only available for particular devices + # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0 + # is only available for ipad) + # Working around with fetching URLs for all the devices found starting with 'unknown' + # until we succeed or eventually fail for each device. + devices = re.findall(r'device\s*=\s*"([^"]+)";', player) + devices.remove('unknown') + devices.insert(0, 'unknown') + for device in devices: + mobile_player = self._download_webpage( + '%s&device=%s' % (mobile_url, device), embedCode, + 'Downloading mobile player JS for %s device' % device) + videos_info = self._search_regex( + r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);', + mobile_player, 'info', fatal=False, default=None) + if videos_info: + break + if not videos_info: + raise ExtractorError('Unable to extract info') + videos_info = videos_info.replace('\\"', '"') + videos_more_info = self._search_regex( + r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"') videos_info = json.loads(videos_info) - videos_more_info =json.loads(videos_more_info) + videos_more_info = json.loads(videos_more_info) if videos_more_info.get('lineup'): videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]