From 475f8a458099c64d367356471069bd0ff2bd1b0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Thu, 29 Sep 2016 23:21:39 +0700 Subject: [PATCH] [vk] Add support for running live streams (Closes #10799) --- youtube_dl/extractor/vk.py | 47 ++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 1d089c9d7..9f7a593ef 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -253,6 +253,12 @@ class VKIE(VKBaseIE): 'duration': 5239, }, }, + { + # live stream, hls and rtmp links,most likely already finished live + # stream by the time you are reading this comment + 'url': 'https://vk.com/video-140332_456239111', + 'only_matching': True, + }, { # removed video, just testing that we match the pattern 'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', @@ -361,6 +367,11 @@ class VKIE(VKBaseIE): data_json = self._search_regex(r'var\s+vars\s*=\s*({.+?});', info_page, 'vars') data = json.loads(data_json) + title = unescapeHTML(data['md_title']) + + if data.get('live') == 2: + title = self._live_title(title) + # Extract upload date upload_date = None mobj = re.search(r'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page) @@ -377,25 +388,33 @@ class VKIE(VKBaseIE): r'([\d,.]+)', views, 'view count', fatal=False)) formats = [] - for k, v in data.items(): - if (not k.startswith('url') and not k.startswith('cache') - and k not in ('extra_data', 'live_mp4')): - continue - if not isinstance(v, compat_str) or not v.startswith('http'): + for format_id, format_url in data.items(): + if not isinstance(format_url, compat_str) or not format_url.startswith(('http', '//', 'rtmp')): continue - height = int_or_none(self._search_regex( - r'^(?:url|cache)(\d+)', k, 'height', default=None)) - formats.append({ - 'format_id': k, - 'url': v, - 'height': height, - }) + if format_id.startswith(('url', 'cache')) or format_id in ('extra_data', 'live_mp4'): + height = int_or_none(self._search_regex( + r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'height': height, + }) + elif format_id == 'hls': + formats.extend(self._extract_m3u8_formats( + format_url, video_id, 'mp4', m3u8_id=format_id, + fatal=False, live=True)) + elif format_id == 'rtmp': + formats.append({ + 'format_id': format_id, + 'url': format_url, + 'ext': 'flv', + }) self._sort_formats(formats) return { - 'id': compat_str(data['vid']), + 'id': compat_str(data.get('vid') or video_id), 'formats': formats, - 'title': unescapeHTML(data['md_title']), + 'title': title, 'thumbnail': data.get('jpg'), 'uploader': data.get('md_author'), 'duration': data.get('duration'),