from __future__ import unicode_literals import re from .common import InfoExtractor from ..utils import ( int_or_none, js_to_json, mimetype2ext, ExtractorError, ) class ImgurIE(InfoExtractor): _VALID_URL = r'https?://(?:i\.)?imgur\.com/(?P[a-zA-Z0-9]+)(?:\.)?(?:mp4|gifv)?' _TESTS = [{ 'url': 'https://i.imgur.com/A61SaA1.gifv', 'info_dict': { 'id': 'A61SaA1', 'ext': 'mp4', 'title': 'MRW gifv is up and running without any bugs', 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', }, }, { 'url': 'https://imgur.com/A61SaA1', 'info_dict': { 'id': 'A61SaA1', 'ext': 'mp4', 'title': 'MRW gifv is up and running without any bugs', 'description': 'The Internet\'s visual storytelling community. Explore, share, and discuss the best visual stories the Internet has to offer.', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) sources = re.findall(r'(.*?)', webpage, 'video elements') formats = [] for m in re.finditer(r'[^"]+)"\s+type="(?P[^"]+)"', video_elements): formats.append({ 'format_id': m.group('type').partition('/')[2], 'url': self._proto_relative_url(m.group('src')), 'ext': mimetype2ext(m.group('type')), 'acodec': 'none', 'width': width, 'height': height, 'http_headers': { 'User-Agent': 'youtube-dl (like wget)', }, }) gif_json = self._search_regex( r'(?s)var\s+videoItem\s*=\s*(\{.*?\})', webpage, 'GIF code', fatal=False) if gif_json: gifd = self._parse_json( gif_json, video_id, transform_source=js_to_json) formats.append({ 'format_id': 'gif', 'preference': -10, 'width': width, 'height': height, 'ext': 'gif', 'acodec': 'none', 'vcodec': 'gif', 'container': 'gif', 'url': self._proto_relative_url(gifd['gifUrl']), 'filesize': gifd.get('size'), 'http_headers': { 'User-Agent': 'youtube-dl (like wget)', }, }) self._sort_formats(formats) return { 'id': video_id, 'formats': formats, 'description': self._og_search_description(webpage), 'title': self._og_search_title(webpage), }