[tv2hu] improve extraction

pull/8/head
Remita Amine 8 years ago
parent 3ef1d0c733
commit e4d74e2778

@ -1031,7 +1031,7 @@ from .tv2 import (
TV2IE, TV2IE,
TV2ArticleIE, TV2ArticleIE,
) )
from .tv2hu import TV2HUIE from .tv2hu import TV2HuIE
from .tv3 import TV3IE from .tv3 import TV3IE
from .tv4 import TV4IE from .tv4 import TV4IE
from .tv5mondeplus import TV5MondePlusIE from .tv5mondeplus import TV5MondePlusIE

@ -1,29 +1,22 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import int_or_none
class TV2HUIE(InfoExtractor):
IE_NAME = 'tv2.hu'
_VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:musoraink/)?(?P<uploader>[^/]+)/(?:teljes_adasok/)?(?P<id>[0-9]+)_(.+?)\.html'
_JSON_URL = r'(?P<json_url>https?://.+?\.tv2\.hu/vod/(?P<upload_date>\d+)/id_(?P<upload_id>\d+).+?&type=json)'
class TV2HuIE(InfoExtractor):
IE_NAME = 'tv2.hu'
_VALID_URL = r'https?://(?:www\.)?tv2\.hu/(?:[^/]+/)+(?P<id>\d+)_[^/?#]+?\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html', 'url': 'http://tv2.hu/ezek_megorultek/217679_ezek-megorultek---1.-adas-1.-resz.html',
'md5': '585e58e2e090f34603804bb2c48e98d8',
'info_dict': { 'info_dict': {
'id': '217679', 'id': '217679',
'ext': 'mp4', 'ext': 'mp4',
'title': 'Ezek megőrültek! - 1. adás 1. rész', 'title': 'Ezek megőrültek! - 1. adás 1. rész',
'upload_id': '220289',
'upload_date': '20160826', 'upload_date': '20160826',
'uploader': 'ezek_megorultek',
'thumbnail': 're:^https?://.*\.jpg$' 'thumbnail': 're:^https?://.*\.jpg$'
},
'params': {
# m3u8 download
'skip_download': True,
} }
}, { }, {
'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html', 'url': 'http://tv2.hu/ezek_megorultek/teljes_adasok/217677_ezek-megorultek---1.-adas-2.-resz.html',
@ -35,44 +28,35 @@ class TV2HUIE(InfoExtractor):
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage( webpage = self._download_webpage(url, video_id)
url, video_id, 'Downloading info page') json_url = self._search_regex(
r'jsonUrl\s*=\s*"([^"]+)"', webpage, 'json url')
json_url = re.search(self._JSON_URL, webpage) json_data = self._download_json(json_url, video_id)
json_data = self._download_json( formats = []
json_url.group('json_url'), video_id, 'Downloading video info') for b in ('bitrates', 'backupBitrates'):
bitrates = json_data.get(b, {})
manifest_url = json_data['bitrates']['hls'] m3u8_url = bitrates.get('hls')
if m3u8_url:
formats = self._extract_m3u8_formats( formats.extend(self._extract_wowza_formats(
manifest_url, video_id, 'mp4', entry_protocol='m3u8_native') m3u8_url, video_id, skip_protocols=['rtmp', 'rtsp']))
for i in range(len(json_data['bitrates']['mp4'])): for mp4_url in bitrates.get('mp4', []):
quality = json_data.get('mp4Labels')[i] height = int_or_none(self._search_regex(
r'\.(\d+)p\.mp4', mp4_url, 'height', default=None))
if quality.lower() == 'auto': formats.append({
continue 'format_id': 'http' + ('-%d' % height if height else ''),
'url': mp4_url,
formats.append({ 'height': height,
'protocol': 'http', 'width': int_or_none(height / 9.0 * 16.0 if height else None),
'url': json_data['bitrates']['mp4'][i], })
'height': int(quality[:-1]),
'width': int(quality[:-1])/9*16,
'ext': 'mp4',
'format_id': quality,
'format_note': 'HTTP',
'preference': int(quality[:-1])
})
self._sort_formats(formats) self._sort_formats(formats)
return { return {
'id': video_id, 'id': video_id,
'title': self._og_search_title(webpage).strip(), 'title': self._og_search_title(webpage).strip(),
'thumbnail': self._og_search_property('image', webpage), 'thumbnail': self._og_search_thumbnail(webpage),
'uploader': self._search_regex(self._VALID_URL, url, 'uploader'), 'upload_date': self._search_regex(
'upload_id': json_url.group('upload_id'), r'/vod/(\d{8})/', json_url, 'upload_date', default=None),
'upload_date': json_url.group('upload_date'), 'formats': formats,
'formats': formats
} }

Loading…
Cancel
Save