From 5c610515c90d090b66aa3d86be86fb06dff8457f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 20 Jan 2021 00:35:50 +0530 Subject: [PATCH] [TrovoLive] Add extractor (partially fix #20) Only VOD extractor has been implemented Related: https://github.com/ytdl-org/youtube-dl/issues/26125 Related: https://github.com/blackjack4494/yt-dlc/issues/220 --- youtube_dlc/extractor/extractors.py | 1 + youtube_dlc/extractor/trovolive.py | 111 ++++++++++++++++++++++++++++ youtube_dlc/utils.py | 6 +- 3 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 youtube_dlc/extractor/trovolive.py diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index 8b322466b..ecb35dd1c 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1265,6 +1265,7 @@ from .toutv import TouTvIE from .toypics import ToypicsUserIE, ToypicsIE from .traileraddict import TrailerAddictIE from .trilulilu import TriluliluIE +from .trovolive import TrovoLiveIE from .trunews import TruNewsIE from .trutv import TruTVIE from .tube8 import Tube8IE diff --git a/youtube_dlc/extractor/trovolive.py b/youtube_dlc/extractor/trovolive.py new file mode 100644 index 000000000..8ad3ebeca --- /dev/null +++ b/youtube_dlc/extractor/trovolive.py @@ -0,0 +1,111 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor + +from ..utils import ( + js_to_json, + try_get, + int_or_none, + str_or_none, + url_or_none, +) +from ..compat import compat_str + + +class TrovoLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?trovo\.live/video/(?P[\w-]+)' + _TEST = { + 'url': 'https://trovo.live/video/ltv-100759829_100759829_1610625308', + 'md5': 'ea7b58427910e9af66a462d895201a30', + 'info_dict': { + 'id': 'ltv-100759829_100759829_1610625308', + 'ext': 'ts', + 'title': 'GTA RP ASTERIX doa najjaca', + 'uploader': 'Peroo42', + 'duration': 5872, + 'view_count': int, + 'like_count': int, + 'comment_count': int, + 'categories': list, + 'is_live': False, + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader_id': '100759829', + } + } + + def _real_extract(self, url): + video_id = self._match_id(url) + webpage = self._download_webpage(url, video_id) + nuxt = self._search_regex(r'\bwindow\.__NUXT__\s*=\s*(.+?);?\s*', webpage, 'nuxt', default='') + mobj = re.search(r'\((?P[^(]+)\)\s*{\s*return\s+(?P{.+})\s*\((?P.+?)\)\s*\)$', nuxt) + + vod_details = vod_info = {} + if mobj: + vod_details = self._parse_json( + js_to_json( + self._search_regex(r'VodDetailInfos\s*:({.+?}),\s*_', webpage, 'VodDetailInfos'), + dict(zip( + (i.strip() for i in mobj.group('arg_names').split(',')), + (i.strip() for i in mobj.group('args').split(','))))), + video_id) + vod_info = try_get(vod_details, lambda x: x['json'][video_id]['vodInfo'], dict) or {} + + player_info = self._parse_json( + self._search_regex( + r'_playerInfo\s*=\s*({.+?})\s*', webpage, 'player info'), + video_id) + + title = ( + vod_info.get('title') + or self._html_search_regex(r'

(.+?)

', webpage, 'title', fatal=False) + or self._og_search_title(webpage)) + uploader = ( + try_get(vod_details, lambda x: x['json'][video_id]['streamerInfo']['userName'], compat_str) + or self._search_regex(r']+userName\s=\s[\'"](.+?)[\'"]', webpage, 'uploader', fatal=False)) + + format_dicts = vod_info.get('playInfos') or player_info.get('urlArray') or [] + + def _extract_format_data(format_dict): + res = format_dict.get('desc') + enc = str_or_none(format_dict.get('encodeType')) + if enc: + notes = [enc.replace('VOD_ENCODE_TYPE_', '')] + level = str_or_none(format_dict.get('levelType')) + if level: + notes.append('level %s' % level) + height = int_or_none(res[:-1]) if res else None + bitrate = format_dict.get('bitrate') + fid = res or ('%sk' % str_or_none(bitrate) if bitrate else None) or ' '.join(notes) + + return { + 'url': format_dict['playUrl'], + 'format_id': fid, + 'format_note': ' '.join(notes), + 'height': height, + 'resolution': str_or_none(res), + 'tbr': int_or_none(bitrate), + 'filesize': int_or_none(format_dict.get('fileSize')), + 'vcodec': 'avc3', + 'acodec': 'aac', + 'ext': 'ts' + } + + formats = [_extract_format_data(f) for f in format_dicts] + self._sort_formats(formats) + return { + 'id': video_id, + 'title': title, + 'uploader': uploader, + 'duration': int_or_none(vod_info.get('duration')), + 'formats': formats, + 'view_count': int_or_none(vod_info.get('watchNum')), + 'like_count': int_or_none(vod_info.get('likeNum')), + 'comment_count': int_or_none(vod_info.get('commentNum')), + 'categories': [str_or_none(vod_info.get('categoryName'))], + 'is_live': try_get(player_info, lambda x: x['isLive'], bool), + 'thumbnail': url_or_none(vod_info.get('coverUrl')), + 'uploader_id': str_or_none(try_get(vod_details, lambda x: x['json'][video_id]['streamerInfo']['uid'])), + } \ No newline at end of file diff --git a/youtube_dlc/utils.py b/youtube_dlc/utils.py index a374a31bf..9ae105331 100644 --- a/youtube_dlc/utils.py +++ b/youtube_dlc/utils.py @@ -4099,7 +4099,8 @@ def strip_jsonp(code): r'\g', code) -def js_to_json(code): +def js_to_json(code, vars={}): + # vars is a dict of var, val pairs to substitute COMMENT_RE = r'/\*(?:(?!\*/).)*?\*/|//[^\n]*' SKIP_RE = r'\s*(?:{comment})?\s*'.format(comment=COMMENT_RE) INTEGER_TABLE = ( @@ -4128,6 +4129,9 @@ def js_to_json(code): i = int(im.group(1), base) return '"%d":' % i if v.endswith(':') else '%d' % i + if v in vars: + return vars[v] + return '"%s"' % v return re.sub(r'''(?sx)