From f9f9699f2fa87ec54d59b1834cc56adb6147fec6 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Thu, 26 Nov 2020 12:56:49 +0100 Subject: [PATCH] [videa] improve extraction --- youtube_dl/extractor/videa.py | 99 +++++++++++++++++++---------------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/youtube_dl/extractor/videa.py b/youtube_dl/extractor/videa.py index a03614cc1..ab2c15cde 100644 --- a/youtube_dl/extractor/videa.py +++ b/youtube_dl/extractor/videa.py @@ -1,10 +1,9 @@ # coding: utf-8 from __future__ import unicode_literals -import re import random +import re import string -import struct from .common import InfoExtractor from ..utils import ( @@ -12,13 +11,14 @@ from ..utils import ( int_or_none, mimetype2ext, parse_codecs, + update_url_query, xpath_element, xpath_text, ) from ..compat import ( compat_b64decode, compat_ord, - compat_parse_qs, + compat_struct_pack, ) @@ -28,7 +28,7 @@ class VideaIE(InfoExtractor): videa(?:kid)?\.hu/ (?: videok/(?:[^/]+/)*[^?#&]+-| - player\?.*?\bv=| + (?:videojs_)?player\?.*?\bv=| player/v/ ) (?P[^?#&]+) @@ -62,6 +62,7 @@ class VideaIE(InfoExtractor): 'url': 'https://videakid.hu/player/v/8YfIAjxwWGwT8HVQ?autoplay=1', 'only_matching': True, }] + _STATIC_SECRET = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' @staticmethod def _extract_urls(webpage): @@ -69,75 +70,84 @@ class VideaIE(InfoExtractor): r']+src=(["\'])(?P(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1', webpage)] - def rc4(self, ciphertext, key): + @staticmethod + def rc4(cipher_text, key): res = b'' - keyLen = len(key) + key_len = len(key) S = list(range(256)) j = 0 for i in range(256): - j = (j + S[i] + ord(key[i % keyLen])) % 256 + j = (j + S[i] + ord(key[i % key_len])) % 256 S[i], S[j] = S[j], S[i] i = 0 j = 0 - for m in range(len(ciphertext)): + for m in range(len(cipher_text)): i = (i + 1) % 256 j = (j + S[i]) % 256 S[i], S[j] = S[j], S[i] k = S[(S[i] + S[j]) % 256] - res += struct.pack("B", k ^ compat_ord(ciphertext[m])) + res += compat_struct_pack('B', k ^ compat_ord(cipher_text[m])) - return res + return res.decode() def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id, fatal=True) - error = self._search_regex(r'

([^<]+)

', webpage, 'error', default=None) - if error: - raise ExtractorError(error, expected=True) - - video_src_params_raw = self._search_regex(r']+id="videa_player_iframe"[^>]+src="/player\?([^"]+)"', webpage, 'video_src_params') - video_src_params = compat_parse_qs(video_src_params_raw) - player_page = self._download_webpage("https://videa.hu/videojs_player?%s" % video_src_params_raw, video_id, fatal=True) - nonce = self._search_regex(r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') - random_seed = ''.join(random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(8)) - static_secret = 'xHb0ZvME5q8CBcoQi6AngerDu3FGO9fkUlwPmLVY_RTzj2hJIS4NasXWKy1td7p' + query = {'v': video_id} + player_page = self._download_webpage( + 'https://videa.hu/player', video_id, query=query) + + nonce = self._search_regex( + r'_xt\s*=\s*"([^"]+)"', player_page, 'nonce') l = nonce[:32] s = nonce[32:] result = '' for i in range(0, 32): - result += s[i - (static_secret.index(l[i]) - 31)] - - video_src_params['_s'] = random_seed - video_src_params['_t'] = result[:16] - encryption_key_stem = result[16:] + random_seed - - [b64_info, handle] = self._download_webpage_handle( - 'http://videa.hu/videaplayer_get_xml.php', video_id, - query=video_src_params, fatal=True) - - encrypted_info = compat_b64decode(b64_info) - key = encryption_key_stem + handle.info()['x-videa-xs'] - info_str = self.rc4(encrypted_info, key).decode('utf8') - info = self._parse_xml(info_str, video_id) - - video = xpath_element(info, './/video', 'video', fatal=True) - sources = xpath_element(info, './/video_sources', 'sources', fatal=True) - hash_values = xpath_element(info, './/hash_values', 'hash_values', fatal=True) + result += s[i - (self._STATIC_SECRET.index(l[i]) - 31)] + + random_seed = ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(8)) + query['_s'] = random_seed + query['_t'] = result[:16] + + b64_info, handle = self._download_webpage_handle( + 'http://videa.hu/videaplayer_get_xml.php', video_id, query=query) + if b64_info.startswith('