diff --git a/yt_dlp/extractor/arnes.py b/yt_dlp/extractor/arnes.py index 115e6e887..6db4e37ce 100644 --- a/yt_dlp/extractor/arnes.py +++ b/yt_dlp/extractor/arnes.py @@ -1,20 +1,25 @@ +import binascii +import hashlib +import json +import os +import random +import time +from datetime import datetime + from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse_urlparse, ) +from ..dependencies import Cryptodome from ..utils import ( format_field, float_or_none, int_or_none, parse_iso8601, remove_start, - ExtractorError ) -from ..dependencies import Cryptodome -from ..networking import Request -import time, hashlib, random, os, json, binascii -from datetime import datetime + class ArnesIE(InfoExtractor): IE_NAME = 'video.arnes.si' @@ -39,6 +44,29 @@ class ArnesIE(InfoExtractor): 'view_count': int, 'tags': ['linearna_algebra'], 'start_time': 10, + 'thumbnail': 'https://video.arnes.si/attachments/video/a1/a1qrWTOQfVoU/transcoded/image/ZfOtoJ9CVl7AKcZIpK3jTEvB.480p.jpg' + } + }, { + # one needs to be registered to get this url on: https://bsf.si/sl/film/dan-ljubezni-epizoda-10/ + 'url': 'https://video-4.arnes.si/embed/4zdg767pl9sv?accessToken=ZONPEE6G6ZM36WYPAIC5M2BL&hideRelated=1', + 'only_matching': True, + 'md5': '84c1e19403a4e50fa2394451675563f3', + 'info_dict': { + 'id': '4zdg767pl9sv', + 'ext': 'mp4', + 'title': 'Dan ljubezni E10 - nova', + 'thumbnail': 'https://video-4.arnes.si/attachments/video/4z/4zdg767pl9sv/transcoded/image/Q1gZUVXgDgZZ.1nvm5msy0xkg.480p.jpg', + 'description': 'film', + 'license': 'CC_BY_NC_SA', + 'creator': 'Video Filmoteka', + 'timestamp': 1681422237, + 'channel': 'Video Filmoteka', + 'channel_id': 'd6rb30mw368k', + 'channel_url': 'https://video-4.arnes.si/?channel=d6rb30mw368k', + 'duration': 651.6, + 'view_count': 44, + 'upload_date': '20230413', + 'tags': [] } }, { 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4', @@ -57,86 +85,87 @@ class ArnesIE(InfoExtractor): 'only_matching': True, }] - _PARSED_URL=None - _ACCESS_TOKEN=None + _PARSED_URL = None + _ACCESS_TOKEN = None + _TIME_OF_LAST_VID_ACCESS_COOKIE_CHANGED = 0 + + # 10s was found when watching how requests are send and when cookie changes + _TIME_BETWEEN_NEW_COOKIE = 10 def _generate_video_access_cookie(self): - key_text = "Xmw1MSIlpZYdyy1DlqIl" - key = hashlib.sha256(key_text.encode("utf-8")).digest() + # Text of key that is then sha256ed gathered from main-es2015.62cf37eb23d6552e6e54.js + # before video_access_cookie is added + key_text = 'Xmw1MSIlpZYdyy1DlqIl' + key = hashlib.sha256(key_text.encode('utf-8')).digest() iv = os.urandom(12) - # iv = bytearray([177, 70, 39, 213, 109, 49, 245, 24, 92, 85, 25, 187]) - time_component = int(datetime.now().timestamp()*1000)+random.randint(0,1000) - # time_component = 1695271197003 + time_component = int(datetime.now().timestamp() * 1000) + random.randint(0, 1000) - token_data = f"{self._ACCESS_TOKEN}|{time_component}" + token_data = f'{self._ACCESS_TOKEN}|{time_component}' encoded_token = token_data.encode('utf-8') encrypted_data, tag = Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, iv).encrypt_and_digest(encoded_token) - base16encrypted_data = binascii.hexlify(iv).decode() + binascii.hexlify(encrypted_data+tag).decode() + base16encrypted_data = binascii.hexlify(iv).decode() + binascii.hexlify(encrypted_data + tag).decode() return base16encrypted_data - def _generate_video_test_cookie(self): - # Generate a random timestamp - current_time = str(int(time.time()) + random.randint(1, 10000)) + def _set_access_cookie(self): + """ + Generates a new video access cookie and sets it. - return current_time + This function should be executed between each downloaded segment. + """ + if self._ACCESS_TOKEN is None: + return - def _set_access_cookie(self): - vid_access_cookie=self._generate_video_access_cookie() - self._set_cookie(self._PARSED_URL.netloc,"video_access_cookie",vid_access_cookie) + current_time = time.time() + if current_time - self._TIME_OF_LAST_VID_ACCESS_COOKIE_CHANGED > self._TIME_BETWEEN_NEW_COOKIE: + vid_access_cookie = self._generate_video_access_cookie() + self._set_cookie(self._PARSED_URL.netloc, 'video_access_cookie', vid_access_cookie) + self._TIME_OF_LAST_VID_ACCESS_COOKIE_CHANGED = current_time + self.write_debug("New video_access_cookie generated.") + def _fragment_hook_before_download(self, frag_filename, frag_info_dict, ctx): + self._set_access_cookie() def _real_extract(self, url): video_id = self._match_id(url) self._PARSED_URL = compat_urllib_parse_urlparse(url) + at_list = compat_parse_qs(self._PARSED_URL.query).get('accessToken') + self._ACCESS_TOKEN = at_list[0] if at_list is not None and len(at_list) != 0 else None - base_url = f"{self._PARSED_URL.scheme}://{self._PARSED_URL.netloc}" - - self._ACCESS_TOKEN = compat_parse_qs(self._PARSED_URL.query).get("accessToken")[0] - - video_res = self._downloader.urlopen(base_url + '/api/public/video/' + video_id + f"?accessToken={self._ACCESS_TOKEN}") - header_date = video_res.headers["Date"] + base_url = f'{self._PARSED_URL.scheme}://{self._PARSED_URL.netloc}' + access_token_query = f'?accessToken={self._ACCESS_TOKEN}' if self._ACCESS_TOKEN is not None else '?' - date_object = datetime.strptime(header_date, '%a, %d %b %Y %H:%M:%S %Z') - timestamp = int(date_object.timestamp()*1000) - vid_access_cookie = self._generate_video_access_cookie() + video_res = self._downloader.urlopen(f'{base_url}/api/public/video/{video_id}{access_token_query}') video_res = json.loads(video_res.read().decode()) - #self._download_json(base_url + '/api/public/video/' + video_id + f"?accessToken={self._ACCESS_TOKEN}", video_id) video = video_res['data'] - vid_test_cookie = self._generate_video_test_cookie() - req_headers = {'Cookie': f'video_test_cookie={vid_test_cookie}; video_access_cookie={vid_access_cookie}'} - check_cookies_request = Request(base_url + f'/api/public/video/embed-support/{vid_test_cookie}', - headers = req_headers) - - check_cookies_data = self._download_json(check_cookies_request, video_id) - if check_cookies_data['status'] != 'OK' or check_cookies_data['data'] != True: - raise ExtractorError('Check cookies request did not succeed.') title = video['title'] - formats = [] - for media in (video.get('media') or []): - media_url = media.get('url') - if not media_url: - continue - formats.append({ - 'url': base_url + media_url, - 'format_id': remove_start(media.get('format'), 'FORMAT_'), - 'format_note': media.get('formatTranslation'), - 'width': int_or_none(media.get('width')), - 'height': int_or_none(media.get('height')), - }) - channel = video.get('channel') or {} channel_id = channel.get('url') thumbnail = video.get('thumbnailUrl') - m3u8_url = base_url+video.get('hls').get('url') + f'?accessToken={self._ACCESS_TOKEN}' - self._set_cookie(self._PARSED_URL.netloc,"video_access_cookie",vid_access_cookie) - formats = self._extract_m3u8_formats(m3u8_url,video_id) + formats = [] + if video.get('hls') is None: + for media in (video.get('media') or []): + media_url = media.get('url') + if not media_url: + continue + formats.append({ + 'url': base_url + media_url + access_token_query, + 'format_id': remove_start(media.get('format'), 'FORMAT_'), + 'format_note': media.get('formatTranslation'), + 'width': int_or_none(media.get('width')), + 'height': int_or_none(media.get('height')), + }) + else: + m3u8_url = f'{base_url}{video.get("hls").get("url")}{access_token_query}' + self._set_access_cookie() + formats = self._extract_m3u8_formats(m3u8_url, video_id) + return { 'id': video_id, 'title': title, @@ -155,5 +184,6 @@ class ArnesIE(InfoExtractor): 'tags': video.get('hashtags'), 'start_time': int_or_none(compat_parse_qs( compat_urllib_parse_urlparse(url).query).get('t', [None])[0]), - '_execute_before_each_fragment': self._set_access_cookie + '_fragment_hook_before_dl': self._fragment_hook_before_download, + '_test': self._fragment_hook_before_download }