From d867f99622ef7fba690b08da56c39d739b822bb7 Mon Sep 17 00:00:00 2001 From: ChocoLZS <61224208+ChocoLZS@users.noreply.github.com> Date: Mon, 18 Nov 2024 02:41:57 +0800 Subject: [PATCH] [ie/PiaLive] Add extractor (#10811) Authored by: ChocoLZS --- yt_dlp/extractor/_extractors.py | 6 +- yt_dlp/extractor/pialive.py | 122 +++++++++++++++++++++++++++++ yt_dlp/extractor/piaulizaportal.py | 70 ----------------- yt_dlp/extractor/uliza.py | 113 ++++++++++++++++++++++++++ 4 files changed, 240 insertions(+), 71 deletions(-) create mode 100644 yt_dlp/extractor/pialive.py delete mode 100644 yt_dlp/extractor/piaulizaportal.py create mode 100644 yt_dlp/extractor/uliza.py diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 0d849c169..967010826 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1520,8 +1520,8 @@ from .pgatour import PGATourIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE +from .pialive import PiaLiveIE from .piapro import PiaproIE -from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, @@ -2250,6 +2250,10 @@ from .ufctv import ( ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE +from .uliza import ( + UlizaPlayerIE, + UlizaPortalIE, +) from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE diff --git a/yt_dlp/extractor/pialive.py b/yt_dlp/extractor/pialive.py new file mode 100644 index 000000000..7469135c1 --- /dev/null +++ b/yt_dlp/extractor/pialive.py @@ -0,0 +1,122 @@ +from .common import InfoExtractor +from ..utils import ( + ExtractorError, + clean_html, + extract_attributes, + get_element_by_class, + get_element_html_by_class, + multipart_encode, + str_or_none, + unified_timestamp, + url_or_none, +) +from ..utils.traversal import traverse_obj + + +class PiaLiveIE(InfoExtractor): + _VALID_URL = r'https?://player\.pia-live\.jp/stream/(?P[\w-]+)' + _PLAYER_ROOT_URL = 'https://player.pia-live.jp/' + _PIA_LIVE_API_URL = 'https://api.pia-live.jp' + _API_KEY = 'kfds)FKFps-dms9e' + _TESTS = [{ + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krUDqGOwN4d61dCWQYOd6CTxl4hjya9dsfEZGsM4uGOUdax60lEI4twsXGXf7crmz8Gk__GhupTrWxA7RFRVt76', + 'info_dict': { + 'id': '88f3109a-f503-4d0f-a9f7-9f39ac745d84', + 'display_id': '2431867_001', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }, { + 'url': 'https://player.pia-live.jp/stream/4JagFBEIM14s_hK9aXHKf3k3F3bY5eoHFQxu68TC6krJdu0GVBVbVy01IwpJ6J3qBEm3d9TCTt1d0eWpsZGj7DrOjVOmS7GAWGwyscMgiThopJvzgWC4H5b-7XQjAfRZ', + 'info_dict': { + 'id': '9ce8b8ba-f6d1-4d1f-83a0-18c3148ded93', + 'display_id': '2431867_002', + 'title': 'こながめでたい日2024の視聴ページ | PIA LIVE STREAM(ぴあライブストリーム)', + 'live_status': 'was_live', + 'comment_count': int, + }, + 'params': { + 'getcomments': True, + 'skip_download': True, + 'ignore_no_formats_error': True, + }, + 'skip': 'The video is no longer available', + }] + + def _extract_var(self, variable, html): + return self._search_regex( + rf'(?:var|const|let)\s+{variable}\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', + html, f'variable {variable}', group='value') + + def _real_extract(self, url): + video_key = self._match_id(url) + webpage = self._download_webpage(url, video_key) + + program_code = self._extract_var('programCode', webpage) + article_code = self._extract_var('articleCode', webpage) + title = self._html_extract_title(webpage) + + if get_element_html_by_class('play-end', webpage): + raise ExtractorError('The video is no longer available', expected=True, video_id=program_code) + + if start_info := clean_html(get_element_by_class('play-waiting__date', webpage)): + date, time = self._search_regex( + r'(?P\d{4}/\d{1,2}/\d{1,2})\([月火水木金土日]\)(?P