From 5e1a54f63e393c218a40949012ff0de0ce63cb15 Mon Sep 17 00:00:00 2001 From: Friedrich Rehren Date: Fri, 17 Feb 2023 08:44:26 +0100 Subject: [PATCH] [extractor/SportDeutschland] Fix extractor (#6041) Authored by: FriedrichRehren Closes #3005 --- yt_dlp/extractor/sportdeutschland.py | 157 +++++++++++++++------------ 1 file changed, 86 insertions(+), 71 deletions(-) diff --git a/yt_dlp/extractor/sportdeutschland.py b/yt_dlp/extractor/sportdeutschland.py index 75074b310..6fc3ce9eb 100644 --- a/yt_dlp/extractor/sportdeutschland.py +++ b/yt_dlp/extractor/sportdeutschland.py @@ -1,95 +1,110 @@ from .common import InfoExtractor + from ..utils import ( - clean_html, - float_or_none, - int_or_none, - parse_iso8601, - parse_qs, - strip_or_none, - try_get, + format_field, + traverse_obj, + unified_timestamp, + strip_or_none ) class SportDeutschlandIE(InfoExtractor): _VALID_URL = r'https?://sportdeutschland\.tv/(?P(?:[^/]+/)?[^?#/&]+)' _TESTS = [{ - 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', + 'url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport/buchholzer-formationswochenende-2023-samstag-1-bundesliga-landesliga', 'info_dict': { - 'id': '5318cac0275701382770543d7edaf0a0', + 'id': '983758e9-5829-454d-a3cf-eb27bccc3c94', 'ext': 'mp4', - 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1', - 'duration': 16106.36, - }, - 'params': { - 'noplaylist': True, - # m3u8 download - 'skip_download': True, - }, + 'title': 'Buchholzer Formationswochenende 2023 - Samstag - 1. Bundesliga / Landesliga', + 'description': 'md5:a288c794a5ee69e200d8f12982f81a87', + 'live_status': 'was_live', + 'channel': 'Blau-Weiss Buchholz Tanzsport', + 'channel_url': 'https://sportdeutschland.tv/blauweissbuchholztanzsport', + 'channel_id': '93ec33c9-48be-43b6-b404-e016b64fdfa3', + 'display_id': '9839a5c7-0dbb-48a8-ab63-3b408adc7b54', + 'duration': 32447, + 'upload_date': '20230114', + 'timestamp': 1673730018.0, + } }, { - 'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0', + 'url': 'https://sportdeutschland.tv/deutscherbadmintonverband/bwf-tour-1-runde-feld-1-yonex-gainward-german-open-2022-0', 'info_dict': { - 'id': 'c6e2fdd01f63013854c47054d2ab776f', - 'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals', - 'description': 'md5:5263ff4c31c04bb780c9f91130b48530', - 'duration': 31397, - }, - 'playlist_count': 2, - }, { - 'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich', - 'only_matching': True, + 'id': '95b97d9a-04f6-4880-9039-182985c33943', + 'ext': 'mp4', + 'title': 'BWF Tour: 1. Runde Feld 1 - YONEX GAINWARD German Open 2022', + 'description': 'md5:2afb5996ceb9ac0b2ac81f563d3a883e', + 'live_status': 'was_live', + 'channel': 'Deutscher Badminton Verband', + 'channel_url': 'https://sportdeutschland.tv/deutscherbadmintonverband', + 'channel_id': '93ca5866-2551-49fc-8424-6db35af58920', + 'display_id': '95c80c52-6b9a-4ae9-9197-984145adfced', + 'duration': 41097, + 'upload_date': '20220309', + 'timestamp': 1646860727.0, + } }] def _real_extract(self, url): display_id = self._match_id(url) - data = self._download_json( - 'https://backend.sportdeutschland.tv/api/permalinks/' + display_id, + meta = self._download_json( + 'https://api.sportdeutschland.tv/api/stateless/frontend/assets/' + display_id, display_id, query={'access_token': 'true'}) - asset = data['asset'] - title = (asset.get('title') or asset['label']).strip() - asset_id = asset.get('id') or asset.get('uuid') + + asset_id = traverse_obj(meta, 'id', 'uuid') + info = { 'id': asset_id, - 'title': title, - 'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'), - 'duration': int_or_none(asset.get('seconds')), + 'channel_url': format_field(meta, ('profile', 'slug'), 'https://sportdeutschland.tv/%s'), + **traverse_obj(meta, { + 'title': (('title', 'name'), {strip_or_none}), + 'description': 'description', + 'channel': ('profile', 'name'), + 'channel_id': ('profile', 'id'), + 'is_live': 'currently_live', + 'was_live': 'was_live' + }, get_all=False) } - videos = asset.get('videos') or [] + + videos = meta.get('videos') or [] + if len(videos) > 1: - playlist_id = parse_qs(url).get('playlistId', [None])[0] - if not self._yes_playlist(playlist_id, asset_id): - videos = [videos[int(playlist_id)]] - - def entries(): - for i, video in enumerate(videos, 1): - video_id = video.get('uuid') - video_url = video.get('url') - if not (video_id and video_url): - continue - formats = self._extract_m3u8_formats( - video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False) - if not formats and not self.get_param('ignore_no_formats'): - continue - yield { - 'id': video_id, - 'formats': formats, - 'title': title + ' - ' + (video.get('label') or 'Teil %d' % i), - 'duration': float_or_none(video.get('duration')), - } info.update({ '_type': 'multi_video', - 'entries': entries(), - }) - else: - formats = self._extract_m3u8_formats( - videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4') - section_title = strip_or_none(try_get(data, lambda x: x['section']['title'])) - info.update({ - 'formats': formats, - 'display_id': asset.get('permalink'), - 'thumbnail': try_get(asset, lambda x: x['images'][0]), - 'categories': [section_title] if section_title else None, - 'view_count': int_or_none(asset.get('views')), - 'is_live': asset.get('is_live') is True, - 'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')), - }) + 'entries': self.processVideoOrStream(asset_id, video) + } for video in enumerate(videos) if video.get('formats')) + + elif len(videos) == 1: + info.update( + self.processVideoOrStream(asset_id, videos[0]) + ) + + livestream = meta.get('livestream') + + if livestream is not None: + info.update( + self.processVideoOrStream(asset_id, livestream) + ) + return info + + def process_video_or_stream(self, asset_id, video): + video_id = video['id'] + video_src = video['src'] + video_type = video['type'] + + token = self._download_json( + f'https://api.sportdeutschland.tv/api/frontend/asset-token/{asset_id}', + video_id, query={'type': video_type, 'playback_id': video_src})['token'] + formats = self._extract_m3u8_formats(f'https://stream.mux.com/{video_src}.m3u8?token={token}', video_id) + + video_data = { + 'display_id': video_id, + 'formats': formats, + } + if video_type == 'mux_vod': + video_data.update({ + 'duration': video.get('duration'), + 'timestamp': unified_timestamp(video.get('created_at')) + }) + + return video_data