From bd9d85a8cc40be1a33bac839f7ba059d57107121 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Fri, 16 May 2025 12:08:16 +0200 Subject: [PATCH 1/8] Fix playlist support for arte.tv --- yt_dlp/extractor/arte.py | 42 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 3f17da463d..e0ca52109e 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -293,33 +293,55 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'only_matching': True, }, { 'url': 'https://www.arte.tv/pl/videos/RC-014123/arte-reportage/', - 'playlist_mincount': 100, + 'playlist_mincount': 20, 'info_dict': { 'description': 'md5:84e7bf1feda248bc325ebfac818c476e', 'id': 'RC-014123', 'title': 'ARTE Reportage - najlepsze reportaże', }, + }, { + 'url': 'https://www.arte.tv/de/videos/RC-025470/ramy/', + 'playlist_mincount': 30, + 'info_dict': { + 'description': 'md5:8766d73504ddccd12dbd1395a1d56815', + 'id': 'RC-025470', + 'title': 'Ramy', + }, }] def _real_extract(self, url): lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - playlist = self._download_json( - f'{self._API_BASE}/playlist/{lang}/{playlist_id}', playlist_id)['data']['attributes'] + webpage = self._download_webpage(url, playlist_id) + + unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') + playlist_data = self._search_json(r'\$L23.+?', webpage, 'series data', + playlist_id, + end_pattern=r'\],\[\[', + transform_source=unescape_func) + + playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) + collections = traverse_obj(playlist_data, + ('data', + 'zones', + playlist_item_filter, + 'content', + 'data', + ...)) entries = [{ '_type': 'url_transparent', - 'url': video['config']['url'], + 'url': f'https://www.arte.tv{video['url']}', 'ie_key': ArteTVIE.ie_key(), - 'id': video.get('providerId'), + 'id': video.get('id'), 'title': video.get('title'), 'alt_title': video.get('subtitle'), - 'thumbnail': url_or_none(traverse_obj(video, ('mainImage', 'url'))), - 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))), - } for video in traverse_obj(playlist, ('items', lambda _, v: v['config']['url']))] + 'duration': int_or_none(traverse_obj(video, ('duration'))), + 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + } for video in collections] return self.playlist_result(entries, playlist_id, - traverse_obj(playlist, ('metadata', 'title')), - traverse_obj(playlist, ('metadata', 'description'))) + traverse_obj(playlist_data, ('data', 'metadata', 'title')), + traverse_obj(playlist_data, ('data', 'metadata', 'description'))) class ArteTVCategoryIE(ArteTVBaseIE): From d610488650f9a12ea28ec225c0ae5106b5c43e78 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Fri, 16 May 2025 13:04:42 +0200 Subject: [PATCH 2/8] Refactored to work with Python 3.9 --- yt_dlp/extractor/arte.py | 51 +++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index e0ca52109e..1a4670b778 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -309,16 +309,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): }, }] - def _real_extract(self, url): - lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - webpage = self._download_webpage(url, playlist_id) - - unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') - playlist_data = self._search_json(r'\$L23.+?', webpage, 'series data', - playlist_id, - end_pattern=r'\],\[\[', - transform_source=unescape_func) - + def _entries(self, playlist_data, playlist_id): playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) collections = traverse_obj(playlist_data, ('data', @@ -328,20 +319,32 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'data', ...)) - entries = [{ - '_type': 'url_transparent', - 'url': f'https://www.arte.tv{video['url']}', - 'ie_key': ArteTVIE.ie_key(), - 'id': video.get('id'), - 'title': video.get('title'), - 'alt_title': video.get('subtitle'), - 'duration': int_or_none(traverse_obj(video, ('duration'))), - 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), - } for video in collections] - - return self.playlist_result(entries, playlist_id, - traverse_obj(playlist_data, ('data', 'metadata', 'title')), - traverse_obj(playlist_data, ('data', 'metadata', 'description'))) + for video in collections: + yield { + '_type': 'url_transparent', + 'url': 'https://www.arte.tv' + video['url'], + 'ie_key': ArteTVIE.ie_key(), + 'id': video['id'], + 'title': video.get('title'), + 'alt_title': video.get('subtitle'), + 'duration': int_or_none(traverse_obj(video, ('duration'))), + 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + } + + def _real_extract(self, url): + lang, playlist_id = self._match_valid_url(url).group('lang', 'id') + webpage = self._download_webpage(url, playlist_id) + + unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') + json_data = self._search_json(r'\$L23.+?', webpage, 'series data', + playlist_id, + end_pattern=r'\],\[\[', + transform_source=unescape_func) + + return self.playlist_result(self._entries(json_data, playlist_id), + playlist_id, + traverse_obj(json_data, ('data', 'metadata', 'title')), + traverse_obj(json_data, ('data', 'metadata', 'description'))) class ArteTVCategoryIE(ArteTVBaseIE): From 15a2b1c5bb4b847c46ac3ac4a5b17f73166858ee Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Wed, 24 Sep 2025 21:08:03 +0200 Subject: [PATCH 3/8] Refactor playlist support for arte.tv to use API endpoint, instead of parsing HTML --- yt_dlp/extractor/arte.py | 58 +++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 1a4670b778..3b2934ed9c 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -309,42 +309,40 @@ class ArteTVPlaylistIE(ArteTVBaseIE): }, }] - def _entries(self, playlist_data, playlist_id): - playlist_item_filter = lambda _, v: re.match(rf'collection_(?:videos|subcollection)_{playlist_id}', v['code']) - collections = traverse_obj(playlist_data, - ('data', - 'zones', - playlist_item_filter, - 'content', - 'data', - ...)) - - for video in collections: - yield { - '_type': 'url_transparent', - 'url': 'https://www.arte.tv' + video['url'], - 'ie_key': ArteTVIE.ie_key(), - 'id': video['id'], - 'title': video.get('title'), - 'alt_title': video.get('subtitle'), - 'duration': int_or_none(traverse_obj(video, ('duration'))), - 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), - } + def _entries(self, season_ids, lang, playlist_id): + for season_id in season_ids: + season_data = self._download_json(f'{self._API_BASE}/playlist/{lang}/{season_id}', season_id, headers={ + 'x-validated-age': '18', + }) + + collection = traverse_obj(season_data, ('data', 'attributes', 'items')) + + for video in collection: + yield { + '_type': 'url_transparent', + 'url': traverse_obj(video, ('link', 'url')), + 'ie_key': ArteTVIE.ie_key(), + 'id': video['providerId'], + 'title': video.get('title'), + 'alt_title': video.get('subtitle'), + 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))), + 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + } def _real_extract(self, url): + _API_TOKEN = 'Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA' lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - webpage = self._download_webpage(url, playlist_id) - unescape_func = lambda jstring: jstring.replace('\\"', '"').replace('\\\\', '\\') - json_data = self._search_json(r'\$L23.+?', webpage, 'series data', - playlist_id, - end_pattern=r'\],\[\[', - transform_source=unescape_func) + playlist_info = self._download_json(f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id, + headers={ + 'Authorization': f'Bearer {_API_TOKEN}', + }) - return self.playlist_result(self._entries(json_data, playlist_id), + season_ids = traverse_obj(playlist_info, ('programs', ..., 'children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) + return self.playlist_result(self._entries(season_ids, lang, playlist_id), playlist_id, - traverse_obj(json_data, ('data', 'metadata', 'title')), - traverse_obj(json_data, ('data', 'metadata', 'description'))) + traverse_obj(playlist_info, ('programs', ..., 'title')), + traverse_obj(playlist_info, ('programs', ..., 'shortDescription'))) class ArteTVCategoryIE(ArteTVBaseIE): From 0d46b5007b87fc9dd093df4420dc281ce62f6e1b Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Wed, 24 Sep 2025 21:13:50 +0200 Subject: [PATCH 4/8] Fix linter issue --- yt_dlp/extractor/arte.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 3b2934ed9c..25b2f2b1e4 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -335,8 +335,8 @@ class ArteTVPlaylistIE(ArteTVBaseIE): playlist_info = self._download_json(f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id, headers={ - 'Authorization': f'Bearer {_API_TOKEN}', - }) + 'Authorization': f'Bearer {_API_TOKEN}', + }) season_ids = traverse_obj(playlist_info, ('programs', ..., 'children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) return self.playlist_result(self._entries(season_ids, lang, playlist_id), From d4040befc9fa0fbf40e11afaa5a2343ed103d66c Mon Sep 17 00:00:00 2001 From: Frank Aurich <1100101@gmail.com> Date: Wed, 24 Sep 2025 21:19:38 +0200 Subject: [PATCH 5/8] Fix linter issue --- yt_dlp/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index 25b2f2b1e4..e9eabad6ef 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -336,7 +336,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): playlist_info = self._download_json(f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id, headers={ 'Authorization': f'Bearer {_API_TOKEN}', - }) + }) season_ids = traverse_obj(playlist_info, ('programs', ..., 'children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) return self.playlist_result(self._entries(season_ids, lang, playlist_id), From a20312ceef51df3792bfa43ed83b6a2a9b51f3b1 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Thu, 2 Oct 2025 11:07:30 +0200 Subject: [PATCH 6/8] fix: Some playlists don't have seasons, but are comporised of a number of shows ("mini series") --- yt_dlp/extractor/arte.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index e9eabad6ef..d828fc3401 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -307,9 +307,17 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'id': 'RC-025470', 'title': 'Ramy', }, + }, { + 'url': 'https://www.arte.tv/de/videos/RC-027148/zucker-genuss-um-welchen-preis/', + 'playlist_mincount': 1, + 'info_dict': { + 'description': 'md5:4c06c2b63970f78276bcadaea2d0df0b', + 'id': 'RC-027148', + 'title': 'Zucker, Genuss um welchen Preis?', + }, }] - def _entries(self, season_ids, lang, playlist_id): + def _season_entries(self, season_ids, lang): for season_id in season_ids: season_data = self._download_json(f'{self._API_BASE}/playlist/{lang}/{season_id}', season_id, headers={ 'x-validated-age': '18', @@ -337,12 +345,21 @@ class ArteTVPlaylistIE(ArteTVBaseIE): headers={ 'Authorization': f'Bearer {_API_TOKEN}', }) - - season_ids = traverse_obj(playlist_info, ('programs', ..., 'children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) - return self.playlist_result(self._entries(season_ids, lang, playlist_id), + playlist_info = traverse_obj(playlist_info, ('programs', ...), get_all=False) + metadata = traverse_obj(playlist_info, {'title': 'title', 'description': 'shortDescription'}, get_all=False) + + # Check first if there are seasons + season_ids = traverse_obj(playlist_info, ('children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) + if season_ids: + return self.playlist_result(self._season_entries(season_ids, lang), + playlist_id, + **metadata) + + # It might be a mini series comprised of a few shows + shows = traverse_obj(playlist_info, ('videos', (lambda _, v: v['kind'] == 'SHOW'))) + return self.playlist_result([self.url_result(show['url'], ArteTVIE) for show in shows], playlist_id, - traverse_obj(playlist_info, ('programs', ..., 'title')), - traverse_obj(playlist_info, ('programs', ..., 'shortDescription'))) + **metadata) class ArteTVCategoryIE(ArteTVBaseIE): From 8f55ed431f737c1954f70cdc0f2734ab56a7bc76 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Sun, 7 Dec 2025 12:36:43 +0100 Subject: [PATCH 7/8] Fix issues found in review --- yt_dlp/extractor/arte.py | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index d828fc3401..b11cf36347 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -319,44 +319,48 @@ class ArteTVPlaylistIE(ArteTVBaseIE): def _season_entries(self, season_ids, lang): for season_id in season_ids: - season_data = self._download_json(f'{self._API_BASE}/playlist/{lang}/{season_id}', season_id, headers={ - 'x-validated-age': '18', - }) + season_data = self._download_json( + f'{self._API_BASE}/playlist/{lang}/{season_id}', season_id, + headers={'x-validated-age': '18'}) - collection = traverse_obj(season_data, ('data', 'attributes', 'items')) - - for video in collection: + for video in traverse_obj(season_data, ( + 'data', 'attributes', 'items', + lambda _, v: v['providerId'] and v['link']['url'])): yield { '_type': 'url_transparent', - 'url': traverse_obj(video, ('link', 'url')), 'ie_key': ArteTVIE.ie_key(), - 'id': video['providerId'], - 'title': video.get('title'), - 'alt_title': video.get('subtitle'), - 'duration': int_or_none(traverse_obj(video, ('duration', 'seconds'))), - 'age_limit': int_or_none(traverse_obj(video, 'ageRating')), + **traverse_obj(video, { + 'url': ('link', 'url', {str}), + 'id': ('providerId'), + 'title': ('title', {str}), + 'alt_title': ('subtitle', {str}), + 'duration': ('duration', 'seconds', {int_or_none}), + 'age_limit': ('ageRating', {int_or_none}), + }), } def _real_extract(self, url): _API_TOKEN = 'Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA' lang, playlist_id = self._match_valid_url(url).group('lang', 'id') - playlist_info = self._download_json(f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id, - headers={ - 'Authorization': f'Bearer {_API_TOKEN}', - }) + playlist_info = self._download_json( + f'https://api.arte.tv/api/opa/v3/programs/{lang}/{playlist_id}', playlist_id, + headers={'Authorization': f'Bearer {_API_TOKEN}'}) playlist_info = traverse_obj(playlist_info, ('programs', ...), get_all=False) - metadata = traverse_obj(playlist_info, {'title': 'title', 'description': 'shortDescription'}, get_all=False) + metadata = traverse_obj( + playlist_info, {'title': ('title', {str}), 'description': ('shortDescription', {str})}) # Check first if there are seasons - season_ids = traverse_obj(playlist_info, ('children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) + season_ids = traverse_obj( + playlist_info, ('children', (lambda _, v: v['catalogType'] == 'SEASON'), 'programId')) if season_ids: return self.playlist_result(self._season_entries(season_ids, lang), playlist_id, **metadata) # It might be a mini series comprised of a few shows - shows = traverse_obj(playlist_info, ('videos', (lambda _, v: v['kind'] == 'SHOW'))) + shows = traverse_obj(playlist_info, ( + 'videos', lambda _, v: v['kind'] == 'SHOW' and url_or_none(v['url']))) return self.playlist_result([self.url_result(show['url'], ArteTVIE) for show in shows], playlist_id, **metadata) From 2f10dc7332570aa351a2598054295e66413cdba2 Mon Sep 17 00:00:00 2001 From: 1100101 <1100101+automatic@gmail.com> Date: Sun, 7 Dec 2025 13:23:01 +0100 Subject: [PATCH 8/8] Forgot a fix --- yt_dlp/extractor/arte.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt_dlp/extractor/arte.py b/yt_dlp/extractor/arte.py index b11cf36347..52fab29f33 100644 --- a/yt_dlp/extractor/arte.py +++ b/yt_dlp/extractor/arte.py @@ -331,7 +331,7 @@ class ArteTVPlaylistIE(ArteTVBaseIE): 'ie_key': ArteTVIE.ie_key(), **traverse_obj(video, { 'url': ('link', 'url', {str}), - 'id': ('providerId'), + 'id': ('providerId', {str}), 'title': ('title', {str}), 'alt_title': ('subtitle', {str}), 'duration': ('duration', 'seconds', {int_or_none}),