import re from .common import InfoExtractor from ..utils import ( clean_html, extract_attributes, get_element_by_class, get_element_html_by_id, get_element_text_and_html_by_tag, parse_duration, strip_or_none, traverse_obj, try_call, ) class ListenNotesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?listennotes\.com/podcasts/[^/]+/[^/]+-(?P.+)/' _TESTS = [{ 'url': 'https://www.listennotes.com/podcasts/thriving-on-overload/tim-oreilly-on-noticing-KrDgvNb_u1n/', 'md5': '5b91a32f841e5788fb82b72a1a8af7f7', 'info_dict': { 'id': 'KrDgvNb_u1n', 'ext': 'mp3', 'title': 'md5:32236591a921adf17bbdbf0441b6c0e9', 'description': 'md5:c581ed197eeddcee55a67cdb547c8cbd', 'duration': 2148.0, 'channel': 'Thriving on Overload', 'channel_id': 'ed84wITivxF', 'episode_id': 'e1312583fa7b4e24acfbb5131050be00', 'thumbnail': 'https://production.listennotes.com/podcasts/thriving-on-overload-ross-dawson-1wb_KospA3P-ed84wITivxF.300x300.jpg', 'channel_url': 'https://www.listennotes.com/podcasts/thriving-on-overload-ross-dawson-ed84wITivxF/', 'cast': ['Tim O’Reilly', 'Cookie Monster', 'Lao Tzu', 'Wallace Steven', 'Eric Raymond', 'Christine Peterson', 'John Maynard Keyne', 'Ross Dawson'], } }, { 'url': 'https://www.listennotes.com/podcasts/ask-noah-show/episode-177-wireguard-with-lwEA3154JzG/', 'md5': '62fb4ffe7fc525632a1138bf72a5ce53', 'info_dict': { 'id': 'lwEA3154JzG', 'ext': 'mp3', 'title': 'Episode 177: WireGuard with Jason Donenfeld', 'description': 'md5:24744f36456a3e95f83c1193a3458594', 'duration': 3861.0, 'channel': 'Ask Noah Show', 'channel_id': '4DQTzdS5-j7', 'episode_id': '8c8954b95e0b4859ad1eecec8bf6d3a4', 'channel_url': 'https://www.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-4DQTzdS5-j7/', 'thumbnail': 'https://production.listennotes.com/podcasts/ask-noah-show-noah-j-chelliah-cfbRUw9Gs3F-4DQTzdS5-j7.300x300.jpg', 'cast': ['noah showlink', 'noah show', 'noah dashboard', 'jason donenfeld'], } }] def _clean_description(self, description): return clean_html(re.sub(r'(\s*)+', '

', description or '')) def _real_extract(self, url): audio_id = self._match_id(url) webpage = self._download_webpage(url, audio_id) data = self._search_json( r'