mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'yt-dlp:master' into teachable-fix-add-hotmart
commit
931a90e7da
@ -0,0 +1,21 @@
|
||||
import functools
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
from yt_dlp.networking import RequestHandler
|
||||
from yt_dlp.networking.common import _REQUEST_HANDLERS
|
||||
from yt_dlp.utils._utils import _YDLLogger as FakeLogger
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def handler(request):
|
||||
RH_KEY = request.param
|
||||
if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler):
|
||||
handler = RH_KEY
|
||||
elif RH_KEY in _REQUEST_HANDLERS:
|
||||
handler = _REQUEST_HANDLERS[RH_KEY]
|
||||
else:
|
||||
pytest.skip(f'{RH_KEY} request handler is not available')
|
||||
|
||||
return functools.partial(handler, logger=FakeLogger)
|
@ -0,0 +1,13 @@
|
||||
# flake8: noqa: F405
|
||||
from types import * # noqa: F403
|
||||
|
||||
from .compat_utils import passthrough_module
|
||||
|
||||
passthrough_module(__name__, 'types')
|
||||
del passthrough_module
|
||||
|
||||
try:
|
||||
# NB: pypy has builtin NoneType, so checking NameError won't work
|
||||
from types import NoneType # >= 3.10
|
||||
except ImportError:
|
||||
NoneType = type(None)
|
@ -1,196 +0,0 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from .kaltura import KalturaIE
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
int_or_none,
|
||||
OnDemandPagedList,
|
||||
parse_age_limit,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class AsianCrushBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|(?:cocoro|retrocrush)\.tv))'
|
||||
_KALTURA_KEYS = [
|
||||
'video_url', 'progressive_url', 'download_url', 'thumbnail_url',
|
||||
'widescreen_thumbnail_url', 'screencap_widescreen',
|
||||
]
|
||||
_API_SUFFIX = {'retrocrush.tv': '-ott'}
|
||||
|
||||
def _call_api(self, host, endpoint, video_id, query, resource):
|
||||
return self._download_json(
|
||||
'https://api%s.%s/%s' % (self._API_SUFFIX.get(host, ''), host, endpoint), video_id,
|
||||
'Downloading %s JSON metadata' % resource, query=query,
|
||||
headers=self.geo_verification_headers())['objects']
|
||||
|
||||
def _download_object_data(self, host, object_id, resource):
|
||||
return self._call_api(
|
||||
host, 'search', object_id, {'id': object_id}, resource)[0]
|
||||
|
||||
def _get_object_description(self, obj):
|
||||
return strip_or_none(obj.get('long_description') or obj.get('short_description'))
|
||||
|
||||
def _parse_video_data(self, video):
|
||||
title = video['name']
|
||||
|
||||
entry_id, partner_id = [None] * 2
|
||||
for k in self._KALTURA_KEYS:
|
||||
k_url = video.get(k)
|
||||
if k_url:
|
||||
mobj = re.search(r'/p/(\d+)/.+?/entryId/([^/]+)/', k_url)
|
||||
if mobj:
|
||||
partner_id, entry_id = mobj.groups()
|
||||
break
|
||||
|
||||
meta_categories = try_get(video, lambda x: x['meta']['categories'], list) or []
|
||||
categories = list(filter(None, [c.get('name') for c in meta_categories]))
|
||||
|
||||
show_info = video.get('show_info') or {}
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': 'kaltura:%s:%s' % (partner_id, entry_id),
|
||||
'ie_key': KalturaIE.ie_key(),
|
||||
'id': entry_id,
|
||||
'title': title,
|
||||
'description': self._get_object_description(video),
|
||||
'age_limit': parse_age_limit(video.get('mpaa_rating') or video.get('tv_rating')),
|
||||
'categories': categories,
|
||||
'series': show_info.get('show_name'),
|
||||
'season_number': int_or_none(show_info.get('season_num')),
|
||||
'season_id': show_info.get('season_id'),
|
||||
'episode_number': int_or_none(show_info.get('episode_num')),
|
||||
}
|
||||
|
||||
|
||||
class AsianCrushIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/video/004289v/women-who-flirt',
|
||||
'md5': 'c3b740e48d0ba002a42c0b72857beae6',
|
||||
'info_dict': {
|
||||
'id': '1_y4tmjm5r',
|
||||
'ext': 'mp4',
|
||||
'title': 'Women Who Flirt',
|
||||
'description': 'md5:b65c7e0ae03a85585476a62a186f924c',
|
||||
'timestamp': 1496936429,
|
||||
'upload_date': '20170608',
|
||||
'uploader_id': 'craig@crifkin.com',
|
||||
'age_limit': 13,
|
||||
'categories': 'count:5',
|
||||
'duration': 5812,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/video/true-tears/012328v-i...gave-away-my-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, video_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
embed_vars = self._parse_json(self._search_regex(
|
||||
r'iEmbedVars\s*=\s*({.+?})', webpage, 'embed vars',
|
||||
default='{}'), video_id, fatal=False) or {}
|
||||
video_id = embed_vars.get('entry_id') or video_id
|
||||
|
||||
video = self._download_object_data(host, video_id, 'video')
|
||||
return self._parse_video_data(video)
|
||||
|
||||
|
||||
class AsianCrushPlaylistIE(AsianCrushBaseIE):
|
||||
_VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushBaseIE._VALID_URL_BASE
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/series/006447s/fruity-samurai',
|
||||
'info_dict': {
|
||||
'id': '6447',
|
||||
'title': 'Fruity Samurai',
|
||||
'description': 'md5:7535174487e4a202d3872a7fc8f2f154',
|
||||
},
|
||||
'playlist_count': 13,
|
||||
}, {
|
||||
'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/series/012355s/true-tears',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_PAGE_SIZE = 1000000000
|
||||
|
||||
def _fetch_page(self, domain, parent_id, page):
|
||||
videos = self._call_api(
|
||||
domain, 'getreferencedobjects', parent_id, {
|
||||
'max': self._PAGE_SIZE,
|
||||
'object_type': 'video',
|
||||
'parent_id': parent_id,
|
||||
'start': page * self._PAGE_SIZE,
|
||||
}, 'page %d' % (page + 1))
|
||||
for video in videos:
|
||||
yield self._parse_video_data(video)
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, playlist_id = self._match_valid_url(url).groups()
|
||||
|
||||
if host == 'cocoro.tv':
|
||||
webpage = self._download_webpage(url, playlist_id)
|
||||
|
||||
entries = []
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'<a[^>]+href=(["\'])(?P<url>%s.*?)\1[^>]*>' % AsianCrushIE._VALID_URL,
|
||||
webpage):
|
||||
attrs = extract_attributes(mobj.group(0))
|
||||
if attrs.get('class') == 'clearfix':
|
||||
entries.append(self.url_result(
|
||||
mobj.group('url'), ie=AsianCrushIE.ie_key()))
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'(?s)<h1\b[^>]\bid=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
|
||||
'title', default=None) or self._og_search_title(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:title', webpage, 'title',
|
||||
default=None) or self._html_extract_title(webpage)
|
||||
if title:
|
||||
title = re.sub(r'\s*\|\s*.+?$', '', title)
|
||||
|
||||
description = self._og_search_description(
|
||||
webpage, default=None) or self._html_search_meta(
|
||||
'twitter:description', webpage, 'description', fatal=False)
|
||||
else:
|
||||
show = self._download_object_data(host, playlist_id, 'show')
|
||||
title = show.get('name')
|
||||
description = self._get_object_description(show)
|
||||
entries = OnDemandPagedList(
|
||||
functools.partial(self._fetch_page, host, playlist_id),
|
||||
self._PAGE_SIZE)
|
||||
|
||||
return self.playlist_result(entries, playlist_id, title, description)
|
@ -0,0 +1,87 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
js_to_json,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class AxsIE(InfoExtractor):
|
||||
IE_NAME = 'axs.tv'
|
||||
_VALID_URL = r'https?://(?:www\.)?axs\.tv/(?:channel/(?:[^/?#]+/)+)?video/(?P<id>[^/?#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.axs.tv/video/5f4dc776b70e4f1c194f22ef/',
|
||||
'md5': '8d97736ae8e50c64df528e5e676778cf',
|
||||
'info_dict': {
|
||||
'id': '5f4dc776b70e4f1c194f22ef',
|
||||
'title': 'Small Town',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e314d28bfaa227a4d7ec965fae19997f',
|
||||
'upload_date': '20230602',
|
||||
'timestamp': 1685729564,
|
||||
'duration': 1284.216,
|
||||
'series': 'Rock & Roll Road Trip with Sammy Hagar',
|
||||
'season': 2,
|
||||
'episode': '3',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4e9d330a0c3b295a7e8394',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.axs.tv/channel/rock-star-interview/video/daryl-hall',
|
||||
'md5': '300ae795cd8f9984652c0949734ffbdc',
|
||||
'info_dict': {
|
||||
'id': '5f488148b70e4f392572977c',
|
||||
'display_id': 'daryl-hall',
|
||||
'title': 'Daryl Hall',
|
||||
'ext': 'mp4',
|
||||
'description': 'md5:e54ecaa0f4b5683fc9259e9e4b196628',
|
||||
'upload_date': '20230214',
|
||||
'timestamp': 1676403615,
|
||||
'duration': 2570.668,
|
||||
'series': 'The Big Interview with Dan Rather',
|
||||
'season': 3,
|
||||
'episode': '5',
|
||||
'thumbnail': 'https://images.dotstudiopro.com/5f4d1901f340b50d937cec32',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
webpage_json_data = self._search_json(
|
||||
r'mountObj\s*=', webpage, 'video ID data', display_id,
|
||||
transform_source=js_to_json)
|
||||
video_id = webpage_json_data['video_id']
|
||||
company_id = webpage_json_data['company_id']
|
||||
|
||||
meta = self._download_json(
|
||||
f'https://api.myspotlight.tv/dotplayer/video/{company_id}/{video_id}',
|
||||
video_id, query={'device_type': 'desktop_web'})['video']
|
||||
|
||||
formats = self._extract_m3u8_formats(
|
||||
meta['video_m3u8'], video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
subtitles = {}
|
||||
for cc in traverse_obj(meta, ('closeCaption', lambda _, v: url_or_none(v['srtPath']))):
|
||||
subtitles.setdefault(cc.get('srtShortLang') or 'en', []).append(
|
||||
{'ext': cc.get('srtExt'), 'url': cc['srtPath']})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(meta, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'series': ('seriestitle', {str}),
|
||||
'season': ('season', {int}),
|
||||
'episode': ('episode', {str}),
|
||||
'duration': ('duration', {float_or_none}),
|
||||
'timestamp': ('updated_at', {parse_iso8601}),
|
||||
'thumbnail': ('thumb', {url_or_none}),
|
||||
}),
|
||||
'subtitles': subtitles,
|
||||
}
|
@ -1,56 +1,170 @@
|
||||
import functools
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
get_element_text_and_html_by_tag,
|
||||
get_elements_by_class,
|
||||
join_nonempty,
|
||||
js_to_json,
|
||||
determine_ext,
|
||||
mimetype2ext,
|
||||
unified_strdate,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
variadic,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
def html_get_element(tag=None, cls=None):
|
||||
assert tag or cls, 'One of tag or class is required'
|
||||
|
||||
if cls:
|
||||
func = functools.partial(get_elements_by_class, cls, tag=tag)
|
||||
else:
|
||||
func = functools.partial(get_element_text_and_html_by_tag, tag)
|
||||
|
||||
def html_get_element_wrapper(html):
|
||||
return variadic(func(html))[0]
|
||||
|
||||
return html_get_element_wrapper
|
||||
|
||||
|
||||
class BpbIE(InfoExtractor):
|
||||
IE_DESC = 'Bundeszentrale für politische Bildung'
|
||||
_VALID_URL = r'https?://(?:www\.)?bpb\.de/mediathek/(?P<id>[0-9]+)/'
|
||||
_VALID_URL = r'https?://(?:www\.|m\.)?bpb\.de/(?:[^/?#]+/)*(?P<id>\d+)(?:[/?#]|$)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
'url': 'http://www.bpb.de/mediathek/297/joachim-gauck-zu-1989-und-die-erinnerung-an-die-ddr',
|
||||
'md5': 'c4f84c8a8044ca9ff68bb8441d300b3f',
|
||||
'info_dict': {
|
||||
'id': '297',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Kooperative Berlin',
|
||||
'description': 'md5:f4f75885ba009d3e2b156247a8941ce6',
|
||||
'release_date': '20160115',
|
||||
'series': 'Interview auf dem Geschichtsforum 1989 | 2009',
|
||||
'tags': ['Friedliche Revolution', 'Erinnerungskultur', 'Vergangenheitspolitik', 'DDR 1949 - 1990', 'Freiheitsrecht', 'BStU', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/7/297_teaser_16x9_1240.jpg?8839D',
|
||||
'title': 'Joachim Gauck zu 1989 und die Erinnerung an die DDR',
|
||||
'description': 'Joachim Gauck, erster Beauftragter für die Stasi-Unterlagen, spricht auf dem Geschichtsforum über die friedliche Revolution 1989 und eine "gewisse Traurigkeit" im Umgang mit der DDR-Vergangenheit.'
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/video/522184/krieg-flucht-und-falschmeldungen-wirstattdesinformation-2/',
|
||||
'info_dict': {
|
||||
'id': '522184',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:f83c795ff8f825a69456a9e51fc15903',
|
||||
'release_date': '20230621',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/4/522184_teaser_16x9_1240.png?EABFB',
|
||||
'title': 'md5:9b01ccdbf58dbf9e5c9f6e771a803b1c',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/lernen/bewegtbild-und-politische-bildung/webvideo/518789/krieg-flucht-und-falschmeldungen-wirstattdesinformation-1/',
|
||||
'info_dict': {
|
||||
'id': '518789',
|
||||
'ext': 'mp4',
|
||||
'creator': 'Institute for Strategic Dialogue Germany gGmbH (ISD)',
|
||||
'description': 'md5:85228aed433e84ff0ff9bc582abd4ea8',
|
||||
'release_date': '20230302',
|
||||
'tags': ['Desinformation', 'Ukraine', 'Russland', 'Geflüchtete'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/9/518789_teaser_16x9_1240.jpeg?56D0D',
|
||||
'title': 'md5:3e956f264bb501f6383f10495a401da4',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/podcasts/apuz-podcast/539727/apuz-20-china/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/audio/315813/folge-1-eine-einfuehrung/',
|
||||
'info_dict': {
|
||||
'id': '315813',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Axel Schröder',
|
||||
'description': 'md5:eda9d1af34e5912efef5baf54fba4427',
|
||||
'release_date': '20200921',
|
||||
'series': 'Auf Endlagersuche. Der deutsche Weg zu einem sicheren Atommülllager',
|
||||
'tags': ['Atomenergie', 'Endlager', 'hoch-radioaktiver Abfall', 'Endlagersuche', 'Atommüll', 'Atomendlager', 'Gorleben', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/3/315813_teaser_16x9_1240.png?92A94',
|
||||
'title': 'Folge 1: Eine Einführung',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/517806/die-weltanschauung-der-neuen-rechten/',
|
||||
'info_dict': {
|
||||
'id': '517806',
|
||||
'ext': 'mp3',
|
||||
'creator': 'Bundeszentrale für politische Bildung',
|
||||
'description': 'md5:594689600e919912aade0b2871cc3fed',
|
||||
'release_date': '20230127',
|
||||
'series': 'Vorträge des Fachtags "Modernisierer. Grenzgänger. Anstifter. Sechs Jahrzehnte \'Neue Rechte\'"',
|
||||
'tags': ['Rechtsextremismus', 'Konservatismus', 'Konservativismus', 'neue Rechte', 'Rechtspopulismus', 'Schnellroda', 'Deutschland'],
|
||||
'thumbnail': 'https://www.bpb.de/cache/images/6/517806_teaser_16x9_1240.png?7A7A0',
|
||||
'title': 'Die Weltanschauung der "Neuen Rechten"',
|
||||
'uploader': 'Bundeszentrale für politische Bildung',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bpb.de/mediathek/reihen/zahlen-und-fakten-soziale-situation-filme/520153/zahlen-und-fakten-die-soziale-situation-in-deutschland-migration/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_TITLE_RE = re.compile('(?P<title>[^<]*)<[^>]+>(?P<series>[^<]*)')
|
||||
|
||||
def _parse_vue_attributes(self, name, string, video_id):
|
||||
attributes = extract_attributes(self._search_regex(rf'(<{name}(?:"[^"]*?"|[^>])*>)', string, name))
|
||||
|
||||
for key, value in attributes.items():
|
||||
if key.startswith(':'):
|
||||
attributes[key] = self._parse_json(value, video_id, transform_source=js_to_json, fatal=False)
|
||||
|
||||
return attributes
|
||||
|
||||
@staticmethod
|
||||
def _process_source(source):
|
||||
url = url_or_none(source['src'])
|
||||
if not url:
|
||||
return None
|
||||
|
||||
source_type = source.get('type', '')
|
||||
extension = mimetype2ext(source_type)
|
||||
is_video = source_type.startswith('video')
|
||||
note = url.rpartition('.')[0].rpartition('_')[2] if is_video else None
|
||||
|
||||
return {
|
||||
'url': url,
|
||||
'ext': extension,
|
||||
'vcodec': None if is_video else 'none',
|
||||
'quality': 10 if note == 'high' else 0,
|
||||
'format_note': note,
|
||||
'format_id': join_nonempty(extension, note),
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
title = self._html_search_regex(
|
||||
r'<h2 class="white">(.*?)</h2>', webpage, 'title')
|
||||
video_info_dicts = re.findall(
|
||||
r"({\s*src\s*:\s*'https?://film\.bpb\.de/[^}]+})", webpage)
|
||||
|
||||
formats = []
|
||||
for video_info in video_info_dicts:
|
||||
video_info = self._parse_json(
|
||||
video_info, video_id, transform_source=js_to_json, fatal=False)
|
||||
if not video_info:
|
||||
continue
|
||||
video_url = video_info.get('src')
|
||||
if not video_url:
|
||||
continue
|
||||
quality = 'high' if '_high' in video_url else 'low'
|
||||
formats.append({
|
||||
'url': video_url,
|
||||
'quality': 10 if quality == 'high' else 0,
|
||||
'format_note': quality,
|
||||
'format_id': '%s-%s' % (quality, determine_ext(video_url)),
|
||||
})
|
||||
title_result = traverse_obj(webpage, ({html_get_element(cls='opening-header__title')}, {self._TITLE_RE.match}))
|
||||
json_lds = list(self._yield_json_ld(webpage, video_id, fatal=False))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'title': title,
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(title_result, ('title', {str.strip})) or None,
|
||||
# This metadata could be interpreted otherwise, but it fits "series" the most
|
||||
'series': traverse_obj(title_result, ('series', {str.strip})) or None,
|
||||
'description': join_nonempty(*traverse_obj(webpage, [(
|
||||
{html_get_element(cls='opening-intro')},
|
||||
[{html_get_element(tag='bpb-accordion-item')}, {html_get_element(cls='text-content')}],
|
||||
), {clean_html}]), delim='\n\n') or None,
|
||||
'creator': self._html_search_meta('author', webpage),
|
||||
'uploader': self._html_search_meta('publisher', webpage),
|
||||
'release_date': unified_strdate(self._html_search_meta('date', webpage)),
|
||||
'tags': traverse_obj(json_lds, (..., 'keywords', {lambda x: x.split(',')}, ...)),
|
||||
**traverse_obj(self._parse_vue_attributes('bpb-player', webpage, video_id), {
|
||||
'formats': (':sources', ..., {self._process_source}),
|
||||
'thumbnail': ('poster', {lambda x: urljoin(url, x)}),
|
||||
}),
|
||||
}
|
||||
|
@ -0,0 +1,127 @@
|
||||
import hashlib
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
traverse_obj,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
class BrilliantpalaBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'brilliantpala'
|
||||
_DOMAIN = '{subdomain}.brilliantpala.org'
|
||||
|
||||
def _initialize_pre_login(self):
|
||||
self._HOMEPAGE = f'https://{self._DOMAIN}'
|
||||
self._LOGIN_API = f'{self._HOMEPAGE}/login/'
|
||||
self._LOGOUT_DEVICES_API = f'{self._HOMEPAGE}/logout_devices/?next=/'
|
||||
self._CONTENT_API = f'{self._HOMEPAGE}/api/v2.4/contents/{{content_id}}/'
|
||||
self._HLS_AES_URI = f'{self._HOMEPAGE}/api/v2.5/video_contents/{{content_id}}/key/'
|
||||
|
||||
def _get_logged_in_username(self, url, video_id):
|
||||
webpage, urlh = self._download_webpage_handle(url, video_id)
|
||||
if self._LOGIN_API == urlh.url:
|
||||
self.raise_login_required()
|
||||
return self._html_search_regex(
|
||||
r'"username"\s*:\s*"(?P<username>[^"]+)"', webpage, 'stream page info', 'username')
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
login_form = self._hidden_inputs(self._download_webpage(
|
||||
self._LOGIN_API, None, 'Downloading login page'))
|
||||
login_form.update({
|
||||
'username': username,
|
||||
'password': password,
|
||||
})
|
||||
self._set_cookie(self._DOMAIN, 'csrftoken', login_form['csrfmiddlewaretoken'])
|
||||
|
||||
logged_page = self._download_webpage(
|
||||
self._LOGIN_API, None, note='Logging in', headers={'Referer': self._LOGIN_API},
|
||||
data=urlencode_postdata(login_form))
|
||||
|
||||
if self._html_search_regex(
|
||||
r'(Your username / email and password)', logged_page, 'auth fail', default=None):
|
||||
raise ExtractorError('wrong username or password', expected=True)
|
||||
|
||||
# the maximum number of logins is one
|
||||
if self._html_search_regex(
|
||||
r'(Logout Other Devices)', logged_page, 'logout devices button', default=None):
|
||||
logout_device_form = self._hidden_inputs(logged_page)
|
||||
self._download_webpage(
|
||||
self._LOGOUT_DEVICES_API, None, headers={'Referer': self._LOGIN_API},
|
||||
note='Logging out other devices', data=urlencode_postdata(logout_device_form))
|
||||
|
||||
def _real_extract(self, url):
|
||||
course_id, content_id = self._match_valid_url(url).group('course_id', 'content_id')
|
||||
video_id = f'{course_id}-{content_id}'
|
||||
|
||||
username = self._get_logged_in_username(url, video_id)
|
||||
|
||||
content_json = self._download_json(
|
||||
self._CONTENT_API.format(content_id=content_id), video_id,
|
||||
note='Fetching content info', errnote='Unable to fetch content info')
|
||||
|
||||
entries = []
|
||||
for stream in traverse_obj(content_json, ('video', 'streams', lambda _, v: v['id'] and v['url'])):
|
||||
formats = self._extract_m3u8_formats(stream['url'], video_id, fatal=False)
|
||||
if not formats:
|
||||
continue
|
||||
entries.append({
|
||||
'id': str(stream['id']),
|
||||
'title': content_json.get('title'),
|
||||
'formats': formats,
|
||||
'hls_aes': {'uri': self._HLS_AES_URI.format(content_id=content_id)},
|
||||
'http_headers': {'X-Key': hashlib.sha256(username.encode('ascii')).hexdigest()},
|
||||
'thumbnail': content_json.get('cover_image'),
|
||||
})
|
||||
|
||||
return self.playlist_result(
|
||||
entries, playlist_id=video_id, playlist_title=content_json.get('title'))
|
||||
|
||||
|
||||
class BrilliantpalaElearnIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Elearn'
|
||||
IE_DESC = 'VoD on elearn.brilliantpala.org'
|
||||
_VALID_URL = r'https?://elearn\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://elearn.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://elearn.brilliantpala.org/courses/98/contents/36683/',
|
||||
'info_dict': {
|
||||
'id': '23577',
|
||||
'ext': 'mp4',
|
||||
'title': 'Physical World, Units and Measurements - 1',
|
||||
'thumbnail': 'https://d1j3vi2u94ebt0.cloudfront.net/institute/brilliantpalalms/chapter_contents/26237/e657f81b90874be19795c7ea081f8d5c.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='elearn')
|
||||
|
||||
|
||||
class BrilliantpalaClassesIE(BrilliantpalaBaseIE):
|
||||
IE_NAME = 'Brilliantpala:Classes'
|
||||
IE_DESC = 'VoD on classes.brilliantpala.org'
|
||||
_VALID_URL = r'https?://classes\.brilliantpala\.org/courses/(?P<course_id>\d+)/contents/(?P<content_id>\d+)/?'
|
||||
_TESTS = [{
|
||||
'url': 'https://classes.brilliantpala.org/courses/42/contents/12345/',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://classes.brilliantpala.org/courses/416/contents/25445/',
|
||||
'info_dict': {
|
||||
'id': '9128',
|
||||
'ext': 'mp4',
|
||||
'title': 'Motion in a Straight Line - Class 1',
|
||||
'thumbnail': 'https://d3e4y8hquds3ek.cloudfront.net/institute/brilliantpalaelearn/chapter_contents/ff5ba838d0ec43419f67387fe1a01fa8.png',
|
||||
'live_status': 'not_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
}]
|
||||
|
||||
_DOMAIN = BrilliantpalaBaseIE._DOMAIN.format(subdomain='classes')
|
@ -0,0 +1,39 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class Canal1IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.|noticias\.)?canal1\.com\.co/(?:[^?#&])+/(?P<id>[\w-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://canal1.com.co/noticias/napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco/',
|
||||
'info_dict': {
|
||||
'id': '63b39f6b354977084b85ab54',
|
||||
'display_id': 'napa-i-una-cadena-de-produccion-de-arroz-que-se-quedo-en-veremos-y-abandonada-en-el-departamento-del-choco',
|
||||
'title': 'Ñapa I Una cadena de producción de arroz que se quedó en veremos y abandonada en el departamento del Chocó',
|
||||
'description': 'md5:bc49c6d64d20610ea1e7daf079a0d013',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39f6b354977084b85ab54',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://noticias.canal1.com.co/noticias/tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter/',
|
||||
'info_dict': {
|
||||
'id': '63b39e93f5fd223aa32250fb',
|
||||
'display_id': 'tres-i-el-triste-record-que-impuso-elon-musk-el-dueno-de-tesla-y-de-twitter',
|
||||
'title': 'Tres I El triste récord que impuso Elon Musk, el dueño de Tesla y de Twitter',
|
||||
'description': 'md5:d9f691f131a21ce6767ca6c05d17d791',
|
||||
'thumbnail': r're:^https?://[^?#]+63b39e93f5fd223aa32250fb',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
# Geo-restricted to Colombia
|
||||
'url': 'https://canal1.com.co/programas/guerreros-canal-1/video-inedito-guerreros-despedida-kewin-zarate/',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return self.url_result(
|
||||
self._search_regex(r'"embedUrl"\s*:\s*"([^"]+)', webpage, 'embed url'),
|
||||
display_id=display_id, url_transparent=True)
|
@ -0,0 +1,136 @@
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
traverse_obj,
|
||||
urljoin,
|
||||
)
|
||||
|
||||
|
||||
class CaracolTvPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://play\.caracoltv\.com/videoDetails/(?P<id>[^/?#]+)'
|
||||
_NETRC_MACHINE = 'caracoltv-play'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'info_dict': {
|
||||
'id': 'OTo4NGFmNjUwOWQ2ZmM0NTg2YWRiOWU0MGNhOWViOWJkYQ==',
|
||||
'title': 'La teoría del promedio',
|
||||
'description': 'md5:1cdd6d2c13f19ef0d9649ab81a023ac3',
|
||||
},
|
||||
'playlist_count': 6,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==/ella?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTo3OWM4ZTliYzQxMmM0MTMxYTk4Mjk2YjdjNGQ4NGRkOQ==',
|
||||
'title': 'Ella',
|
||||
'description': 'md5:a639b1feb5ddcc0cff92a489b4e544b8',
|
||||
},
|
||||
'playlist_count': 10,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==/la-vuelta-al-mundo-en-80-risas-2022?season=0',
|
||||
'info_dict': {
|
||||
'id': 'OTpiYTY1YTVmOTI5MzI0ZWJhOGZiY2Y3MmRlOWZlYmJkOA==',
|
||||
'title': 'La vuelta al mundo en 80 risas 2022',
|
||||
'description': 'md5:e97aac36106e5c37ebf947b3350106a4',
|
||||
},
|
||||
'playlist_count': 17,
|
||||
}, {
|
||||
'url': 'https://play.caracoltv.com/videoDetails/MzoxX3BwbjRmNjB1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_USER_TOKEN = None
|
||||
|
||||
def _extract_app_token(self, webpage):
|
||||
config_js_path = self._search_regex(
|
||||
r'<script[^>]+src\s*=\s*"([^"]+coreConfig.js[^"]+)', webpage, 'config js url', fatal=False)
|
||||
|
||||
mediation_config = {} if not config_js_path else self._search_json(
|
||||
r'mediation\s*:', self._download_webpage(
|
||||
urljoin('https://play.caracoltv.com/', config_js_path), None, fatal=False, note='Extracting JS config'),
|
||||
'mediation_config', None, transform_source=js_to_json, fatal=False)
|
||||
|
||||
key = traverse_obj(
|
||||
mediation_config, ('live', 'key')) or '795cd9c089a1fc48094524a5eba85a3fca1331817c802f601735907c8bbb4f50'
|
||||
secret = traverse_obj(
|
||||
mediation_config, ('live', 'secret')) or '64dec00a6989ba83d087621465b5e5d38bdac22033b0613b659c442c78976fa0'
|
||||
|
||||
return base64.b64encode(f'{key}:{secret}'.encode()).decode()
|
||||
|
||||
def _perform_login(self, email, password):
|
||||
webpage = self._download_webpage('https://play.caracoltv.com/', None, fatal=False)
|
||||
app_token = self._extract_app_token(webpage)
|
||||
|
||||
bearer_token = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/applications/oauth', None, data=b'', note='Retrieving bearer token',
|
||||
headers={'Authorization': f'Basic {app_token}'})['token']
|
||||
|
||||
self._USER_TOKEN = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/user/login', None, note='Performing login', headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': f'Bearer {bearer_token}',
|
||||
}, data=json.dumps({
|
||||
'device_data': {
|
||||
'device_id': str(uuid.uuid4()),
|
||||
'device_token': '',
|
||||
'device_type': 'web'
|
||||
},
|
||||
'login_data': {
|
||||
'enabled': True,
|
||||
'email': email,
|
||||
'password': password,
|
||||
}
|
||||
}).encode())['user_token']
|
||||
|
||||
def _extract_video(self, video_data, series_id=None, season_id=None, season_number=None):
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_data['stream_url'], series_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': video_data['id'],
|
||||
'title': video_data.get('name'),
|
||||
'description': video_data.get('description'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnails': traverse_obj(
|
||||
video_data, ('extra_thumbs', ..., {'url': 'thumb_url', 'height': 'height', 'width': 'width'})),
|
||||
'series_id': series_id,
|
||||
'season_id': season_id,
|
||||
'season_number': int_or_none(season_number),
|
||||
'episode_number': int_or_none(video_data.get('item_order')),
|
||||
'is_live': video_data.get('entry_type') == 3,
|
||||
}
|
||||
|
||||
def _extract_series_seasons(self, seasons, series_id):
|
||||
for season in seasons:
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'season_id': season['id']},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})
|
||||
|
||||
season_number = season.get('order')
|
||||
for episode in api_response['items']:
|
||||
yield self._extract_video(episode, series_id, season['id'], season_number)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_id = self._match_id(url)
|
||||
|
||||
if self._USER_TOKEN is None:
|
||||
self._perform_login('guest@inmobly.com', 'Test@gus1')
|
||||
|
||||
api_response = self._download_json(
|
||||
'https://eu-gateway.inmobly.com/feed', series_id, query={'include_ids': series_id},
|
||||
headers={'Authorization': f'Bearer {self._USER_TOKEN}'})['items'][0]
|
||||
|
||||
if not api_response.get('seasons'):
|
||||
return self._extract_video(api_response)
|
||||
|
||||
return self.playlist_result(
|
||||
self._extract_series_seasons(api_response['seasons'], series_id),
|
||||
series_id, **traverse_obj(api_response, {
|
||||
'title': 'name',
|
||||
'description': 'description',
|
||||
}))
|
@ -0,0 +1,136 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
filter_dict,
|
||||
int_or_none,
|
||||
parse_age_limit,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class CineverseBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https://www\.(?P<host>%s)' % '|'.join(map(re.escape, (
|
||||
'cineverse.com',
|
||||
'asiancrush.com',
|
||||
'dovechannel.com',
|
||||
'screambox.com',
|
||||
'midnightpulp.com',
|
||||
'fandor.com',
|
||||
'retrocrush.tv',
|
||||
)))
|
||||
|
||||
|
||||
class CineverseIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/watch/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.asiancrush.com/watch/DMR00018919/Women-Who-Flirt',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Women Who Flirt',
|
||||
'ext': 'mp4',
|
||||
'id': 'DMR00018919',
|
||||
'modified_timestamp': 1678744575289,
|
||||
'cast': ['Xun Zhou', 'Xiaoming Huang', 'Yi-Lin Sie', 'Sonia Sui', 'Quniciren'],
|
||||
'duration': 5811.597,
|
||||
'description': 'md5:892fd62a05611d394141e8394ace0bc6',
|
||||
'age_limit': 13,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.retrocrush.tv/watch/1000000023016/Archenemy! Crystal Bowie',
|
||||
'skip': 'geo-blocked',
|
||||
'info_dict': {
|
||||
'title': 'Archenemy! Crystal Bowie',
|
||||
'ext': 'mp4',
|
||||
'id': '1000000023016',
|
||||
'episode_number': 3,
|
||||
'season_number': 1,
|
||||
'cast': ['Nachi Nozawa', 'Yoshiko Sakakibara', 'Toshiko Fujita'],
|
||||
'age_limit': 0,
|
||||
'episode': 'Episode 3',
|
||||
'season': 'Season 1',
|
||||
'duration': 1485.067,
|
||||
'description': 'Cobra meets a beautiful bounty hunter by the name of Jane Royal.',
|
||||
'series': 'Space Adventure COBRA (Original Japanese)',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
url, smuggled_data = unsmuggle_url(url, default={})
|
||||
self._initialize_geo_bypass({
|
||||
'countries': smuggled_data.get('geo_countries'),
|
||||
})
|
||||
video_id = self._match_id(url)
|
||||
html = self._download_webpage(url, video_id)
|
||||
idetails = self._search_nextjs_data(html, video_id)['props']['pageProps']['idetails']
|
||||
|
||||
if idetails.get('err_code') == 1200:
|
||||
self.raise_geo_restricted(
|
||||
'This video is not available from your location due to geo restriction. '
|
||||
'You may be able to bypass it by using the /details/ page instead of the /watch/ page',
|
||||
countries=smuggled_data.get('geo_countries'))
|
||||
|
||||
return {
|
||||
'subtitles': filter_dict({
|
||||
'en': traverse_obj(idetails, (('cc_url_vtt', 'subtitle_url'), {'url': {url_or_none}})) or None,
|
||||
}),
|
||||
'formats': self._extract_m3u8_formats(idetails['url'], video_id),
|
||||
**traverse_obj(idetails, {
|
||||
'title': 'title',
|
||||
'id': ('details', 'item_id'),
|
||||
'description': ('details', 'description'),
|
||||
'duration': ('duration', {lambda x: x / 1000}),
|
||||
'cast': ('details', 'cast', {lambda x: x.split(', ')}),
|
||||
'modified_timestamp': ('details', 'updated_by', 0, 'update_time', 'time', {int_or_none}),
|
||||
'season_number': ('details', 'season', {int_or_none}),
|
||||
'episode_number': ('details', 'episode', {int_or_none}),
|
||||
'age_limit': ('details', 'rating_code', {parse_age_limit}),
|
||||
'series': ('details', 'series_details', 'title'),
|
||||
}),
|
||||
}
|
||||
|
||||
|
||||
class CineverseDetailsIE(CineverseBaseIE):
|
||||
_VALID_URL = rf'{CineverseBaseIE._VALID_URL_BASE}/details/(?P<id>[A-Z0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.retrocrush.tv/details/1000000023012/Space-Adventure-COBRA-(Original-Japanese)',
|
||||
'playlist_mincount': 30,
|
||||
'info_dict': {
|
||||
'title': 'Space Adventure COBRA (Original Japanese)',
|
||||
'id': '1000000023012',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.asiancrush.com/details/NNVG4938/Hansel-and-Gretel',
|
||||
'info_dict': {
|
||||
'id': 'NNVG4938',
|
||||
'ext': 'mp4',
|
||||
'title': 'Hansel and Gretel',
|
||||
'description': 'md5:e3e4c35309c2e82aee044f972c2fb05d',
|
||||
'cast': ['Jeong-myeong Cheon', 'Eun Won-jae', 'Shim Eun-gyeong', 'Ji-hee Jin', 'Hee-soon Park', 'Lydia Park', 'Kyeong-ik Kim'],
|
||||
'duration': 7030.732,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
host, series_id = self._match_valid_url(url).group('host', 'id')
|
||||
html = self._download_webpage(url, series_id)
|
||||
pageprops = self._search_nextjs_data(html, series_id)['props']['pageProps']
|
||||
|
||||
geo_countries = traverse_obj(pageprops, ('itemDetailsData', 'geo_country', {lambda x: x.split(', ')}))
|
||||
geoblocked = traverse_obj(pageprops, (
|
||||
'itemDetailsData', 'playback_err_msg')) == 'This title is not available in your location.'
|
||||
|
||||
def item_result(item):
|
||||
item_url = f'https://www.{host}/watch/{item["item_id"]}/{item["title"]}'
|
||||
if geoblocked:
|
||||
item_url = smuggle_url(item_url, {'geo_countries': geo_countries})
|
||||
return self.url_result(item_url, CineverseIE)
|
||||
|
||||
season = traverse_obj(pageprops, ('seasonEpisodes', ..., 'episodes', lambda _, v: v['item_id'] and v['title']))
|
||||
if season:
|
||||
return self.playlist_result([item_result(ep) for ep in season], playlist_id=series_id,
|
||||
playlist_title=traverse_obj(pageprops, ('itemDetailsData', 'title')))
|
||||
return item_result(pageprops['itemDetailsData'])
|
@ -0,0 +1,63 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class ErocastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?erocast\.me/track/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://erocast.me/track/9787/f',
|
||||
'md5': 'af63b91f5f231096aba54dd682abea3b',
|
||||
'info_dict': {
|
||||
'id': '9787',
|
||||
'title': '[F4M] Your roommate, who is definitely not possessed by an alien, suddenly wants to fuck you',
|
||||
'url': 'https://erocast.s3.us-east-2.wasabisys.com/1220419/track.m3u8',
|
||||
'ext': 'm4a',
|
||||
'age_limit': 18,
|
||||
'release_timestamp': 1696178652,
|
||||
'release_date': '20231001',
|
||||
'modified_timestamp': int,
|
||||
'modified_date': str,
|
||||
'description': 'ExtraTerrestrial Tuesday!',
|
||||
'uploader': 'clarissaisshy',
|
||||
'uploader_id': '8113',
|
||||
'uploader_url': 'https://erocast.me/clarissaisshy',
|
||||
'thumbnail': 'https://erocast.s3.us-east-2.wasabisys.com/1220418/conversions/1696179247-lg.jpg',
|
||||
'duration': 2307,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'webpage_url': 'https://erocast.me/track/9787/f4m-your-roommate-who-is-definitely-not-possessed-by-an-alien-suddenly-wants-to-fuck-you',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
data = self._search_json(
|
||||
rf'<script>\s*var song_data_{video_id}\s*=', webpage, 'data', video_id, end_pattern=r'</script>')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
data.get('file_url') or data['stream_url'], video_id, 'm4a', m3u8_id='hls'),
|
||||
'age_limit': 18,
|
||||
**traverse_obj(data, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'release_timestamp': ('created_at', {parse_iso8601}),
|
||||
'modified_timestamp': ('updated_at', {parse_iso8601}),
|
||||
'uploader': ('user', 'name', {str}),
|
||||
'uploader_id': ('user', 'id', {str_or_none}),
|
||||
'uploader_url': ('user', 'permalink_url', {url_or_none}),
|
||||
'thumbnail': ('artwork_url', {url_or_none}),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'view_count': ('plays', {int_or_none}),
|
||||
'comment_count': ('comment_count', {int_or_none}),
|
||||
'webpage_url': ('permalink_url', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -0,0 +1,156 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_duration,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class JTBCIE(InfoExtractor):
|
||||
IE_DESC = 'jtbc.co.kr'
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://(?:
|
||||
vod\.jtbc\.co\.kr/player/(?:program|clip)
|
||||
|tv\.jtbc\.co\.kr/(?:replay|trailer|clip)/pr\d+/pm\d+
|
||||
)/(?P<id>(?:ep|vo)\d+)'''
|
||||
_GEO_COUNTRIES = ['KR']
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.jtbc.co.kr/replay/pr10011629/pm10067930/ep20216321/view',
|
||||
'md5': 'e6ade71d8c8685bbfd6e6ce4167c6a6c',
|
||||
'info_dict': {
|
||||
'id': 'VO10721192',
|
||||
'display_id': 'ep20216321',
|
||||
'ext': 'mp4',
|
||||
'title': '힘쎈여자 강남순 2회 다시보기',
|
||||
'description': 'md5:043c1d9019100ce271dba09995dbd1e2',
|
||||
'duration': 3770.0,
|
||||
'release_date': '20231008',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/drama/stronggirlnamsoon/img/20231008_163541_522_1.jpg',
|
||||
'series': '힘쎈여자 강남순',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/player/program/ep20216733',
|
||||
'md5': '217a6d190f115a75e4bda0ceaa4cd7f4',
|
||||
'info_dict': {
|
||||
'id': 'VO10721429',
|
||||
'display_id': 'ep20216733',
|
||||
'ext': 'mp4',
|
||||
'title': '헬로 마이 닥터 친절한 진료실 149회 다시보기',
|
||||
'description': 'md5:1d70788a982dd5de26874a92fcffddb8',
|
||||
'duration': 2720.0,
|
||||
'release_date': '20231009',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/culture/hellomydoctor/img/20231009_095002_528_1.jpg',
|
||||
'series': '헬로 마이 닥터 친절한 진료실',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/player/clip/vo10721270',
|
||||
'md5': '05782e2dc22a9c548aebefe62ae4328a',
|
||||
'info_dict': {
|
||||
'id': 'VO10721270',
|
||||
'display_id': 'vo10721270',
|
||||
'ext': 'mp4',
|
||||
'title': '뭉쳐야 찬다3 2회 예고편 - A매치로 향하는 마지막 관문💥',
|
||||
'description': 'md5:d48b51a8655c84843b4ed8d0c39aae68',
|
||||
'duration': 46.0,
|
||||
'release_date': '20231015',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/soccer3/img/20231008_210957_775_1.jpg',
|
||||
'series': '뭉쳐야 찬다3',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://tv.jtbc.co.kr/trailer/pr10010392/pm10032526/vo10720912/view',
|
||||
'md5': '367d480eb3ef54a9cd7a4b4d69c4b32d',
|
||||
'info_dict': {
|
||||
'id': 'VO10720912',
|
||||
'display_id': 'vo10720912',
|
||||
'ext': 'mp4',
|
||||
'title': '아는 형님 404회 예고편 | 10월 14일(토) 저녁 8시 50분 방송!',
|
||||
'description': 'md5:2743bb1079ceb85bb00060f2ad8f0280',
|
||||
'duration': 148.0,
|
||||
'release_date': '20231014',
|
||||
'age_limit': 15,
|
||||
'thumbnail': 'https://fs.jtbc.co.kr//joydata/CP00000001/prog/enter/jtbcbros/img/20231006_230023_802_1.jpg',
|
||||
'series': '아는 형님',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
if display_id.startswith('vo'):
|
||||
video_id = display_id.upper()
|
||||
else:
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
video_id = self._search_regex(r'data-vod="(VO\d+)"', webpage, 'vod id')
|
||||
|
||||
playback_data = self._download_json(
|
||||
f'https://api.jtbc.co.kr/vod/{video_id}', video_id, note='Downloading VOD playback data')
|
||||
|
||||
subtitles = {}
|
||||
for sub in traverse_obj(playback_data, ('tracks', lambda _, v: v['file'])):
|
||||
subtitles.setdefault(sub.get('label', 'und'), []).append({'url': sub['file']})
|
||||
|
||||
formats = []
|
||||
for stream_url in traverse_obj(playback_data, ('sources', 'HLS', ..., 'file', {url_or_none})):
|
||||
stream_url = re.sub(r'/playlist(?:_pd\d+)?\.m3u8', '/index.m3u8', stream_url)
|
||||
formats.extend(self._extract_m3u8_formats(stream_url, video_id, fatal=False))
|
||||
|
||||
metadata = self._download_json(
|
||||
'https://now-api.jtbc.co.kr/v1/vod/detail', video_id,
|
||||
note='Downloading mobile details', fatal=False, query={'vodFileId': video_id})
|
||||
return {
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
**traverse_obj(metadata, ('vodDetail', {
|
||||
'title': 'vodTitleView',
|
||||
'series': 'programTitle',
|
||||
'age_limit': ('watchAge', {int_or_none}),
|
||||
'release_date': ('broadcastDate', {lambda x: re.match(r'\d{8}', x.replace('.', ''))}, 0),
|
||||
'description': 'episodeContents',
|
||||
'thumbnail': ('imgFileUrl', {url_or_none}),
|
||||
})),
|
||||
'duration': parse_duration(playback_data.get('playTime')),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
|
||||
|
||||
class JTBCProgramIE(InfoExtractor):
|
||||
IE_NAME = 'JTBC:program'
|
||||
_VALID_URL = r'https?://(?:vod\.jtbc\.co\.kr/program|tv\.jtbc\.co\.kr/replay)/(?P<id>pr\d+)/(?:replay|pm\d+)/?(?:$|[?#])'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://tv.jtbc.co.kr/replay/pr10010392/pm10032710',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'pr10010392',
|
||||
},
|
||||
'playlist_count': 398,
|
||||
}, {
|
||||
'url': 'https://vod.jtbc.co.kr/program/pr10011491/replay',
|
||||
'info_dict': {
|
||||
'_type': 'playlist',
|
||||
'id': 'pr10011491',
|
||||
},
|
||||
'playlist_count': 59,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
program_id = self._match_id(url)
|
||||
|
||||
vod_list = self._download_json(
|
||||
'https://now-api.jtbc.co.kr/v1/vodClip/programHome/programReplayVodList', program_id,
|
||||
note='Downloading program replay list', query={
|
||||
'programId': program_id,
|
||||
'rowCount': '10000',
|
||||
})
|
||||
|
||||
entries = [self.url_result(f'https://vod.jtbc.co.kr/player/program/{video_id}', JTBCIE, video_id)
|
||||
for video_id in traverse_obj(vod_list, ('programReplayVodList', ..., 'episodeId'))]
|
||||
return self.playlist_result(entries, program_id)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue