mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'yt-dlp:master' into generic
commit
a111b3ab58
@ -0,0 +1,5 @@
|
||||
[build-system]
|
||||
build-backend = 'setuptools.build_meta'
|
||||
# https://github.com/yt-dlp/yt-dlp/issues/5941
|
||||
# https://github.com/pypa/distutils/issues/17
|
||||
requires = ['setuptools > 50']
|
@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
# Allow direct execution
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
import unittest.mock
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
import contextlib
|
||||
import itertools
|
||||
from pathlib import Path
|
||||
|
||||
from yt_dlp.compat import compat_expanduser
|
||||
from yt_dlp.options import create_parser, parseOpts
|
||||
from yt_dlp.utils import Config, get_executable_path
|
||||
|
||||
ENVIRON_DEFAULTS = {
|
||||
'HOME': None,
|
||||
'XDG_CONFIG_HOME': '/_xdg_config_home/',
|
||||
'USERPROFILE': 'C:/Users/testing/',
|
||||
'APPDATA': 'C:/Users/testing/AppData/Roaming/',
|
||||
'HOMEDRIVE': 'C:/',
|
||||
'HOMEPATH': 'Users/testing/',
|
||||
}
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_environ(**kwargs):
|
||||
saved_environ = os.environ.copy()
|
||||
|
||||
for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items():
|
||||
if value is None:
|
||||
os.environ.pop(name, None)
|
||||
else:
|
||||
os.environ[name] = value
|
||||
|
||||
yield
|
||||
|
||||
os.environ.clear()
|
||||
os.environ.update(saved_environ)
|
||||
|
||||
|
||||
def _generate_expected_groups():
|
||||
xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
|
||||
appdata_dir = os.getenv('appdata')
|
||||
home_dir = compat_expanduser('~')
|
||||
return {
|
||||
'Portable': [
|
||||
Path(get_executable_path(), 'yt-dlp.conf'),
|
||||
],
|
||||
'Home': [
|
||||
Path('yt-dlp.conf'),
|
||||
],
|
||||
'User': [
|
||||
Path(xdg_config_home, 'yt-dlp.conf'),
|
||||
Path(xdg_config_home, 'yt-dlp', 'config'),
|
||||
Path(xdg_config_home, 'yt-dlp', 'config.txt'),
|
||||
*((
|
||||
Path(appdata_dir, 'yt-dlp.conf'),
|
||||
Path(appdata_dir, 'yt-dlp', 'config'),
|
||||
Path(appdata_dir, 'yt-dlp', 'config.txt'),
|
||||
) if appdata_dir else ()),
|
||||
Path(home_dir, 'yt-dlp.conf'),
|
||||
Path(home_dir, 'yt-dlp.conf.txt'),
|
||||
Path(home_dir, '.yt-dlp', 'config'),
|
||||
Path(home_dir, '.yt-dlp', 'config.txt'),
|
||||
],
|
||||
'System': [
|
||||
Path('/etc/yt-dlp.conf'),
|
||||
Path('/etc/yt-dlp/config'),
|
||||
Path('/etc/yt-dlp/config.txt'),
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
class TestConfig(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
@set_environ()
|
||||
def test_config__ENVIRON_DEFAULTS_sanity(self):
|
||||
expected = make_expected()
|
||||
self.assertCountEqual(
|
||||
set(expected), expected,
|
||||
'ENVIRON_DEFAULTS produces non unique names')
|
||||
|
||||
def test_config_all_environ_values(self):
|
||||
for name, value in ENVIRON_DEFAULTS.items():
|
||||
for new_value in (None, '', '.', value or '/some/dir'):
|
||||
with set_environ(**{name: new_value}):
|
||||
self._simple_grouping_test()
|
||||
|
||||
def test_config_default_expected_locations(self):
|
||||
files, _ = self._simple_config_test()
|
||||
self.assertEqual(
|
||||
files, make_expected(),
|
||||
'Not all expected locations have been checked')
|
||||
|
||||
def test_config_default_grouping(self):
|
||||
self._simple_grouping_test()
|
||||
|
||||
def _simple_grouping_test(self):
|
||||
expected_groups = make_expected_groups()
|
||||
for name, group in expected_groups.items():
|
||||
for index, existing_path in enumerate(group):
|
||||
result, opts = self._simple_config_test(existing_path)
|
||||
expected = expected_from_expected_groups(expected_groups, existing_path)
|
||||
self.assertEqual(
|
||||
result, expected,
|
||||
f'The checked locations do not match the expected ({name}, {index})')
|
||||
self.assertEqual(
|
||||
opts.outtmpl['default'], '1',
|
||||
f'The used result value was incorrect ({name}, {index})')
|
||||
|
||||
def _simple_config_test(self, *stop_paths):
|
||||
encountered = 0
|
||||
paths = []
|
||||
|
||||
def read_file(filename, default=[]):
|
||||
nonlocal encountered
|
||||
path = Path(filename)
|
||||
paths.append(path)
|
||||
if path in stop_paths:
|
||||
encountered += 1
|
||||
return ['-o', f'{encountered}']
|
||||
|
||||
with ConfigMock(read_file):
|
||||
_, opts, _ = parseOpts([], False)
|
||||
|
||||
return paths, opts
|
||||
|
||||
@set_environ()
|
||||
def test_config_early_exit_commandline(self):
|
||||
self._early_exit_test(0, '--ignore-config')
|
||||
|
||||
@set_environ()
|
||||
def test_config_early_exit_files(self):
|
||||
for index, _ in enumerate(make_expected(), 1):
|
||||
self._early_exit_test(index)
|
||||
|
||||
def _early_exit_test(self, allowed_reads, *args):
|
||||
reads = 0
|
||||
|
||||
def read_file(filename, default=[]):
|
||||
nonlocal reads
|
||||
reads += 1
|
||||
|
||||
if reads > allowed_reads:
|
||||
self.fail('The remaining config was not ignored')
|
||||
elif reads == allowed_reads:
|
||||
return ['--ignore-config']
|
||||
|
||||
with ConfigMock(read_file):
|
||||
parseOpts(args, False)
|
||||
|
||||
@set_environ()
|
||||
def test_config_override_commandline(self):
|
||||
self._override_test(0, '-o', 'pass')
|
||||
|
||||
@set_environ()
|
||||
def test_config_override_files(self):
|
||||
for index, _ in enumerate(make_expected(), 1):
|
||||
self._override_test(index)
|
||||
|
||||
def _override_test(self, start_index, *args):
|
||||
index = 0
|
||||
|
||||
def read_file(filename, default=[]):
|
||||
nonlocal index
|
||||
index += 1
|
||||
|
||||
if index > start_index:
|
||||
return ['-o', 'fail']
|
||||
elif index == start_index:
|
||||
return ['-o', 'pass']
|
||||
|
||||
with ConfigMock(read_file):
|
||||
_, opts, _ = parseOpts(args, False)
|
||||
|
||||
self.assertEqual(
|
||||
opts.outtmpl['default'], 'pass',
|
||||
'The earlier group did not override the later ones')
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def ConfigMock(read_file=None):
|
||||
with unittest.mock.patch('yt_dlp.options.Config') as mock:
|
||||
mock.return_value = Config(create_parser())
|
||||
if read_file is not None:
|
||||
mock.read_file = read_file
|
||||
|
||||
yield mock
|
||||
|
||||
|
||||
def make_expected(*filepaths):
|
||||
return expected_from_expected_groups(_generate_expected_groups(), *filepaths)
|
||||
|
||||
|
||||
def make_expected_groups(*filepaths):
|
||||
return _filter_expected_groups(_generate_expected_groups(), filepaths)
|
||||
|
||||
|
||||
def expected_from_expected_groups(expected_groups, *filepaths):
|
||||
return list(itertools.chain.from_iterable(
|
||||
_filter_expected_groups(expected_groups, filepaths).values()))
|
||||
|
||||
|
||||
def _filter_expected_groups(expected, filepaths):
|
||||
if not filepaths:
|
||||
return expected
|
||||
|
||||
result = {}
|
||||
for group, paths in expected.items():
|
||||
new_paths = []
|
||||
for path in paths:
|
||||
new_paths.append(path)
|
||||
if path in filepaths:
|
||||
break
|
||||
|
||||
result[group] = new_paths
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,73 @@
|
||||
import importlib
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata')
|
||||
sys.path.append(str(TEST_DATA_DIR))
|
||||
importlib.invalidate_caches()
|
||||
|
||||
from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins
|
||||
|
||||
|
||||
class TestPlugins(unittest.TestCase):
|
||||
|
||||
TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME
|
||||
|
||||
def test_directories_containing_plugins(self):
|
||||
self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories()))
|
||||
|
||||
def test_extractor_classes(self):
|
||||
for module_name in tuple(sys.modules):
|
||||
if module_name.startswith(f'{PACKAGE_NAME}.extractor'):
|
||||
del sys.modules[module_name]
|
||||
plugins_ie = load_plugins('extractor', 'IE')
|
||||
|
||||
self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys())
|
||||
self.assertIn('NormalPluginIE', plugins_ie.keys())
|
||||
|
||||
# don't load modules with underscore prefix
|
||||
self.assertFalse(
|
||||
f'{PACKAGE_NAME}.extractor._ignore' in sys.modules.keys(),
|
||||
'loaded module beginning with underscore')
|
||||
self.assertNotIn('IgnorePluginIE', plugins_ie.keys())
|
||||
|
||||
# Don't load extractors with underscore prefix
|
||||
self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys())
|
||||
|
||||
# Don't load extractors not specified in __all__ (if supplied)
|
||||
self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys())
|
||||
self.assertIn('InAllPluginIE', plugins_ie.keys())
|
||||
|
||||
def test_postprocessor_classes(self):
|
||||
plugins_pp = load_plugins('postprocessor', 'PP')
|
||||
self.assertIn('NormalPluginPP', plugins_pp.keys())
|
||||
|
||||
def test_importing_zipped_module(self):
|
||||
zip_path = TEST_DATA_DIR / 'zipped_plugins.zip'
|
||||
shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4])
|
||||
sys.path.append(str(zip_path)) # add zip to search paths
|
||||
importlib.invalidate_caches() # reset the import caches
|
||||
|
||||
try:
|
||||
for plugin_type in ('extractor', 'postprocessor'):
|
||||
package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}')
|
||||
self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__))
|
||||
|
||||
plugins_ie = load_plugins('extractor', 'IE')
|
||||
self.assertIn('ZippedPluginIE', plugins_ie.keys())
|
||||
|
||||
plugins_pp = load_plugins('postprocessor', 'PP')
|
||||
self.assertIn('ZippedPluginPP', plugins_pp.keys())
|
||||
|
||||
finally:
|
||||
sys.path.remove(str(zip_path))
|
||||
os.remove(zip_path)
|
||||
importlib.invalidate_caches() # reset the import caches
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,5 @@
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
class IgnorePluginIE(InfoExtractor):
|
||||
pass
|
@ -0,0 +1,12 @@
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
class IgnoreNotInAllPluginIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class InAllPluginIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
__all__ = ['InAllPluginIE']
|
@ -0,0 +1,9 @@
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
class NormalPluginIE(InfoExtractor):
|
||||
pass
|
||||
|
||||
|
||||
class _IgnoreUnderscorePluginIE(InfoExtractor):
|
||||
pass
|
@ -0,0 +1,5 @@
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
|
||||
|
||||
class NormalPluginPP(PostProcessor):
|
||||
pass
|
@ -0,0 +1,5 @@
|
||||
from yt_dlp.extractor.common import InfoExtractor
|
||||
|
||||
|
||||
class ZippedPluginIE(InfoExtractor):
|
||||
pass
|
@ -0,0 +1,5 @@
|
||||
from yt_dlp.postprocessor.common import PostProcessor
|
||||
|
||||
|
||||
class ZippedPluginPP(PostProcessor):
|
||||
pass
|
@ -0,0 +1,96 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_iso8601,
|
||||
traverse_obj
|
||||
)
|
||||
|
||||
|
||||
class AirTVIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.air\.tv/watch\?v=(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# without youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=W87jcWleSn2hXZN47zJZsQ',
|
||||
'info_dict': {
|
||||
'id': 'W87jcWleSn2hXZN47zJZsQ',
|
||||
'ext': 'mp4',
|
||||
'release_date': '20221003',
|
||||
'release_timestamp': 1664792603,
|
||||
'channel_id': 'vgfManQlRQKgoFQ8i8peFQ',
|
||||
'title': 'md5:c12d49ed367c3dadaa67659aff43494c',
|
||||
'upload_date': '20221003',
|
||||
'duration': 151,
|
||||
'view_count': int,
|
||||
'thumbnail': 'https://cdn-sp-gcs.air.tv/videos/W/8/W87jcWleSn2hXZN47zJZsQ/b13fc56464f47d9d62a36d110b9b5a72-4096x2160_9.jpg',
|
||||
'timestamp': 1664792603,
|
||||
}
|
||||
}, {
|
||||
# with youtube_id
|
||||
'url': 'https://www.air.tv/watch?v=sv57EC8tRXG6h8dNXFUU1Q',
|
||||
'info_dict': {
|
||||
'id': '2ZTqmpee-bQ',
|
||||
'ext': 'mp4',
|
||||
'comment_count': int,
|
||||
'tags': 'count:11',
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'uploader': 'Newsflare',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/2ZTqmpee-bQ/maxresdefault.webp',
|
||||
'availability': 'public',
|
||||
'title': 'Geese Chase Alligator Across Golf Course',
|
||||
'uploader_id': 'NewsflareBreaking',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCzSSoloGEz10HALUAbYhngQ',
|
||||
'description': 'md5:99b21d9cea59330149efbd9706e208f5',
|
||||
'age_limit': 0,
|
||||
'channel_id': 'UCzSSoloGEz10HALUAbYhngQ',
|
||||
'uploader_url': 'http://www.youtube.com/user/NewsflareBreaking',
|
||||
'view_count': int,
|
||||
'categories': ['News & Politics'],
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'Newsflare',
|
||||
'duration': 37,
|
||||
'upload_date': '20180511',
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitle(self, json_data, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for source in traverse_obj(json_data, 'sources', 'sources_desktop', ...):
|
||||
ext = determine_ext(source.get('src'), mimetype2ext(source.get('type')))
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(source.get('src'), video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({'url': source.get('src'), 'ext': ext})
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_json = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['initialState']['videos'][display_id]
|
||||
if nextjs_json.get('youtube_id'):
|
||||
return self.url_result(
|
||||
f'https://www.youtube.com/watch?v={nextjs_json.get("youtube_id")}', YoutubeIE)
|
||||
|
||||
formats, subtitles = self._get_formats_and_subtitle(nextjs_json, display_id)
|
||||
return {
|
||||
'id': display_id,
|
||||
'title': nextjs_json.get('title') or self._html_search_meta('og:title', webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': nextjs_json.get('description') or None,
|
||||
'duration': int_or_none(nextjs_json.get('duration')),
|
||||
'thumbnails': [
|
||||
{'url': thumbnail}
|
||||
for thumbnail in traverse_obj(nextjs_json, ('default_thumbnails', ...))],
|
||||
'channel_id': traverse_obj(nextjs_json, 'channel', 'channel_slug'),
|
||||
'timestamp': parse_iso8601(nextjs_json.get('created')),
|
||||
'release_timestamp': parse_iso8601(nextjs_json.get('published')),
|
||||
'view_count': int_or_none(nextjs_json.get('views')),
|
||||
}
|
@ -0,0 +1,290 @@
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import ExtractorError, int_or_none, traverse_obj, try_get
|
||||
|
||||
|
||||
class AmazonMiniTVBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._download_webpage(
|
||||
'https://www.amazon.in/minitv', None,
|
||||
note='Fetching guest session cookies')
|
||||
AmazonMiniTVBaseIE.session_id = self._get_cookies('https://www.amazon.in')['session-id'].value
|
||||
|
||||
def _call_api(self, asin, data=None, note=None):
|
||||
device = {'clientId': 'ATVIN', 'deviceLocale': 'en_GB'}
|
||||
if data:
|
||||
data['variables'].update({
|
||||
'contentType': 'VOD',
|
||||
'sessionIdToken': self.session_id,
|
||||
**device,
|
||||
})
|
||||
|
||||
resp = self._download_json(
|
||||
f'https://www.amazon.in/minitv/api/web/{"graphql" if data else "prs"}',
|
||||
asin, note=note, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps(data).encode() if data else None,
|
||||
query=None if data else {
|
||||
'deviceType': 'A1WMMUXPCUJL4N',
|
||||
'contentId': asin,
|
||||
**device,
|
||||
})
|
||||
|
||||
if resp.get('errors'):
|
||||
raise ExtractorError(f'MiniTV said: {resp["errors"][0]["message"]}')
|
||||
elif not data:
|
||||
return resp
|
||||
return resp['data'][data['operationName']]
|
||||
|
||||
|
||||
class AmazonMiniTVIE(AmazonMiniTVBaseIE):
|
||||
_VALID_URL = r'(?:https?://(?:www\.)?amazon\.in/minitv/tp/|amazonminitv:(?:amzn1\.dv\.gti\.)?)(?P<id>[a-f0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.amazon.in/minitv/tp/75fe3a75-b8fe-4499-8100-5c9424344840?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
|
||||
'ext': 'mp4',
|
||||
'title': 'May I Kiss You?',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'description': 'md5:a549bfc747973e04feb707833474e59d',
|
||||
'release_timestamp': 1644710400,
|
||||
'release_date': '20220213',
|
||||
'duration': 846,
|
||||
'chapters': 'count:2',
|
||||
'series': 'Couple Goals',
|
||||
'series_id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'season': 'Season 3',
|
||||
'season_number': 3,
|
||||
'season_id': 'amzn1.dv.gti.20331016-d9b9-4968-b991-c89fa4927a36',
|
||||
'episode': 'May I Kiss You?',
|
||||
'episode_number': 2,
|
||||
'episode_id': 'amzn1.dv.gti.75fe3a75-b8fe-4499-8100-5c9424344840',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab?referrer=https%3A%2F%2Fwww.amazon.in%2Fminitv',
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jahaan',
|
||||
'language': 'Hindi',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'description': 'md5:05eb765a77bf703f322f120ec6867339',
|
||||
'release_timestamp': 1647475200,
|
||||
'release_date': '20220317',
|
||||
'duration': 783,
|
||||
'chapters': [],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.amazon.in/minitv/tp/280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'amazonminitv:amzn1.dv.gti.280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'amazonminitv:280d2564-584f-452f-9c98-7baf906e01ab',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY_CONTENT = '''
|
||||
query content($sessionIdToken: String!, $deviceLocale: String, $contentId: ID!, $contentType: ContentType!, $clientId: String) {
|
||||
content(
|
||||
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
|
||||
contentId: $contentId
|
||||
contentType: $contentType
|
||||
) {
|
||||
contentId
|
||||
name
|
||||
... on Episode {
|
||||
contentId
|
||||
vodType
|
||||
name
|
||||
images
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
publicReleaseDateUTC
|
||||
audioTracks
|
||||
seasonId
|
||||
seriesId
|
||||
seriesName
|
||||
seasonNumber
|
||||
episodeNumber
|
||||
timecode {
|
||||
endCreditsTime
|
||||
}
|
||||
}
|
||||
... on MovieContent {
|
||||
contentId
|
||||
vodType
|
||||
name
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
images
|
||||
publicReleaseDateUTC
|
||||
audioTracks
|
||||
}
|
||||
}
|
||||
}'''
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
prs = self._call_api(asin, note='Downloading playback info')
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for type_, asset in prs['playbackAssets'].items():
|
||||
if not traverse_obj(asset, 'manifestUrl'):
|
||||
continue
|
||||
if type_ == 'hls':
|
||||
m3u8_fmts, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset['manifestUrl'], asin, ext='mp4', entry_protocol='m3u8_native',
|
||||
m3u8_id=type_, fatal=False)
|
||||
formats.extend(m3u8_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, m3u8_subs)
|
||||
elif type_ == 'dash':
|
||||
mpd_fmts, mpd_subs = self._extract_mpd_formats_and_subtitles(
|
||||
asset['manifestUrl'], asin, mpd_id=type_, fatal=False)
|
||||
formats.extend(mpd_fmts)
|
||||
subtitles = self._merge_subtitles(subtitles, mpd_subs)
|
||||
else:
|
||||
self.report_warning(f'Unknown asset type: {type_}')
|
||||
|
||||
title_info = self._call_api(
|
||||
asin, note='Downloading title info', data={
|
||||
'operationName': 'content',
|
||||
'variables': {'contentId': asin},
|
||||
'query': self._GRAPHQL_QUERY_CONTENT,
|
||||
})
|
||||
credits_time = try_get(title_info, lambda x: x['timecode']['endCreditsTime'] / 1000)
|
||||
is_episode = title_info.get('vodType') == 'EPISODE'
|
||||
|
||||
return {
|
||||
'id': asin,
|
||||
'title': title_info.get('name'),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'language': traverse_obj(title_info, ('audioTracks', 0)),
|
||||
'thumbnails': [{
|
||||
'id': type_,
|
||||
'url': url,
|
||||
} for type_, url in (title_info.get('images') or {}).items()],
|
||||
'description': traverse_obj(title_info, ('description', 'synopsis')),
|
||||
'release_timestamp': int_or_none(try_get(title_info, lambda x: x['publicReleaseDateUTC'] / 1000)),
|
||||
'duration': traverse_obj(title_info, ('description', 'contentLengthInSeconds')),
|
||||
'chapters': [{
|
||||
'start_time': credits_time,
|
||||
'title': 'End Credits',
|
||||
}] if credits_time else [],
|
||||
'series': title_info.get('seriesName'),
|
||||
'series_id': title_info.get('seriesId'),
|
||||
'season_number': title_info.get('seasonNumber'),
|
||||
'season_id': title_info.get('seasonId'),
|
||||
'episode': title_info.get('name') if is_episode else None,
|
||||
'episode_number': title_info.get('episodeNumber'),
|
||||
'episode_id': asin if is_episode else None,
|
||||
}
|
||||
|
||||
|
||||
class AmazonMiniTVSeasonIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:season'
|
||||
_VALID_URL = r'amazonminitv:season:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
IE_DESC = 'Amazon MiniTV Series, "minitv:season:" prefix'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:season:amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'playlist_mincount': 6,
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
},
|
||||
}, {
|
||||
'url': 'amazonminitv:season:0aa996eb-6a1b-4886-a342-387fbd2f1db0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY = '''
|
||||
query getEpisodes($sessionIdToken: String!, $clientId: String, $episodeOrSeasonId: ID!, $deviceLocale: String) {
|
||||
getEpisodes(
|
||||
applicationContextInput: {sessionIdToken: $sessionIdToken, deviceLocale: $deviceLocale, clientId: $clientId}
|
||||
episodeOrSeasonId: $episodeOrSeasonId
|
||||
) {
|
||||
episodes {
|
||||
... on Episode {
|
||||
contentId
|
||||
name
|
||||
images
|
||||
seriesName
|
||||
seasonId
|
||||
seriesId
|
||||
seasonNumber
|
||||
episodeNumber
|
||||
description {
|
||||
synopsis
|
||||
contentLengthInSeconds
|
||||
}
|
||||
publicReleaseDateUTC
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
def _entries(self, asin):
|
||||
season_info = self._call_api(
|
||||
asin, note='Downloading season info', data={
|
||||
'operationName': 'getEpisodes',
|
||||
'variables': {'episodeOrSeasonId': asin},
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
})
|
||||
|
||||
for episode in season_info['episodes']:
|
||||
yield self.url_result(
|
||||
f'amazonminitv:{episode["contentId"]}', AmazonMiniTVIE, episode['contentId'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
return self.playlist_result(self._entries(asin), asin)
|
||||
|
||||
|
||||
class AmazonMiniTVSeriesIE(AmazonMiniTVBaseIE):
|
||||
IE_NAME = 'amazonminitv:series'
|
||||
_VALID_URL = r'amazonminitv:series:(?:amzn1\.dv\.gti\.)?(?P<id>[a-f0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'amazonminitv:series:amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'playlist_mincount': 3,
|
||||
'info_dict': {
|
||||
'id': 'amzn1.dv.gti.56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
},
|
||||
}, {
|
||||
'url': 'amazonminitv:series:56521d46-b040-4fd5-872e-3e70476a04b0',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_GRAPHQL_QUERY = '''
|
||||
query getSeasons($sessionIdToken: String!, $deviceLocale: String, $episodeOrSeasonOrSeriesId: ID!, $clientId: String) {
|
||||
getSeasons(
|
||||
applicationContextInput: {deviceLocale: $deviceLocale, sessionIdToken: $sessionIdToken, clientId: $clientId}
|
||||
episodeOrSeasonOrSeriesId: $episodeOrSeasonOrSeriesId
|
||||
) {
|
||||
seasons {
|
||||
seasonId
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
def _entries(self, asin):
|
||||
season_info = self._call_api(
|
||||
asin, note='Downloading series info', data={
|
||||
'operationName': 'getSeasons',
|
||||
'variables': {'episodeOrSeasonOrSeriesId': asin},
|
||||
'query': self._GRAPHQL_QUERY,
|
||||
})
|
||||
|
||||
for season in season_info['seasons']:
|
||||
yield self.url_result(f'amazonminitv:season:{season["seasonId"]}', AmazonMiniTVSeasonIE, season['seasonId'])
|
||||
|
||||
def _real_extract(self, url):
|
||||
asin = f'amzn1.dv.gti.{self._match_id(url)}'
|
||||
return self.playlist_result(self._entries(asin), asin)
|
@ -0,0 +1,101 @@
|
||||
from .common import InfoExtractor
|
||||
from .youtube import YoutubeIE, YoutubeTabIE
|
||||
|
||||
|
||||
class BeatBumpVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/listen\?id=(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/listen?id=MgNrAu2pzNs',
|
||||
'md5': '5ff3fff41d3935b9810a9731e485fe66',
|
||||
'info_dict': {
|
||||
'id': 'MgNrAu2pzNs',
|
||||
'ext': 'mp4',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'artist': 'Stephen',
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'upload_date': '20190312',
|
||||
'categories': ['Music'],
|
||||
'playable_in_embed': True,
|
||||
'duration': 169,
|
||||
'like_count': int,
|
||||
'alt_title': 'Voyeur Girl',
|
||||
'view_count': int,
|
||||
'track': 'Voyeur Girl',
|
||||
'uploader': 'Stephen - Topic',
|
||||
'title': 'Voyeur Girl',
|
||||
'channel_follower_count': int,
|
||||
'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
'age_limit': 0,
|
||||
'availability': 'public',
|
||||
'live_status': 'not_live',
|
||||
'album': 'it\'s too much love to know my dear',
|
||||
'channel': 'Stephen',
|
||||
'comment_count': int,
|
||||
'description': 'md5:7ae382a65843d6df2685993e90a8628f',
|
||||
'tags': 'count:11',
|
||||
'creator': 'Stephen',
|
||||
'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/watch?v={id_}', YoutubeIE, id_)
|
||||
|
||||
|
||||
class BeatBumpPlaylistIE(InfoExtractor):
|
||||
_VALID_URL = r'https://beatbump\.ml/(?:release\?id=|artist/|playlist/)(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://beatbump.ml/release?id=MPREb_gTAcphH99wE',
|
||||
'playlist_count': 50,
|
||||
'info_dict': {
|
||||
'id': 'OLAK5uy_l1m0thk3g31NmIIz_vMIbWtyv7eZixlH0',
|
||||
'availability': 'unlisted',
|
||||
'view_count': int,
|
||||
'title': 'Album - Royalty Free Music Library V2 (50 Songs)',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'modified_date': '20221223',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/artist/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'channel_follower_count': int,
|
||||
'title': 'NoCopyrightSounds - Videos',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'description': 'md5:cd4fd53d81d363d05eee6c1b478b491a',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': 'count:12',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://beatbump.ml/playlist/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'playlist_mincount': 1,
|
||||
'params': {'flatplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq',
|
||||
'uploader_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'description': 'Providing you with copyright free / safe music for gaming, live streaming, studying and more!',
|
||||
'view_count': int,
|
||||
'channel_url': 'https://www.youtube.com/@NoCopyrightSounds',
|
||||
'uploader_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
'title': 'NCS : All Releases 💿',
|
||||
'uploader': 'NoCopyrightSounds',
|
||||
'availability': 'public',
|
||||
'channel': 'NoCopyrightSounds',
|
||||
'tags': [],
|
||||
'modified_date': '20221225',
|
||||
'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
id_ = self._match_id(url)
|
||||
return self.url_result(f'https://music.youtube.com/browse/{id_}', YoutubeTabIE, id_)
|
@ -1,24 +1,80 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_urllib_parse_unquote
|
||||
from .youtube import YoutubeTabIE
|
||||
from ..utils import parse_qs, smuggle_url, traverse_obj
|
||||
|
||||
|
||||
class EmbedlyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?url=(?P<id>[^#&]+)'
|
||||
_VALID_URL = r'https?://(?:www|cdn\.)?embedly\.com/widgets/media\.html\?(?:[^#]*?&)?(?:src|url)=(?:[^#&]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||
'info_dict': {
|
||||
'id': 'UUGLim4T2loE5rwCMdpCIPVg',
|
||||
'modified_date': '20221225',
|
||||
'view_count': int,
|
||||
'uploader_url': 'https://www.youtube.com/@TraciHinesMusic',
|
||||
'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
|
||||
'uploader': 'TraciJHines',
|
||||
'channel_url': 'https://www.youtube.com/@TraciHinesMusic',
|
||||
'channel': 'TraciJHines',
|
||||
'availability': 'public',
|
||||
'uploader_id': 'UCGLim4T2loE5rwCMdpCIPVg',
|
||||
'description': '',
|
||||
'tags': [],
|
||||
'title': 'Uploads from TraciJHines',
|
||||
},
|
||||
'playlist_mincount': 10,
|
||||
}, {
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=http%3A%2F%2Fwww.youtube.com%2Fembed%2Fvideoseries%3Flist%3DUUGLim4T2loE5rwCMdpCIPVg&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DSU4fj_aEMVw%26list%3DUUGLim4T2loE5rwCMdpCIPVg&image=http%3A%2F%2Fi.ytimg.com%2Fvi%2FSU4fj_aEMVw%2Fhqdefault.jpg&key=8ee8a2e6a8cc47aab1a5ee67f9a178e0&type=text%2Fhtml&schema=youtube&autoplay=1',
|
||||
'params': {'noplaylist': True},
|
||||
'info_dict': {
|
||||
'id': 'SU4fj_aEMVw',
|
||||
'ext': 'mp4',
|
||||
'title': 'I\'m on Patreon!',
|
||||
'age_limit': 0,
|
||||
'categories': ['Entertainment'],
|
||||
'thumbnail': 'https://i.ytimg.com/vi_webp/SU4fj_aEMVw/maxresdefault.webp',
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'channel': 'TraciJHines',
|
||||
'uploader_id': 'TraciJHines',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCGLim4T2loE5rwCMdpCIPVg',
|
||||
'uploader_url': 'http://www.youtube.com/user/TraciJHines',
|
||||
'upload_date': '20150211',
|
||||
'duration': 282,
|
||||
'availability': 'public',
|
||||
'channel_follower_count': int,
|
||||
'tags': 'count:39',
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_id': 'UCGLim4T2loE5rwCMdpCIPVg',
|
||||
'like_count': int,
|
||||
'uploader': 'TraciJHines',
|
||||
'description': 'md5:8af6425f50bd46fbf29f3db0fc3a8364',
|
||||
'chapters': list,
|
||||
|
||||
},
|
||||
}, {
|
||||
'url': 'https://cdn.embedly.com/widgets/media.html?src=https://player.vimeo.com/video/1234567?h=abcdefgh',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# Bypass suitable check
|
||||
def _extract_from_webpage(cls, url, webpage):
|
||||
# Bypass "ie=cls" and suitable check
|
||||
for mobj in re.finditer(r'class=["\']embedly-card["\'][^>]href=["\'](?P<url>[^"\']+)', webpage):
|
||||
yield mobj.group('url')
|
||||
yield cls.url_result(mobj.group('url'))
|
||||
|
||||
for mobj in re.finditer(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage):
|
||||
yield urllib.parse.unquote(mobj.group('url'))
|
||||
yield cls.url_result(urllib.parse.unquote(mobj.group('url')))
|
||||
|
||||
def _real_extract(self, url):
|
||||
return self.url_result(compat_urllib_parse_unquote(self._match_id(url)))
|
||||
qs = parse_qs(url)
|
||||
src = urllib.parse.unquote(traverse_obj(qs, ('url', 0)) or '')
|
||||
if src and YoutubeTabIE.suitable(src):
|
||||
return self.url_result(src, YoutubeTabIE)
|
||||
return self.url_result(smuggle_url(
|
||||
urllib.parse.unquote(traverse_obj(qs, ('src', 0), ('url', 0))),
|
||||
{'http_headers': {'Referer': url}}))
|
||||
|
@ -1,31 +1,51 @@
|
||||
from .common import InfoExtractor
|
||||
from .uplynk import UplynkPreplayIE
|
||||
from ..utils import HEADRequest, float_or_none, make_archive_id, smuggle_url
|
||||
|
||||
|
||||
class FoxSportsIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/(?:[^/]+/)*video/(?P<id>\d+)'
|
||||
|
||||
_TEST = {
|
||||
'url': 'http://www.foxsports.com/tennessee/video/432609859715',
|
||||
'md5': 'b49050e955bebe32c301972e4012ac17',
|
||||
_VALID_URL = r'https?://(?:www\.)?foxsports\.com/watch/(?P<id>[\w-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.foxsports.com/watch/play-612168c6700004b',
|
||||
'info_dict': {
|
||||
'id': '432609859715',
|
||||
'id': 'b72f5bd8658140baa5791bb676433733',
|
||||
'ext': 'mp4',
|
||||
'title': 'Courtney Lee on going up 2-0 in series vs. Blazers',
|
||||
'description': 'Courtney Lee talks about Memphis being focused.',
|
||||
# TODO: fix timestamp
|
||||
'upload_date': '19700101', # '20150423',
|
||||
# 'timestamp': 1429761109,
|
||||
'uploader': 'NEWA-FNG-FOXSPORTS',
|
||||
'display_id': 'play-612168c6700004b',
|
||||
'title': 'md5:e0c4ecac3a1f25295b4fae22fb5c126a',
|
||||
'description': 'md5:371bc43609708ae2b9e1a939229762af',
|
||||
'uploader_id': '06b4a36349624051a9ba52ac3a91d268',
|
||||
'upload_date': '20221205',
|
||||
'timestamp': 1670262586,
|
||||
'duration': 31.7317,
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'extra_param_to_segment_url': str,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
json_ld = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={})
|
||||
data = self._download_json(
|
||||
f'https://api3.fox.com/v2.0/vodplayer/sportsclip/{video_id}',
|
||||
video_id, note='Downloading API JSON', headers={
|
||||
'x-api-key': 'cf289e299efdfa39fb6316f259d1de93',
|
||||
})
|
||||
preplay_url = self._request_webpage(
|
||||
HEADRequest(data['url']), video_id, 'Fetching preplay URL').geturl()
|
||||
|
||||
return self.url_result(
|
||||
'https://feed.theplatform.com/f/BKQ29B/foxsports-all?byId=' + video_id, 'ThePlatformFeed')
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': UplynkPreplayIE.ie_key(),
|
||||
'url': smuggle_url(preplay_url, {'Origin': 'https://www.foxsports.com'}),
|
||||
'display_id': video_id,
|
||||
'title': data.get('name') or json_ld.get('title'),
|
||||
'description': data.get('description') or json_ld.get('description'),
|
||||
'duration': float_or_none(data.get('durationInSeconds')),
|
||||
'timestamp': json_ld.get('timestamp'),
|
||||
'thumbnails': json_ld.get('thumbnails'),
|
||||
'_old_archive_ids': [make_archive_id(self, video_id)],
|
||||
}
|
||||
|
@ -0,0 +1,66 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
join_nonempty,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class Kanal2IE(InfoExtractor):
|
||||
_VALID_URL = r'https?://kanal2\.postimees\.ee/[^?#]+\?([^#]+&)?id=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'note': 'Test standard url (#5575)',
|
||||
'url': 'https://kanal2.postimees.ee/pluss/video/?id=40792',
|
||||
'md5': '7ea7b16266ec1798743777df241883dd',
|
||||
'info_dict': {
|
||||
'id': '40792',
|
||||
'ext': 'mp4',
|
||||
'title': 'Aedniku aabits / Osa 53 (05.08.2016 20:00)',
|
||||
'thumbnail': r're:https?://.*\.jpg$',
|
||||
'description': 'md5:53cabf3c5d73150d594747f727431248',
|
||||
'upload_date': '20160805',
|
||||
'timestamp': 1470420000,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
playlist = self._download_json(
|
||||
f'https://kanal2.postimees.ee/player/playlist/{video_id}',
|
||||
video_id, query={'type': 'episodes'},
|
||||
headers={'X-Requested-With': 'XMLHttpRequest'})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': join_nonempty(*traverse_obj(playlist, ('info', ('title', 'subtitle'))), delim=' / '),
|
||||
'description': traverse_obj(playlist, ('info', 'description')),
|
||||
'thumbnail': traverse_obj(playlist, ('data', 'image')),
|
||||
'formats': self.get_formats(playlist, video_id),
|
||||
'timestamp': unified_timestamp(self._search_regex(
|
||||
r'\((\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2})\)$',
|
||||
traverse_obj(playlist, ('info', 'subtitle')), 'timestamp', default='') + ' +0200'),
|
||||
}
|
||||
|
||||
def get_formats(self, playlist, video_id):
|
||||
path = traverse_obj(playlist, ('data', 'path'))
|
||||
if not path:
|
||||
raise ExtractorError('Path value not found in playlist JSON response')
|
||||
session = self._download_json(
|
||||
'https://sts.postimees.ee/session/register',
|
||||
video_id, note='Creating session', errnote='Error creating session',
|
||||
headers={
|
||||
'X-Original-URI': path,
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
if session.get('reason') != 'OK' or not session.get('session'):
|
||||
reason = session.get('reason', 'unknown error')
|
||||
raise ExtractorError(f'Unable to obtain session: {reason}')
|
||||
|
||||
formats = []
|
||||
for stream in traverse_obj(playlist, ('data', 'streams', ..., 'file')):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
update_url_query(stream, {'s': session['session']}), video_id, 'mp4'))
|
||||
|
||||
return formats
|
@ -0,0 +1,127 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
from ..utils import (
|
||||
HEADRequest,
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
merge_dicts,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class KickBaseIE(InfoExtractor):
|
||||
def _real_initialize(self):
|
||||
self._request_webpage(HEADRequest('https://kick.com/'), None, 'Setting up session')
|
||||
xsrf_token = self._get_cookies('https://kick.com/').get('XSRF-TOKEN')
|
||||
if not xsrf_token:
|
||||
self.write_debug('kick.com did not set XSRF-TOKEN cookie')
|
||||
KickBaseIE._API_HEADERS = {
|
||||
'Authorization': f'Bearer {xsrf_token.value}',
|
||||
'X-XSRF-TOKEN': xsrf_token.value,
|
||||
} if xsrf_token else {}
|
||||
|
||||
def _call_api(self, path, display_id, note='Downloading API JSON', headers={}, **kwargs):
|
||||
return self._download_json(
|
||||
f'https://kick.com/api/v1/{path}', display_id, note=note,
|
||||
headers=merge_dicts(headers, self._API_HEADERS), **kwargs)
|
||||
|
||||
|
||||
class KickIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/(?!(?:video|categories|search|auth)(?:[/?#]|$))(?P<id>[\w_]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/yuppy',
|
||||
'info_dict': {
|
||||
'id': '6cde1-kickrp-joe-flemmingskick-info-heremust-knowmust-see21',
|
||||
'ext': 'mp4',
|
||||
'title': str,
|
||||
'description': str,
|
||||
'channel': 'yuppy',
|
||||
'channel_id': '33538',
|
||||
'uploader': 'Yuppy',
|
||||
'uploader_id': '33793',
|
||||
'upload_date': str,
|
||||
'live_status': 'is_live',
|
||||
'timestamp': int,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'categories': list,
|
||||
},
|
||||
'skip': 'livestream',
|
||||
}, {
|
||||
'url': 'https://kick.com/kmack710',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
channel = self._match_id(url)
|
||||
response = self._call_api(f'channels/{channel}', channel)
|
||||
if not traverse_obj(response, 'livestream', expected_type=dict):
|
||||
raise UserNotLive(video_id=channel)
|
||||
|
||||
return {
|
||||
'id': str(traverse_obj(
|
||||
response, ('livestream', ('slug', 'id')), get_all=False, default=channel)),
|
||||
'formats': self._extract_m3u8_formats(
|
||||
response['playback_url'], channel, 'mp4', live=True),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('user', 'bio')),
|
||||
'channel': channel,
|
||||
'channel_id': str_or_none(traverse_obj(response, 'id', ('livestream', 'channel_id'))),
|
||||
'uploader': traverse_obj(response, 'name', ('user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, 'user_id', ('user', 'id'))),
|
||||
'is_live': True,
|
||||
'timestamp': unified_timestamp(traverse_obj(response, ('livestream', 'created_at'))),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail', 'url'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('recent_categories', ..., 'name')),
|
||||
}
|
||||
|
||||
|
||||
class KickVODIE(KickBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?kick\.com/video/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_TESTS = [{
|
||||
'url': 'https://kick.com/video/54244b5e-050a-4df4-a013-b2433dafbe35',
|
||||
'md5': '73691206a6a49db25c5aa1588e6538fc',
|
||||
'info_dict': {
|
||||
'id': '54244b5e-050a-4df4-a013-b2433dafbe35',
|
||||
'ext': 'mp4',
|
||||
'title': 'Making 710-carBoosting. Kinda No Pixel inspired. !guilded - !links',
|
||||
'description': 'md5:a0d3546bf7955d0a8252ffe0fd6f518f',
|
||||
'channel': 'kmack710',
|
||||
'channel_id': '16278',
|
||||
'uploader': 'Kmack710',
|
||||
'uploader_id': '16412',
|
||||
'upload_date': '20221206',
|
||||
'timestamp': 1670318289,
|
||||
'duration': 40104.0,
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'categories': ['Grand Theft Auto V'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
response = self._call_api(f'video/{video_id}', video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._extract_m3u8_formats(response['source'], video_id, 'mp4'),
|
||||
'title': traverse_obj(
|
||||
response, ('livestream', ('session_title', 'slug')), get_all=False, default=''),
|
||||
'description': traverse_obj(response, ('livestream', 'channel', 'user', 'bio')),
|
||||
'channel': traverse_obj(response, ('livestream', 'channel', 'slug')),
|
||||
'channel_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'id'))),
|
||||
'uploader': traverse_obj(response, ('livestream', 'channel', 'user', 'username')),
|
||||
'uploader_id': str_or_none(traverse_obj(response, ('livestream', 'channel', 'user_id'))),
|
||||
'timestamp': unified_timestamp(response.get('created_at')),
|
||||
'duration': float_or_none(traverse_obj(response, ('livestream', 'duration')), scale=1000),
|
||||
'thumbnail': traverse_obj(
|
||||
response, ('livestream', 'thumbnail'), expected_type=url_or_none),
|
||||
'categories': traverse_obj(response, ('livestream', 'categories', ..., 'name')),
|
||||
}
|
@ -0,0 +1,155 @@
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import clean_html, get_element_html_by_class
|
||||
|
||||
|
||||
class MediaStreamIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://mdstrm.com/(?:embed|live-stream)/(?P<id>\w+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://mdstrm.com/embed/6318e3f1d1d316083ae48831',
|
||||
'md5': '97b4f2634b8e8612cc574dfcd504df05',
|
||||
'info_dict': {
|
||||
'id': '6318e3f1d1d316083ae48831',
|
||||
'title': 'Video: Así fue el despido de Thomas Tuchel del Chelsea',
|
||||
'description': 'md5:358ce1e1396010d50a1ece1be3633c95',
|
||||
'thumbnail': r're:^https?://[^?#]+6318e3f1d1d316083ae48831',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
'url': 'https://www.multimedios.com/video/costa-rica-tv-en-vivo/v2616',
|
||||
'info_dict': {
|
||||
'id': '5a7b1e63a8da282c34d65445',
|
||||
'title': 're:mmtv-costarica',
|
||||
'description': 'mmtv-costarica',
|
||||
'thumbnail': 're:^https?://[^?#]+5a7b1e63a8da282c34d65445',
|
||||
'ext': 'mp4',
|
||||
'live_status': 'is_live',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'Livestream'
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.multimedios.com/television/clases-de-llaves-y-castigos-quien-sabe-mas',
|
||||
'md5': 'de31f0b1ecc321fb35bf22d58734ea40',
|
||||
'info_dict': {
|
||||
'id': '63731bab8ec9b308a2c9ed28',
|
||||
'title': 'Clases de llaves y castigos ¿Quién sabe más?',
|
||||
'description': 'md5:1b49aa1ee5a4b32fbd66104b2d629e9d',
|
||||
'thumbnail': 're:^https?://[^?#]+63731bab8ec9b308a2c9ed28',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americatv.com.pe/videos/esto-es-guerra/facundo-gonzalez-sufrio-fuerte-golpe-durante-competencia-frente-hugo-garcia-eeg-noticia-139120',
|
||||
'info_dict': {
|
||||
'id': '63756df1c638b008a5659dec',
|
||||
'title': 'Facundo González sufrió fuerte golpe durante competencia frente a Hugo García en EEG',
|
||||
'description': 'md5:9490c034264afd756eef7b2c3adee69e',
|
||||
'thumbnail': 're:^https?://[^?#]+63756df1c638b008a5659dec',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.americatv.com.pe/videos/al-fondo-hay-sitio/nuevas-lomas-town-bernardo-mata-se-enfrento-sujeto-luchar-amor-macarena-noticia-139083',
|
||||
'info_dict': {
|
||||
'id': '637307669609130f74cd3a6e',
|
||||
'title': 'Las Nuevas Lomas Town: Bernardo De La Mata se enfrentó a sujeto para luchar por el amor de Macarena',
|
||||
'description': 'md5:60d71772f1e1496923539ae58aa17124',
|
||||
'thumbnail': 're:^https?://[^?#]+637307669609130f74cd3a6e',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
for mobj in re.finditer(r'<script[^>]+>[^>]*playerMdStream.mdstreamVideo\(\s*[\'"](?P<video_id>\w+)', webpage):
|
||||
yield f'https://mdstrm.com/embed/{mobj.group("video_id")}'
|
||||
|
||||
yield from re.findall(
|
||||
r'<iframe[^>]src\s*=\s*"(https://mdstrm.com/[\w-]+/\w+)', webpage)
|
||||
|
||||
for mobj in re.finditer(
|
||||
r'''(?x)
|
||||
<(?:div|ps-mediastream)[^>]+
|
||||
class\s*=\s*"[^"]*MediaStreamVideoPlayer[^"]*"[^>]+
|
||||
data-video-id\s*=\s*"(?P<video_id>\w+)\s*"
|
||||
(?:\s*data-video-type\s*=\s*"(?P<video_type>[^"]+))?
|
||||
''', webpage):
|
||||
|
||||
video_type = 'live-stream' if mobj.group('video_type') == 'live' else 'embed'
|
||||
yield f'https://mdstrm.com/{video_type}/{mobj.group("video_id")}'
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
if 'Debido a tu ubicación no puedes ver el contenido' in webpage:
|
||||
self.raise_geo_restricted()
|
||||
|
||||
player_config = self._search_json(r'window.MDSTRM.OPTIONS\s*=', webpage, 'metadata', video_id)
|
||||
|
||||
formats, subtitles = [], {}
|
||||
for video_format in player_config['src']:
|
||||
if video_format == 'hls':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(player_config['src'][video_format], video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
elif video_format == 'mpd':
|
||||
fmts, subs = self._extract_mpd_formats_and_subtitles(player_config['src'][video_format], video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': player_config['src'][video_format],
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._og_search_title(webpage) or player_config.get('title'),
|
||||
'description': self._og_search_description(webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'is_live': player_config.get('type') == 'live',
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
}
|
||||
|
||||
|
||||
class WinSportsVideoIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.winsports\.co/videos/(?P<display_id>[\w-]+)-(?P<id>\d+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'https://www.winsports.co/videos/siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco-60536',
|
||||
'info_dict': {
|
||||
'id': '62dc8357162c4b0821fcfb3c',
|
||||
'display_id': 'siempre-castellanos-gran-atajada-del-portero-cardenal-para-evitar-la-caida-de-su-arco',
|
||||
'title': '¡Siempre Castellanos! Gran atajada del portero \'cardenal\' para evitar la caída de su arco',
|
||||
'description': 'md5:eb811b2b2882bdc59431732c06b905f2',
|
||||
'thumbnail': r're:^https?://[^?#]+62dc8357162c4b0821fcfb3c',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.winsports.co/videos/observa-aqui-los-goles-del-empate-entre-tolima-y-nacional-60548',
|
||||
'info_dict': {
|
||||
'id': '62dcb875ef12a5526790b552',
|
||||
'display_id': 'observa-aqui-los-goles-del-empate-entre-tolima-y-nacional',
|
||||
'title': 'Observa aquí los goles del empate entre Tolima y Nacional',
|
||||
'description': 'md5:b19402ba6e46558b93fd24b873eea9c9',
|
||||
'thumbnail': r're:^https?://[^?#]+62dcb875ef12a5526790b552',
|
||||
'ext': 'mp4',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id, video_id = self._match_valid_url(url).group('display_id', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
media_setting_json = self._search_json(
|
||||
r'<script\s*[^>]+data-drupal-selector="drupal-settings-json">', webpage, 'drupal-setting-json', display_id)
|
||||
|
||||
mediastream_id = media_setting_json['settings']['mediastream_formatter'][video_id]['mediastream_id']
|
||||
|
||||
return self.url_result(
|
||||
f'https://mdstrm.com/embed/{mediastream_id}', MediaStreamIE, video_id, url_transparent=True,
|
||||
display_id=display_id, video_title=clean_html(get_element_html_by_class('title-news', webpage)))
|
@ -0,0 +1,116 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
determine_ext,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
traverse_obj,
|
||||
variadic,
|
||||
)
|
||||
|
||||
|
||||
class NoicePodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://open\.noice\.id/content/(?P<id>[a-fA-F0-9-]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://open.noice.id/content/7694bb04-ff0f-40fa-a60b-5b39f29584b2',
|
||||
'info_dict': {
|
||||
'id': '7694bb04-ff0f-40fa-a60b-5b39f29584b2',
|
||||
'ext': 'm4a',
|
||||
'season': 'Season 1',
|
||||
'description': 'md5:58d1274e6857b6fbbecf47075885380d',
|
||||
'release_date': '20221115',
|
||||
'timestamp': 1668496642,
|
||||
'season_number': 1,
|
||||
'upload_date': '20221115',
|
||||
'release_timestamp': 1668496642,
|
||||
'title': 'Eps 1. Belajar dari Wishnutama: Kreatif Bukan Followers! (bersama Wishnutama)',
|
||||
'modified_date': '20221121',
|
||||
'categories': ['Bisnis dan Keuangan'],
|
||||
'duration': 3567,
|
||||
'modified_timestamp': 1669030647,
|
||||
'thumbnail': 'https://images.noiceid.cc/catalog/content-1668496302560',
|
||||
'channel_id': '9dab1024-5b92-4265-ae1c-63da87359832',
|
||||
'like_count': int,
|
||||
'channel': 'Noice Space Talks',
|
||||
'comment_count': int,
|
||||
'dislike_count': int,
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
}, {
|
||||
'url': 'https://open.noice.id/content/222134e4-99f2-456f-b8a2-b8be404bf063',
|
||||
'info_dict': {
|
||||
'id': '222134e4-99f2-456f-b8a2-b8be404bf063',
|
||||
'ext': 'm4a',
|
||||
'release_timestamp': 1653488220,
|
||||
'description': 'md5:35074f6190cef52b05dd133bb2ef460e',
|
||||
'upload_date': '20220525',
|
||||
'timestamp': 1653460637,
|
||||
'release_date': '20220525',
|
||||
'thumbnail': 'https://images.noiceid.cc/catalog/content-1653460337625',
|
||||
'title': 'Eps 1: Dijodohin Sama Anak Pak RT',
|
||||
'modified_timestamp': 1669030647,
|
||||
'season_number': 1,
|
||||
'modified_date': '20221121',
|
||||
'categories': ['Cerita dan Drama'],
|
||||
'duration': 1830,
|
||||
'season': 'Season 1',
|
||||
'channel_id': '60193f6b-d24d-4b23-913b-ceed5a731e74',
|
||||
'dislike_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'channel': 'Dear Jerome',
|
||||
'channel_follower_count': int,
|
||||
}
|
||||
}]
|
||||
|
||||
def _get_formats_and_subtitles(self, media_url, video_id):
|
||||
formats, subtitles = [], {}
|
||||
for url in variadic(media_url):
|
||||
ext = determine_ext(url)
|
||||
if ext == 'm3u8':
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(url, video_id)
|
||||
formats.extend(fmts)
|
||||
self._merge_subtitles(subs, target=subtitles)
|
||||
else:
|
||||
formats.append({
|
||||
'url': url,
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'acodec': 'mp3',
|
||||
})
|
||||
return formats, subtitles
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
nextjs_data = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['contentDetails']
|
||||
|
||||
media_url_list = traverse_obj(nextjs_data, (('rawContentUrl', 'url'), ))
|
||||
formats, subtitles = self._get_formats_and_subtitles(media_url_list, display_id)
|
||||
|
||||
return {
|
||||
'id': nextjs_data.get('id') or display_id,
|
||||
'title': nextjs_data.get('title') or self._html_search_meta('og:title', webpage),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'description': (nextjs_data.get('description') or clean_html(nextjs_data.get('htmlDescription'))
|
||||
or self._html_search_meta(['description', 'og:description'], webpage)),
|
||||
'thumbnail': nextjs_data.get('image') or self._html_search_meta('og:image', webpage),
|
||||
'timestamp': parse_iso8601(nextjs_data.get('createdAt')),
|
||||
'release_timestamp': parse_iso8601(nextjs_data.get('publishedAt')),
|
||||
'modified_timestamp': parse_iso8601(
|
||||
nextjs_data.get('updatedAt') or self._html_search_meta('og:updated_time', webpage)),
|
||||
'duration': int_or_none(nextjs_data.get('duration')),
|
||||
'categories': traverse_obj(nextjs_data, ('genres', ..., 'name')),
|
||||
'season': nextjs_data.get('seasonName'),
|
||||
'season_number': int_or_none(nextjs_data.get('seasonNumber')),
|
||||
'channel': traverse_obj(nextjs_data, ('catalog', 'title')),
|
||||
'channel_id': traverse_obj(nextjs_data, ('catalog', 'id'), 'catalogId'),
|
||||
**traverse_obj(nextjs_data, ('meta', 'aggregations', {
|
||||
'like_count': 'likes',
|
||||
'dislike_count': 'dislikes',
|
||||
'comment_count': 'comments',
|
||||
'channel_follower_count': 'followers',
|
||||
}))
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
from .common import InfoExtractor
|
||||
|
||||
|
||||
class OnePlacePodcastIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://www\.oneplace\.com/[\w]+/[^/]+/listen/[\w-]+-(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.oneplace.com/ministries/a-daily-walk/listen/living-in-the-last-days-part-2-958461.html',
|
||||
'info_dict': {
|
||||
'id': '958461',
|
||||
'ext': 'mp3',
|
||||
'title': 'Living in the Last Days Part 2 | A Daily Walk with John Randall',
|
||||
'description': 'md5:fbb8f1cf21447ac54ecaa2887fc20c6e',
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.oneplace.com/ministries/ankerberg-show/listen/ep-3-relying-on-the-constant-companionship-of-the-holy-spirit-part-2-922513.html',
|
||||
'info_dict': {
|
||||
'id': '922513',
|
||||
'ext': 'mp3',
|
||||
'description': 'md5:8b810b4349aa40a5d033b4536fe428e1',
|
||||
'title': 'md5:ce10f7d8d5ddcf485ed8905ef109659d',
|
||||
}
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'url': self._search_regex((
|
||||
r'mp3-url\s*=\s*"([^"]+)',
|
||||
r'<div[^>]+id\s*=\s*"player"[^>]+data-media-url\s*=\s*"(?P<media_url>[^"]+)',
|
||||
), webpage, 'media url'),
|
||||
'ext': 'mp3',
|
||||
'vcodec': 'none',
|
||||
'title': self._html_search_regex((
|
||||
r'<div[^>]class\s*=\s*"details"[^>]+>[^<]<h2[^>]+>(?P<content>[^>]+)>',
|
||||
self._meta_regex('og:title'), self._meta_regex('title'),
|
||||
), webpage, 'title', group='content', default=None),
|
||||
'description': self._html_search_regex(
|
||||
r'<div[^>]+class="[^"]+epDesc"[^>]*>\s*(?P<desc>.+?)\s*</div>',
|
||||
webpage, 'description', default=None),
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import traverse_obj, update_url_query
|
||||
|
||||
|
||||
class ScreencastifyIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://watch\.screencastify\.com/v/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8',
|
||||
'info_dict': {
|
||||
'id': 'sYVkZip3quLKhHw4Ybk8',
|
||||
'ext': 'mp4',
|
||||
'title': 'Inserting and Aligning the Case Top and Bottom',
|
||||
'description': '',
|
||||
'uploader': 'Paul Gunn',
|
||||
'extra_param_to_segment_url': str,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
info = self._download_json(
|
||||
f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id)
|
||||
|
||||
query_string = traverse_obj(info, ('manifest', 'auth', 'query'))
|
||||
query = urllib.parse.parse_qs(query_string)
|
||||
formats = []
|
||||
dash_manifest_url = traverse_obj(info, ('manifest', 'url'))
|
||||
if dash_manifest_url:
|
||||
formats.extend(
|
||||
self._extract_mpd_formats(
|
||||
dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False))
|
||||
hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl'))
|
||||
if hls_manifest_url:
|
||||
formats.extend(
|
||||
self._extract_m3u8_formats(
|
||||
hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False))
|
||||
for f in formats:
|
||||
f['url'] = update_url_query(f['url'], query)
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': info.get('title'),
|
||||
'description': info.get('description'),
|
||||
'uploader': info.get('userName'),
|
||||
'formats': formats,
|
||||
'extra_param_to_segment_url': query_string,
|
||||
}
|
@ -1,103 +1,567 @@
|
||||
import re
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
bool_or_none,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_qs,
|
||||
smuggle_url,
|
||||
try_get,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
|
||||
|
||||
class SlidesLiveIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://slideslive\.com/(?P<id>[0-9]+)'
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
# video_service_name = YOUTUBE
|
||||
# service_name = yoda, only XML slides info
|
||||
'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
|
||||
'md5': 'b29fcd6c6952d0c79c5079b0e7a07e6f',
|
||||
'info_dict': {
|
||||
'id': 'LMtgR8ba0b0',
|
||||
'id': '38902413',
|
||||
'ext': 'mp4',
|
||||
'title': 'GCC IA16 backend',
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38902413.',
|
||||
'uploader': 'SlidesLive Videos - A',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'timestamp': 1597615266,
|
||||
'upload_date': '20170925',
|
||||
}
|
||||
'timestamp': 1648189972,
|
||||
'upload_date': '20220325',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnails': 'count:42',
|
||||
'chapters': 'count:41',
|
||||
'duration': 1638,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# video_service_name = yoda
|
||||
# service_name = yoda, /v7/ slides
|
||||
'url': 'https://slideslive.com/38935785',
|
||||
'md5': '575cd7a6c0acc6e28422fe76dd4bcb1a',
|
||||
'info_dict': {
|
||||
'id': 'RMraDYN5ozA_',
|
||||
'id': '38935785',
|
||||
'ext': 'mp4',
|
||||
'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
|
||||
'upload_date': '20211115',
|
||||
'timestamp': 1636996003,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:640',
|
||||
'chapters': 'count:639',
|
||||
'duration': 9832,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# video_service_name = youtube
|
||||
# service_name = yoda, /v1/ slides
|
||||
'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
|
||||
'info_dict': {
|
||||
'id': '38973182',
|
||||
'ext': 'mp4',
|
||||
'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
|
||||
'upload_date': '20220201',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'timestamp': 1643728135,
|
||||
'thumbnails': 'count:3',
|
||||
'chapters': 'count:2',
|
||||
'duration': 5889,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# service_name = youtube, only XML slides info
|
||||
'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
|
||||
'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
|
||||
'info_dict': {
|
||||
'id': 'jmg02wCJD5M',
|
||||
'display_id': '38897546',
|
||||
'ext': 'mp4',
|
||||
'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38897546.',
|
||||
'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
|
||||
'channel': 'SlidesLive Videos - G1',
|
||||
'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
|
||||
'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
|
||||
'uploader': 'SlidesLive Videos - G1',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
|
||||
'live_status': 'not_live',
|
||||
'upload_date': '20160710',
|
||||
'timestamp': 1618786715,
|
||||
'duration': 6827,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'comment_count': int,
|
||||
'channel_follower_count': int,
|
||||
'age_limit': 0,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
|
||||
'thumbnails': 'count:169',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'unlisted',
|
||||
'tags': [],
|
||||
'categories': ['People & Blogs'],
|
||||
'chapters': 'count:168',
|
||||
},
|
||||
}, {
|
||||
# embed-only presentation, only XML slides info
|
||||
'url': 'https://slideslive.com/embed/presentation/38925850',
|
||||
'info_dict': {
|
||||
'id': '38925850',
|
||||
'ext': 'mp4',
|
||||
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnails': 'count:8',
|
||||
'timestamp': 1629671508,
|
||||
'upload_date': '20210822',
|
||||
'chapters': 'count:7',
|
||||
'duration': 326,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# embed-only presentation, only JSON slides info, /v5/ slides (.png)
|
||||
'url': 'https://slideslive.com/38979920/',
|
||||
'info_dict': {
|
||||
'id': '38979920',
|
||||
'ext': 'mp4',
|
||||
'title': 'MoReL: Multi-omics Relational Learning',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:7',
|
||||
'timestamp': 1654714970,
|
||||
'upload_date': '20220608',
|
||||
'chapters': 'count:6',
|
||||
'duration': 171,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v2/ slides (.jpg)
|
||||
'url': 'https://slideslive.com/38954074',
|
||||
'info_dict': {
|
||||
'id': '38954074',
|
||||
'ext': 'mp4',
|
||||
'title': 'Decentralized Attribution of Generative Models',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnails': 'count:16',
|
||||
'timestamp': 1622806321,
|
||||
'upload_date': '20210604',
|
||||
'chapters': 'count:15',
|
||||
'duration': 306,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v4/ slides (.png)
|
||||
'url': 'https://slideslive.com/38979570/',
|
||||
'info_dict': {
|
||||
'id': '38979570',
|
||||
'ext': 'mp4',
|
||||
'title': 'Efficient Active Search for Combinatorial Optimization Problems',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:9',
|
||||
'timestamp': 1654714896,
|
||||
'upload_date': '20220608',
|
||||
'chapters': 'count:8',
|
||||
'duration': 295,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v10/ slides
|
||||
'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
|
||||
'info_dict': {
|
||||
'id': '38979880',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Representation Power of Neural Networks',
|
||||
'timestamp': 1654714962,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:22',
|
||||
'upload_date': '20220608',
|
||||
'chapters': 'count:21',
|
||||
'duration': 294,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v7/ slides, 2 video slides
|
||||
'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
|
||||
'playlist_count': 3,
|
||||
'info_dict': {
|
||||
'id': '38979682-playlist',
|
||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '38979682',
|
||||
'ext': 'mp4',
|
||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
|
||||
'timestamp': 1654714920,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:30',
|
||||
'upload_date': '20220608',
|
||||
'chapters': 'count:31',
|
||||
'duration': 272,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '38979682-021',
|
||||
'ext': 'mp4',
|
||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
|
||||
'duration': 3,
|
||||
'timestamp': 1654714920,
|
||||
'upload_date': '20220608',
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '38979682-024',
|
||||
'ext': 'mp4',
|
||||
'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
|
||||
'duration': 4,
|
||||
'timestamp': 1654714920,
|
||||
'upload_date': '20220608',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v6/ slides, 1 video slide, edit.videoken.com embed
|
||||
'url': 'https://slideslive.com/38979481/',
|
||||
'playlist_count': 2,
|
||||
'info_dict': {
|
||||
'id': '38979481-playlist',
|
||||
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
|
||||
},
|
||||
'playlist': [{
|
||||
'info_dict': {
|
||||
'id': '38979481',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
|
||||
'timestamp': 1654714877,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:43',
|
||||
'upload_date': '20220608',
|
||||
'chapters': 'count:43',
|
||||
'duration': 315,
|
||||
},
|
||||
}, {
|
||||
'info_dict': {
|
||||
'id': '38979481-013',
|
||||
'ext': 'mp4',
|
||||
'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
|
||||
'duration': 3,
|
||||
'timestamp': 1654714877,
|
||||
'upload_date': '20220608',
|
||||
},
|
||||
}],
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v3/ slides, .jpg and .png, service_name = youtube
|
||||
'url': 'https://slideslive.com/embed/38932460/',
|
||||
'info_dict': {
|
||||
'id': 'RTPdrgkyTiE',
|
||||
'display_id': '38932460',
|
||||
'ext': 'mp4',
|
||||
'title': 'Active Learning for Hierarchical Multi-Label Classification',
|
||||
'description': 'Watch full version of this video at https://slideslive.com/38932460.',
|
||||
'channel': 'SlidesLive Videos - A',
|
||||
'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
|
||||
'uploader': 'SlidesLive Videos - A',
|
||||
'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
|
||||
'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
|
||||
'upload_date': '20200903',
|
||||
'timestamp': 1602599092,
|
||||
'duration': 942,
|
||||
'age_limit': 0,
|
||||
'live_status': 'not_live',
|
||||
'playable_in_embed': True,
|
||||
'availability': 'unlisted',
|
||||
'categories': ['People & Blogs'],
|
||||
'tags': [],
|
||||
'channel_follower_count': int,
|
||||
'like_count': int,
|
||||
'view_count': int,
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
|
||||
'thumbnails': 'count:21',
|
||||
'chapters': 'count:20',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# /v3/ slides, .png only, service_name = yoda
|
||||
'url': 'https://slideslive.com/38983994',
|
||||
'info_dict': {
|
||||
'id': '38983994',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zero-Shot AutoML with Pretrained Models',
|
||||
'timestamp': 1662384834,
|
||||
'upload_date': '20220905',
|
||||
'thumbnail': r're:^https?://.*\.(?:jpg|png)',
|
||||
'thumbnails': 'count:23',
|
||||
'chapters': 'count:22',
|
||||
'duration': 295,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}, {
|
||||
# service_name = yoda
|
||||
'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video_service_name = url
|
||||
# dead link, service_name = url
|
||||
'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# video_service_name = vimeo
|
||||
# dead link, service_name = vimeo
|
||||
'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_WEBPAGE_TESTS = [{
|
||||
# only XML slides info
|
||||
'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
|
||||
'info_dict': {
|
||||
'id': '38925850',
|
||||
'ext': 'mp4',
|
||||
'title': 'Towards a Deep Network Architecture for Structured Smoothness',
|
||||
'thumbnail': r're:^https?://.*\.jpg',
|
||||
'thumbnails': 'count:8',
|
||||
'timestamp': 1629671508,
|
||||
'upload_date': '20210822',
|
||||
'chapters': 'count:7',
|
||||
'duration': 326,
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def _extract_embed_urls(cls, url, webpage):
|
||||
# Reference: https://slideslive.com/embed_presentation.js
|
||||
for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage):
|
||||
url_parsed = urllib.parse.urlparse(url)
|
||||
origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
|
||||
yield update_url_query(
|
||||
f'https://slideslive.com/embed/presentation/{embed_id}', {
|
||||
'embed_parent_url': url,
|
||||
'embed_container_origin': origin,
|
||||
})
|
||||
|
||||
def _download_embed_webpage_handle(self, video_id, headers):
|
||||
return self._download_webpage_handle(
|
||||
f'https://slideslive.com/embed/presentation/{video_id}', video_id,
|
||||
headers=headers, query=traverse_obj(headers, {
|
||||
'embed_parent_url': 'Referer',
|
||||
'embed_container_origin': 'Origin',
|
||||
}))
|
||||
|
||||
def _extract_custom_m3u8_info(self, m3u8_data):
|
||||
m3u8_dict = {}
|
||||
|
||||
lookup = {
|
||||
'PRESENTATION-TITLE': 'title',
|
||||
'PRESENTATION-UPDATED-AT': 'timestamp',
|
||||
'PRESENTATION-THUMBNAIL': 'thumbnail',
|
||||
'PLAYLIST-TYPE': 'playlist_type',
|
||||
'VOD-VIDEO-SERVICE-NAME': 'service_name',
|
||||
'VOD-VIDEO-ID': 'service_id',
|
||||
'VOD-VIDEO-SERVERS': 'video_servers',
|
||||
'VOD-SUBTITLES': 'subtitles',
|
||||
'VOD-SLIDES-JSON-URL': 'slides_json_url',
|
||||
'VOD-SLIDES-XML-URL': 'slides_xml_url',
|
||||
}
|
||||
|
||||
for line in m3u8_data.splitlines():
|
||||
if not line.startswith('#EXT-SL-'):
|
||||
continue
|
||||
tag, _, value = line.partition(':')
|
||||
key = lookup.get(tag.lstrip('#EXT-SL-'))
|
||||
if not key:
|
||||
continue
|
||||
m3u8_dict[key] = value
|
||||
|
||||
# Some values are stringified JSON arrays
|
||||
for key in ('video_servers', 'subtitles'):
|
||||
if key in m3u8_dict:
|
||||
m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
|
||||
|
||||
return m3u8_dict
|
||||
|
||||
def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
|
||||
formats, duration = [], None
|
||||
|
||||
hls_formats = self._extract_m3u8_formats(
|
||||
f'https://{cdn_hostname}/{path}/master.m3u8',
|
||||
video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
|
||||
if hls_formats:
|
||||
if not skip_duration:
|
||||
duration = self._extract_m3u8_vod_duration(
|
||||
hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
|
||||
formats.extend(hls_formats)
|
||||
|
||||
dash_formats = self._extract_mpd_formats(
|
||||
f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
|
||||
if dash_formats:
|
||||
if not duration and not skip_duration:
|
||||
duration = self._extract_mpd_vod_duration(
|
||||
f'https://{cdn_hostname}/{path}/master.mpd', video_id,
|
||||
note='Extracting duration from DASH manifest')
|
||||
formats.extend(dash_formats)
|
||||
|
||||
return formats, duration
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://ben.slideslive.com/player/' + video_id, video_id)
|
||||
service_name = video_data['video_service_name'].lower()
|
||||
webpage, urlh = self._download_embed_webpage_handle(
|
||||
video_id, headers=traverse_obj(parse_qs(url), {
|
||||
'Referer': ('embed_parent_url', -1),
|
||||
'Origin': ('embed_container_origin', -1)}))
|
||||
redirect_url = urlh.geturl()
|
||||
if 'domain_not_allowed' in redirect_url:
|
||||
domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
|
||||
if not domain:
|
||||
raise ExtractorError(
|
||||
'This is an embed-only presentation. Try passing --referer', expected=True)
|
||||
webpage, _ = self._download_embed_webpage_handle(video_id, headers={
|
||||
'Referer': f'https://{domain}/',
|
||||
'Origin': f'https://{domain}',
|
||||
})
|
||||
|
||||
player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
|
||||
player_data = self._download_webpage(
|
||||
f'https://ben.slideslive.com/player/{video_id}', video_id,
|
||||
note='Downloading player info', query={'player_token': player_token})
|
||||
player_info = self._extract_custom_m3u8_info(player_data)
|
||||
|
||||
service_name = player_info['service_name'].lower()
|
||||
assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
|
||||
service_id = video_data['video_service_id']
|
||||
service_id = player_info['service_id']
|
||||
|
||||
slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
|
||||
slides, slides_info = {}, []
|
||||
|
||||
if player_info.get('slides_json_url'):
|
||||
slides = self._download_json(
|
||||
player_info['slides_json_url'], video_id, fatal=False,
|
||||
note='Downloading slides JSON', errnote=False) or {}
|
||||
slide_ext_default = '.png'
|
||||
slide_quality = traverse_obj(slides, ('slide_qualities', 0))
|
||||
if slide_quality:
|
||||
slide_ext_default = '.jpg'
|
||||
slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
|
||||
for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
|
||||
slides_info.append((
|
||||
slide_id, traverse_obj(slide, ('image', 'name')),
|
||||
traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
|
||||
int_or_none(slide.get('time'), scale=1000)))
|
||||
|
||||
if not slides and player_info.get('slides_xml_url'):
|
||||
slides = self._download_xml(
|
||||
player_info['slides_xml_url'], video_id, fatal=False,
|
||||
note='Downloading slides XML', errnote='Failed to download slides info')
|
||||
slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
|
||||
for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
|
||||
slides_info.append((
|
||||
slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
|
||||
int_or_none(xpath_text(slide, './timeSec', 'time'))))
|
||||
|
||||
chapters, thumbnails = [], []
|
||||
if url_or_none(player_info.get('thumbnail')):
|
||||
thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
|
||||
for slide_id, slide_path, slide_ext, start_time in slides_info:
|
||||
if slide_path:
|
||||
thumbnails.append({
|
||||
'id': f'{slide_id:03d}',
|
||||
'url': slide_url_template % (video_id, slide_path, slide_ext),
|
||||
})
|
||||
chapters.append({
|
||||
'title': f'Slide {slide_id:03d}',
|
||||
'start_time': start_time,
|
||||
})
|
||||
|
||||
subtitles = {}
|
||||
for sub in try_get(video_data, lambda x: x['subtitles'], list) or []:
|
||||
if not isinstance(sub, dict):
|
||||
continue
|
||||
for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
|
||||
webvtt_url = url_or_none(sub.get('webvtt_url'))
|
||||
if not webvtt_url:
|
||||
continue
|
||||
lang = sub.get('language') or 'en'
|
||||
subtitles.setdefault(lang, []).append({
|
||||
subtitles.setdefault(sub.get('language') or 'en', []).append({
|
||||
'url': webvtt_url,
|
||||
'ext': 'vtt',
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': video_id,
|
||||
'thumbnail': video_data.get('thumbnail'),
|
||||
'is_live': bool_or_none(video_data.get('is_live')),
|
||||
'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
|
||||
'timestamp': unified_timestamp(player_info.get('timestamp')),
|
||||
'is_live': player_info.get('playlist_type') != 'vod',
|
||||
'thumbnails': thumbnails,
|
||||
'chapters': chapters,
|
||||
'subtitles': subtitles,
|
||||
}
|
||||
if service_name in ('url', 'yoda'):
|
||||
info['title'] = video_data['title']
|
||||
if service_name == 'url':
|
||||
info['url'] = service_id
|
||||
else:
|
||||
formats = []
|
||||
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
|
||||
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'm3u8'),
|
||||
service_id, 'mp4', m3u8_id='hls', fatal=False))
|
||||
formats.extend(self._extract_mpd_formats(
|
||||
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
|
||||
mpd_id='dash', fatal=False))
|
||||
info.update({
|
||||
'id': service_id,
|
||||
'formats': formats,
|
||||
})
|
||||
|
||||
if service_name == 'url':
|
||||
info['url'] = service_id
|
||||
elif service_name == 'yoda':
|
||||
formats, duration = self._extract_formats_and_duration(
|
||||
player_info['video_servers'][0], service_id, video_id)
|
||||
info.update({
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
})
|
||||
else:
|
||||
info.update({
|
||||
'_type': 'url_transparent',
|
||||
'url': service_id,
|
||||
'ie_key': service_name.capitalize(),
|
||||
'title': video_data.get('title'),
|
||||
'display_id': video_id,
|
||||
})
|
||||
if service_name == 'vimeo':
|
||||
info['url'] = smuggle_url(
|
||||
'https://player.vimeo.com/video/' + service_id,
|
||||
f'https://player.vimeo.com/video/{service_id}',
|
||||
{'http_headers': {'Referer': url}})
|
||||
return info
|
||||
|
||||
video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
|
||||
if not video_slides:
|
||||
return info
|
||||
|
||||
def entries():
|
||||
yield info
|
||||
|
||||
service_data = self._download_json(
|
||||
f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
|
||||
video_id, fatal=False, query={
|
||||
'player_token': player_token,
|
||||
'videos': ','.join(video_slides),
|
||||
}, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
|
||||
|
||||
for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
|
||||
if not traverse_obj(slide, ('video', 'service')) == 'yoda':
|
||||
continue
|
||||
video_path = traverse_obj(slide, ('video', 'id'))
|
||||
cdn_hostname = traverse_obj(service_data, (
|
||||
video_path, 'video_servers', ...), get_all=False)
|
||||
if not cdn_hostname or not video_path:
|
||||
continue
|
||||
formats, _ = self._extract_formats_and_duration(
|
||||
cdn_hostname, video_path, video_id, skip_duration=True)
|
||||
if not formats:
|
||||
continue
|
||||
yield {
|
||||
'id': f'{video_id}-{slide_id:03d}',
|
||||
'title': f'{info["title"]} - Slide {slide_id:03d}',
|
||||
'timestamp': info['timestamp'],
|
||||
'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
|
||||
'formats': formats,
|
||||
}
|
||||
|
||||
return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue