Updated to release 2020.11.26

pull/7/head
pukkandan 4 years ago
parent 02ced43cbf
commit 38d7028407

@ -477,6 +477,7 @@
- **massengeschmack.tv** - **massengeschmack.tv**
- **MatchTV** - **MatchTV**
- **MDR**: MDR.DE and KiKA - **MDR**: MDR.DE and KiKA
- **MedalTV**
- **media.ccc.de** - **media.ccc.de**
- **media.ccc.de:lists** - **media.ccc.de:lists**
- **Medialaan** - **Medialaan**
@ -846,6 +847,10 @@
- **Sport5** - **Sport5**
- **SportBox** - **SportBox**
- **SportDeutschland** - **SportDeutschland**
- **Spreaker**
- **SpreakerPage**
- **SpreakerShow**
- **SpreakerShowPage**
- **SpringboardPlatform** - **SpringboardPlatform**
- **Sprout** - **Sprout**
- **sr:mediathek**: Saarländischer Rundfunk - **sr:mediathek**: Saarländischer Rundfunk
@ -1064,7 +1069,7 @@
- **vk:wallpost** - **vk:wallpost**
- **vlive** - **vlive**
- **vlive:channel** - **vlive:channel**
- **vlive:playlist** - **vlive:post**
- **Vodlocker** - **Vodlocker**
- **VODPl** - **VODPl**
- **VODPlatform** - **VODPlatform**

@ -97,12 +97,15 @@ class FragmentFD(FileDownloader):
def _download_fragment(self, ctx, frag_url, info_dict, headers=None): def _download_fragment(self, ctx, frag_url, info_dict, headers=None):
fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index'])
success = ctx['dl'].download(fragment_filename, { fragment_info_dict = {
'url': frag_url, 'url': frag_url,
'http_headers': headers or info_dict.get('http_headers'), 'http_headers': headers or info_dict.get('http_headers'),
}) }
success = ctx['dl'].download(fragment_filename, fragment_info_dict)
if not success: if not success:
return False, None return False, None
if fragment_info_dict.get('filetime'):
ctx['fragment_filetime'] = fragment_info_dict.get('filetime')
down, frag_sanitized = sanitize_open(fragment_filename, 'rb') down, frag_sanitized = sanitize_open(fragment_filename, 'rb')
ctx['fragment_filename_sanitized'] = frag_sanitized ctx['fragment_filename_sanitized'] = frag_sanitized
frag_content = down.read() frag_content = down.read()
@ -258,6 +261,13 @@ class FragmentFD(FileDownloader):
downloaded_bytes = ctx['complete_frags_downloaded_bytes'] downloaded_bytes = ctx['complete_frags_downloaded_bytes']
else: else:
self.try_rename(ctx['tmpfilename'], ctx['filename']) self.try_rename(ctx['tmpfilename'], ctx['filename'])
if self.params.get('updatetime', True):
filetime = ctx.get('fragment_filetime')
if filetime:
try:
os.utime(ctx['filename'], (time.time(), filetime))
except Exception:
pass
downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename'])) downloaded_bytes = os.path.getsize(encodeFilename(ctx['filename']))
self._hook_progress({ self._hook_progress({

@ -981,7 +981,7 @@ class BBCIE(BBCCoUkIE):
group_id = self._search_regex( group_id = self._search_regex(
r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX, r'<div[^>]+\bclass=["\']video["\'][^>]+\bdata-pid=["\'](%s)' % self._ID_REGEX,
webpage, 'group id', default=None) webpage, 'group id', default=None)
if playlist_id: if group_id:
return self.url_result( return self.url_result(
'https://www.bbc.co.uk/programmes/%s' % group_id, 'https://www.bbc.co.uk/programmes/%s' % group_id,
ie=BBCCoUkIE.ie_key()) ie=BBCCoUkIE.ie_key())
@ -1092,10 +1092,26 @@ class BBCIE(BBCCoUkIE):
self._search_regex( self._search_regex(
r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage, r'(?s)bbcthreeConfig\s*=\s*({.+?})\s*;\s*<', webpage,
'bbcthree config', default='{}'), 'bbcthree config', default='{}'),
playlist_id, transform_source=js_to_json, fatal=False) playlist_id, transform_source=js_to_json, fatal=False) or {}
if bbc3_config: payload = bbc3_config.get('payload') or {}
if payload:
clip = payload.get('currentClip') or {}
clip_vpid = clip.get('vpid')
clip_title = clip.get('title')
if clip_vpid and clip_title:
formats, subtitles = self._download_media_selector(clip_vpid)
self._sort_formats(formats)
return {
'id': clip_vpid,
'title': clip_title,
'thumbnail': dict_get(clip, ('poster', 'imageUrl')),
'description': clip.get('description'),
'duration': parse_duration(clip.get('duration')),
'formats': formats,
'subtitles': subtitles,
}
bbc3_playlist = try_get( bbc3_playlist = try_get(
bbc3_config, lambda x: x['payload']['content']['bbcMedia']['playlist'], payload, lambda x: x['content']['bbcMedia']['playlist'],
dict) dict)
if bbc3_playlist: if bbc3_playlist:
playlist_title = bbc3_playlist.get('title') or playlist_title playlist_title = bbc3_playlist.get('title') or playlist_title
@ -1118,6 +1134,39 @@ class BBCIE(BBCCoUkIE):
return self.playlist_result( return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description) entries, playlist_id, playlist_title, playlist_description)
initial_data = self._parse_json(self._search_regex(
r'window\.__INITIAL_DATA__\s*=\s*({.+?});', webpage,
'preload state', default='{}'), playlist_id, fatal=False)
if initial_data:
def parse_media(media):
if not media:
return
for item in (try_get(media, lambda x: x['media']['items'], list) or []):
item_id = item.get('id')
item_title = item.get('title')
if not (item_id and item_title):
continue
formats, subtitles = self._download_media_selector(item_id)
self._sort_formats(formats)
entries.append({
'id': item_id,
'title': item_title,
'thumbnail': item.get('holdingImageUrl'),
'formats': formats,
'subtitles': subtitles,
})
for resp in (initial_data.get('data') or {}).values():
name = resp.get('name')
if name == 'media-experience':
parse_media(try_get(resp, lambda x: x['data']['initialItem']['mediaItem'], dict))
elif name == 'article':
for block in (try_get(resp, lambda x: x['data']['blocks'], list) or []):
if block.get('type') != 'media':
continue
parse_media(block.get('model'))
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
def extract_all(pattern): def extract_all(pattern):
return list(filter(None, map( return list(filter(None, map(
lambda s: self._parse_json(s, playlist_id, fatal=False), lambda s: self._parse_json(s, playlist_id, fatal=False),

@ -5,10 +5,16 @@ import codecs
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import (
compat_chr,
compat_ord,
compat_urllib_parse_unquote,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
float_or_none, float_or_none,
int_or_none, int_or_none,
merge_dicts,
multipart_encode, multipart_encode,
parse_duration, parse_duration,
random_birthday, random_birthday,
@ -107,8 +113,9 @@ class CDAIE(InfoExtractor):
r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage, r'Odsłony:(?:\s|&nbsp;)*([0-9]+)', webpage,
'view_count', default=None) 'view_count', default=None)
average_rating = self._search_regex( average_rating = self._search_regex(
r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)', (r'<(?:span|meta)[^>]+itemprop=(["\'])ratingValue\1[^>]*>(?P<rating_value>[0-9.]+)',
webpage, 'rating', fatal=False, group='rating_value') r'<span[^>]+\bclass=["\']rating["\'][^>]*>(?P<rating_value>[0-9.]+)'), webpage, 'rating', fatal=False,
group='rating_value')
info_dict = { info_dict = {
'id': video_id, 'id': video_id,
@ -123,6 +130,24 @@ class CDAIE(InfoExtractor):
'age_limit': 18 if need_confirm_age else 0, 'age_limit': 18 if need_confirm_age else 0,
} }
# Source: https://www.cda.pl/js/player.js?t=1606154898
def decrypt_file(a):
for p in ('_XDDD', '_CDA', '_ADC', '_CXD', '_QWE', '_Q5', '_IKSDE'):
a = a.replace(p, '')
a = compat_urllib_parse_unquote(a)
b = []
for c in a:
f = compat_ord(c)
b.append(compat_chr(33 + (f + 14) % 94) if 33 <= f and 126 >= f else compat_chr(f))
a = ''.join(b)
a = a.replace('.cda.mp4', '')
for p in ('.2cda.pl', '.3cda.pl'):
a = a.replace(p, '.cda.pl')
if '/upstream' in a:
a = a.replace('/upstream', '.mp4/upstream')
return 'https://' + a
return 'https://' + a + '.mp4'
def extract_format(page, version): def extract_format(page, version):
json_str = self._html_search_regex( json_str = self._html_search_regex(
r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page, r'player_data=(\\?["\'])(?P<player_data>.+?)\1', page,
@ -141,6 +166,8 @@ class CDAIE(InfoExtractor):
video['file'] = codecs.decode(video['file'], 'rot_13') video['file'] = codecs.decode(video['file'], 'rot_13')
if video['file'].endswith('adc.mp4'): if video['file'].endswith('adc.mp4'):
video['file'] = video['file'].replace('adc.mp4', '.mp4') video['file'] = video['file'].replace('adc.mp4', '.mp4')
elif not video['file'].startswith('http'):
video['file'] = decrypt_file(video['file'])
f = { f = {
'url': video['file'], 'url': video['file'],
} }
@ -179,4 +206,6 @@ class CDAIE(InfoExtractor):
self._sort_formats(formats) self._sort_formats(formats)
return info_dict info = self._search_json_ld(webpage, video_id, default={})
return merge_dicts(info_dict, info)

@ -620,6 +620,7 @@ from .markiza import (
from .massengeschmacktv import MassengeschmackTVIE from .massengeschmacktv import MassengeschmackTVIE
from .matchtv import MatchTVIE from .matchtv import MatchTVIE
from .mdr import MDRIE from .mdr import MDRIE
from .medaltv import MedalTVIE
from .mediaset import MediasetIE from .mediaset import MediasetIE
from .mediasite import ( from .mediasite import (
MediasiteIE, MediasiteIE,
@ -1102,6 +1103,12 @@ from .stitcher import StitcherIE
from .sport5 import Sport5IE from .sport5 import Sport5IE
from .sportbox import SportBoxIE from .sportbox import SportBoxIE
from .sportdeutschland import SportDeutschlandIE from .sportdeutschland import SportDeutschlandIE
from .spreaker import (
SpreakerIE,
SpreakerPageIE,
SpreakerShowIE,
SpreakerShowPageIE,
)
from .springboardplatform import SpringboardPlatformIE from .springboardplatform import SpringboardPlatformIE
from .sprout import SproutIE from .sprout import SproutIE
from .srgssr import ( from .srgssr import (
@ -1395,8 +1402,8 @@ from .vk import (
) )
from .vlive import ( from .vlive import (
VLiveIE, VLiveIE,
VLivePostIE,
VLiveChannelIE, VLiveChannelIE,
VLivePlaylistIE
) )
from .vodlocker import VodlockerIE from .vodlocker import VodlockerIE
from .vodpl import VODPlIE from .vodpl import VODPlIE

@ -0,0 +1,131 @@
# coding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
int_or_none,
str_or_none,
try_get,
)
class MedalTVIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?medal\.tv/clips/(?P<id>[0-9]+)'
_TESTS = [{
'url': 'https://medal.tv/clips/34934644/3Is9zyGMoBMr',
'md5': '7b07b064331b1cf9e8e5c52a06ae68fa',
'info_dict': {
'id': '34934644',
'ext': 'mp4',
'title': 'Quad Cold',
'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'MowgliSB',
'timestamp': 1603165266,
'upload_date': '20201020',
'uploader_id': 10619174,
}
}, {
'url': 'https://medal.tv/clips/36787208',
'md5': 'b6dc76b78195fff0b4f8bf4a33ec2148',
'info_dict': {
'id': '36787208',
'ext': 'mp4',
'title': 'u tk me i tk u bigger',
'description': 'Medal,https://medal.tv/desktop/',
'uploader': 'Mimicc',
'timestamp': 1605580939,
'upload_date': '20201117',
'uploader_id': 5156321,
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
hydration_data = self._parse_json(self._search_regex(
r'<script[^>]*>\s*(?:var\s*)?hydrationData\s*=\s*({.+?})\s*</script>',
webpage, 'hydration data', default='{}'), video_id)
clip = try_get(
hydration_data, lambda x: x['clips'][video_id], dict) or {}
if not clip:
raise ExtractorError(
'Could not find video information.', video_id=video_id)
title = clip['contentTitle']
source_width = int_or_none(clip.get('sourceWidth'))
source_height = int_or_none(clip.get('sourceHeight'))
aspect_ratio = source_width / source_height if source_width and source_height else 16 / 9
def add_item(container, item_url, height, id_key='format_id', item_id=None):
item_id = item_id or '%dp' % height
if item_id not in item_url:
return
width = int(round(aspect_ratio * height))
container.append({
'url': item_url,
id_key: item_id,
'width': width,
'height': height
})
formats = []
thumbnails = []
for k, v in clip.items():
if not (v and isinstance(v, compat_str)):
continue
mobj = re.match(r'(contentUrl|thumbnail)(?:(\d+)p)?$', k)
if not mobj:
continue
prefix = mobj.group(1)
height = int_or_none(mobj.group(2))
if prefix == 'contentUrl':
add_item(
formats, v, height or source_height,
item_id=None if height else 'source')
elif prefix == 'thumbnail':
add_item(thumbnails, v, height, 'id')
error = clip.get('error')
if not formats and error:
if error == 404:
raise ExtractorError(
'That clip does not exist.',
expected=True, video_id=video_id)
else:
raise ExtractorError(
'An unknown error occurred ({0}).'.format(error),
video_id=video_id)
self._sort_formats(formats)
# Necessary because the id of the author is not known in advance.
# Won't raise an issue if no profile can be found as this is optional.
author = try_get(
hydration_data, lambda x: list(x['profiles'].values())[0], dict) or {}
author_id = str_or_none(author.get('id'))
author_url = 'https://medal.tv/users/{0}'.format(author_id) if author_id else None
return {
'id': video_id,
'title': title,
'formats': formats,
'thumbnails': thumbnails,
'description': clip.get('contentDescription'),
'uploader': author.get('displayName'),
'timestamp': float_or_none(clip.get('created'), 1000),
'uploader_id': author_id,
'uploader_url': author_url,
'duration': int_or_none(clip.get('videoLengthSeconds')),
'view_count': int_or_none(clip.get('views')),
'like_count': int_or_none(clip.get('likes')),
'comment_count': int_or_none(clip.get('comments')),
}

@ -9,6 +9,7 @@ from ..compat import (
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
) )
from ..utils import ( from ..utils import (
determine_ext,
ExtractorError, ExtractorError,
int_or_none, int_or_none,
js_to_json, js_to_json,
@ -16,185 +17,13 @@ from ..utils import (
parse_age_limit, parse_age_limit,
parse_duration, parse_duration,
try_get, try_get,
url_or_none,
) )
class NRKBaseIE(InfoExtractor): class NRKBaseIE(InfoExtractor):
_GEO_COUNTRIES = ['NO'] _GEO_COUNTRIES = ['NO']
_api_host = None
def _real_extract(self, url):
video_id = self._match_id(url)
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
for api_host in api_hosts:
data = self._download_json(
'http://%s/mediaelement/%s' % (api_host, video_id),
video_id, 'Downloading mediaelement JSON',
fatal=api_host == api_hosts[-1])
if not data:
continue
self._api_host = api_host
break
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
entries = []
conviva = data.get('convivaStatistics') or {}
live = (data.get('mediaElementType') == 'Live'
or data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
return ((video_id, title) if len(media_assets) == 1
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url')
if not asset_url:
continue
formats = self._extract_akamai_formats(asset_url, video_id)
if not formats:
continue
self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
for subtitle in ('webVtt', 'timedText'):
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
if subtitle_url:
subtitles.setdefault('no', []).append({
'url': compat_urllib_parse_unquote(subtitle_url)
})
entries.append({
'id': asset.get('carrierId') or entry_id,
'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
})
if not entries:
media_url = data.get('mediaUrl')
if media_url:
formats = self._extract_akamai_formats(media_url, video_id)
self._sort_formats(formats)
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if not entries:
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
season_number = None
episode_number = None
if data.get('mediaElementType') == 'Episode':
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
data.get('relativeOriginUrl', '')
EPISODENUM_RE = [
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
]
season_number = int_or_none(self._search_regex(
EPISODENUM_RE, _season_episode, 'season number',
default=None, group='season'))
episode_number = int_or_none(self._search_regex(
EPISODENUM_RE, _season_episode, 'episode number',
default=None, group='episode'))
thumbnails = None
images = data.get('images')
if images and isinstance(images, dict):
web_images = images.get('webImages')
if isinstance(web_images, list):
thumbnails = [{
'url': image['imageUrl'],
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
} for image in web_images if image.get('imageUrl')]
description = data.get('description')
category = data.get('mediaAnalytics', {}).get('category')
common_info = {
'description': description,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'categories': [category] if category else None,
'age_limit': parse_age_limit(data.get('legalAge')),
'thumbnails': thumbnails,
}
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
for entry in entries:
entry.update(common_info)
for f in entry['formats']:
f['vcodec'] = vcodec
points = data.get('shortIndexPoints')
if isinstance(points, list):
chapters = []
for next_num, point in enumerate(points, start=1):
if not isinstance(point, dict):
continue
start_time = parse_duration(point.get('startPoint'))
if start_time is None:
continue
end_time = parse_duration(
data.get('duration')
if next_num == len(points)
else points[next_num].get('startPoint'))
if end_time is None:
continue
chapters.append({
'start_time': start_time,
'end_time': end_time,
'title': point.get('title'),
})
if chapters and len(entries) == 1:
entries[0]['chapters'] = chapters
return self.playlist_result(entries, video_id, title, description)
class NRKIE(NRKBaseIE): class NRKIE(NRKBaseIE):
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
@ -202,13 +31,13 @@ class NRKIE(NRKBaseIE):
nrk:| nrk:|
https?:// https?://
(?: (?:
(?:www\.)?nrk\.no/video/PS\*| (?:www\.)?nrk\.no/video/(?:PS\*|[^_]+_)|
v8[-.]psapi\.nrk\.no/mediaelement/ v8[-.]psapi\.nrk\.no/mediaelement/
) )
) )
(?P<id>[^?#&]+) (?P<id>[^?\#&]+)
''' '''
_API_HOSTS = ('psapi.nrk.no', 'v8-psapi.nrk.no')
_TESTS = [{ _TESTS = [{
# video # video
'url': 'http://www.nrk.no/video/PS*150533', 'url': 'http://www.nrk.no/video/PS*150533',
@ -240,8 +69,76 @@ class NRKIE(NRKBaseIE):
}, { }, {
'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9', 'url': 'https://v8-psapi.nrk.no/mediaelement/ecc1b952-96dc-4a98-81b9-5296dc7a98d9',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://www.nrk.no/video/dompap-og-andre-fugler-i-piip-show_150533',
'only_matching': True,
}, {
'url': 'https://www.nrk.no/video/humor/kommentatorboksen-reiser-til-sjos_d1fda11f-a4ad-437a-a374-0398bc84e999',
'only_matching': True,
}] }]
def _extract_from_playback(self, video_id):
manifest = self._download_json(
'http://psapi.nrk.no/playback/manifest/%s' % video_id,
video_id, 'Downloading manifest JSON')
playable = manifest['playable']
formats = []
for asset in playable['assets']:
if not isinstance(asset, dict):
continue
if asset.get('encrypted'):
continue
format_url = url_or_none(asset.get('url'))
if not format_url:
continue
if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8':
formats.extend(self._extract_m3u8_formats(
format_url, video_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats)
data = self._download_json(
'http://psapi.nrk.no/playback/metadata/%s' % video_id,
video_id, 'Downloading metadata JSON')
preplay = data['preplay']
titles = preplay['titles']
title = titles['title']
alt_title = titles.get('subtitle')
description = preplay.get('description')
duration = parse_duration(playable.get('duration')) or parse_duration(data.get('duration'))
thumbnails = []
for image in try_get(
preplay, lambda x: x['poster']['images'], list) or []:
if not isinstance(image, dict):
continue
image_url = url_or_none(image.get('url'))
if not image_url:
continue
thumbnails.append({
'url': image_url,
'width': int_or_none(image.get('pixelWidth')),
'height': int_or_none(image.get('pixelHeight')),
})
return {
'id': video_id,
'title': title,
'alt_title': alt_title,
'description': description,
'duration': duration,
'thumbnails': thumbnails,
'formats': formats,
}
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_playback(video_id)
class NRKTVIE(NRKBaseIE): class NRKTVIE(NRKBaseIE):
IE_DESC = 'NRK TV and NRK Radio' IE_DESC = 'NRK TV and NRK Radio'
@ -380,6 +277,181 @@ class NRKTVIE(NRKBaseIE):
'only_matching': True, 'only_matching': True,
}] }]
_api_host = None
def _extract_from_mediaelement(self, video_id):
api_hosts = (self._api_host, ) if self._api_host else self._API_HOSTS
for api_host in api_hosts:
data = self._download_json(
'http://%s/mediaelement/%s' % (api_host, video_id),
video_id, 'Downloading mediaelement JSON',
fatal=api_host == api_hosts[-1])
if not data:
continue
self._api_host = api_host
break
title = data.get('fullTitle') or data.get('mainTitle') or data['title']
video_id = data.get('id') or video_id
entries = []
conviva = data.get('convivaStatistics') or {}
live = (data.get('mediaElementType') == 'Live'
or data.get('isLive') is True or conviva.get('isLive'))
def make_title(t):
return self._live_title(t) if live else t
media_assets = data.get('mediaAssets')
if media_assets and isinstance(media_assets, list):
def video_id_and_title(idx):
return ((video_id, title) if len(media_assets) == 1
else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx)))
for num, asset in enumerate(media_assets, 1):
asset_url = asset.get('url')
if not asset_url:
continue
formats = self._extract_akamai_formats(asset_url, video_id)
if not formats:
continue
self._sort_formats(formats)
# Some f4m streams may not work with hdcore in fragments' URLs
for f in formats:
extra_param = f.get('extra_param_to_segment_url')
if extra_param and 'hdcore' in extra_param:
del f['extra_param_to_segment_url']
entry_id, entry_title = video_id_and_title(num)
duration = parse_duration(asset.get('duration'))
subtitles = {}
for subtitle in ('webVtt', 'timedText'):
subtitle_url = asset.get('%sSubtitlesUrl' % subtitle)
if subtitle_url:
subtitles.setdefault('no', []).append({
'url': compat_urllib_parse_unquote(subtitle_url)
})
entries.append({
'id': asset.get('carrierId') or entry_id,
'title': make_title(entry_title),
'duration': duration,
'subtitles': subtitles,
'formats': formats,
})
if not entries:
media_url = data.get('mediaUrl')
if media_url:
formats = self._extract_akamai_formats(media_url, video_id)
self._sort_formats(formats)
duration = parse_duration(data.get('duration'))
entries = [{
'id': video_id,
'title': make_title(title),
'duration': duration,
'formats': formats,
}]
if not entries:
MESSAGES = {
'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet',
'ProgramRightsHasExpired': 'Programmet har gått ut',
'NoProgramRights': 'Ikke tilgjengelig',
'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge',
}
message_type = data.get('messageType', '')
# Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked*
if 'IsGeoBlocked' in message_type:
self.raise_geo_restricted(
msg=MESSAGES.get('ProgramIsGeoBlocked'),
countries=self._GEO_COUNTRIES)
raise ExtractorError(
'%s said: %s' % (self.IE_NAME, MESSAGES.get(
message_type, message_type)),
expected=True)
series = conviva.get('seriesName') or data.get('seriesTitle')
episode = conviva.get('episodeName') or data.get('episodeNumberOrDate')
season_number = None
episode_number = None
if data.get('mediaElementType') == 'Episode':
_season_episode = data.get('scoresStatistics', {}).get('springStreamStream') or \
data.get('relativeOriginUrl', '')
EPISODENUM_RE = [
r'/s(?P<season>\d{,2})e(?P<episode>\d{,2})\.',
r'/sesong-(?P<season>\d{,2})/episode-(?P<episode>\d{,2})',
]
season_number = int_or_none(self._search_regex(
EPISODENUM_RE, _season_episode, 'season number',
default=None, group='season'))
episode_number = int_or_none(self._search_regex(
EPISODENUM_RE, _season_episode, 'episode number',
default=None, group='episode'))
thumbnails = None
images = data.get('images')
if images and isinstance(images, dict):
web_images = images.get('webImages')
if isinstance(web_images, list):
thumbnails = [{
'url': image['imageUrl'],
'width': int_or_none(image.get('width')),
'height': int_or_none(image.get('height')),
} for image in web_images if image.get('imageUrl')]
description = data.get('description')
category = data.get('mediaAnalytics', {}).get('category')
common_info = {
'description': description,
'series': series,
'episode': episode,
'season_number': season_number,
'episode_number': episode_number,
'categories': [category] if category else None,
'age_limit': parse_age_limit(data.get('legalAge')),
'thumbnails': thumbnails,
}
vcodec = 'none' if data.get('mediaType') == 'Audio' else None
for entry in entries:
entry.update(common_info)
for f in entry['formats']:
f['vcodec'] = vcodec
points = data.get('shortIndexPoints')
if isinstance(points, list):
chapters = []
for next_num, point in enumerate(points, start=1):
if not isinstance(point, dict):
continue
start_time = parse_duration(point.get('startPoint'))
if start_time is None:
continue
end_time = parse_duration(
data.get('duration')
if next_num == len(points)
else points[next_num].get('startPoint'))
if end_time is None:
continue
chapters.append({
'start_time': start_time,
'end_time': end_time,
'title': point.get('title'),
})
if chapters and len(entries) == 1:
entries[0]['chapters'] = chapters
return self.playlist_result(entries, video_id, title, description)
def _real_extract(self, url):
video_id = self._match_id(url)
return self._extract_from_mediaelement(video_id)
class NRKTVEpisodeIE(InfoExtractor): class NRKTVEpisodeIE(InfoExtractor):
_VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)' _VALID_URL = r'https?://tv\.nrk\.no/serie/(?P<id>[^/]+/sesong/\d+/episode/\d+)'

@ -0,0 +1,176 @@
# coding: utf-8
from __future__ import unicode_literals
import itertools
from .common import InfoExtractor
from ..compat import compat_str
from ..utils import (
float_or_none,
int_or_none,
str_or_none,
try_get,
unified_timestamp,
url_or_none,
)
def _extract_episode(data, episode_id=None):
title = data['title']
download_url = data['download_url']
series = try_get(data, lambda x: x['show']['title'], compat_str)
uploader = try_get(data, lambda x: x['author']['fullname'], compat_str)
thumbnails = []
for image in ('image_original', 'image_medium', 'image'):
image_url = url_or_none(data.get('%s_url' % image))
if image_url:
thumbnails.append({'url': image_url})
def stats(key):
return int_or_none(try_get(
data,
(lambda x: x['%ss_count' % key],
lambda x: x['stats']['%ss' % key])))
def duration(key):
return float_or_none(data.get(key), scale=1000)
return {
'id': compat_str(episode_id or data['episode_id']),
'url': download_url,
'display_id': data.get('permalink'),
'title': title,
'description': data.get('description'),
'timestamp': unified_timestamp(data.get('published_at')),
'uploader': uploader,
'uploader_id': str_or_none(data.get('author_id')),
'creator': uploader,
'duration': duration('duration') or duration('length'),
'view_count': stats('play'),
'like_count': stats('like'),
'comment_count': stats('message'),
'format': 'MPEG Layer 3',
'format_id': 'mp3',
'container': 'mp3',
'ext': 'mp3',
'thumbnails': thumbnails,
'series': series,
'extractor_key': SpreakerIE.ie_key(),
}
class SpreakerIE(InfoExtractor):
_VALID_URL = r'''(?x)
https?://
api\.spreaker\.com/
(?:
(?:download/)?episode|
v2/episodes
)/
(?P<id>\d+)
'''
_TESTS = [{
'url': 'https://api.spreaker.com/episode/12534508',
'info_dict': {
'id': '12534508',
'display_id': 'swm-ep15-how-to-market-your-music-part-2',
'ext': 'mp3',
'title': 'EP:15 | Music Marketing (Likes) - Part 2',
'description': 'md5:0588c43e27be46423e183076fa071177',
'timestamp': 1502250336,
'upload_date': '20170809',
'uploader': 'SWM',
'uploader_id': '9780658',
'duration': 1063.42,
'view_count': int,
'like_count': int,
'comment_count': int,
'series': 'Success With Music (SWM)',
},
}, {
'url': 'https://api.spreaker.com/download/episode/12534508/swm_ep15_how_to_market_your_music_part_2.mp3',
'only_matching': True,
}, {
'url': 'https://api.spreaker.com/v2/episodes/12534508?export=episode_segments',
'only_matching': True,
}]
def _real_extract(self, url):
episode_id = self._match_id(url)
data = self._download_json(
'https://api.spreaker.com/v2/episodes/%s' % episode_id,
episode_id)['response']['episode']
return _extract_episode(data, episode_id)
class SpreakerPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/user/[^/]+/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/user/9780658/swm-ep15-how-to-market-your-music-part-2',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
episode_id = self._search_regex(
(r'data-episode_id=["\'](?P<id>\d+)',
r'episode_id\s*:\s*(?P<id>\d+)'), webpage, 'episode id')
return self.url_result(
'https://api.spreaker.com/episode/%s' % episode_id,
ie=SpreakerIE.ie_key(), video_id=episode_id)
class SpreakerShowIE(InfoExtractor):
_VALID_URL = r'https?://api\.spreaker\.com/show/(?P<id>\d+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/3-ninjas-podcast',
'info_dict': {
'id': '4652058',
},
'playlist_mincount': 118,
}]
def _entries(self, show_id):
for page_num in itertools.count(1):
episodes = self._download_json(
'https://api.spreaker.com/show/%s/episodes' % show_id,
show_id, note='Downloading JSON page %d' % page_num, query={
'page': page_num,
'max_per_page': 100,
})
pager = try_get(episodes, lambda x: x['response']['pager'], dict)
if not pager:
break
results = pager.get('results')
if not results or not isinstance(results, list):
break
for result in results:
if not isinstance(result, dict):
continue
yield _extract_episode(result)
if page_num == pager.get('last_page'):
break
def _real_extract(self, url):
show_id = self._match_id(url)
return self.playlist_result(self._entries(show_id), playlist_id=show_id)
class SpreakerShowPageIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?spreaker\.com/show/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://www.spreaker.com/show/success-with-music',
'only_matching': True,
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
show_id = self._search_regex(
r'show_id\s*:\s*(?P<id>\d+)', webpage, 'show id')
return self.url_result(
'https://api.spreaker.com/show/%s' % show_id,
ie=SpreakerShowIE.ie_key(), video_id=show_id)

@ -21,6 +21,7 @@ from ..utils import (
parse_age_limit, parse_age_limit,
parse_iso8601, parse_iso8601,
sanitized_Request, sanitized_Request,
std_headers,
) )
@ -227,8 +228,10 @@ class VikiIE(VikiBaseIE):
resp = self._download_json( resp = self._download_json(
'https://www.viki.com/api/videos/' + video_id, 'https://www.viki.com/api/videos/' + video_id,
video_id, 'Downloading video JSON', video_id, 'Downloading video JSON', headers={
headers={'x-viki-app-ver': '4.0.57'}) 'x-client-user-agent': std_headers['User-Agent'],
'x-viki-app-ver': '4.0.57',
})
video = resp['video'] video = resp['video']
self._check_errors(video) self._check_errors(video)

@ -1,55 +1,50 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
import re
import time
import itertools import itertools
import json
from .common import InfoExtractor
from .naver import NaverBaseIE from .naver import NaverBaseIE
from ..compat import compat_str from ..compat import (
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none,
merge_dicts, merge_dicts,
str_or_none,
strip_or_none,
try_get, try_get,
urlencode_postdata, urlencode_postdata,
) )
class VLiveIE(NaverBaseIE): class VLiveBaseIE(NaverBaseIE):
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
class VLiveIE(VLiveBaseIE):
IE_NAME = 'vlive' IE_NAME = 'vlive'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|post)/(?P<id>(?:\d-)?[0-9]+)' _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/(?:video|embed)/(?P<id>[0-9]+)'
_NETRC_MACHINE = 'vlive' _NETRC_MACHINE = 'vlive'
_TESTS = [{ _TESTS = [{
'url': 'https://www.vlive.tv/video/1326', 'url': 'http://www.vlive.tv/video/1326',
'md5': 'cc7314812855ce56de70a06a27314983', 'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': { 'info_dict': {
'id': '1326', 'id': '1326',
'ext': 'mp4', 'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast", 'title': "Girl's Day's Broadcast",
'creator': "Girl's Day", 'creator': "Girl's Day",
'view_count': int, 'view_count': int,
'uploader_id': 'muploader_a', 'uploader_id': 'muploader_a',
}, },
}, }, {
{ 'url': 'http://www.vlive.tv/video/16937',
'url': 'https://vlive.tv/post/1-18244258',
'md5': 'cc7314812855ce56de70a06a27314983',
'info_dict': {
'id': '1326',
'ext': 'mp4',
'title': "[V LIVE] Girl's Day's Broadcast",
'creator': "Girl's Day",
'view_count': int,
'uploader_id': 'muploader_a',
},
},
{
'url': 'https://www.vlive.tv/video/16937',
'info_dict': { 'info_dict': {
'id': '16937', 'id': '16937',
'ext': 'mp4', 'ext': 'mp4',
'title': '[V LIVE] 첸백시 걍방', 'title': '첸백시 걍방',
'creator': 'EXO', 'creator': 'EXO',
'view_count': int, 'view_count': int,
'subtitles': 'mincount:12', 'subtitles': 'mincount:12',
@ -70,12 +65,15 @@ class VLiveIE(NaverBaseIE):
'subtitles': 'mincount:10', 'subtitles': 'mincount:10',
}, },
'skip': 'This video is only available for CH+ subscribers', 'skip': 'This video is only available for CH+ subscribers',
}, {
'url': 'https://www.vlive.tv/embed/1326',
'only_matching': True,
}, {
# works only with gcc=KR
'url': 'https://www.vlive.tv/video/225019',
'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
def _real_initialize(self): def _real_initialize(self):
self._login() self._login()
@ -107,118 +105,159 @@ class VLiveIE(NaverBaseIE):
if not is_logged_in(): if not is_logged_in():
raise ExtractorError('Unable to log in', expected=True) raise ExtractorError('Unable to log in', expected=True)
def _real_extract(self, url): def _call_api(self, path_template, video_id, fields=None):
# url may match on a post or a video url with a post_id potentially matching a video_id query = {'appId': self._APP_ID, 'gcc': 'KR'}
working_id = self._match_id(url) if fields:
webpage = self._download_webpage(url, working_id) query['fields'] = fields
try:
PARAMS_RE = r'window\.__PRELOADED_STATE__\s*=\s*({.*});?\s*</script>' return self._download_json(
PARAMS_FIELD = 'params' 'https://www.vlive.tv/globalv-web/vam-web/' + path_template % video_id, video_id,
'Downloading %s JSON metadata' % path_template.split('/')[-1].split('-')[0],
params = self._search_regex( headers={'Referer': 'https://www.vlive.tv/'}, query=query)
PARAMS_RE, webpage, PARAMS_FIELD, default='', flags=re.DOTALL) except ExtractorError as e:
params = self._parse_json(params, working_id, fatal=False) if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
self.raise_login_required(json.loads(e.cause.read().decode())['message'])
video_params = try_get(params, lambda x: x["postDetail"]["post"]["officialVideo"], dict) raise
if video_params is None:
error = try_get(params, lambda x: x["postDetail"]["error"], dict)
error_data = try_get(error, lambda x: x["data"], dict)
error_video = try_get(error_data, lambda x: x["officialVideo"], dict)
error_msg = try_get(error, lambda x: x["message"], compat_str)
product_type = try_get(error_data,
[lambda x: x["officialVideo"]["productType"],
lambda x: x["board"]["boardType"]],
compat_str)
if error_video is not None:
if product_type in ('VLIVE_PLUS', 'VLIVE+'):
self.raise_login_required('This video is only available with V LIVE+.')
elif error_msg is not None:
raise ExtractorError('V LIVE reported the following error: %s' % error_msg)
else:
raise ExtractorError('Failed to extract video parameters.')
elif 'post' in url:
raise ExtractorError('Url does not appear to be a video post.', expected=True)
else:
raise ExtractorError('Failed to extract video parameters.')
video_id = working_id if 'video' in url else str(video_params["videoSeq"])
video_type = video_params["type"] def _real_extract(self, url):
if video_type in ('VOD'): video_id = self._match_id(url)
encoding_status = video_params["encodingStatus"]
if encoding_status == 'COMPLETE': post = self._call_api(
return self._replay(video_id, webpage, params, video_params) 'post/v1.0/officialVideoPost-%s', video_id,
else: 'author{nickname},channel{channelCode,channelName},officialVideo{commentCount,exposeStatus,likeCount,playCount,playTime,status,title,type,vodId}')
raise ExtractorError('VOD encoding not yet complete. Please try again later.',
expected=True) video = post['officialVideo']
elif video_type in ('LIVE'):
video_status = video_params["status"] def get_common_fields():
if video_status in ('RESERVED'): channel = post.get('channel') or {}
return {
'title': video.get('title'),
'creator': post.get('author', {}).get('nickname'),
'channel': channel.get('channelName'),
'channel_id': channel.get('channelCode'),
'duration': int_or_none(video.get('playTime')),
'view_count': int_or_none(video.get('playCount')),
'like_count': int_or_none(video.get('likeCount')),
'comment_count': int_or_none(video.get('commentCount')),
}
video_type = video.get('type')
if video_type == 'VOD':
inkey = self._call_api('video/v1.0/vod/%s/inkey', video_id)['inkey']
vod_id = video['vodId']
return merge_dicts(
get_common_fields(),
self._extract_video_info(video_id, vod_id, inkey))
elif video_type == 'LIVE':
status = video.get('status')
if status == 'ON_AIR':
stream_url = self._call_api(
'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl']
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
info = get_common_fields()
info.update({
'title': self._live_title(video['title']),
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
elif status == 'ENDED':
raise ExtractorError(
'Uploading for replay. Please wait...', expected=True)
elif status == 'RESERVED':
raise ExtractorError('Coming soon!', expected=True) raise ExtractorError('Coming soon!', expected=True)
elif video_status in ('ENDED', 'END'): elif video.get('exposeStatus') == 'CANCEL':
raise ExtractorError('Uploading for replay. Please wait...', expected=True) raise ExtractorError(
'We are sorry, but the live broadcast has been canceled.',
expected=True)
else: else:
return self._live(video_id, webpage, params) raise ExtractorError('Unknown status ' + status)
else:
raise ExtractorError('Unknown video type %s' % video_type)
def _get_common_fields(self, webpage, params):
title = self._og_search_title(webpage)
description = self._html_search_meta(
['og:description', 'description', 'twitter:description'],
webpage, 'description', default=None)
creator = (try_get(params, lambda x: x["channel"]["channel"]["channelName"], compat_str)
or self._search_regex(r'on (.*) channel', description or '', 'creator', fatal=False))
thumbnail = self._og_search_thumbnail(webpage)
return {
'title': title,
'creator': creator,
'thumbnail': thumbnail,
}
def _live(self, video_id, webpage, params):
LIVE_INFO_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/old/v3/live/%s/playInfo' % video_id
play_info = self._download_json(LIVE_INFO_ENDPOINT, video_id,
headers={"referer": "https://www.vlive.tv"})
streams = try_get(play_info, lambda x: x["result"]["streamList"], list) or []
formats = [] class VLivePostIE(VLiveIE):
for stream in streams: IE_NAME = 'vlive:post'
formats.extend(self._extract_m3u8_formats( _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/post/(?P<id>\d-\d+)'
stream['serviceUrl'], video_id, 'mp4', _TESTS = [{
fatal=False, live=True)) # uploadType = SOS
self._sort_formats(formats) 'url': 'https://www.vlive.tv/post/1-20088044',
'info_dict': {
'id': '1-20088044',
'title': 'Hola estrellitas la tierra les dice hola (si era así no?) Ha...',
'description': 'md5:fab8a1e50e6e51608907f46c7fa4b407',
},
'playlist_count': 3,
}, {
# uploadType = V
'url': 'https://www.vlive.tv/post/1-20087926',
'info_dict': {
'id': '1-20087926',
'title': 'James Corden: And so, the baby becamos the Papa💜😭💪😭',
},
'playlist_count': 1,
}]
_FVIDEO_TMPL = 'fvideo/v1.0/fvideo-%%s/%s'
_SOS_TMPL = _FVIDEO_TMPL % 'sosPlayInfo'
_INKEY_TMPL = _FVIDEO_TMPL % 'inKey'
info = self._get_common_fields(webpage, params) def _real_extract(self, url):
info.update({ post_id = self._match_id(url)
'title': self._live_title(info['title']),
'id': video_id,
'formats': formats,
'is_live': True,
})
return info
def _replay(self, video_id, webpage, params, video_params): post = self._call_api(
long_video_id = video_params["vodId"] 'post/v1.0/post-%s', post_id,
'attachments{video},officialVideo{videoSeq},plainBody,title')
VOD_KEY_ENDPOINT = 'https://www.vlive.tv/globalv-web/vam-web/video/v1.0/vod/%s/inkey' % video_id video_seq = str_or_none(try_get(
key_json = self._download_json(VOD_KEY_ENDPOINT, video_id, post, lambda x: x['officialVideo']['videoSeq']))
headers={"referer": "https://www.vlive.tv"}) if video_seq:
key = key_json["inkey"] return self.url_result(
'http://www.vlive.tv/video/' + video_seq,
VLiveIE.ie_key(), video_seq)
return merge_dicts( title = post['title']
self._get_common_fields(webpage, params), entries = []
self._extract_video_info(video_id, long_video_id, key)) for idx, video in enumerate(post['attachments']['video'].values()):
video_id = video.get('videoId')
if not video_id:
continue
upload_type = video.get('uploadType')
upload_info = video.get('uploadInfo') or {}
entry = None
if upload_type == 'SOS':
download = self._call_api(
self._SOS_TMPL, video_id)['videoUrl']['download']
formats = []
for f_id, f_url in download.items():
formats.append({
'format_id': f_id,
'url': f_url,
'height': int_or_none(f_id[:-1]),
})
self._sort_formats(formats)
entry = {
'formats': formats,
'id': video_id,
'thumbnail': upload_info.get('imageUrl'),
}
elif upload_type == 'V':
vod_id = upload_info.get('videoId')
if not vod_id:
continue
inkey = self._call_api(self._INKEY_TMPL, video_id)['inKey']
entry = self._extract_video_info(video_id, vod_id, inkey)
if entry:
entry['title'] = '%s_part%s' % (title, idx)
entries.append(entry)
return self.playlist_result(
entries, post_id, title, strip_or_none(post.get('plainBody')))
class VLiveChannelIE(InfoExtractor): class VLiveChannelIE(VLiveBaseIE):
IE_NAME = 'vlive:channel' IE_NAME = 'vlive:channel'
_VALID_URL = r'https?://(?:(?:www|m)\.)?(?:channels\.vlive\.tv/|vlive\.tv/channels?/)(?P<id>[0-9A-Z]+)' _VALID_URL = r'https?://(?:channels\.vlive\.tv|(?:(?:www|m)\.)?vlive\.tv/channel)/(?P<id>[0-9A-Z]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://channels.vlive.tv/FCD4B', 'url': 'http://channels.vlive.tv/FCD4B',
'info_dict': { 'info_dict': {
'id': 'FCD4B', 'id': 'FCD4B',
'title': 'MAMAMOO', 'title': 'MAMAMOO',
@ -226,63 +265,39 @@ class VLiveChannelIE(InfoExtractor):
'playlist_mincount': 110 'playlist_mincount': 110
}, { }, {
'url': 'https://www.vlive.tv/channel/FCD4B', 'url': 'https://www.vlive.tv/channel/FCD4B',
'info_dict': { 'only_matching': True,
'id': 'FCD4B',
'title': 'MAMAMOO',
},
'playlist_mincount': 110
}] }]
_APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
def _call_api(self, path, channel_key_suffix, channel_value, note, query):
q = {
'app_id': self._APP_ID,
'channel' + channel_key_suffix: channel_value,
}
q.update(query)
return self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/' + path,
channel_value, note='Downloading ' + note, query=q)['result']
def _real_extract(self, url): def _real_extract(self, url):
channel_code = self._match_id(url) channel_code = self._match_id(url)
webpage = self._download_webpage( channel_seq = self._call_api(
'http://channels.vlive.tv/%s/video' % channel_code, channel_code) 'decodeChannelCode', 'Code', channel_code,
'decode channel code', {})['channelSeq']
app_id = None
app_js_url = self._search_regex(
r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
webpage, 'app js', default=None, group='url')
if app_js_url:
app_js = self._download_webpage(
app_js_url, channel_code, 'Downloading app JS', fatal=False)
if app_js:
app_id = self._search_regex(
r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
app_js, 'app id', default=None)
app_id = app_id or self._APP_ID
channel_info = self._download_json(
'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
channel_code, note='Downloading decode channel code',
query={
'app_id': app_id,
'channelCode': channel_code,
'_': int(time.time())
})
channel_seq = channel_info['result']['channelSeq']
channel_name = None channel_name = None
entries = [] entries = []
for page_num in itertools.count(1): for page_num in itertools.count(1):
video_list = self._download_json( video_list = self._call_api(
'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList', 'getChannelVideoList', 'Seq', channel_seq,
channel_code, note='Downloading channel list page #%d' % page_num, 'channel list page #%d' % page_num, {
query={
'app_id': app_id,
'channelSeq': channel_seq,
# Large values of maxNumOfRows (~300 or above) may cause # Large values of maxNumOfRows (~300 or above) may cause
# empty responses (see [1]), e.g. this happens for [2] that # empty responses (see [1]), e.g. this happens for [2] that
# has more than 300 videos. # has more than 300 videos.
# 1. https://github.com/ytdl-org/youtube-dl/issues/13830 # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
# 2. http://channels.vlive.tv/EDBF. # 2. http://channels.vlive.tv/EDBF.
'maxNumOfRows': 100, 'maxNumOfRows': 100,
'_': int(time.time()),
'pageNo': page_num 'pageNo': page_num
} }
) )
@ -290,11 +305,11 @@ class VLiveChannelIE(InfoExtractor):
if not channel_name: if not channel_name:
channel_name = try_get( channel_name = try_get(
video_list, video_list,
lambda x: x['result']['channelInfo']['channelName'], lambda x: x['channelInfo']['channelName'],
compat_str) compat_str)
videos = try_get( videos = try_get(
video_list, lambda x: x['result']['videoList'], list) video_list, lambda x: x['videoList'], list)
if not videos: if not videos:
break break
@ -310,79 +325,3 @@ class VLiveChannelIE(InfoExtractor):
return self.playlist_result( return self.playlist_result(
entries, channel_code, channel_name) entries, channel_code, channel_name)
class VLivePlaylistIE(InfoExtractor):
IE_NAME = 'vlive:playlist'
_VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
_VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
_TESTS = [{
# regular working playlist
'url': 'https://www.vlive.tv/video/117956/playlist/117963',
'info_dict': {
'id': '117963',
'title': '아이돌룸(IDOL ROOM) 41회 - (여자)아이들'
},
'playlist_mincount': 10
}, {
# playlist with no playlistVideoSeqs
'url': 'http://www.vlive.tv/video/22867/playlist/22912',
'info_dict': {
'id': '22867',
'ext': 'mp4',
'title': '[V LIVE] Valentine Day Message from MINA',
'creator': 'TWICE',
'view_count': int
},
'params': {
'skip_download': True,
}
}]
def _build_video_result(self, video_id, message):
self.to_screen(message)
return self.url_result(
self._VIDEO_URL_TEMPLATE % video_id,
ie=VLiveIE.ie_key(), video_id=video_id)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id, playlist_id = mobj.group('video_id', 'id')
if self._downloader.params.get('noplaylist'):
return self._build_video_result(
video_id,
'Downloading just video %s because of --no-playlist'
% video_id)
self.to_screen(
'Downloading playlist %s - add --no-playlist to just download video'
% playlist_id)
webpage = self._download_webpage(
'http://www.vlive.tv/video/%s/playlist/%s'
% (video_id, playlist_id), playlist_id)
raw_item_ids = self._search_regex(
r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
'playlist video seqs', default=None, fatal=False)
if not raw_item_ids:
return self._build_video_result(
video_id,
'Downloading just video %s because no playlist was found'
% video_id)
item_ids = self._parse_json(raw_item_ids, playlist_id)
entries = [
self.url_result(
self._VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
video_id=compat_str(item_id))
for item_id in item_ids]
playlist_name = self._html_search_regex(
r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
webpage, 'playlist title', fatal=False)
return self.playlist_result(entries, playlist_id, playlist_name)

@ -1335,44 +1335,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
return self._parse_json( return self._parse_json(
uppercase_escape(config), video_id, fatal=False) uppercase_escape(config), video_id, fatal=False)
def _get_music_metadata_from_yt_initial(self, yt_initial):
music_metadata = []
key_map = {
'Album': 'album',
'Artist': 'artist',
'Song': 'track'
}
contents = try_get(yt_initial, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'])
if type(contents) is list:
for content in contents:
music_track = {}
if type(content) is not dict:
continue
videoSecondaryInfoRenderer = try_get(content, lambda x: x['videoSecondaryInfoRenderer'])
if type(videoSecondaryInfoRenderer) is not dict:
continue
rows = try_get(videoSecondaryInfoRenderer, lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'])
if type(rows) is not list:
continue
for row in rows:
metadataRowRenderer = try_get(row, lambda x: x['metadataRowRenderer'])
if type(metadataRowRenderer) is not dict:
continue
key = try_get(metadataRowRenderer, lambda x: x['title']['simpleText'])
value = try_get(metadataRowRenderer, lambda x: x['contents'][0]['simpleText']) or \
try_get(metadataRowRenderer, lambda x: x['contents'][0]['runs'][0]['text'])
if type(key) is not str or type(value) is not str:
continue
if key in key_map:
if key_map[key] in music_track:
# we've started on a new track
music_metadata.append(music_track)
music_track = {}
music_track[key_map[key]] = value
if len(music_track.keys()):
music_metadata.append(music_track)
return music_metadata
def _get_automatic_captions(self, video_id, webpage): def _get_automatic_captions(self, video_id, webpage):
"""We need the webpage for getting the captions url, pass it as an """We need the webpage for getting the captions url, pass it as an
argument to speed up the process.""" argument to speed up the process."""
@ -2295,7 +2257,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Youtube Music Auto-generated description # Youtube Music Auto-generated description
release_date = release_year = None release_date = release_year = None
if video_description: if video_description:
mobj = re.search(r'(?s)Provided to YouTube by [^\n]+\n+(?P<track>[^·]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?', video_description) mobj = re.search(r'(?s)(?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+(?P<album>[^\n]+)(?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description)
if mobj: if mobj:
if not track: if not track:
track = mobj.group('track').strip() track = mobj.group('track').strip()
@ -2312,13 +2274,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if release_year: if release_year:
release_year = int(release_year) release_year = int(release_year)
yt_initial = self._get_yt_initial_data(video_id, video_webpage) yt_initial_data = self._extract_yt_initial_data(video_id, video_webpage)
if yt_initial: contents = try_get(yt_initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
music_metadata = self._get_music_metadata_from_yt_initial(yt_initial) for content in contents:
if len(music_metadata): rows = try_get(content, lambda x: x['videoSecondaryInfoRenderer']['metadataRowContainer']['metadataRowContainerRenderer']['rows'], list) or []
album = music_metadata[0].get('album') multiple_songs = False
artist = music_metadata[0].get('artist') for row in rows:
track = music_metadata[0].get('track') if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True:
multiple_songs = True
break
for row in rows:
mrr = row.get('metadataRowRenderer') or {}
mrr_title = try_get(
mrr, lambda x: x['title']['simpleText'], compat_str)
mrr_contents = try_get(
mrr, lambda x: x['contents'][0], dict) or {}
mrr_contents_text = try_get(mrr_contents, [lambda x: x['simpleText'], lambda x: x['runs'][0]['text']], compat_str)
if not (mrr_title and mrr_contents_text):
continue
if mrr_title == 'License':
video_license = mrr_contents_text
elif not multiple_songs:
if mrr_title == 'Album':
album = mrr_contents_text
elif mrr_title == 'Artist':
artist = mrr_contents_text
elif mrr_title == 'Song':
track = mrr_contents_text
m_episode = re.search( m_episode = re.search(
r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>', r'<div[^>]+id="watch7-headline"[^>]*>\s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b>\s*S(?P<season>\d+)\s*•\s*E(?P<episode>\d+)</span>',

Loading…
Cancel
Save