mirror of https://github.com/yt-dlp/yt-dlp
Update to ytdl-2021.01.24.1
parent
f74980cbae
commit
a820dc722e
@ -0,0 +1,196 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..compat import compat_str
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
)
|
||||
|
||||
|
||||
class MindsBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?minds\.com/'
|
||||
|
||||
def _call_api(self, path, video_id, resource, query=None):
|
||||
api_url = 'https://www.minds.com/api/' + path
|
||||
token = self._get_cookies(api_url).get('XSRF-TOKEN')
|
||||
return self._download_json(
|
||||
api_url, video_id, 'Downloading %s JSON metadata' % resource, headers={
|
||||
'Referer': 'https://www.minds.com/',
|
||||
'X-XSRF-TOKEN': token.value if token else '',
|
||||
}, query=query)
|
||||
|
||||
|
||||
class MindsIE(MindsBaseIE):
|
||||
IE_NAME = 'minds'
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?:media|newsfeed|archive/view)/(?P<id>[0-9]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.minds.com/media/100000000000086822',
|
||||
'md5': '215a658184a419764852239d4970b045',
|
||||
'info_dict': {
|
||||
'id': '100000000000086822',
|
||||
'ext': 'mp4',
|
||||
'title': 'Minds intro sequence',
|
||||
'thumbnail': r're:https?://.+\.png',
|
||||
'uploader_id': 'ottman',
|
||||
'upload_date': '20130524',
|
||||
'timestamp': 1369404826,
|
||||
'uploader': 'Bill Ottman',
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'tags': ['animation'],
|
||||
'comment_count': int,
|
||||
'license': 'attribution-cc',
|
||||
},
|
||||
}, {
|
||||
# entity.type == 'activity' and empty title
|
||||
'url': 'https://www.minds.com/newsfeed/798025111988506624',
|
||||
'md5': 'b2733a74af78d7fd3f541c4cbbaa5950',
|
||||
'info_dict': {
|
||||
'id': '798022190320226304',
|
||||
'ext': 'mp4',
|
||||
'title': '798022190320226304',
|
||||
'uploader': 'ColinFlaherty',
|
||||
'upload_date': '20180111',
|
||||
'timestamp': 1515639316,
|
||||
'uploader_id': 'ColinFlaherty',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.minds.com/archive/view/715172106794442752',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# youtube perma_url
|
||||
'url': 'https://www.minds.com/newsfeed/1197131838022602752',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
entity_id = self._match_id(url)
|
||||
entity = self._call_api(
|
||||
'v1/entities/entity/' + entity_id, entity_id, 'entity')['entity']
|
||||
if entity.get('type') == 'activity':
|
||||
if entity.get('custom_type') == 'video':
|
||||
video_id = entity['entity_guid']
|
||||
else:
|
||||
return self.url_result(entity['perma_url'])
|
||||
else:
|
||||
assert(entity['subtype'] == 'video')
|
||||
video_id = entity_id
|
||||
# 1080p and webm formats available only on the sources array
|
||||
video = self._call_api(
|
||||
'v2/media/video/' + video_id, video_id, 'video')
|
||||
|
||||
formats = []
|
||||
for source in (video.get('sources') or []):
|
||||
src = source.get('src')
|
||||
if not src:
|
||||
continue
|
||||
formats.append({
|
||||
'format_id': source.get('label'),
|
||||
'height': int_or_none(source.get('size')),
|
||||
'url': src,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
entity = video.get('entity') or entity
|
||||
owner = entity.get('ownerObj') or {}
|
||||
uploader_id = owner.get('username')
|
||||
|
||||
tags = entity.get('tags')
|
||||
if tags and isinstance(tags, compat_str):
|
||||
tags = [tags]
|
||||
|
||||
thumbnail = None
|
||||
poster = video.get('poster') or entity.get('thumbnail_src')
|
||||
if poster:
|
||||
urlh = self._request_webpage(poster, video_id, fatal=False)
|
||||
if urlh:
|
||||
thumbnail = urlh.geturl()
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': entity.get('title') or video_id,
|
||||
'formats': formats,
|
||||
'description': clean_html(entity.get('description')) or None,
|
||||
'license': str_or_none(entity.get('license')),
|
||||
'timestamp': int_or_none(entity.get('time_created')),
|
||||
'uploader': strip_or_none(owner.get('name')),
|
||||
'uploader_id': uploader_id,
|
||||
'uploader_url': 'https://www.minds.com/' + uploader_id if uploader_id else None,
|
||||
'view_count': int_or_none(entity.get('play:count')),
|
||||
'like_count': int_or_none(entity.get('thumbs:up:count')),
|
||||
'dislike_count': int_or_none(entity.get('thumbs:down:count')),
|
||||
'tags': tags,
|
||||
'comment_count': int_or_none(entity.get('comments:count')),
|
||||
'thumbnail': thumbnail,
|
||||
}
|
||||
|
||||
|
||||
class MindsFeedBaseIE(MindsBaseIE):
|
||||
_PAGE_SIZE = 150
|
||||
|
||||
def _entries(self, feed_id):
|
||||
query = {'limit': self._PAGE_SIZE, 'sync': 1}
|
||||
i = 1
|
||||
while True:
|
||||
data = self._call_api(
|
||||
'v2/feeds/container/%s/videos' % feed_id,
|
||||
feed_id, 'page %s' % i, query)
|
||||
entities = data.get('entities') or []
|
||||
for entity in entities:
|
||||
guid = entity.get('guid')
|
||||
if not guid:
|
||||
continue
|
||||
yield self.url_result(
|
||||
'https://www.minds.com/newsfeed/' + guid,
|
||||
MindsIE.ie_key(), guid)
|
||||
query['from_timestamp'] = data['load-next']
|
||||
if not (query['from_timestamp'] and len(entities) == self._PAGE_SIZE):
|
||||
break
|
||||
i += 1
|
||||
|
||||
def _real_extract(self, url):
|
||||
feed_id = self._match_id(url)
|
||||
feed = self._call_api(
|
||||
'v1/%s/%s' % (self._FEED_PATH, feed_id),
|
||||
feed_id, self._FEED_TYPE)[self._FEED_TYPE]
|
||||
|
||||
return self.playlist_result(
|
||||
self._entries(feed['guid']), feed_id,
|
||||
strip_or_none(feed.get('name')),
|
||||
feed.get('briefdescription'))
|
||||
|
||||
|
||||
class MindsChannelIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'channel'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'(?!(?:newsfeed|media|api|archive|groups)/)(?P<id>[^/?&#]+)'
|
||||
_FEED_PATH = 'channel'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/ottman',
|
||||
'info_dict': {
|
||||
'id': 'ottman',
|
||||
'title': 'Bill Ottman',
|
||||
'description': 'Co-creator & CEO @minds',
|
||||
},
|
||||
'playlist_mincount': 54,
|
||||
}
|
||||
|
||||
|
||||
class MindsGroupIE(MindsFeedBaseIE):
|
||||
_FEED_TYPE = 'group'
|
||||
IE_NAME = 'minds:' + _FEED_TYPE
|
||||
_VALID_URL = MindsBaseIE._VALID_URL_BASE + r'groups/profile/(?P<id>[0-9]+)'
|
||||
_FEED_PATH = 'groups/group'
|
||||
_TEST = {
|
||||
'url': 'https://www.minds.com/groups/profile/785582576369672204/feed/videos',
|
||||
'info_dict': {
|
||||
'id': '785582576369672204',
|
||||
'title': 'Cooking Videos',
|
||||
},
|
||||
'playlist_mincount': 1,
|
||||
}
|
@ -1,104 +1,125 @@
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import str_to_int
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
try_get,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class NineGagIE(InfoExtractor):
|
||||
IE_NAME = '9gag'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag(?:\.com/tv|\.tv)/(?:p|embed)/(?P<id>[a-zA-Z0-9]+)(?:/(?P<display_id>[^?#/]+))?'
|
||||
_VALID_URL = r'https?://(?:www\.)?9gag\.com/gag/(?P<id>[^/?&#]+)'
|
||||
|
||||
_TESTS = [{
|
||||
'url': 'http://9gag.com/tv/p/Kk2X5/people-are-awesome-2013-is-absolutely-awesome',
|
||||
'info_dict': {
|
||||
'id': 'kXzwOKyGlSA',
|
||||
'ext': 'mp4',
|
||||
'description': 'This 3-minute video will make you smile and then make you feel untalented and insignificant. Anyway, you should share this awesomeness. (Thanks, Dino!)',
|
||||
'title': '\"People Are Awesome 2013\" Is Absolutely Awesome',
|
||||
'uploader_id': 'UCdEH6EjDKwtTe-sO2f0_1XA',
|
||||
'uploader': 'CompilationChannel',
|
||||
'upload_date': '20131110',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Youtube'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/aKolP3',
|
||||
_TEST = {
|
||||
'url': 'https://9gag.com/gag/ae5Ag7B',
|
||||
'info_dict': {
|
||||
'id': 'aKolP3',
|
||||
'id': 'ae5Ag7B',
|
||||
'ext': 'mp4',
|
||||
'title': 'This Guy Travelled 11 countries In 44 days Just To Make This Amazing Video',
|
||||
'description': "I just saw more in 1 minute than I've seen in 1 year. This guy's video is epic!!",
|
||||
'uploader_id': 'rickmereki',
|
||||
'uploader': 'Rick Mereki',
|
||||
'upload_date': '20110803',
|
||||
'view_count': int,
|
||||
},
|
||||
'add_ie': ['Vimeo'],
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/p/KklwM',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.tv/p/Kk2X5',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://9gag.com/tv/embed/a5Dmvl',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
_EXTERNAL_VIDEO_PROVIDER = {
|
||||
'1': {
|
||||
'url': '%s',
|
||||
'ie_key': 'Youtube',
|
||||
},
|
||||
'2': {
|
||||
'url': 'http://player.vimeo.com/video/%s',
|
||||
'ie_key': 'Vimeo',
|
||||
},
|
||||
'3': {
|
||||
'url': 'http://instagram.com/p/%s',
|
||||
'ie_key': 'Instagram',
|
||||
},
|
||||
'4': {
|
||||
'url': 'http://vine.co/v/%s',
|
||||
'ie_key': 'Vine',
|
||||
},
|
||||
'title': 'Capybara Agility Training',
|
||||
'upload_date': '20191108',
|
||||
'timestamp': 1573237208,
|
||||
'categories': ['Awesome'],
|
||||
'tags': ['Weimaraner', 'American Pit Bull Terrier'],
|
||||
'duration': 44,
|
||||
'like_count': int,
|
||||
'dislike_count': int,
|
||||
'comment_count': int,
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = re.match(self._VALID_URL, url)
|
||||
video_id = mobj.group('id')
|
||||
display_id = mobj.group('display_id') or video_id
|
||||
post_id = self._match_id(url)
|
||||
post = self._download_json(
|
||||
'https://9gag.com/v1/post', post_id, query={
|
||||
'id': post_id
|
||||
})['data']['post']
|
||||
|
||||
if post.get('type') != 'Animated':
|
||||
raise ExtractorError(
|
||||
'The given url does not contain a video',
|
||||
expected=True)
|
||||
|
||||
title = post['title']
|
||||
|
||||
duration = None
|
||||
formats = []
|
||||
thumbnails = []
|
||||
for key, image in (post.get('images') or {}).items():
|
||||
image_url = url_or_none(image.get('url'))
|
||||
if not image_url:
|
||||
continue
|
||||
ext = determine_ext(image_url)
|
||||
image_id = key.strip('image')
|
||||
common = {
|
||||
'url': image_url,
|
||||
'width': int_or_none(image.get('width')),
|
||||
'height': int_or_none(image.get('height')),
|
||||
}
|
||||
if ext in ('jpg', 'png'):
|
||||
webp_url = image.get('webpUrl')
|
||||
if webp_url:
|
||||
t = common.copy()
|
||||
t.update({
|
||||
'id': image_id + '-webp',
|
||||
'url': webp_url,
|
||||
})
|
||||
thumbnails.append(t)
|
||||
common.update({
|
||||
'id': image_id,
|
||||
'ext': ext,
|
||||
})
|
||||
thumbnails.append(common)
|
||||
elif ext in ('webm', 'mp4'):
|
||||
if not duration:
|
||||
duration = int_or_none(image.get('duration'))
|
||||
common['acodec'] = 'none' if image.get('hasAudio') == 0 else None
|
||||
for vcodec in ('vp8', 'vp9', 'h265'):
|
||||
c_url = image.get(vcodec + 'Url')
|
||||
if not c_url:
|
||||
continue
|
||||
c_f = common.copy()
|
||||
c_f.update({
|
||||
'format_id': image_id + '-' + vcodec,
|
||||
'url': c_url,
|
||||
'vcodec': vcodec,
|
||||
})
|
||||
formats.append(c_f)
|
||||
common.update({
|
||||
'ext': ext,
|
||||
'format_id': image_id,
|
||||
})
|
||||
formats.append(common)
|
||||
self._sort_formats(formats)
|
||||
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
section = try_get(post, lambda x: x['postSection']['name'])
|
||||
|
||||
post_view = self._parse_json(
|
||||
self._search_regex(
|
||||
r'var\s+postView\s*=\s*new\s+app\.PostView\({\s*post:\s*({.+?})\s*,\s*posts:\s*prefetchedCurrentPost',
|
||||
webpage, 'post view'),
|
||||
display_id)
|
||||
tags = None
|
||||
post_tags = post.get('tags')
|
||||
if post_tags:
|
||||
tags = []
|
||||
for tag in post_tags:
|
||||
tag_key = tag.get('key')
|
||||
if not tag_key:
|
||||
continue
|
||||
tags.append(tag_key)
|
||||
|
||||
ie_key = None
|
||||
source_url = post_view.get('sourceUrl')
|
||||
if not source_url:
|
||||
external_video_id = post_view['videoExternalId']
|
||||
external_video_provider = post_view['videoExternalProvider']
|
||||
source_url = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['url'] % external_video_id
|
||||
ie_key = self._EXTERNAL_VIDEO_PROVIDER[external_video_provider]['ie_key']
|
||||
title = post_view['title']
|
||||
description = post_view.get('description')
|
||||
view_count = str_to_int(post_view.get('externalView'))
|
||||
thumbnail = post_view.get('thumbnail_700w') or post_view.get('ogImageUrl') or post_view.get('thumbnail_300w')
|
||||
get_count = lambda x: int_or_none(post.get(x + 'Count'))
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'url': source_url,
|
||||
'ie_key': ie_key,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'id': post_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'view_count': view_count,
|
||||
'thumbnail': thumbnail,
|
||||
'timestamp': int_or_none(post.get('creationTs')),
|
||||
'duration': duration,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'like_count': get_count('upVote'),
|
||||
'dislike_count': get_count('downVote'),
|
||||
'comment_count': get_count('comments'),
|
||||
'age_limit': 18 if post.get('nsfw') == 1 else None,
|
||||
'categories': [section] if section else None,
|
||||
'tags': tags,
|
||||
}
|
||||
|
@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
import re
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
clean_podcast_url,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
strip_or_none,
|
||||
try_get,
|
||||
unified_strdate,
|
||||
)
|
||||
|
||||
|
||||
class SpotifyBaseIE(InfoExtractor):
|
||||
_ACCESS_TOKEN = None
|
||||
_OPERATION_HASHES = {
|
||||
'Episode': '8276d4423d709ae9b68ec1b74cc047ba0f7479059a37820be730f125189ac2bf',
|
||||
'MinimalShow': '13ee079672fad3f858ea45a55eb109553b4fb0969ed793185b2e34cbb6ee7cc0',
|
||||
'ShowEpisodes': 'e0e5ce27bd7748d2c59b4d44ba245a8992a05be75d6fabc3b20753fc8857444d',
|
||||
}
|
||||
_VALID_URL_TEMPL = r'https?://open\.spotify\.com/%s/(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_initialize(self):
|
||||
self._ACCESS_TOKEN = self._download_json(
|
||||
'https://open.spotify.com/get_access_token', None)['accessToken']
|
||||
|
||||
def _call_api(self, operation, video_id, variables):
|
||||
return self._download_json(
|
||||
'https://api-partner.spotify.com/pathfinder/v1/query', video_id, query={
|
||||
'operationName': 'query' + operation,
|
||||
'variables': json.dumps(variables),
|
||||
'extensions': json.dumps({
|
||||
'persistedQuery': {
|
||||
'sha256Hash': self._OPERATION_HASHES[operation],
|
||||
},
|
||||
})
|
||||
}, headers={'authorization': 'Bearer ' + self._ACCESS_TOKEN})['data']
|
||||
|
||||
def _extract_episode(self, episode, series):
|
||||
episode_id = episode['id']
|
||||
title = episode['name'].strip()
|
||||
|
||||
formats = []
|
||||
audio_preview = episode.get('audioPreview') or {}
|
||||
audio_preview_url = audio_preview.get('url')
|
||||
if audio_preview_url:
|
||||
f = {
|
||||
'url': audio_preview_url.replace('://p.scdn.co/mp3-preview/', '://anon-podcast.scdn.co/'),
|
||||
'vcodec': 'none',
|
||||
}
|
||||
audio_preview_format = audio_preview.get('format')
|
||||
if audio_preview_format:
|
||||
f['format_id'] = audio_preview_format
|
||||
mobj = re.match(r'([0-9A-Z]{3})_(?:[A-Z]+_)?(\d+)', audio_preview_format)
|
||||
if mobj:
|
||||
f.update({
|
||||
'abr': int(mobj.group(2)),
|
||||
'ext': mobj.group(1).lower(),
|
||||
})
|
||||
formats.append(f)
|
||||
|
||||
for item in (try_get(episode, lambda x: x['audio']['items']) or []):
|
||||
item_url = item.get('url')
|
||||
if not (item_url and item.get('externallyHosted')):
|
||||
continue
|
||||
formats.append({
|
||||
'url': clean_podcast_url(item_url),
|
||||
'vcodec': 'none',
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
for source in (try_get(episode, lambda x: x['coverArt']['sources']) or []):
|
||||
source_url = source.get('url')
|
||||
if not source_url:
|
||||
continue
|
||||
thumbnails.append({
|
||||
'url': source_url,
|
||||
'width': int_or_none(source.get('width')),
|
||||
'height': int_or_none(source.get('height')),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': episode_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnails': thumbnails,
|
||||
'description': strip_or_none(episode.get('description')),
|
||||
'duration': float_or_none(try_get(
|
||||
episode, lambda x: x['duration']['totalMilliseconds']), 1000),
|
||||
'release_date': unified_strdate(try_get(
|
||||
episode, lambda x: x['releaseDate']['isoString'])),
|
||||
'series': series,
|
||||
}
|
||||
|
||||
|
||||
class SpotifyIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'episode'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/episode/4Z7GAJ50bgctf6uclHlWKo',
|
||||
'md5': '74010a1e3fa4d9e1ab3aa7ad14e42d3b',
|
||||
'info_dict': {
|
||||
'id': '4Z7GAJ50bgctf6uclHlWKo',
|
||||
'ext': 'mp3',
|
||||
'title': 'From the archive: Why time management is ruining our lives',
|
||||
'description': 'md5:b120d9c4ff4135b42aa9b6d9cde86935',
|
||||
'duration': 2083.605,
|
||||
'release_date': '20201217',
|
||||
'series': "The Guardian's Audio Long Reads",
|
||||
}
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
episode_id = self._match_id(url)
|
||||
episode = self._call_api('Episode', episode_id, {
|
||||
'uri': 'spotify:episode:' + episode_id
|
||||
})['episode']
|
||||
return self._extract_episode(
|
||||
episode, try_get(episode, lambda x: x['podcast']['name']))
|
||||
|
||||
|
||||
class SpotifyShowIE(SpotifyBaseIE):
|
||||
IE_NAME = 'spotify:show'
|
||||
_VALID_URL = SpotifyBaseIE._VALID_URL_TEMPL % 'show'
|
||||
_TEST = {
|
||||
'url': 'https://open.spotify.com/show/4PM9Ke6l66IRNpottHKV9M',
|
||||
'info_dict': {
|
||||
'id': '4PM9Ke6l66IRNpottHKV9M',
|
||||
'title': 'The Story from the Guardian',
|
||||
'description': 'The Story podcast is dedicated to our finest audio documentaries, investigations and long form stories',
|
||||
},
|
||||
'playlist_mincount': 36,
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_id = self._match_id(url)
|
||||
podcast = self._call_api('ShowEpisodes', show_id, {
|
||||
'limit': 1000000000,
|
||||
'offset': 0,
|
||||
'uri': 'spotify:show:' + show_id,
|
||||
})['podcast']
|
||||
podcast_name = podcast.get('name')
|
||||
|
||||
entries = []
|
||||
for item in (try_get(podcast, lambda x: x['episodes']['items']) or []):
|
||||
episode = item.get('episode')
|
||||
if not episode:
|
||||
continue
|
||||
entries.append(self._extract_episode(episode, podcast_name))
|
||||
|
||||
return self.playlist_result(
|
||||
entries, show_id, podcast_name, podcast.get('description'))
|
@ -0,0 +1,193 @@
|
||||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import json
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_get,
|
||||
)
|
||||
|
||||
|
||||
class TrovoBaseIE(InfoExtractor):
|
||||
_VALID_URL_BASE = r'https?://(?:www\.)?trovo\.live/'
|
||||
|
||||
def _extract_streamer_info(self, data):
|
||||
streamer_info = data.get('streamerInfo') or {}
|
||||
username = streamer_info.get('userName')
|
||||
return {
|
||||
'uploader': streamer_info.get('nickName'),
|
||||
'uploader_id': str_or_none(streamer_info.get('uid')),
|
||||
'uploader_url': 'https://trovo.live/' + username if username else None,
|
||||
}
|
||||
|
||||
|
||||
class TrovoIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?!(?:clip|video)/)(?P<id>[^/?&#]+)'
|
||||
|
||||
def _real_extract(self, url):
|
||||
username = self._match_id(url)
|
||||
live_info = self._download_json(
|
||||
'https://gql.trovo.live/', username, query={
|
||||
'query': '''{
|
||||
getLiveInfo(params: {userName: "%s"}) {
|
||||
isLive
|
||||
programInfo {
|
||||
coverUrl
|
||||
id
|
||||
streamInfo {
|
||||
desc
|
||||
playUrl
|
||||
}
|
||||
title
|
||||
}
|
||||
streamerInfo {
|
||||
nickName
|
||||
uid
|
||||
userName
|
||||
}
|
||||
}
|
||||
}''' % username,
|
||||
})['data']['getLiveInfo']
|
||||
if live_info.get('isLive') == 0:
|
||||
raise ExtractorError('%s is offline' % username, expected=True)
|
||||
program_info = live_info['programInfo']
|
||||
program_id = program_info['id']
|
||||
title = self._live_title(program_info['title'])
|
||||
|
||||
formats = []
|
||||
for stream_info in (program_info.get('streamInfo') or []):
|
||||
play_url = stream_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = stream_info.get('desc')
|
||||
formats.append({
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
info = {
|
||||
'id': program_id,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': program_info.get('coverUrl'),
|
||||
'is_live': True,
|
||||
}
|
||||
info.update(self._extract_streamer_info(live_info))
|
||||
return info
|
||||
|
||||
|
||||
class TrovoVodIE(TrovoBaseIE):
|
||||
_VALID_URL = TrovoBaseIE._VALID_URL_BASE + r'(?:clip|video)/(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://trovo.live/video/ltv-100095501_100095501_1609596043',
|
||||
'info_dict': {
|
||||
'id': 'ltv-100095501_100095501_1609596043',
|
||||
'ext': 'mp4',
|
||||
'title': 'Spontaner 12 Stunden Stream! - Ok Boomer!',
|
||||
'uploader': 'Exsl',
|
||||
'timestamp': 1609640305,
|
||||
'upload_date': '20210103',
|
||||
'uploader_id': '100095501',
|
||||
'duration': 43977,
|
||||
'view_count': int,
|
||||
'like_count': int,
|
||||
'comment_count': int,
|
||||
'comments': 'mincount:8',
|
||||
'categories': ['Grand Theft Auto V'],
|
||||
},
|
||||
}, {
|
||||
'url': 'https://trovo.live/clip/lc-5285890810184026005',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
vid = self._match_id(url)
|
||||
resp = self._download_json(
|
||||
'https://gql.trovo.live/', vid, data=json.dumps([{
|
||||
'query': '''{
|
||||
batchGetVodDetailInfo(params: {vids: ["%s"]}) {
|
||||
VodDetailInfos
|
||||
}
|
||||
}''' % vid,
|
||||
}, {
|
||||
'query': '''{
|
||||
getCommentList(params: {appInfo: {postID: "%s"}, pageSize: 1000000000, preview: {}}) {
|
||||
commentList {
|
||||
author {
|
||||
nickName
|
||||
uid
|
||||
}
|
||||
commentID
|
||||
content
|
||||
createdAt
|
||||
parentID
|
||||
}
|
||||
}
|
||||
}''' % vid,
|
||||
}]).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
vod_detail_info = resp[0]['data']['batchGetVodDetailInfo']['VodDetailInfos'][vid]
|
||||
vod_info = vod_detail_info['vodInfo']
|
||||
title = vod_info['title']
|
||||
|
||||
language = vod_info.get('languageName')
|
||||
formats = []
|
||||
for play_info in (vod_info.get('playInfos') or []):
|
||||
play_url = play_info.get('playUrl')
|
||||
if not play_url:
|
||||
continue
|
||||
format_id = play_info.get('desc')
|
||||
formats.append({
|
||||
'ext': 'mp4',
|
||||
'filesize': int_or_none(play_info.get('fileSize')),
|
||||
'format_id': format_id,
|
||||
'height': int_or_none(format_id[:-1]) if format_id else None,
|
||||
'language': language,
|
||||
'protocol': 'm3u8_native',
|
||||
'tbr': int_or_none(play_info.get('bitrate')),
|
||||
'url': play_url,
|
||||
})
|
||||
self._sort_formats(formats)
|
||||
|
||||
category = vod_info.get('categoryName')
|
||||
get_count = lambda x: int_or_none(vod_info.get(x + 'Num'))
|
||||
|
||||
comment_list = try_get(resp, lambda x: x[1]['data']['getCommentList']['commentList'], list) or []
|
||||
comments = []
|
||||
for comment in comment_list:
|
||||
content = comment.get('content')
|
||||
if not content:
|
||||
continue
|
||||
author = comment.get('author') or {}
|
||||
parent = comment.get('parentID')
|
||||
comments.append({
|
||||
'author': author.get('nickName'),
|
||||
'author_id': str_or_none(author.get('uid')),
|
||||
'id': str_or_none(comment.get('commentID')),
|
||||
'text': content,
|
||||
'timestamp': int_or_none(comment.get('createdAt')),
|
||||
'parent': 'root' if parent == 0 else str_or_none(parent),
|
||||
})
|
||||
|
||||
info = {
|
||||
'id': vid,
|
||||
'title': title,
|
||||
'formats': formats,
|
||||
'thumbnail': vod_info.get('coverUrl'),
|
||||
'timestamp': int_or_none(vod_info.get('publishTs')),
|
||||
'duration': int_or_none(vod_info.get('duration')),
|
||||
'view_count': get_count('watch'),
|
||||
'like_count': get_count('like'),
|
||||
'comment_count': get_count('comment'),
|
||||
'comments': comments,
|
||||
'categories': [category] if category else None,
|
||||
}
|
||||
info.update(self._extract_streamer_info(vod_detail_info))
|
||||
return info
|
Loading…
Reference in New Issue