[roosterteeth] Add series extractor

pull/1572/head
pukkandan 3 years ago
parent 34921b4345
commit 244644c02c
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

@ -1200,7 +1200,7 @@ from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE from .rmcdecouverte import RMCDecouverteIE
from .ro220 import Ro220IE from .ro220 import Ro220IE
from .rockstargames import RockstarGamesIE from .rockstargames import RockstarGamesIE
from .roosterteeth import RoosterTeethIE from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rozhlas import RozhlasIE from .rozhlas import RozhlasIE

@ -1,25 +1,93 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import ( from ..compat import compat_HTTPError
compat_HTTPError,
compat_str,
)
from ..utils import ( from ..utils import (
ExtractorError, ExtractorError,
int_or_none, int_or_none,
join_nonempty,
LazyList,
parse_qs,
str_or_none, str_or_none,
traverse_obj,
url_or_none,
urlencode_postdata, urlencode_postdata,
urljoin,
) )
class RoosterTeethIE(InfoExtractor): class RoosterTeethBaseIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_NETRC_MACHINE = 'roosterteeth' _NETRC_MACHINE = 'roosterteeth'
_API_BASE = 'https://svod-be.roosterteeth.com'
_API_BASE_URL = f'{_API_BASE}/api/v1'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
if self._get_cookies(self._API_BASE_URL).get('rt_access_token'):
return
try:
self._download_json(
'https://auth.roosterteeth.com/oauth/token',
None, 'Logging in', data=urlencode_postdata({
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
'grant_type': 'password',
'username': username,
'password': password,
}))
except ExtractorError as e:
msg = 'Unable to login'
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
if resp:
error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
if error:
msg += ': ' + error
self.report_warning(msg)
def _real_initialize(self):
self._login()
def _extract_video_info(self, data):
thumbnails = []
for image in traverse_obj(data, ('included', 'images')):
if image.get('type') not in ('episode_image', 'bonus_feature_image'):
continue
thumbnails.extend([{
'id': name,
'url': url,
} for name, url in (image.get('attributes') or {}).items() if url_or_none(url)])
attributes = data.get('attributes') or {}
title = traverse_obj(attributes, 'title', 'display_title')
sub_only = attributes.get('is_sponsors_only')
return {
'id': str(data.get('id')),
'display_id': attributes.get('slug'),
'title': title,
'description': traverse_obj(attributes, 'description', 'caption'),
'series': attributes.get('show_title'),
'season_number': int_or_none(attributes.get('season_number')),
'season_id': attributes.get('season_id'),
'episode': title,
'episode_number': int_or_none(attributes.get('number')),
'episode_id': str_or_none(data.get('uuid')),
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
'thumbnails': thumbnails,
'availability': self._availability(
needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
is_private=False, is_unlisted=False),
'tags': attributes.get('genres')
}
class RoosterTeethIE(RoosterTeethBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'md5': 'e2bd7764732d785ef797700a2489f212',
'info_dict': { 'info_dict': {
'id': '9156', 'id': '9156',
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
@ -30,19 +98,20 @@ class RoosterTeethIE(InfoExtractor):
'series': 'Million Dollars, But...', 'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement', 'episode': 'Million Dollars, But... The Game Announcement',
}, },
'skip_download': 'm3u8',
}, { }, {
'url': 'https://roosterteeth.com/watch/rwby-bonus-25', 'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
'md5': 'fe8d9d976b272c18a24fe7f1f5830084',
'info_dict': { 'info_dict': {
'id': '31', 'id': '40432',
'display_id': 'rwby-bonus-25', 'display_id': 'rwby-bonus-25',
'title': 'Volume 2, World of Remnant 3', 'title': 'Grimm',
'description': 'md5:8d58d3270292ea11da00ea712bbfb009', 'description': 'md5:f30ff570741213418a8d2c19868b93ab',
'episode': 'Volume 2, World of Remnant 3', 'episode': 'Grimm',
'channel_id': 'fab60c1c-29cb-43bc-9383-5c3538d9e246', 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
'ext': 'mp4', 'ext': 'mp4',
}, },
'skip_download': 'm3u8',
}, { }, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True, 'only_matching': True,
@ -63,40 +132,10 @@ class RoosterTeethIE(InfoExtractor):
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True, 'only_matching': True,
}] }]
_EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/watch/'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
try:
self._download_json(
'https://auth.roosterteeth.com/oauth/token',
None, 'Logging in', data=urlencode_postdata({
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
'grant_type': 'password',
'username': username,
'password': password,
}))
except ExtractorError as e:
msg = 'Unable to login'
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
if resp:
error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
if error:
msg += ': ' + error
self.report_warning(msg)
def _real_initialize(self):
if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
return
self._login()
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) display_id = self._match_id(url)
api_episode_url = self._EPISODE_BASE_URL + display_id api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}'
try: try:
video_data = self._download_json( video_data = self._download_json(
@ -118,36 +157,55 @@ class RoosterTeethIE(InfoExtractor):
episode = self._download_json( episode = self._download_json(
api_episode_url, display_id, api_episode_url, display_id,
'Downloading episode JSON metadata')['data'][0] 'Downloading episode JSON metadata')['data'][0]
attributes = episode['attributes']
title = attributes.get('title') or attributes['display_title']
video_id = compat_str(episode['id'])
thumbnails = []
for image in episode.get('included', {}).get('images', []):
if image.get('type') in ('episode_image', 'bonus_feature_image'):
img_attributes = image.get('attributes') or {}
for k in ('thumb', 'small', 'medium', 'large'):
img_url = img_attributes.get(k)
if img_url:
thumbnails.append({
'id': k,
'url': img_url,
})
return { return {
'id': video_id,
'display_id': display_id, 'display_id': display_id,
'title': title,
'description': attributes.get('description') or attributes.get('caption'),
'thumbnails': thumbnails,
'series': attributes.get('show_title'),
'season_number': int_or_none(attributes.get('season_number')),
'season_id': attributes.get('season_id'),
'episode': title,
'episode_number': int_or_none(attributes.get('number')),
'episode_id': str_or_none(episode.get('uuid')),
'formats': formats, 'formats': formats,
'channel_id': attributes.get('channel_id'), 'subtitles': subtitles,
'duration': int_or_none(attributes.get('length')), **self._extract_video_info(episode)
'subtitles': subtitles
} }
class RoosterTeethSeriesIE(RoosterTeethBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://roosterteeth.com/series/rwby?season=7',
'playlist_count': 13,
'info_dict': {
'id': 'rwby-7',
'title': 'RWBY - Season 7',
}
}, {
'url': 'https://roosterteeth.com/series/role-initiative',
'playlist_mincount': 16,
'info_dict': {
'id': 'role-initiative',
'title': 'Role Initiative',
}
}]
def _entries(self, series_id, season_number):
display_id = join_nonempty(series_id, season_number)
# TODO: extract bonus material
for data in self._download_json(
f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']:
idx = traverse_obj(data, ('attributes', 'number'))
if season_number and idx != season_number:
continue
season_url = urljoin(self._API_BASE, data['links']['episodes'])
season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data']
for episode in season:
yield self.url_result(
f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}',
RoosterTeethIE.ie_key(),
**self._extract_video_info(episode))
def _real_extract(self, url):
series_id = self._match_id(url)
season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none)
entries = LazyList(self._entries(series_id, season_number))
return self.playlist_result(
entries,
join_nonempty(series_id, season_number),
join_nonempty(entries[0].get('series'), season_number, delim=' - Season '))

Loading…
Cancel
Save