[roosterteeth] Add series extractor

pull/1572/head
pukkandan 3 years ago
parent 34921b4345
commit 244644c02c
No known key found for this signature in database
GPG Key ID: 0F00D95A001F4698

@ -1200,7 +1200,7 @@ from .rice import RICEIE
from .rmcdecouverte import RMCDecouverteIE
from .ro220 import Ro220IE
from .rockstargames import RockstarGamesIE
from .roosterteeth import RoosterTeethIE
from .roosterteeth import RoosterTeethIE, RoosterTeethSeriesIE
from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE
from .rozhlas import RozhlasIE

@ -1,25 +1,93 @@
# coding: utf-8
from __future__ import unicode_literals
from .common import InfoExtractor
from ..compat import (
compat_HTTPError,
compat_str,
)
from ..compat import compat_HTTPError
from ..utils import (
ExtractorError,
int_or_none,
join_nonempty,
LazyList,
parse_qs,
str_or_none,
traverse_obj,
url_or_none,
urlencode_postdata,
urljoin,
)
class RoosterTeethIE(InfoExtractor):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
class RoosterTeethBaseIE(InfoExtractor):
_NETRC_MACHINE = 'roosterteeth'
_API_BASE = 'https://svod-be.roosterteeth.com'
_API_BASE_URL = f'{_API_BASE}/api/v1'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
if self._get_cookies(self._API_BASE_URL).get('rt_access_token'):
return
try:
self._download_json(
'https://auth.roosterteeth.com/oauth/token',
None, 'Logging in', data=urlencode_postdata({
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
'grant_type': 'password',
'username': username,
'password': password,
}))
except ExtractorError as e:
msg = 'Unable to login'
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
if resp:
error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
if error:
msg += ': ' + error
self.report_warning(msg)
def _real_initialize(self):
self._login()
def _extract_video_info(self, data):
thumbnails = []
for image in traverse_obj(data, ('included', 'images')):
if image.get('type') not in ('episode_image', 'bonus_feature_image'):
continue
thumbnails.extend([{
'id': name,
'url': url,
} for name, url in (image.get('attributes') or {}).items() if url_or_none(url)])
attributes = data.get('attributes') or {}
title = traverse_obj(attributes, 'title', 'display_title')
sub_only = attributes.get('is_sponsors_only')
return {
'id': str(data.get('id')),
'display_id': attributes.get('slug'),
'title': title,
'description': traverse_obj(attributes, 'description', 'caption'),
'series': attributes.get('show_title'),
'season_number': int_or_none(attributes.get('season_number')),
'season_id': attributes.get('season_id'),
'episode': title,
'episode_number': int_or_none(attributes.get('number')),
'episode_id': str_or_none(data.get('uuid')),
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
'thumbnails': thumbnails,
'availability': self._availability(
needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only,
is_private=False, is_unlisted=False),
'tags': attributes.get('genres')
}
class RoosterTeethIE(RoosterTeethBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'md5': 'e2bd7764732d785ef797700a2489f212',
'info_dict': {
'id': '9156',
'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement',
@ -30,19 +98,20 @@ class RoosterTeethIE(InfoExtractor):
'series': 'Million Dollars, But...',
'episode': 'Million Dollars, But... The Game Announcement',
},
'skip_download': 'm3u8',
}, {
'url': 'https://roosterteeth.com/watch/rwby-bonus-25',
'md5': 'fe8d9d976b272c18a24fe7f1f5830084',
'info_dict': {
'id': '31',
'id': '40432',
'display_id': 'rwby-bonus-25',
'title': 'Volume 2, World of Remnant 3',
'description': 'md5:8d58d3270292ea11da00ea712bbfb009',
'episode': 'Volume 2, World of Remnant 3',
'channel_id': 'fab60c1c-29cb-43bc-9383-5c3538d9e246',
'title': 'Grimm',
'description': 'md5:f30ff570741213418a8d2c19868b93ab',
'episode': 'Grimm',
'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1',
'thumbnail': r're:^https?://.*\.(png|jpe?g)$',
'ext': 'mp4',
},
'skip_download': 'm3u8',
}, {
'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31',
'only_matching': True,
@ -63,40 +132,10 @@ class RoosterTeethIE(InfoExtractor):
'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement',
'only_matching': True,
}]
_EPISODE_BASE_URL = 'https://svod-be.roosterteeth.com/api/v1/watch/'
def _login(self):
username, password = self._get_login_info()
if username is None:
return
try:
self._download_json(
'https://auth.roosterteeth.com/oauth/token',
None, 'Logging in', data=urlencode_postdata({
'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5',
'grant_type': 'password',
'username': username,
'password': password,
}))
except ExtractorError as e:
msg = 'Unable to login'
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
resp = self._parse_json(e.cause.read().decode(), None, fatal=False)
if resp:
error = resp.get('extra_info') or resp.get('error_description') or resp.get('error')
if error:
msg += ': ' + error
self.report_warning(msg)
def _real_initialize(self):
if self._get_cookies(self._EPISODE_BASE_URL).get('rt_access_token'):
return
self._login()
def _real_extract(self, url):
display_id = self._match_id(url)
api_episode_url = self._EPISODE_BASE_URL + display_id
api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}'
try:
video_data = self._download_json(
@ -118,36 +157,55 @@ class RoosterTeethIE(InfoExtractor):
episode = self._download_json(
api_episode_url, display_id,
'Downloading episode JSON metadata')['data'][0]
attributes = episode['attributes']
title = attributes.get('title') or attributes['display_title']
video_id = compat_str(episode['id'])
thumbnails = []
for image in episode.get('included', {}).get('images', []):
if image.get('type') in ('episode_image', 'bonus_feature_image'):
img_attributes = image.get('attributes') or {}
for k in ('thumb', 'small', 'medium', 'large'):
img_url = img_attributes.get(k)
if img_url:
thumbnails.append({
'id': k,
'url': img_url,
})
return {
'id': video_id,
'display_id': display_id,
'title': title,
'description': attributes.get('description') or attributes.get('caption'),
'thumbnails': thumbnails,
'series': attributes.get('show_title'),
'season_number': int_or_none(attributes.get('season_number')),
'season_id': attributes.get('season_id'),
'episode': title,
'episode_number': int_or_none(attributes.get('number')),
'episode_id': str_or_none(episode.get('uuid')),
'formats': formats,
'channel_id': attributes.get('channel_id'),
'duration': int_or_none(attributes.get('length')),
'subtitles': subtitles
'subtitles': subtitles,
**self._extract_video_info(episode)
}
class RoosterTeethSeriesIE(RoosterTeethBaseIE):
_VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)'
_TESTS = [{
'url': 'https://roosterteeth.com/series/rwby?season=7',
'playlist_count': 13,
'info_dict': {
'id': 'rwby-7',
'title': 'RWBY - Season 7',
}
}, {
'url': 'https://roosterteeth.com/series/role-initiative',
'playlist_mincount': 16,
'info_dict': {
'id': 'role-initiative',
'title': 'Role Initiative',
}
}]
def _entries(self, series_id, season_number):
display_id = join_nonempty(series_id, season_number)
# TODO: extract bonus material
for data in self._download_json(
f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id)['data']:
idx = traverse_obj(data, ('attributes', 'number'))
if season_number and idx != season_number:
continue
season_url = urljoin(self._API_BASE, data['links']['episodes'])
season = self._download_json(season_url, display_id, f'Downloading season {idx} JSON metadata')['data']
for episode in season:
yield self.url_result(
f'https://www.roosterteeth.com{episode["canonical_links"]["self"]}',
RoosterTeethIE.ie_key(),
**self._extract_video_info(episode))
def _real_extract(self, url):
series_id = self._match_id(url)
season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none)
entries = LazyList(self._entries(series_id, season_number))
return self.playlist_result(
entries,
join_nonempty(series_id, season_number),
join_nonempty(entries[0].get('series'), season_number, delim=' - Season '))

Loading…
Cancel
Save