Add support for TVMaze extraction
parent
9ffb34b972
commit
1139219e23
@ -0,0 +1,119 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Literal, Optional, TypeVar, TypedDict, Union
|
||||
|
||||
|
||||
Weekdays = Union[
|
||||
Literal["Monday"],
|
||||
Literal["Tuesday"],
|
||||
Literal["Wednesday"],
|
||||
Literal["Thursday"],
|
||||
Literal["Friday"],
|
||||
Literal["Saturday"],
|
||||
Literal["Sunday"],
|
||||
]
|
||||
|
||||
|
||||
class TvmazeCountry(TypedDict):
|
||||
name: str
|
||||
code: str
|
||||
timezone: str
|
||||
|
||||
class TvmazeEmbeddings(TypedDict, total=False):
|
||||
show: TvmazeShow
|
||||
seasons: List[TvmazeSeason]
|
||||
episodes: List[TvmazeEpisode]
|
||||
|
||||
class TvmazeEpisode(TypedDict):
|
||||
id: int
|
||||
url: str
|
||||
name: str
|
||||
season: int
|
||||
number: int
|
||||
type: str
|
||||
airdate: str
|
||||
airtime: str
|
||||
airstamp: str
|
||||
runtime: int
|
||||
rating: TvmazeRating
|
||||
image: TvmazeImage
|
||||
summary: str
|
||||
|
||||
class TvmazeEpisodeEmbedded(TvmazeEpisode):
|
||||
_embedded: TvmazeEmbeddings
|
||||
|
||||
class TvmazeExternalIds(TypedDict):
|
||||
tvrage: Optional[int]
|
||||
thetvdb: Optional[int]
|
||||
imdb: Optional[str]
|
||||
|
||||
class TvmazeImage(TypedDict):
|
||||
medium: Optional[str]
|
||||
original: str
|
||||
|
||||
def select_best_image(*image_list: TvmazeImage) -> Optional[str]:
|
||||
for image in image_list:
|
||||
if image is not None:
|
||||
found = image.get("original") or image.get("medium")
|
||||
if found:
|
||||
return found
|
||||
return None
|
||||
|
||||
class TvmazeNetwork(TypedDict):
|
||||
id: int
|
||||
name: str
|
||||
country: TvmazeCountry
|
||||
webChannel: Optional[Any]
|
||||
dvdCountry: Optional[TvmazeCountry]
|
||||
|
||||
class TvmazeRating(TypedDict):
|
||||
average: int
|
||||
|
||||
class TvmazeSchedule(TypedDict):
|
||||
time: str
|
||||
days: List[Weekdays]
|
||||
|
||||
class TvmazeSeason(TypedDict):
|
||||
id: int
|
||||
url: str
|
||||
number: int
|
||||
name: str
|
||||
episodeOrder: int
|
||||
premiereDate: str
|
||||
endDate: str
|
||||
network: TvmazeNetwork
|
||||
webChannel: Optional[Any]
|
||||
image: TvmazeImage
|
||||
summary: str
|
||||
|
||||
class TvmazeShow(TypedDict):
|
||||
id: int
|
||||
url: str
|
||||
name: str
|
||||
type: str
|
||||
language: str
|
||||
genres: List[str]
|
||||
status: str
|
||||
runtime: int
|
||||
averageRuntime: int
|
||||
premiered: str
|
||||
ended: str
|
||||
officialSite: str
|
||||
schedule: TvmazeSchedule
|
||||
rating: TvmazeRating
|
||||
weight: int
|
||||
externals: TvmazeExternalIds
|
||||
image: TvmazeImage
|
||||
summary: str
|
||||
updated: int
|
||||
|
||||
class TvmazeShowEmbedded(TvmazeShow):
|
||||
_embedded: TvmazeEmbeddings
|
||||
|
||||
|
||||
T = TypeVar("T", bound=Dict)
|
||||
def add_embedding(object: T, key: str, value: Any, parent_key: str = "_embedded") -> T:
|
||||
if parent_key not in object:
|
||||
object[parent_key] = {}
|
||||
object[parent_key][key] = value
|
||||
return object
|
@ -0,0 +1,136 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import itertools
|
||||
import re
|
||||
from typing import List, Optional
|
||||
|
||||
from pony import orm # TODO remove
|
||||
import requests
|
||||
|
||||
from ...models import MediaCollection, Tag
|
||||
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
|
||||
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel
|
||||
from .base import CollectionExtractor
|
||||
|
||||
|
||||
class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
|
||||
|
||||
SUPPORTED_PATTERN = re.compile(
|
||||
r"""^
|
||||
(
|
||||
https?://((api|www)\.)?tvmaze\.com
|
||||
|
|
||||
tvmaze://
|
||||
)/shows/
|
||||
(?P<show_id>\d+)
|
||||
(/.*)?
|
||||
$""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def __get_show_id(cls, uri: str) -> Optional[int]:
|
||||
m = cls.SUPPORTED_PATTERN.search(uri)
|
||||
return int(m.group("show_id")) if m else None
|
||||
|
||||
@classmethod
|
||||
def __require_show_id(cls, uri: str) -> int:
|
||||
show_id = cls.__get_show_id(uri)
|
||||
if show_id is None:
|
||||
raise Exception(
|
||||
f"Expected uri to be extractable for TvmazeCollectionExtractor: {uri}"
|
||||
)
|
||||
return show_id
|
||||
|
||||
@classmethod
|
||||
def __get_show_uri(cls, show_id: str | int) -> str:
|
||||
return f"https://www.tvmaze.com/shows/{show_id}"
|
||||
|
||||
@classmethod
|
||||
def __get_show_api_uri(cls, show_id: str | int) -> str:
|
||||
return f"https://api.tvmaze.com/shows/{show_id}"
|
||||
|
||||
@classmethod
|
||||
def __get_show_custom_uri(cls, show_id: str | int) -> str:
|
||||
return f"tvmaze:///shows/{show_id}"
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("tvmaze")
|
||||
|
||||
def uri_suitable(self, uri: str) -> SuitableLevel:
|
||||
show_id = self.__get_show_id(uri)
|
||||
return SuitableLevel.always_or_no(bool(show_id))
|
||||
|
||||
def can_extract_offline(self, uri: str) -> bool:
|
||||
return True
|
||||
|
||||
def _cache_expired(self, object: MediaCollection) -> bool:
|
||||
last_release_date = orm.max(l.element.release_date for l in object.media_links)
|
||||
return (datetime.now() - object.last_updated) > self._calculate_wait_hours(
|
||||
last_release_date
|
||||
)
|
||||
|
||||
def _extract_offline(self, uri: str) -> ExtractedDataLight:
|
||||
show_id = self.__require_show_id(uri)
|
||||
return ExtractedDataLight(
|
||||
extractor_name=self.name,
|
||||
object_key=str(show_id),
|
||||
object_uri=self.__get_show_uri(show_id),
|
||||
)
|
||||
|
||||
def _extract_online(self, uri: str) -> ExtractedData[TvmazeShowEmbedded]:
|
||||
show_id = self.__require_show_id(uri)
|
||||
api_uri = self.__get_show_api_uri(show_id)
|
||||
res = requests.get(
|
||||
url=api_uri,
|
||||
params={
|
||||
"embed[]": [
|
||||
"episodes",
|
||||
]
|
||||
},
|
||||
)
|
||||
data = res.json()
|
||||
return ExtractedData(
|
||||
extractor_name=self.name,
|
||||
object_key=str(show_id),
|
||||
object_uri=self.__get_show_uri(show_id),
|
||||
data=data,
|
||||
)
|
||||
|
||||
def _update_object_raw(
|
||||
self,
|
||||
object: MediaCollection,
|
||||
data: TvmazeShowEmbedded,
|
||||
) -> None:
|
||||
object.title = f"[tvmaze] {data['name']}"
|
||||
object.description = data.get("summary", "")
|
||||
object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d")
|
||||
object.set_watch_in_order_auto(True)
|
||||
object.add_uris(
|
||||
(
|
||||
self.__get_show_uri(data["id"]),
|
||||
self.__get_show_api_uri(data["id"]),
|
||||
self.__get_show_custom_uri(data["id"]),
|
||||
)
|
||||
)
|
||||
for genre in itertools.chain(["Video", data["type"]], data["genres"]):
|
||||
tag_list: List[Tag] = list(
|
||||
orm.select(tag for tag in Tag if tag.title == genre)
|
||||
)
|
||||
if len(tag_list) == 1:
|
||||
object.tag_list.add(tag_list[0])
|
||||
for episode in data["_embedded"]["episodes"]:
|
||||
if episode["airstamp"] is not None:
|
||||
add_embedding(episode, "show", data)
|
||||
self._inject_episode(
|
||||
collection=object,
|
||||
data=ExtractedData[TvmazeEpisodeEmbedded](
|
||||
extractor_name="tvmaze",
|
||||
object_key=str(episode["id"]),
|
||||
object_uri=f"tvmaze:///episodes/{episode['id']}",
|
||||
data=episode,
|
||||
),
|
||||
season=episode["season"],
|
||||
episode=episode["number"],
|
||||
)
|
@ -0,0 +1,119 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
from ...models import MediaElement, MediaThumbnail
|
||||
from ..all.tvmaze import TvmazeEpisodeEmbedded, select_best_image
|
||||
from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel
|
||||
from .base import MediaExtractor
|
||||
|
||||
|
||||
class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
|
||||
|
||||
SUPPORTED_PATTERN = re.compile(
|
||||
r"""^
|
||||
(
|
||||
https?://((api|www)\.)?tvmaze\.com
|
||||
|
|
||||
tvmaze://
|
||||
)/episodes/
|
||||
(?P<episode_id>\d+)
|
||||
(/.*)?
|
||||
$""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def __get_episode_id(cls, uri: str) -> Optional[int]:
|
||||
m = cls.SUPPORTED_PATTERN.search(uri)
|
||||
return int(m.group("episode_id")) if m else None
|
||||
|
||||
@classmethod
|
||||
def __get_episode_uri(cls, episode_id: str | int) -> str:
|
||||
return f"https://www.tvmaze.com/episodes/{episode_id}"
|
||||
|
||||
@classmethod
|
||||
def __get_episode_api_uri(cls, episode_id: str | int) -> str:
|
||||
return f"https://api.tvmaze.com/episodes/{episode_id}"
|
||||
|
||||
@classmethod
|
||||
def __get_episode_custom_uri(cls, episode_id: str | int) -> str:
|
||||
return f"tvmaze:///episodes/{episode_id}"
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__("tvmaze")
|
||||
|
||||
def uri_suitable(self, uri: str) -> SuitableLevel:
|
||||
episode_id = self.__get_episode_id(uri)
|
||||
return SuitableLevel.always_or_no(bool(episode_id))
|
||||
|
||||
def can_extract_offline(self, uri: str) -> bool:
|
||||
return True
|
||||
|
||||
def _extract_offline(self, uri: str) -> ExtractedDataLight:
|
||||
episode_id = self.__get_episode_id(uri)
|
||||
return ExtractedDataLight(
|
||||
extractor_name=self.name,
|
||||
object_key=str(episode_id),
|
||||
object_uri=uri,
|
||||
)
|
||||
|
||||
def _extract_online(self, uri: str) -> ExtractedData[TvmazeEpisodeEmbedded]:
|
||||
episode_id = self.__get_episode_id(uri)
|
||||
if episode_id is None:
|
||||
raise Exception(f"Expected {uri!r} to be extractable")
|
||||
api_uri = self.__get_episode_api_uri(episode_id)
|
||||
res = requests.get(
|
||||
url=api_uri,
|
||||
params={
|
||||
"embed[]": [
|
||||
"show",
|
||||
]
|
||||
},
|
||||
)
|
||||
data = res.json()
|
||||
return ExtractedData(
|
||||
extractor_name=self.name,
|
||||
object_key=str(episode_id),
|
||||
object_uri=uri,
|
||||
data=data,
|
||||
)
|
||||
|
||||
def _update_object_raw(
|
||||
self, object: MediaElement, data: TvmazeEpisodeEmbedded
|
||||
) -> None:
|
||||
# sanity check
|
||||
airstamp = data.get("airstamp")
|
||||
if airstamp is None: # not released yet
|
||||
raise ExtractionError(
|
||||
f"Could not extract {object.uri!r} because of missing data probably due to not being released yet"
|
||||
)
|
||||
# extract data
|
||||
show = data["_embedded"]["show"]
|
||||
title = data.get("name")
|
||||
if not title:
|
||||
title = f"Season {data['season']} - Episode {data['number']}"
|
||||
object.title = f"{title} - {show['name']}"
|
||||
object.description = data.get("summary")
|
||||
thumbnail_uri = select_best_image(data.get("image"), show.get("image"))
|
||||
object.thumbnail = (
|
||||
MediaThumbnail.from_uri(thumbnail_uri) if thumbnail_uri else None
|
||||
)
|
||||
object.release_date = datetime.strptime(airstamp, "%Y-%m-%dT%H:%M:%S%z")
|
||||
object.length = (
|
||||
data.get("runtime")
|
||||
or show.get("runtime")
|
||||
or show.get("averageRuntime")
|
||||
or 0
|
||||
) * 60
|
||||
object.add_uris(
|
||||
(
|
||||
self.__get_episode_uri(data["id"]),
|
||||
self.__get_episode_api_uri(data["id"]),
|
||||
self.__get_episode_custom_uri(data["id"]),
|
||||
)
|
||||
)
|
Loading…
Reference in New Issue