diff --git a/server/entertainment_decider/extractors/all/tvmaze.py b/server/entertainment_decider/extractors/all/tvmaze.py new file mode 100644 index 0000000..3c6fb01 --- /dev/null +++ b/server/entertainment_decider/extractors/all/tvmaze.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Literal, Optional, TypeVar, TypedDict, Union + + +Weekdays = Union[ + Literal["Monday"], + Literal["Tuesday"], + Literal["Wednesday"], + Literal["Thursday"], + Literal["Friday"], + Literal["Saturday"], + Literal["Sunday"], +] + + +class TvmazeCountry(TypedDict): + name: str + code: str + timezone: str + +class TvmazeEmbeddings(TypedDict, total=False): + show: TvmazeShow + seasons: List[TvmazeSeason] + episodes: List[TvmazeEpisode] + +class TvmazeEpisode(TypedDict): + id: int + url: str + name: str + season: int + number: int + type: str + airdate: str + airtime: str + airstamp: str + runtime: int + rating: TvmazeRating + image: TvmazeImage + summary: str + +class TvmazeEpisodeEmbedded(TvmazeEpisode): + _embedded: TvmazeEmbeddings + +class TvmazeExternalIds(TypedDict): + tvrage: Optional[int] + thetvdb: Optional[int] + imdb: Optional[str] + +class TvmazeImage(TypedDict): + medium: Optional[str] + original: str + +def select_best_image(*image_list: TvmazeImage) -> Optional[str]: + for image in image_list: + if image is not None: + found = image.get("original") or image.get("medium") + if found: + return found + return None + +class TvmazeNetwork(TypedDict): + id: int + name: str + country: TvmazeCountry + webChannel: Optional[Any] + dvdCountry: Optional[TvmazeCountry] + +class TvmazeRating(TypedDict): + average: int + +class TvmazeSchedule(TypedDict): + time: str + days: List[Weekdays] + +class TvmazeSeason(TypedDict): + id: int + url: str + number: int + name: str + episodeOrder: int + premiereDate: str + endDate: str + network: TvmazeNetwork + webChannel: Optional[Any] + image: TvmazeImage + summary: str + +class TvmazeShow(TypedDict): + id: int + url: str + name: str + type: str + language: str + genres: List[str] + status: str + runtime: int + averageRuntime: int + premiered: str + ended: str + officialSite: str + schedule: TvmazeSchedule + rating: TvmazeRating + weight: int + externals: TvmazeExternalIds + image: TvmazeImage + summary: str + updated: int + +class TvmazeShowEmbedded(TvmazeShow): + _embedded: TvmazeEmbeddings + + +T = TypeVar("T", bound=Dict) +def add_embedding(object: T, key: str, value: Any, parent_key: str = "_embedded") -> T: + if parent_key not in object: + object[parent_key] = {} + object[parent_key][key] = value + return object diff --git a/server/entertainment_decider/extractors/collection/__init__.py b/server/entertainment_decider/extractors/collection/__init__.py index b07e0bd..b83adf1 100644 --- a/server/entertainment_decider/extractors/collection/__init__.py +++ b/server/entertainment_decider/extractors/collection/__init__.py @@ -8,6 +8,7 @@ from ...models import MediaCollection from ..helpers import expect_suitable_extractor from .base import CollectionExtractor from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter +from .tvmaze import TvmazeCollectionExtractor from .youtube import YouTubeCollectionExtractor @@ -18,6 +19,7 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = { label_filter=-1033, mark_as_read=True, ), + "tvmaze": TvmazeCollectionExtractor(), "youtube": YouTubeCollectionExtractor(), } diff --git a/server/entertainment_decider/extractors/collection/tvmaze.py b/server/entertainment_decider/extractors/collection/tvmaze.py new file mode 100644 index 0000000..cc5e51b --- /dev/null +++ b/server/entertainment_decider/extractors/collection/tvmaze.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +from datetime import datetime +import itertools +import re +from typing import List, Optional + +from pony import orm # TODO remove +import requests + +from ...models import MediaCollection, Tag +from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding +from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel +from .base import CollectionExtractor + + +class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]): + + SUPPORTED_PATTERN = re.compile( + r"""^ + ( + https?://((api|www)\.)?tvmaze\.com + | + tvmaze:// + )/shows/ + (?P\d+) + (/.*)? + $""", + re.VERBOSE, + ) + + @classmethod + def __get_show_id(cls, uri: str) -> Optional[int]: + m = cls.SUPPORTED_PATTERN.search(uri) + return int(m.group("show_id")) if m else None + + @classmethod + def __require_show_id(cls, uri: str) -> int: + show_id = cls.__get_show_id(uri) + if show_id is None: + raise Exception( + f"Expected uri to be extractable for TvmazeCollectionExtractor: {uri}" + ) + return show_id + + @classmethod + def __get_show_uri(cls, show_id: str | int) -> str: + return f"https://www.tvmaze.com/shows/{show_id}" + + @classmethod + def __get_show_api_uri(cls, show_id: str | int) -> str: + return f"https://api.tvmaze.com/shows/{show_id}" + + @classmethod + def __get_show_custom_uri(cls, show_id: str | int) -> str: + return f"tvmaze:///shows/{show_id}" + + def __init__(self) -> None: + super().__init__("tvmaze") + + def uri_suitable(self, uri: str) -> SuitableLevel: + show_id = self.__get_show_id(uri) + return SuitableLevel.always_or_no(bool(show_id)) + + def can_extract_offline(self, uri: str) -> bool: + return True + + def _cache_expired(self, object: MediaCollection) -> bool: + last_release_date = orm.max(l.element.release_date for l in object.media_links) + return (datetime.now() - object.last_updated) > self._calculate_wait_hours( + last_release_date + ) + + def _extract_offline(self, uri: str) -> ExtractedDataLight: + show_id = self.__require_show_id(uri) + return ExtractedDataLight( + extractor_name=self.name, + object_key=str(show_id), + object_uri=self.__get_show_uri(show_id), + ) + + def _extract_online(self, uri: str) -> ExtractedData[TvmazeShowEmbedded]: + show_id = self.__require_show_id(uri) + api_uri = self.__get_show_api_uri(show_id) + res = requests.get( + url=api_uri, + params={ + "embed[]": [ + "episodes", + ] + }, + ) + data = res.json() + return ExtractedData( + extractor_name=self.name, + object_key=str(show_id), + object_uri=self.__get_show_uri(show_id), + data=data, + ) + + def _update_object_raw( + self, + object: MediaCollection, + data: TvmazeShowEmbedded, + ) -> None: + object.title = f"[tvmaze] {data['name']}" + object.description = data.get("summary", "") + object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d") + object.set_watch_in_order_auto(True) + object.add_uris( + ( + self.__get_show_uri(data["id"]), + self.__get_show_api_uri(data["id"]), + self.__get_show_custom_uri(data["id"]), + ) + ) + for genre in itertools.chain(["Video", data["type"]], data["genres"]): + tag_list: List[Tag] = list( + orm.select(tag for tag in Tag if tag.title == genre) + ) + if len(tag_list) == 1: + object.tag_list.add(tag_list[0]) + for episode in data["_embedded"]["episodes"]: + if episode["airstamp"] is not None: + add_embedding(episode, "show", data) + self._inject_episode( + collection=object, + data=ExtractedData[TvmazeEpisodeEmbedded]( + extractor_name="tvmaze", + object_key=str(episode["id"]), + object_uri=f"tvmaze:///episodes/{episode['id']}", + data=episode, + ), + season=episode["season"], + episode=episode["number"], + ) diff --git a/server/entertainment_decider/extractors/media/__init__.py b/server/entertainment_decider/extractors/media/__init__.py index 6e25c45..83e45df 100644 --- a/server/entertainment_decider/extractors/media/__init__.py +++ b/server/entertainment_decider/extractors/media/__init__.py @@ -6,11 +6,13 @@ from typing import Dict, Tuple from ...models import MediaElement from ..helpers import expect_suitable_extractor from .base import MediaExtractor +from .tvmaze import TvmazeMediaExtractor from .youtube import YoutubeMediaExtractor from .ytdl import YtdlMediaExtractor MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = { + "tvmaze": TvmazeMediaExtractor(), "youtube": YoutubeMediaExtractor(), "ytdl": YtdlMediaExtractor(), } diff --git a/server/entertainment_decider/extractors/media/tvmaze.py b/server/entertainment_decider/extractors/media/tvmaze.py new file mode 100644 index 0000000..01486db --- /dev/null +++ b/server/entertainment_decider/extractors/media/tvmaze.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from datetime import datetime +import re +from typing import Optional + +import requests + +from ...models import MediaElement, MediaThumbnail +from ..all.tvmaze import TvmazeEpisodeEmbedded, select_best_image +from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel +from .base import MediaExtractor + + +class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]): + + SUPPORTED_PATTERN = re.compile( + r"""^ + ( + https?://((api|www)\.)?tvmaze\.com + | + tvmaze:// + )/episodes/ + (?P\d+) + (/.*)? + $""", + re.VERBOSE, + ) + + @classmethod + def __get_episode_id(cls, uri: str) -> Optional[int]: + m = cls.SUPPORTED_PATTERN.search(uri) + return int(m.group("episode_id")) if m else None + + @classmethod + def __get_episode_uri(cls, episode_id: str | int) -> str: + return f"https://www.tvmaze.com/episodes/{episode_id}" + + @classmethod + def __get_episode_api_uri(cls, episode_id: str | int) -> str: + return f"https://api.tvmaze.com/episodes/{episode_id}" + + @classmethod + def __get_episode_custom_uri(cls, episode_id: str | int) -> str: + return f"tvmaze:///episodes/{episode_id}" + + def __init__(self) -> None: + super().__init__("tvmaze") + + def uri_suitable(self, uri: str) -> SuitableLevel: + episode_id = self.__get_episode_id(uri) + return SuitableLevel.always_or_no(bool(episode_id)) + + def can_extract_offline(self, uri: str) -> bool: + return True + + def _extract_offline(self, uri: str) -> ExtractedDataLight: + episode_id = self.__get_episode_id(uri) + return ExtractedDataLight( + extractor_name=self.name, + object_key=str(episode_id), + object_uri=uri, + ) + + def _extract_online(self, uri: str) -> ExtractedData[TvmazeEpisodeEmbedded]: + episode_id = self.__get_episode_id(uri) + if episode_id is None: + raise Exception(f"Expected {uri!r} to be extractable") + api_uri = self.__get_episode_api_uri(episode_id) + res = requests.get( + url=api_uri, + params={ + "embed[]": [ + "show", + ] + }, + ) + data = res.json() + return ExtractedData( + extractor_name=self.name, + object_key=str(episode_id), + object_uri=uri, + data=data, + ) + + def _update_object_raw( + self, object: MediaElement, data: TvmazeEpisodeEmbedded + ) -> None: + # sanity check + airstamp = data.get("airstamp") + if airstamp is None: # not released yet + raise ExtractionError( + f"Could not extract {object.uri!r} because of missing data probably due to not being released yet" + ) + # extract data + show = data["_embedded"]["show"] + title = data.get("name") + if not title: + title = f"Season {data['season']} - Episode {data['number']}" + object.title = f"{title} - {show['name']}" + object.description = data.get("summary") + thumbnail_uri = select_best_image(data.get("image"), show.get("image")) + object.thumbnail = ( + MediaThumbnail.from_uri(thumbnail_uri) if thumbnail_uri else None + ) + object.release_date = datetime.strptime(airstamp, "%Y-%m-%dT%H:%M:%S%z") + object.length = ( + data.get("runtime") + or show.get("runtime") + or show.get("averageRuntime") + or 0 + ) * 60 + object.add_uris( + ( + self.__get_episode_uri(data["id"]), + self.__get_episode_api_uri(data["id"]), + self.__get_episode_custom_uri(data["id"]), + ) + )