Add support for TVMaze extraction
parent
9ffb34b972
commit
1139219e23
@ -0,0 +1,119 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from typing import Any, Dict, List, Literal, Optional, TypeVar, TypedDict, Union
|
||||||
|
|
||||||
|
|
||||||
|
Weekdays = Union[
|
||||||
|
Literal["Monday"],
|
||||||
|
Literal["Tuesday"],
|
||||||
|
Literal["Wednesday"],
|
||||||
|
Literal["Thursday"],
|
||||||
|
Literal["Friday"],
|
||||||
|
Literal["Saturday"],
|
||||||
|
Literal["Sunday"],
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TvmazeCountry(TypedDict):
|
||||||
|
name: str
|
||||||
|
code: str
|
||||||
|
timezone: str
|
||||||
|
|
||||||
|
class TvmazeEmbeddings(TypedDict, total=False):
|
||||||
|
show: TvmazeShow
|
||||||
|
seasons: List[TvmazeSeason]
|
||||||
|
episodes: List[TvmazeEpisode]
|
||||||
|
|
||||||
|
class TvmazeEpisode(TypedDict):
|
||||||
|
id: int
|
||||||
|
url: str
|
||||||
|
name: str
|
||||||
|
season: int
|
||||||
|
number: int
|
||||||
|
type: str
|
||||||
|
airdate: str
|
||||||
|
airtime: str
|
||||||
|
airstamp: str
|
||||||
|
runtime: int
|
||||||
|
rating: TvmazeRating
|
||||||
|
image: TvmazeImage
|
||||||
|
summary: str
|
||||||
|
|
||||||
|
class TvmazeEpisodeEmbedded(TvmazeEpisode):
|
||||||
|
_embedded: TvmazeEmbeddings
|
||||||
|
|
||||||
|
class TvmazeExternalIds(TypedDict):
|
||||||
|
tvrage: Optional[int]
|
||||||
|
thetvdb: Optional[int]
|
||||||
|
imdb: Optional[str]
|
||||||
|
|
||||||
|
class TvmazeImage(TypedDict):
|
||||||
|
medium: Optional[str]
|
||||||
|
original: str
|
||||||
|
|
||||||
|
def select_best_image(*image_list: TvmazeImage) -> Optional[str]:
|
||||||
|
for image in image_list:
|
||||||
|
if image is not None:
|
||||||
|
found = image.get("original") or image.get("medium")
|
||||||
|
if found:
|
||||||
|
return found
|
||||||
|
return None
|
||||||
|
|
||||||
|
class TvmazeNetwork(TypedDict):
|
||||||
|
id: int
|
||||||
|
name: str
|
||||||
|
country: TvmazeCountry
|
||||||
|
webChannel: Optional[Any]
|
||||||
|
dvdCountry: Optional[TvmazeCountry]
|
||||||
|
|
||||||
|
class TvmazeRating(TypedDict):
|
||||||
|
average: int
|
||||||
|
|
||||||
|
class TvmazeSchedule(TypedDict):
|
||||||
|
time: str
|
||||||
|
days: List[Weekdays]
|
||||||
|
|
||||||
|
class TvmazeSeason(TypedDict):
|
||||||
|
id: int
|
||||||
|
url: str
|
||||||
|
number: int
|
||||||
|
name: str
|
||||||
|
episodeOrder: int
|
||||||
|
premiereDate: str
|
||||||
|
endDate: str
|
||||||
|
network: TvmazeNetwork
|
||||||
|
webChannel: Optional[Any]
|
||||||
|
image: TvmazeImage
|
||||||
|
summary: str
|
||||||
|
|
||||||
|
class TvmazeShow(TypedDict):
|
||||||
|
id: int
|
||||||
|
url: str
|
||||||
|
name: str
|
||||||
|
type: str
|
||||||
|
language: str
|
||||||
|
genres: List[str]
|
||||||
|
status: str
|
||||||
|
runtime: int
|
||||||
|
averageRuntime: int
|
||||||
|
premiered: str
|
||||||
|
ended: str
|
||||||
|
officialSite: str
|
||||||
|
schedule: TvmazeSchedule
|
||||||
|
rating: TvmazeRating
|
||||||
|
weight: int
|
||||||
|
externals: TvmazeExternalIds
|
||||||
|
image: TvmazeImage
|
||||||
|
summary: str
|
||||||
|
updated: int
|
||||||
|
|
||||||
|
class TvmazeShowEmbedded(TvmazeShow):
|
||||||
|
_embedded: TvmazeEmbeddings
|
||||||
|
|
||||||
|
|
||||||
|
T = TypeVar("T", bound=Dict)
|
||||||
|
def add_embedding(object: T, key: str, value: Any, parent_key: str = "_embedded") -> T:
|
||||||
|
if parent_key not in object:
|
||||||
|
object[parent_key] = {}
|
||||||
|
object[parent_key][key] = value
|
||||||
|
return object
|
@ -0,0 +1,136 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import itertools
|
||||||
|
import re
|
||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pony import orm # TODO remove
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ...models import MediaCollection, Tag
|
||||||
|
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
|
||||||
|
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel
|
||||||
|
from .base import CollectionExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
|
||||||
|
|
||||||
|
SUPPORTED_PATTERN = re.compile(
|
||||||
|
r"""^
|
||||||
|
(
|
||||||
|
https?://((api|www)\.)?tvmaze\.com
|
||||||
|
|
|
||||||
|
tvmaze://
|
||||||
|
)/shows/
|
||||||
|
(?P<show_id>\d+)
|
||||||
|
(/.*)?
|
||||||
|
$""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_show_id(cls, uri: str) -> Optional[int]:
|
||||||
|
m = cls.SUPPORTED_PATTERN.search(uri)
|
||||||
|
return int(m.group("show_id")) if m else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __require_show_id(cls, uri: str) -> int:
|
||||||
|
show_id = cls.__get_show_id(uri)
|
||||||
|
if show_id is None:
|
||||||
|
raise Exception(
|
||||||
|
f"Expected uri to be extractable for TvmazeCollectionExtractor: {uri}"
|
||||||
|
)
|
||||||
|
return show_id
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_show_uri(cls, show_id: str | int) -> str:
|
||||||
|
return f"https://www.tvmaze.com/shows/{show_id}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_show_api_uri(cls, show_id: str | int) -> str:
|
||||||
|
return f"https://api.tvmaze.com/shows/{show_id}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_show_custom_uri(cls, show_id: str | int) -> str:
|
||||||
|
return f"tvmaze:///shows/{show_id}"
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__("tvmaze")
|
||||||
|
|
||||||
|
def uri_suitable(self, uri: str) -> SuitableLevel:
|
||||||
|
show_id = self.__get_show_id(uri)
|
||||||
|
return SuitableLevel.always_or_no(bool(show_id))
|
||||||
|
|
||||||
|
def can_extract_offline(self, uri: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _cache_expired(self, object: MediaCollection) -> bool:
|
||||||
|
last_release_date = orm.max(l.element.release_date for l in object.media_links)
|
||||||
|
return (datetime.now() - object.last_updated) > self._calculate_wait_hours(
|
||||||
|
last_release_date
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_offline(self, uri: str) -> ExtractedDataLight:
|
||||||
|
show_id = self.__require_show_id(uri)
|
||||||
|
return ExtractedDataLight(
|
||||||
|
extractor_name=self.name,
|
||||||
|
object_key=str(show_id),
|
||||||
|
object_uri=self.__get_show_uri(show_id),
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_online(self, uri: str) -> ExtractedData[TvmazeShowEmbedded]:
|
||||||
|
show_id = self.__require_show_id(uri)
|
||||||
|
api_uri = self.__get_show_api_uri(show_id)
|
||||||
|
res = requests.get(
|
||||||
|
url=api_uri,
|
||||||
|
params={
|
||||||
|
"embed[]": [
|
||||||
|
"episodes",
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
data = res.json()
|
||||||
|
return ExtractedData(
|
||||||
|
extractor_name=self.name,
|
||||||
|
object_key=str(show_id),
|
||||||
|
object_uri=self.__get_show_uri(show_id),
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _update_object_raw(
|
||||||
|
self,
|
||||||
|
object: MediaCollection,
|
||||||
|
data: TvmazeShowEmbedded,
|
||||||
|
) -> None:
|
||||||
|
object.title = f"[tvmaze] {data['name']}"
|
||||||
|
object.description = data.get("summary", "")
|
||||||
|
object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d")
|
||||||
|
object.set_watch_in_order_auto(True)
|
||||||
|
object.add_uris(
|
||||||
|
(
|
||||||
|
self.__get_show_uri(data["id"]),
|
||||||
|
self.__get_show_api_uri(data["id"]),
|
||||||
|
self.__get_show_custom_uri(data["id"]),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
for genre in itertools.chain(["Video", data["type"]], data["genres"]):
|
||||||
|
tag_list: List[Tag] = list(
|
||||||
|
orm.select(tag for tag in Tag if tag.title == genre)
|
||||||
|
)
|
||||||
|
if len(tag_list) == 1:
|
||||||
|
object.tag_list.add(tag_list[0])
|
||||||
|
for episode in data["_embedded"]["episodes"]:
|
||||||
|
if episode["airstamp"] is not None:
|
||||||
|
add_embedding(episode, "show", data)
|
||||||
|
self._inject_episode(
|
||||||
|
collection=object,
|
||||||
|
data=ExtractedData[TvmazeEpisodeEmbedded](
|
||||||
|
extractor_name="tvmaze",
|
||||||
|
object_key=str(episode["id"]),
|
||||||
|
object_uri=f"tvmaze:///episodes/{episode['id']}",
|
||||||
|
data=episode,
|
||||||
|
),
|
||||||
|
season=episode["season"],
|
||||||
|
episode=episode["number"],
|
||||||
|
)
|
@ -0,0 +1,119 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import re
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from ...models import MediaElement, MediaThumbnail
|
||||||
|
from ..all.tvmaze import TvmazeEpisodeEmbedded, select_best_image
|
||||||
|
from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel
|
||||||
|
from .base import MediaExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
|
||||||
|
|
||||||
|
SUPPORTED_PATTERN = re.compile(
|
||||||
|
r"""^
|
||||||
|
(
|
||||||
|
https?://((api|www)\.)?tvmaze\.com
|
||||||
|
|
|
||||||
|
tvmaze://
|
||||||
|
)/episodes/
|
||||||
|
(?P<episode_id>\d+)
|
||||||
|
(/.*)?
|
||||||
|
$""",
|
||||||
|
re.VERBOSE,
|
||||||
|
)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_episode_id(cls, uri: str) -> Optional[int]:
|
||||||
|
m = cls.SUPPORTED_PATTERN.search(uri)
|
||||||
|
return int(m.group("episode_id")) if m else None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_episode_uri(cls, episode_id: str | int) -> str:
|
||||||
|
return f"https://www.tvmaze.com/episodes/{episode_id}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_episode_api_uri(cls, episode_id: str | int) -> str:
|
||||||
|
return f"https://api.tvmaze.com/episodes/{episode_id}"
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def __get_episode_custom_uri(cls, episode_id: str | int) -> str:
|
||||||
|
return f"tvmaze:///episodes/{episode_id}"
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__("tvmaze")
|
||||||
|
|
||||||
|
def uri_suitable(self, uri: str) -> SuitableLevel:
|
||||||
|
episode_id = self.__get_episode_id(uri)
|
||||||
|
return SuitableLevel.always_or_no(bool(episode_id))
|
||||||
|
|
||||||
|
def can_extract_offline(self, uri: str) -> bool:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _extract_offline(self, uri: str) -> ExtractedDataLight:
|
||||||
|
episode_id = self.__get_episode_id(uri)
|
||||||
|
return ExtractedDataLight(
|
||||||
|
extractor_name=self.name,
|
||||||
|
object_key=str(episode_id),
|
||||||
|
object_uri=uri,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_online(self, uri: str) -> ExtractedData[TvmazeEpisodeEmbedded]:
|
||||||
|
episode_id = self.__get_episode_id(uri)
|
||||||
|
if episode_id is None:
|
||||||
|
raise Exception(f"Expected {uri!r} to be extractable")
|
||||||
|
api_uri = self.__get_episode_api_uri(episode_id)
|
||||||
|
res = requests.get(
|
||||||
|
url=api_uri,
|
||||||
|
params={
|
||||||
|
"embed[]": [
|
||||||
|
"show",
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
data = res.json()
|
||||||
|
return ExtractedData(
|
||||||
|
extractor_name=self.name,
|
||||||
|
object_key=str(episode_id),
|
||||||
|
object_uri=uri,
|
||||||
|
data=data,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _update_object_raw(
|
||||||
|
self, object: MediaElement, data: TvmazeEpisodeEmbedded
|
||||||
|
) -> None:
|
||||||
|
# sanity check
|
||||||
|
airstamp = data.get("airstamp")
|
||||||
|
if airstamp is None: # not released yet
|
||||||
|
raise ExtractionError(
|
||||||
|
f"Could not extract {object.uri!r} because of missing data probably due to not being released yet"
|
||||||
|
)
|
||||||
|
# extract data
|
||||||
|
show = data["_embedded"]["show"]
|
||||||
|
title = data.get("name")
|
||||||
|
if not title:
|
||||||
|
title = f"Season {data['season']} - Episode {data['number']}"
|
||||||
|
object.title = f"{title} - {show['name']}"
|
||||||
|
object.description = data.get("summary")
|
||||||
|
thumbnail_uri = select_best_image(data.get("image"), show.get("image"))
|
||||||
|
object.thumbnail = (
|
||||||
|
MediaThumbnail.from_uri(thumbnail_uri) if thumbnail_uri else None
|
||||||
|
)
|
||||||
|
object.release_date = datetime.strptime(airstamp, "%Y-%m-%dT%H:%M:%S%z")
|
||||||
|
object.length = (
|
||||||
|
data.get("runtime")
|
||||||
|
or show.get("runtime")
|
||||||
|
or show.get("averageRuntime")
|
||||||
|
or 0
|
||||||
|
) * 60
|
||||||
|
object.add_uris(
|
||||||
|
(
|
||||||
|
self.__get_episode_uri(data["id"]),
|
||||||
|
self.__get_episode_api_uri(data["id"]),
|
||||||
|
self.__get_episode_custom_uri(data["id"]),
|
||||||
|
)
|
||||||
|
)
|
Loading…
Reference in New Issue