Add support for TVMaze extraction

master
Felix Stupp 2 years ago
parent 9ffb34b972
commit 1139219e23
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -0,0 +1,119 @@
from __future__ import annotations
from typing import Any, Dict, List, Literal, Optional, TypeVar, TypedDict, Union
Weekdays = Union[
Literal["Monday"],
Literal["Tuesday"],
Literal["Wednesday"],
Literal["Thursday"],
Literal["Friday"],
Literal["Saturday"],
Literal["Sunday"],
]
class TvmazeCountry(TypedDict):
name: str
code: str
timezone: str
class TvmazeEmbeddings(TypedDict, total=False):
show: TvmazeShow
seasons: List[TvmazeSeason]
episodes: List[TvmazeEpisode]
class TvmazeEpisode(TypedDict):
id: int
url: str
name: str
season: int
number: int
type: str
airdate: str
airtime: str
airstamp: str
runtime: int
rating: TvmazeRating
image: TvmazeImage
summary: str
class TvmazeEpisodeEmbedded(TvmazeEpisode):
_embedded: TvmazeEmbeddings
class TvmazeExternalIds(TypedDict):
tvrage: Optional[int]
thetvdb: Optional[int]
imdb: Optional[str]
class TvmazeImage(TypedDict):
medium: Optional[str]
original: str
def select_best_image(*image_list: TvmazeImage) -> Optional[str]:
for image in image_list:
if image is not None:
found = image.get("original") or image.get("medium")
if found:
return found
return None
class TvmazeNetwork(TypedDict):
id: int
name: str
country: TvmazeCountry
webChannel: Optional[Any]
dvdCountry: Optional[TvmazeCountry]
class TvmazeRating(TypedDict):
average: int
class TvmazeSchedule(TypedDict):
time: str
days: List[Weekdays]
class TvmazeSeason(TypedDict):
id: int
url: str
number: int
name: str
episodeOrder: int
premiereDate: str
endDate: str
network: TvmazeNetwork
webChannel: Optional[Any]
image: TvmazeImage
summary: str
class TvmazeShow(TypedDict):
id: int
url: str
name: str
type: str
language: str
genres: List[str]
status: str
runtime: int
averageRuntime: int
premiered: str
ended: str
officialSite: str
schedule: TvmazeSchedule
rating: TvmazeRating
weight: int
externals: TvmazeExternalIds
image: TvmazeImage
summary: str
updated: int
class TvmazeShowEmbedded(TvmazeShow):
_embedded: TvmazeEmbeddings
T = TypeVar("T", bound=Dict)
def add_embedding(object: T, key: str, value: Any, parent_key: str = "_embedded") -> T:
if parent_key not in object:
object[parent_key] = {}
object[parent_key][key] = value
return object

@ -8,6 +8,7 @@ from ...models import MediaCollection
from ..helpers import expect_suitable_extractor
from .base import CollectionExtractor
from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
from .tvmaze import TvmazeCollectionExtractor
from .youtube import YouTubeCollectionExtractor
@ -18,6 +19,7 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
label_filter=-1033,
mark_as_read=True,
),
"tvmaze": TvmazeCollectionExtractor(),
"youtube": YouTubeCollectionExtractor(),
}

@ -0,0 +1,136 @@
from __future__ import annotations
from datetime import datetime
import itertools
import re
from typing import List, Optional
from pony import orm # TODO remove
import requests
from ...models import MediaCollection, Tag
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel
from .base import CollectionExtractor
class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
SUPPORTED_PATTERN = re.compile(
r"""^
(
https?://((api|www)\.)?tvmaze\.com
|
tvmaze://
)/shows/
(?P<show_id>\d+)
(/.*)?
$""",
re.VERBOSE,
)
@classmethod
def __get_show_id(cls, uri: str) -> Optional[int]:
m = cls.SUPPORTED_PATTERN.search(uri)
return int(m.group("show_id")) if m else None
@classmethod
def __require_show_id(cls, uri: str) -> int:
show_id = cls.__get_show_id(uri)
if show_id is None:
raise Exception(
f"Expected uri to be extractable for TvmazeCollectionExtractor: {uri}"
)
return show_id
@classmethod
def __get_show_uri(cls, show_id: str | int) -> str:
return f"https://www.tvmaze.com/shows/{show_id}"
@classmethod
def __get_show_api_uri(cls, show_id: str | int) -> str:
return f"https://api.tvmaze.com/shows/{show_id}"
@classmethod
def __get_show_custom_uri(cls, show_id: str | int) -> str:
return f"tvmaze:///shows/{show_id}"
def __init__(self) -> None:
super().__init__("tvmaze")
def uri_suitable(self, uri: str) -> SuitableLevel:
show_id = self.__get_show_id(uri)
return SuitableLevel.always_or_no(bool(show_id))
def can_extract_offline(self, uri: str) -> bool:
return True
def _cache_expired(self, object: MediaCollection) -> bool:
last_release_date = orm.max(l.element.release_date for l in object.media_links)
return (datetime.now() - object.last_updated) > self._calculate_wait_hours(
last_release_date
)
def _extract_offline(self, uri: str) -> ExtractedDataLight:
show_id = self.__require_show_id(uri)
return ExtractedDataLight(
extractor_name=self.name,
object_key=str(show_id),
object_uri=self.__get_show_uri(show_id),
)
def _extract_online(self, uri: str) -> ExtractedData[TvmazeShowEmbedded]:
show_id = self.__require_show_id(uri)
api_uri = self.__get_show_api_uri(show_id)
res = requests.get(
url=api_uri,
params={
"embed[]": [
"episodes",
]
},
)
data = res.json()
return ExtractedData(
extractor_name=self.name,
object_key=str(show_id),
object_uri=self.__get_show_uri(show_id),
data=data,
)
def _update_object_raw(
self,
object: MediaCollection,
data: TvmazeShowEmbedded,
) -> None:
object.title = f"[tvmaze] {data['name']}"
object.description = data.get("summary", "")
object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d")
object.set_watch_in_order_auto(True)
object.add_uris(
(
self.__get_show_uri(data["id"]),
self.__get_show_api_uri(data["id"]),
self.__get_show_custom_uri(data["id"]),
)
)
for genre in itertools.chain(["Video", data["type"]], data["genres"]):
tag_list: List[Tag] = list(
orm.select(tag for tag in Tag if tag.title == genre)
)
if len(tag_list) == 1:
object.tag_list.add(tag_list[0])
for episode in data["_embedded"]["episodes"]:
if episode["airstamp"] is not None:
add_embedding(episode, "show", data)
self._inject_episode(
collection=object,
data=ExtractedData[TvmazeEpisodeEmbedded](
extractor_name="tvmaze",
object_key=str(episode["id"]),
object_uri=f"tvmaze:///episodes/{episode['id']}",
data=episode,
),
season=episode["season"],
episode=episode["number"],
)

@ -6,11 +6,13 @@ from typing import Dict, Tuple
from ...models import MediaElement
from ..helpers import expect_suitable_extractor
from .base import MediaExtractor
from .tvmaze import TvmazeMediaExtractor
from .youtube import YoutubeMediaExtractor
from .ytdl import YtdlMediaExtractor
MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
"tvmaze": TvmazeMediaExtractor(),
"youtube": YoutubeMediaExtractor(),
"ytdl": YtdlMediaExtractor(),
}

@ -0,0 +1,119 @@
from __future__ import annotations
from datetime import datetime
import re
from typing import Optional
import requests
from ...models import MediaElement, MediaThumbnail
from ..all.tvmaze import TvmazeEpisodeEmbedded, select_best_image
from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel
from .base import MediaExtractor
class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
SUPPORTED_PATTERN = re.compile(
r"""^
(
https?://((api|www)\.)?tvmaze\.com
|
tvmaze://
)/episodes/
(?P<episode_id>\d+)
(/.*)?
$""",
re.VERBOSE,
)
@classmethod
def __get_episode_id(cls, uri: str) -> Optional[int]:
m = cls.SUPPORTED_PATTERN.search(uri)
return int(m.group("episode_id")) if m else None
@classmethod
def __get_episode_uri(cls, episode_id: str | int) -> str:
return f"https://www.tvmaze.com/episodes/{episode_id}"
@classmethod
def __get_episode_api_uri(cls, episode_id: str | int) -> str:
return f"https://api.tvmaze.com/episodes/{episode_id}"
@classmethod
def __get_episode_custom_uri(cls, episode_id: str | int) -> str:
return f"tvmaze:///episodes/{episode_id}"
def __init__(self) -> None:
super().__init__("tvmaze")
def uri_suitable(self, uri: str) -> SuitableLevel:
episode_id = self.__get_episode_id(uri)
return SuitableLevel.always_or_no(bool(episode_id))
def can_extract_offline(self, uri: str) -> bool:
return True
def _extract_offline(self, uri: str) -> ExtractedDataLight:
episode_id = self.__get_episode_id(uri)
return ExtractedDataLight(
extractor_name=self.name,
object_key=str(episode_id),
object_uri=uri,
)
def _extract_online(self, uri: str) -> ExtractedData[TvmazeEpisodeEmbedded]:
episode_id = self.__get_episode_id(uri)
if episode_id is None:
raise Exception(f"Expected {uri!r} to be extractable")
api_uri = self.__get_episode_api_uri(episode_id)
res = requests.get(
url=api_uri,
params={
"embed[]": [
"show",
]
},
)
data = res.json()
return ExtractedData(
extractor_name=self.name,
object_key=str(episode_id),
object_uri=uri,
data=data,
)
def _update_object_raw(
self, object: MediaElement, data: TvmazeEpisodeEmbedded
) -> None:
# sanity check
airstamp = data.get("airstamp")
if airstamp is None: # not released yet
raise ExtractionError(
f"Could not extract {object.uri!r} because of missing data probably due to not being released yet"
)
# extract data
show = data["_embedded"]["show"]
title = data.get("name")
if not title:
title = f"Season {data['season']} - Episode {data['number']}"
object.title = f"{title} - {show['name']}"
object.description = data.get("summary")
thumbnail_uri = select_best_image(data.get("image"), show.get("image"))
object.thumbnail = (
MediaThumbnail.from_uri(thumbnail_uri) if thumbnail_uri else None
)
object.release_date = datetime.strptime(airstamp, "%Y-%m-%dT%H:%M:%S%z")
object.length = (
data.get("runtime")
or show.get("runtime")
or show.get("averageRuntime")
or 0
) * 60
object.add_uris(
(
self.__get_episode_uri(data["id"]),
self.__get_episode_api_uri(data["id"]),
self.__get_episode_custom_uri(data["id"]),
)
)
Loading…
Cancel
Save