Reworked ExtractedData classes, split into Offline & Online

master
Felix Stupp 2 years ago
parent bba57e82d8
commit c97559ed62
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -1,16 +1,19 @@
from __future__ import annotations from __future__ import annotations
import re import re
from typing import List, Set from typing import List, Set, TypeAlias
from pony import orm from pony import orm
from ...models import MediaCollection, MediaCollectionLink, MediaElement from ...models import MediaCollection, MediaCollectionLink, MediaElement
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
class AggregatedCollectionExtractor(CollectionExtractor[List[List[MediaElement]]]): DataType: TypeAlias = List[List[MediaElement]]
class AggregatedCollectionExtractor(CollectionExtractor[DataType]):
__uri_regex = re.compile(r"^aggregated:///(?P<id>\d+(,\d+)*)") __uri_regex = re.compile(r"^aggregated:///(?P<id>\d+(,\d+)*)")
@ -43,18 +46,18 @@ class AggregatedCollectionExtractor(CollectionExtractor[List[List[MediaElement]]
return True return True
return False return False
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[DataType]:
coll_id = ",".join(str(i) for i in self.__get_id(uri)) coll_id = ",".join(str(i) for i in self.__get_id(uri))
return ExtractedDataLight( return ExtractedDataOffline[DataType](
extractor_name=self.name, extractor_name=self.name,
object_key=coll_id, object_key=coll_id,
object_uri=uri, object_uri=uri,
) )
def _extract_online(self, uri: str) -> ExtractedData[List[List[MediaElement]]]: def _extract_online(self, uri: str) -> ExtractedDataOnline[DataType]:
colls = self.__get_collections(uri) colls = self.__get_collections(uri)
coll_id = ",".join(str(c.id) for c in colls) coll_id = ",".join(str(c.id) for c in colls)
return ExtractedData( return ExtractedDataOnline[DataType](
extractor_name=self.name, extractor_name=self.name,
object_key=coll_id, object_key=coll_id,
object_uri=uri, object_uri=uri,
@ -69,9 +72,7 @@ class AggregatedCollectionExtractor(CollectionExtractor[List[List[MediaElement]]
], ],
) )
def _update_object_raw( def _update_object_raw(self, object: MediaCollection, data: DataType) -> None:
self, object: MediaCollection, data: List[List[MediaElement]]
) -> None:
if object.title is None or "[aggregated]" not in object.title: if object.title is None or "[aggregated]" not in object.title:
object.title = f"[aggregated] {object.uri}" object.title = f"[aggregated] {object.uri}"
object.creator = None object.creator = None

@ -13,7 +13,12 @@ from ...models import (
MediaCollectionLink, MediaCollectionLink,
MediaElement, MediaElement,
) )
from ..generic import ExtractedData, ExtractionError, GeneralExtractor from ..generic import (
ExtractedDataOnline,
ExtractedDataOffline,
ExtractionError,
GeneralExtractor,
)
T = TypeVar("T") T = TypeVar("T")
@ -48,12 +53,12 @@ class CollectionExtractor(GeneralExtractor[MediaCollection, T]):
def __configure_collection(self, collection: MediaCollection) -> None: def __configure_collection(self, collection: MediaCollection) -> None:
collection.keep_updated = True collection.keep_updated = True
def _create_object(self, data: ExtractedData[T]) -> MediaCollection: def _create_object(self, data: ExtractedDataOffline[T]) -> MediaCollection:
collection = data.create_collection() collection = data.create_collection()
self.__configure_collection(collection) self.__configure_collection(collection)
return collection return collection
def _load_object(self, data: ExtractedData[T]) -> Optional[MediaCollection]: def _load_object(self, data: ExtractedDataOffline[T]) -> Optional[MediaCollection]:
collection = data.load_collection() collection = data.load_collection()
if collection is not None: if collection is not None:
self.__configure_collection(collection) self.__configure_collection(collection)
@ -89,7 +94,7 @@ class CollectionExtractor(GeneralExtractor[MediaCollection, T]):
def _inject_episode( def _inject_episode(
self, self,
collection: MediaCollection, collection: MediaCollection,
data: ExtractedData[Any], data: ExtractedDataOnline[Any],
season: int = 0, season: int = 0,
episode: int = 0, episode: int = 0,
) -> Optional[MediaElement]: ) -> Optional[MediaElement]:
@ -133,5 +138,7 @@ class CollectionExtractor(GeneralExtractor[MediaCollection, T]):
link.season = 0 link.season = 0
link.episode = index + 1 link.episode = index + 1
def _update_hook(self, object: MediaCollection, data: ExtractedData[T]) -> None: def _update_hook(
self, object: MediaCollection, data: ExtractedDataOnline[T]
) -> None:
self._sort_episodes(object) self._sort_episodes(object)

@ -11,9 +11,8 @@ from ..all.tmdb import (
TmdbCollectionData, TmdbCollectionData,
TMDB_REGEX_URI, TMDB_REGEX_URI,
TmdbKeywordData, TmdbKeywordData,
TmdbMovieEntryData,
) )
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
@ -58,9 +57,9 @@ class TmdbBaseExtractor(CollectionExtractor[T]):
self._calculate_wait_hours(last_release_date) * 7 * 24 self._calculate_wait_hours(last_release_date) * 7 * 24
) )
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[T]:
id = self._get_id(uri) id = self._get_id(uri)
return ExtractedDataLight( return ExtractedDataOffline[T](
extractor_name=self.name, extractor_name=self.name,
object_key=f"{self.TMDB_CLASS}:{id}", object_key=f"{self.TMDB_CLASS}:{id}",
object_uri=uri, object_uri=uri,
@ -71,10 +70,10 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]):
TMDB_CLASS = "collection" TMDB_CLASS = "collection"
def _extract_online(self, uri: str) -> ExtractedData[TmdbCollectionData]: def _extract_online(self, uri: str) -> ExtractedDataOnline[TmdbCollectionData]:
id = self._get_id(uri) id = self._get_id(uri)
data = TmdbCollectionData.from_id(id) data = TmdbCollectionData.from_id(id)
return ExtractedData( return ExtractedDataOnline(
extractor_name=self.name, extractor_name=self.name,
object_key=f"{self.TMDB_CLASS}:{id}", object_key=f"{self.TMDB_CLASS}:{id}",
object_uri=uri, object_uri=uri,
@ -110,10 +109,10 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
TMDB_CLASS = "keyword" TMDB_CLASS = "keyword"
def _extract_online(self, uri: str) -> ExtractedData[TmdbKeywordData]: def _extract_online(self, uri: str) -> ExtractedDataOnline[TmdbKeywordData]:
id = self._get_id(uri) id = self._get_id(uri)
data = TmdbKeywordData.from_id(id) data = TmdbKeywordData.from_id(id)
return ExtractedData( return ExtractedDataOnline(
extractor_name=self.name, extractor_name=self.name,
object_key=f"{self.TMDB_CLASS}:{id}", object_key=f"{self.TMDB_CLASS}:{id}",
object_uri=uri, object_uri=uri,

@ -8,7 +8,7 @@ from pony import orm # TODO remove
from ...models import MediaCollection from ...models import MediaCollection
from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
@ -41,14 +41,14 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
def _cache_expired(self, object: MediaCollection) -> bool: def _cache_expired(self, object: MediaCollection) -> bool:
return (datetime.now() - object.last_updated) > timedelta(minutes=15) return (datetime.now() - object.last_updated) > timedelta(minutes=15)
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[HeadlineList]:
return ExtractedDataLight( return ExtractedDataOffline[HeadlineList](
extractor_name=self.name, extractor_name=self.name,
object_key=uri, object_key=uri,
object_uri=uri, object_uri=uri,
) )
def _extract_online(self, uri: str) -> ExtractedData[HeadlineList]: def _extract_online(self, uri: str) -> ExtractedDataOnline[HeadlineList]:
rss_uri = self.__decode_uri(uri) rss_uri = self.__decode_uri(uri)
logging.info(f"Extract collection from tt-rss: {uri!r}") logging.info(f"Extract collection from tt-rss: {uri!r}")
data = rss_uri.request(self.__params, order_by="feed_dates", view_mode="unread") data = rss_uri.request(self.__params, order_by="feed_dates", view_mode="unread")
@ -59,7 +59,7 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
if self.__label_filter if self.__label_filter
in (label_marker[0] for label_marker in headline.labels) in (label_marker[0] for label_marker in headline.labels)
] ]
return ExtractedData( return ExtractedDataOnline(
extractor_name=self.name, extractor_name=self.name,
object_key=uri, object_key=uri,
object_uri=uri, object_uri=uri,

@ -10,7 +10,7 @@ import requests
from ...models import MediaCollection, Tag from ...models import MediaCollection, Tag
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
@ -71,15 +71,15 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
last_release_date last_release_date
) )
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[TvmazeShowEmbedded]:
show_id = self.__require_show_id(uri) show_id = self.__require_show_id(uri)
return ExtractedDataLight( return ExtractedDataOffline[TvmazeShowEmbedded](
extractor_name=self.name, extractor_name=self.name,
object_key=str(show_id), object_key=str(show_id),
object_uri=self.__get_show_uri(show_id), object_uri=self.__get_show_uri(show_id),
) )
def _extract_online(self, uri: str) -> ExtractedData[TvmazeShowEmbedded]: def _extract_online(self, uri: str) -> ExtractedDataOnline[TvmazeShowEmbedded]:
show_id = self.__require_show_id(uri) show_id = self.__require_show_id(uri)
api_uri = self.__get_show_api_uri(show_id) api_uri = self.__get_show_api_uri(show_id)
res = requests.get( res = requests.get(
@ -91,7 +91,7 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
}, },
) )
data = res.json() data = res.json()
return ExtractedData( return ExtractedDataOnline[TvmazeShowEmbedded](
extractor_name=self.name, extractor_name=self.name,
object_key=str(show_id), object_key=str(show_id),
object_uri=self.__get_show_uri(show_id), object_uri=self.__get_show_uri(show_id),
@ -125,7 +125,7 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
add_embedding(episode, "show", data) add_embedding(episode, "show", data)
self._inject_episode( self._inject_episode(
collection=object, collection=object,
data=ExtractedData[TvmazeEpisodeEmbedded]( data=ExtractedDataOnline[TvmazeEpisodeEmbedded](
extractor_name="tvmaze", extractor_name="tvmaze",
object_key=str(episode["id"]), object_key=str(episode["id"]),
object_uri=f"tvmaze:///episodes/{episode['id']}", object_uri=f"tvmaze:///episodes/{episode['id']}",

@ -3,17 +3,20 @@ from __future__ import annotations
from datetime import datetime from datetime import datetime
import logging import logging
import re import re
from typing import Dict from typing import Dict, TypeAlias
from pony import orm # TODO remove from pony import orm # TODO remove
import youtubesearchpython import youtubesearchpython
from ...models import MediaCollection from ...models import MediaCollection
from ..generic import ExtractedData, ExtractedDataLight, SuitableLevel from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
class YouTubeCollectionExtractor(CollectionExtractor[Dict]): DataType: TypeAlias = Dict
class YouTubeCollectionExtractor(CollectionExtractor[DataType]):
__uri_regex = re.compile( __uri_regex = re.compile(
r"^https?://(www\.)?youtube\.com/(channel/|playlist\?list=)(?P<id>[^/&?]+)" r"^https?://(www\.)?youtube\.com/(channel/|playlist\?list=)(?P<id>[^/&?]+)"
@ -59,15 +62,15 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
last_release_date last_release_date
) )
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[DataType]:
playlist_id = self.__convert_if_required(self.__get_id(uri)) playlist_id = self.__convert_if_required(self.__get_id(uri))
return ExtractedDataLight( return ExtractedDataOffline[DataType](
extractor_name=self.name, extractor_name=self.name,
object_key=playlist_id, object_key=playlist_id,
object_uri=uri, object_uri=uri,
) )
def _extract_online(self, uri: str) -> ExtractedData[Dict]: def _extract_online(self, uri: str) -> ExtractedDataOnline[DataType]:
orig_id = self.__get_id(uri) orig_id = self.__get_id(uri)
playlist_id = self.__convert_if_required(orig_id) playlist_id = self.__convert_if_required(orig_id)
playlist_link = f"https://www.youtube.com/playlist?list={playlist_id}" playlist_link = f"https://www.youtube.com/playlist?list={playlist_id}"
@ -89,7 +92,7 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
logging.debug( logging.debug(
f"Retrieved {len(playlist.videos)} videos from playlist {playlist_link!r}" f"Retrieved {len(playlist.videos)} videos from playlist {playlist_link!r}"
) )
return ExtractedData( return ExtractedDataOnline[DataType](
extractor_name=self.name, extractor_name=self.name,
object_key=playlist_id, object_key=playlist_id,
object_uri=uri, object_uri=uri,
@ -99,7 +102,7 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
}, },
) )
def _update_object_raw(self, object: MediaCollection, data: Dict) -> None: def _update_object_raw(self, object: MediaCollection, data: DataType) -> None:
info = data["info"] info = data["info"]
is_channel = self.__is_channel_id(info["id"]) is_channel = self.__is_channel_id(info["id"])
object.title = ( object.title = (

@ -64,15 +64,6 @@ class ExtractedDataLight:
extractor_key=self.object_key, extractor_key=self.object_key,
) )
@dataclass
class ExtractedData(ExtractedDataLight, Generic[T]):
data: T = dataclasses.field(repr=False, compare=False)
@property
def has_data(self) -> bool:
return self.data is not None
def load_media(self) -> Optional[MediaElement]: def load_media(self) -> Optional[MediaElement]:
return MediaElement.get( return MediaElement.get(
extractor_name=self.extractor_name, extractor_name=self.extractor_name,
@ -86,6 +77,39 @@ class ExtractedData(ExtractedDataLight, Generic[T]):
) )
@dataclass
class ExtractedDataOffline(ExtractedDataLight, Generic[T]):
data: Optional[T] = dataclasses.field(default=None, repr=False, compare=False)
@property
def has_data(self) -> bool:
return self.data is not None
@property
def online_type(self) -> ExtractedDataOnline[T]:
if self.data is None:
raise Exception("Explicit type requires data to be set")
return ExtractedDataOnline[T](
object_uri=self.object_uri,
extractor_name=self.extractor_name,
object_key=self.object_key,
data=self.data,
)
@dataclass
class ExtractedDataOnline(ExtractedDataOffline[T]):
data: T = dataclasses.field(repr=False, compare=False)
@property
def has_data(self) -> bool:
return True
@property
def online_type(self) -> ExtractedDataOnline[T]:
return self
@dataclass @dataclass
class AuthorExtractedData(ExtractedDataLight): class AuthorExtractedData(ExtractedDataLight):
author_name: str author_name: str
@ -111,10 +135,10 @@ class GeneralExtractor(Generic[E, T]):
def check_uri(uri: str) -> Optional[E]: def check_uri(uri: str) -> Optional[E]:
raise NotImplementedError() raise NotImplementedError()
def _create_object(self, data: ExtractedData[T]) -> E: def _create_object(self, data: ExtractedDataOffline[T]) -> E:
raise NotImplementedError() raise NotImplementedError()
def _load_object(self, data: ExtractedData[T]) -> Optional[E]: def _load_object(self, data: ExtractedDataOffline[T]) -> Optional[E]:
raise NotImplementedError() raise NotImplementedError()
# abstract (for specific extractor classes) # abstract (for specific extractor classes)
@ -128,33 +152,35 @@ class GeneralExtractor(Generic[E, T]):
def _cache_expired(self, object: E) -> bool: def _cache_expired(self, object: E) -> bool:
return False return False
def _extract_offline_only(self, uri: str) -> ExtractedDataLight: def _extract_offline_only(self, uri: str) -> ExtractedDataOffline[T]:
raise NotImplementedError() raise NotImplementedError()
def _extract_online(self, uri: str) -> ExtractedData[T]: def _extract_online(self, uri: str) -> ExtractedDataOnline[T]:
raise NotImplementedError() raise NotImplementedError()
def _update_object_raw(self, object: E, data: T) -> None: def _update_object_raw(self, object: E, data: T) -> None:
raise NotImplementedError() raise NotImplementedError()
def _update_hook(self, object: E, data: ExtractedData[T]) -> None: def _update_hook(self, object: E, data: ExtractedDataOnline[T]) -> None:
return None return None
# defined # defined
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[T]:
return ( return (
self._extract_offline_only(uri) self._extract_offline_only(uri)
if self.can_extract_offline(uri) if self.can_extract_offline(uri)
else self._extract_online(uri) else self._extract_online(uri)
) )
def _extract_required(self, data: ExtractedData[T]) -> ExtractedData[T]: def _extract_required(
self, data: ExtractedDataOffline[T]
) -> ExtractedDataOnline[T]:
if data.has_data: if data.has_data:
return data return data.online_type
return self._extract_online(data.object_uri) return self._extract_online(data.object_uri)
def _update_object(self, object: E, data: ExtractedData[T]) -> E: def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> E:
object.uri = data.object_uri object.uri = data.object_uri
self._update_object_raw(object, data.data) self._update_object_raw(object, data.data)
self._update_hook(object, data) self._update_hook(object, data)
@ -175,7 +201,7 @@ class GeneralExtractor(Generic[E, T]):
logging.debug(f"Updating info for media: {data!r}") logging.debug(f"Updating info for media: {data!r}")
return self._update_object(object, data) return self._update_object(object, data)
def inject_object(self, data: ExtractedData[T]) -> E: def inject_object(self, data: ExtractedDataOnline[T]) -> E:
object = self._load_object(data) object = self._load_object(data)
data = self._extract_required(data) data = self._extract_required(data)
if object is None: if object is None:
@ -183,15 +209,15 @@ class GeneralExtractor(Generic[E, T]):
object = self._create_object(data) object = self._create_object(data)
return self._update_object(object, data) return self._update_object(object, data)
def store_object(self, data: ExtractedData[T]) -> E: def store_object(self, data: ExtractedDataOffline[T]) -> E:
object = self._load_object(data) object = self._load_object(data)
if object is not None: if object is not None:
logging.debug(f"Found object already in database: {data!r}") logging.debug(f"Found object already in database: {data!r}")
return object return object
data = self._extract_required(data) full_data = self._extract_required(data)
logging.debug(f"Store info for object: {data!r}") logging.debug(f"Store info for object: {full_data!r}")
object = self._create_object(data) object = self._create_object(full_data)
return self._update_object(object, data) return self._update_object(object, full_data)
def extract_and_store(self, uri: str) -> E: def extract_and_store(self, uri: str) -> E:
object = self.check_uri(uri) object = self.check_uri(uri)

@ -4,7 +4,12 @@ import logging
from typing import Optional, TypeVar from typing import Optional, TypeVar
from ...models import MediaCollection, MediaElement, MediaUriMapping from ...models import MediaCollection, MediaElement, MediaUriMapping
from ..generic import AuthorExtractedData, ExtractedData, GeneralExtractor from ..generic import (
AuthorExtractedData,
ExtractedDataOnline,
ExtractedDataOffline,
GeneralExtractor,
)
from ..collection.base import CollectionExtractor from ..collection.base import CollectionExtractor
@ -35,10 +40,10 @@ class MediaExtractor(GeneralExtractor[MediaElement, T]):
return elem return elem
return None return None
def _create_object(self, data: ExtractedData[T]) -> MediaElement: def _create_object(self, data: ExtractedDataOffline[T]) -> MediaElement:
return data.create_media() return data.create_media()
def _load_object(self, data: ExtractedData[T]) -> Optional[MediaElement]: def _load_object(self, data: ExtractedDataOffline[T]) -> Optional[MediaElement]:
return data.load_media() return data.load_media()
def __create_author_collection( def __create_author_collection(
@ -78,5 +83,5 @@ class MediaExtractor(GeneralExtractor[MediaElement, T]):
collection = self.__get_author_collection(author_data) collection = self.__get_author_collection(author_data)
collection.add_episode(element) collection.add_episode(element)
def _update_hook(self, object: MediaElement, data: ExtractedData[T]) -> None: def _update_hook(self, object: MediaElement, data: ExtractedDataOnline[T]) -> None:
self.__add_to_author_collection(object, data.data) self.__add_to_author_collection(object, data.data)

@ -8,7 +8,12 @@ from pony import orm
from ...models import MediaElement, MediaThumbnail, Query, Tag from ...models import MediaElement, MediaThumbnail, Query, Tag
from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI
from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel from ..generic import (
ExtractedDataOnline,
ExtractedDataOffline,
ExtractionError,
SuitableLevel,
)
from .base import MediaExtractor from .base import MediaExtractor
@ -41,18 +46,18 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]):
def can_extract_offline(self, uri: str) -> bool: def can_extract_offline(self, uri: str) -> bool:
return True return True
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[TmdbMovieData]:
movie_id = self.__get_movie_id(uri) movie_id = self.__get_movie_id(uri)
return ExtractedDataLight( return ExtractedDataOffline[TmdbMovieData](
extractor_name=self.name, extractor_name=self.name,
object_key=str(movie_id), object_key=str(movie_id),
object_uri=uri, object_uri=uri,
) )
def _extract_online(self, uri: str) -> ExtractedData[TmdbMovieData]: def _extract_online(self, uri: str) -> ExtractedDataOnline[TmdbMovieData]:
movie_id = self.__get_movie_id(uri) movie_id = self.__get_movie_id(uri)
data = TmdbMovieData.from_id(movie_id) data = TmdbMovieData.from_id(movie_id)
return ExtractedData( return ExtractedDataOnline[TmdbMovieData](
extractor_name=self.name, extractor_name=self.name,
object_key=f"movie:{movie_id}", object_key=f"movie:{movie_id}",
object_uri=uri, object_uri=uri,

@ -7,8 +7,13 @@ from typing import Optional
import requests import requests
from ...models import MediaElement, MediaThumbnail from ...models import MediaElement, MediaThumbnail
from ..all.tvmaze import TvmazeEpisodeEmbedded, select_best_image from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, select_best_image
from ..generic import ExtractedData, ExtractedDataLight, ExtractionError, SuitableLevel from ..generic import (
ExtractedDataOnline,
ExtractedDataOffline,
ExtractionError,
SuitableLevel,
)
from .base import MediaExtractor from .base import MediaExtractor
@ -54,15 +59,15 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
def can_extract_offline(self, uri: str) -> bool: def can_extract_offline(self, uri: str) -> bool:
return True return True
def _extract_offline(self, uri: str) -> ExtractedDataLight: def _extract_offline(self, uri: str) -> ExtractedDataOffline[TvmazeEpisodeEmbedded]:
episode_id = self.__get_episode_id(uri) episode_id = self.__get_episode_id(uri)
return ExtractedDataLight( return ExtractedDataOffline[TvmazeEpisodeEmbedded](
extractor_name=self.name, extractor_name=self.name,
object_key=str(episode_id), object_key=str(episode_id),
object_uri=uri, object_uri=uri,
) )
def _extract_online(self, uri: str) -> ExtractedData[TvmazeEpisodeEmbedded]: def _extract_online(self, uri: str) -> ExtractedDataOnline[TvmazeEpisodeEmbedded]:
episode_id = self.__get_episode_id(uri) episode_id = self.__get_episode_id(uri)
if episode_id is None: if episode_id is None:
raise Exception(f"Expected {uri!r} to be extractable") raise Exception(f"Expected {uri!r} to be extractable")
@ -76,7 +81,7 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
}, },
) )
data = res.json() data = res.json()
return ExtractedData( return ExtractedDataOnline[TvmazeEpisodeEmbedded](
extractor_name=self.name, extractor_name=self.name,
object_key=str(episode_id), object_key=str(episode_id),
object_uri=uri, object_uri=uri,
@ -84,7 +89,9 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
) )
def _update_object_raw( def _update_object_raw(
self, object: MediaElement, data: TvmazeEpisodeEmbedded self,
object: MediaElement,
data: TvmazeEpisodeEmbedded,
) -> None: ) -> None:
# sanity check # sanity check
airstamp = data.get("airstamp") airstamp = data.get("airstamp")

@ -14,7 +14,7 @@ from ...models import (
) )
from ..generic import ( from ..generic import (
AuthorExtractedData, AuthorExtractedData,
ExtractedData, ExtractedDataOnline,
ExtractionError, ExtractionError,
SuitableLevel, SuitableLevel,
) )
@ -91,7 +91,7 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
author_name=data["channel"]["name"], author_name=data["channel"]["name"],
) )
def _extract_online(self, uri: str) -> ExtractedData[YoutubeVideoData]: def _extract_online(self, uri: str) -> ExtractedDataOnline[YoutubeVideoData]:
logging.info(f"Request info using youtube_search_python for {uri!r}") logging.info(f"Request info using youtube_search_python for {uri!r}")
uri_match = self.__uri_regex.match(uri) uri_match = self.__uri_regex.match(uri)
if not uri_match: if not uri_match:
@ -106,7 +106,7 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
raise ExtractionError() from e raise ExtractionError() from e
if vid_data["isLiveNow"]: if vid_data["isLiveNow"]:
raise ExtractionError("Video is live, so pass extraction") raise ExtractionError("Video is live, so pass extraction")
return ExtractedData[YoutubeVideoData]( return ExtractedDataOnline[YoutubeVideoData](
object_uri=uri, object_uri=uri,
extractor_name=self.name, extractor_name=self.name,
object_key=vid_data["id"], object_key=vid_data["id"],

@ -12,7 +12,12 @@ from ...models import (
thumbnail_sort_key, thumbnail_sort_key,
) )
from ..all.ytdl import get_video_info, YtdlErrorException from ..all.ytdl import get_video_info, YtdlErrorException
from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel from ..generic import (
AuthorExtractedData,
ExtractedDataOnline,
ExtractionError,
SuitableLevel,
)
from .base import MediaExtractor from .base import MediaExtractor
@ -48,7 +53,7 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
else None, else None,
) )
def _extract_online(self, uri: str) -> ExtractedData[Dict]: def _extract_online(self, uri: str) -> ExtractedDataOnline[Dict]:
logging.info(f"Request info using youtube-dl for {uri!r}") logging.info(f"Request info using youtube-dl for {uri!r}")
try: try:
vid_data = get_video_info(uri) vid_data = get_video_info(uri)
@ -58,7 +63,7 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
raise ExtractionError("Video is live, so pass extraction") raise ExtractionError("Video is live, so pass extraction")
ytdl_extractor_key = vid_data.get("extractor_key") or vid_data["ie_key"] ytdl_extractor_key = vid_data.get("extractor_key") or vid_data["ie_key"]
ytdl_video_id = vid_data["id"] ytdl_video_id = vid_data["id"]
return ExtractedData[Dict]( return ExtractedDataOnline[Dict](
object_uri=uri, object_uri=uri,
extractor_name=self.name, extractor_name=self.name,
object_key=f"{ytdl_extractor_key}:{ytdl_video_id}", object_key=f"{ytdl_extractor_key}:{ytdl_video_id}",

Loading…
Cancel
Save