diff --git a/server/app.py b/server/app.py index af0a7bd..981593f 100644 --- a/server/app.py +++ b/server/app.py @@ -790,17 +790,17 @@ def refresh_collections() -> ResponseReturnValue: orm.select(c.id for c in MediaCollection if c.keep_updated) ) errors = [] - failed_colls = set[int]() + changed_colls = list[int]() for coll_id in collection_ids: + coll = MediaCollection[coll_id] try: - coll = MediaCollection[coll_id] - collection_update(coll) + change_state = collection_update(coll) orm.commit() + if change_state.may_has_changed: + changed_colls.append(coll_id) # TODO make Exception more specific except Exception as e: orm.rollback() - failed_colls.add(coll_id) - coll = MediaCollection[coll_id] errors.append( { "collection": { @@ -813,9 +813,7 @@ def refresh_collections() -> ResponseReturnValue: }, }, ) - # TODO detect changed collections properly to speed up cache rebuild - # meaning check if collection really changed - update_element_lookup_cache(collection_ids - failed_colls) + update_element_lookup_cache(changed_colls) if errors: return ( { diff --git a/server/entertainment_decider/extractors/collection/__init__.py b/server/entertainment_decider/extractors/collection/__init__.py index 2571cc7..d381cde 100644 --- a/server/entertainment_decider/extractors/collection/__init__.py +++ b/server/entertainment_decider/extractors/collection/__init__.py @@ -5,6 +5,7 @@ from typing import Dict, Tuple from ...config import app_config from ...models import MediaCollection +from ..generic import ChangedReport from ..helpers import expect_suitable_extractor from .base import CollectionExtractor from .aggregated import AggregatedCollectionExtractor @@ -39,9 +40,9 @@ def collection_expect_extractor(uri: str) -> CollectionExtractor: def collection_update( collection: MediaCollection, check_cache_expired: bool = True, -) -> None: +) -> ChangedReport: ex = collection_expect_extractor(collection.uri) - ex.update_object( + return ex.update_object( object=collection, check_cache_expired=check_cache_expired, ) diff --git a/server/entertainment_decider/extractors/collection/aggregated.py b/server/entertainment_decider/extractors/collection/aggregated.py index 31471ca..beab9bd 100644 --- a/server/entertainment_decider/extractors/collection/aggregated.py +++ b/server/entertainment_decider/extractors/collection/aggregated.py @@ -6,7 +6,12 @@ from typing import List, Set, TypeAlias from pony import orm from ...models import MediaCollection, MediaCollectionLink, MediaElement -from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel +from ..generic import ( + ChangedReport, + ExtractedDataOnline, + ExtractedDataOffline, + SuitableLevel, +) from .base import CollectionExtractor @@ -72,7 +77,11 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]): ], ) - def _update_object_raw(self, object: MediaCollection, data: DataType) -> None: + def _update_object_raw( + self, + object: MediaCollection, + data: DataType, + ) -> ChangedReport: if object.title is None or "[aggregated]" not in object.title: object.title = f"[aggregated] {object.uri}" object.creator = None @@ -91,3 +100,4 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]): orm.delete(link for link in object.media_links if link.element.id in all_links) for uri_link in list(object.uris): uri_link.delete() + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/collection/tmdb.py b/server/entertainment_decider/extractors/collection/tmdb.py index 05af2a0..18287fc 100644 --- a/server/entertainment_decider/extractors/collection/tmdb.py +++ b/server/entertainment_decider/extractors/collection/tmdb.py @@ -12,7 +12,12 @@ from ..all.tmdb import ( TMDB_REGEX_URI, TmdbKeywordData, ) -from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel +from ..generic import ( + ChangedReport, + ExtractedDataOnline, + ExtractedDataOffline, + SuitableLevel, +) from .base import CollectionExtractor @@ -84,7 +89,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]): self, object: MediaCollection, data: TmdbCollectionData, - ) -> None: + ) -> ChangedReport: # extract data object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}" object.description = data.description or "" @@ -103,6 +108,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]): ) if element: orm.commit() + return ChangedReport.ChangedSome # TODO improve class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]): @@ -123,7 +129,7 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]): self, object: MediaCollection, data: TmdbKeywordData, - ) -> None: + ) -> ChangedReport: # extract data object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}" object.release_date = data.release_date @@ -141,3 +147,4 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]): ) if element: orm.commit() + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/collection/tt_rss.py b/server/entertainment_decider/extractors/collection/tt_rss.py index 86b8309..998af78 100644 --- a/server/entertainment_decider/extractors/collection/tt_rss.py +++ b/server/entertainment_decider/extractors/collection/tt_rss.py @@ -8,7 +8,12 @@ from pony import orm # TODO remove from ...models import MediaCollection from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri -from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel +from ..generic import ( + ChangedReport, + ExtractedDataOnline, + ExtractedDataOffline, + SuitableLevel, +) from .base import CollectionExtractor @@ -66,7 +71,11 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]): data=data, ) - def _update_object_raw(self, object: MediaCollection, data: HeadlineList) -> None: + def _update_object_raw( + self, + object: MediaCollection, + data: HeadlineList, + ) -> ChangedReport: if not object.title: object.title = object.uri object.creator = None @@ -83,3 +92,4 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]): rss_uri.set_read(self.__params, readed_headlines) if object.watch_in_order_auto: object.watch_in_order = False # no order available + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/collection/tvmaze.py b/server/entertainment_decider/extractors/collection/tvmaze.py index bcdefa2..8bf5532 100644 --- a/server/entertainment_decider/extractors/collection/tvmaze.py +++ b/server/entertainment_decider/extractors/collection/tvmaze.py @@ -10,7 +10,12 @@ import requests from ...models import MediaCollection, Tag from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding -from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel +from ..generic import ( + ChangedReport, + ExtractedDataOnline, + ExtractedDataOffline, + SuitableLevel, +) from .base import CollectionExtractor @@ -102,7 +107,7 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]): self, object: MediaCollection, data: TvmazeShowEmbedded, - ) -> None: + ) -> ChangedReport: object.title = f"[tvmaze] {data['name']}" object.description = data.get("summary", "") object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d") @@ -134,3 +139,4 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]): season=episode["season"], episode=episode["number"], ) + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/collection/youtube.py b/server/entertainment_decider/extractors/collection/youtube.py index 9a3b72a..025d1f0 100644 --- a/server/entertainment_decider/extractors/collection/youtube.py +++ b/server/entertainment_decider/extractors/collection/youtube.py @@ -9,7 +9,12 @@ from pony import orm # TODO remove import youtubesearchpython from ...models import MediaCollection -from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel +from ..generic import ( + ChangedReport, + ExtractedDataOnline, + ExtractedDataOffline, + SuitableLevel, +) from .base import CollectionExtractor @@ -102,7 +107,11 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]): }, ) - def _update_object_raw(self, object: MediaCollection, data: DataType) -> None: + def _update_object_raw( + self, + object: MediaCollection, + data: DataType, + ) -> ChangedReport: info = data["info"] is_channel = self.__is_channel_id(info["id"]) object.title = ( @@ -140,3 +149,4 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]): f"https://www.youtube.com/channel/{info['channel']['id']}" ) ) + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/generic.py b/server/entertainment_decider/extractors/generic.py index 6ccbcae..66ad348 100644 --- a/server/entertainment_decider/extractors/generic.py +++ b/server/entertainment_decider/extractors/generic.py @@ -3,6 +3,7 @@ from __future__ import annotations import dataclasses from dataclasses import dataclass from datetime import datetime +import enum from enum import Enum import logging from typing import Generic, Optional, TypeVar @@ -13,6 +14,24 @@ from ..models import MediaCollection, MediaElement T = TypeVar("T") +class ChangedReport(Enum): + StayedSame = enum.auto() + """Declares that the action did not change anything. + + This requires that really nothing changed. If unsure, use ChangedSome. + """ + ChangedSome = enum.auto() + """Declares that something (might) have changed. + + It is not required that something really changed, + this could also mean that there is currently no better way to determine if something really changed. + """ + + @property + def may_has_changed(self) -> bool: + return self != self.StayedSame + + class SuitableLevel(Enum): NO = (False, False) @@ -158,7 +177,7 @@ class GeneralExtractor(Generic[E, T]): def _extract_online(self, uri: str) -> ExtractedDataOnline[T]: raise NotImplementedError() - def _update_object_raw(self, object: E, data: T) -> None: + def _update_object_raw(self, object: E, data: T) -> ChangedReport: raise NotImplementedError() def _update_hook(self, object: E, data: ExtractedDataOnline[T]) -> None: @@ -180,14 +199,18 @@ class GeneralExtractor(Generic[E, T]): return data.online_type return self._extract_online(data.object_uri) - def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> E: + def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> ChangedReport: object.uri = data.object_uri self._update_object_raw(object, data.data) self._update_hook(object, data) object.last_updated = datetime.now() - return object + return ChangedReport.ChangedSome # TODO improve - def update_object(self, object: E, check_cache_expired: bool = True) -> E: + def update_object( + self, + object: E, + check_cache_expired: bool = True, + ) -> ChangedReport: if ( object.was_extracted and check_cache_expired @@ -196,7 +219,7 @@ class GeneralExtractor(Generic[E, T]): logging.debug( f"Skip info for element as already extracted and cache valid: {object.title!r}" ) - return object + return ChangedReport.StayedSame data = self._extract_online(object.uri) logging.debug(f"Updating info for media: {data!r}") return self._update_object(object, data) @@ -207,7 +230,8 @@ class GeneralExtractor(Generic[E, T]): if object is None: logging.debug(f"Store info for object: {data!r}") object = self._create_object(data) - return self._update_object(object, data) + self._update_object(object, data) + return object def store_object(self, data: ExtractedDataOffline[T]) -> E: object = self._load_object(data) @@ -217,7 +241,8 @@ class GeneralExtractor(Generic[E, T]): full_data = self._extract_required(data) logging.debug(f"Store info for object: {full_data!r}") object = self._create_object(full_data) - return self._update_object(object, full_data) + self._update_object(object, full_data) + return object def extract_and_store(self, uri: str) -> E: object = self.check_uri(uri) diff --git a/server/entertainment_decider/extractors/media/tmdb.py b/server/entertainment_decider/extractors/media/tmdb.py index 804b8f4..de2dbfc 100644 --- a/server/entertainment_decider/extractors/media/tmdb.py +++ b/server/entertainment_decider/extractors/media/tmdb.py @@ -9,6 +9,7 @@ from pony import orm from ...models import MediaElement, MediaThumbnail, Query, Tag from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI from ..generic import ( + ChangedReport, ExtractedDataOnline, ExtractedDataOffline, ExtractionError, @@ -64,7 +65,11 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]): data=data, ) - def _update_object_raw(self, object: MediaElement, data: TmdbMovieData) -> None: + def _update_object_raw( + self, + object: MediaElement, + data: TmdbMovieData, + ) -> ChangedReport: # sanity check if not data.was_released: raise ExtractionError( @@ -91,3 +96,4 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]): ) if len(tag_list) == 1: object.tag_list.add(tag_list[0]) + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/media/tvmaze.py b/server/entertainment_decider/extractors/media/tvmaze.py index 8be6bf4..22f83be 100644 --- a/server/entertainment_decider/extractors/media/tvmaze.py +++ b/server/entertainment_decider/extractors/media/tvmaze.py @@ -9,6 +9,7 @@ import requests from ...models import MediaElement, MediaThumbnail from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, select_best_image from ..generic import ( + ChangedReport, ExtractedDataOnline, ExtractedDataOffline, ExtractionError, @@ -92,7 +93,7 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]): self, object: MediaElement, data: TvmazeEpisodeEmbedded, - ) -> None: + ) -> ChangedReport: # sanity check airstamp = data.get("airstamp") if airstamp is None: # not released yet @@ -124,3 +125,4 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]): self.__get_episode_custom_uri(data["id"]), ) ) + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/media/youtube.py b/server/entertainment_decider/extractors/media/youtube.py index e6a15ec..4101b5d 100644 --- a/server/entertainment_decider/extractors/media/youtube.py +++ b/server/entertainment_decider/extractors/media/youtube.py @@ -14,6 +14,7 @@ from ...models import ( ) from ..generic import ( AuthorExtractedData, + ChangedReport, ExtractedDataOnline, ExtractionError, SuitableLevel, @@ -113,7 +114,11 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]): data=vid_data, ) - def _update_object_raw(self, object: MediaElement, data: YoutubeVideoData) -> None: + def _update_object_raw( + self, + object: MediaElement, + data: YoutubeVideoData, + ) -> ChangedReport: object.title = f"{data['title']} - {data['channel']['name']}" object.description = data.get("description") if data.get("thumbnails"): @@ -133,3 +138,4 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]): f"https://youtube.com/watch?v={data['id']}", ) ) + return ChangedReport.ChangedSome # TODO improve diff --git a/server/entertainment_decider/extractors/media/ytdl.py b/server/entertainment_decider/extractors/media/ytdl.py index 395633f..ebf0648 100644 --- a/server/entertainment_decider/extractors/media/ytdl.py +++ b/server/entertainment_decider/extractors/media/ytdl.py @@ -14,6 +14,7 @@ from ...models import ( from ..all.ytdl import get_video_info, YtdlErrorException from ..generic import ( AuthorExtractedData, + ChangedReport, ExtractedDataOnline, ExtractionError, SuitableLevel, @@ -70,7 +71,7 @@ class YtdlMediaExtractor(MediaExtractor[Dict]): data=vid_data, ) - def _update_object_raw(self, object: MediaElement, data: Dict) -> None: + def _update_object_raw(self, object: MediaElement, data: Dict) -> ChangedReport: object.title = ( f"{data['title']} - {data['uploader']}" if "uploader" in data @@ -96,3 +97,4 @@ class YtdlMediaExtractor(MediaExtractor[Dict]): object.thumbnail = MediaThumbnail.from_uri(data["thumbnail"]) object.release_date = datetime.strptime(data["upload_date"], "%Y%m%d") object.length = int(data["duration"]) + return ChangedReport.ChangedSome # TODO improve