Improved change detection for updates

master
Felix Stupp 2 years ago
parent 5bbef2a5a1
commit eaa2bcbfe2
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -790,17 +790,17 @@ def refresh_collections() -> ResponseReturnValue:
orm.select(c.id for c in MediaCollection if c.keep_updated) orm.select(c.id for c in MediaCollection if c.keep_updated)
) )
errors = [] errors = []
failed_colls = set[int]() changed_colls = list[int]()
for coll_id in collection_ids: for coll_id in collection_ids:
coll = MediaCollection[coll_id]
try: try:
coll = MediaCollection[coll_id] change_state = collection_update(coll)
collection_update(coll)
orm.commit() orm.commit()
if change_state.may_has_changed:
changed_colls.append(coll_id)
# TODO make Exception more specific # TODO make Exception more specific
except Exception as e: except Exception as e:
orm.rollback() orm.rollback()
failed_colls.add(coll_id)
coll = MediaCollection[coll_id]
errors.append( errors.append(
{ {
"collection": { "collection": {
@ -813,9 +813,7 @@ def refresh_collections() -> ResponseReturnValue:
}, },
}, },
) )
# TODO detect changed collections properly to speed up cache rebuild update_element_lookup_cache(changed_colls)
# meaning check if collection really changed
update_element_lookup_cache(collection_ids - failed_colls)
if errors: if errors:
return ( return (
{ {

@ -5,6 +5,7 @@ from typing import Dict, Tuple
from ...config import app_config from ...config import app_config
from ...models import MediaCollection from ...models import MediaCollection
from ..generic import ChangedReport
from ..helpers import expect_suitable_extractor from ..helpers import expect_suitable_extractor
from .base import CollectionExtractor from .base import CollectionExtractor
from .aggregated import AggregatedCollectionExtractor from .aggregated import AggregatedCollectionExtractor
@ -39,9 +40,9 @@ def collection_expect_extractor(uri: str) -> CollectionExtractor:
def collection_update( def collection_update(
collection: MediaCollection, collection: MediaCollection,
check_cache_expired: bool = True, check_cache_expired: bool = True,
) -> None: ) -> ChangedReport:
ex = collection_expect_extractor(collection.uri) ex = collection_expect_extractor(collection.uri)
ex.update_object( return ex.update_object(
object=collection, object=collection,
check_cache_expired=check_cache_expired, check_cache_expired=check_cache_expired,
) )

@ -6,7 +6,12 @@ from typing import List, Set, TypeAlias
from pony import orm from pony import orm
from ...models import MediaCollection, MediaCollectionLink, MediaElement from ...models import MediaCollection, MediaCollectionLink, MediaElement
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor from .base import CollectionExtractor
@ -72,7 +77,11 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]):
], ],
) )
def _update_object_raw(self, object: MediaCollection, data: DataType) -> None: def _update_object_raw(
self,
object: MediaCollection,
data: DataType,
) -> ChangedReport:
if object.title is None or "[aggregated]" not in object.title: if object.title is None or "[aggregated]" not in object.title:
object.title = f"[aggregated] {object.uri}" object.title = f"[aggregated] {object.uri}"
object.creator = None object.creator = None
@ -91,3 +100,4 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]):
orm.delete(link for link in object.media_links if link.element.id in all_links) orm.delete(link for link in object.media_links if link.element.id in all_links)
for uri_link in list(object.uris): for uri_link in list(object.uris):
uri_link.delete() uri_link.delete()
return ChangedReport.ChangedSome # TODO improve

@ -12,7 +12,12 @@ from ..all.tmdb import (
TMDB_REGEX_URI, TMDB_REGEX_URI,
TmdbKeywordData, TmdbKeywordData,
) )
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor from .base import CollectionExtractor
@ -84,7 +89,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]):
self, self,
object: MediaCollection, object: MediaCollection,
data: TmdbCollectionData, data: TmdbCollectionData,
) -> None: ) -> ChangedReport:
# extract data # extract data
object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}" object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}"
object.description = data.description or "" object.description = data.description or ""
@ -103,6 +108,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]):
) )
if element: if element:
orm.commit() orm.commit()
return ChangedReport.ChangedSome # TODO improve
class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]): class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
@ -123,7 +129,7 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
self, self,
object: MediaCollection, object: MediaCollection,
data: TmdbKeywordData, data: TmdbKeywordData,
) -> None: ) -> ChangedReport:
# extract data # extract data
object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}" object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}"
object.release_date = data.release_date object.release_date = data.release_date
@ -141,3 +147,4 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
) )
if element: if element:
orm.commit() orm.commit()
return ChangedReport.ChangedSome # TODO improve

@ -8,7 +8,12 @@ from pony import orm # TODO remove
from ...models import MediaCollection from ...models import MediaCollection
from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor from .base import CollectionExtractor
@ -66,7 +71,11 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
data=data, data=data,
) )
def _update_object_raw(self, object: MediaCollection, data: HeadlineList) -> None: def _update_object_raw(
self,
object: MediaCollection,
data: HeadlineList,
) -> ChangedReport:
if not object.title: if not object.title:
object.title = object.uri object.title = object.uri
object.creator = None object.creator = None
@ -83,3 +92,4 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
rss_uri.set_read(self.__params, readed_headlines) rss_uri.set_read(self.__params, readed_headlines)
if object.watch_in_order_auto: if object.watch_in_order_auto:
object.watch_in_order = False # no order available object.watch_in_order = False # no order available
return ChangedReport.ChangedSome # TODO improve

@ -10,7 +10,12 @@ import requests
from ...models import MediaCollection, Tag from ...models import MediaCollection, Tag
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor from .base import CollectionExtractor
@ -102,7 +107,7 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
self, self,
object: MediaCollection, object: MediaCollection,
data: TvmazeShowEmbedded, data: TvmazeShowEmbedded,
) -> None: ) -> ChangedReport:
object.title = f"[tvmaze] {data['name']}" object.title = f"[tvmaze] {data['name']}"
object.description = data.get("summary", "") object.description = data.get("summary", "")
object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d") object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d")
@ -134,3 +139,4 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
season=episode["season"], season=episode["season"],
episode=episode["number"], episode=episode["number"],
) )
return ChangedReport.ChangedSome # TODO improve

@ -9,7 +9,12 @@ from pony import orm # TODO remove
import youtubesearchpython import youtubesearchpython
from ...models import MediaCollection from ...models import MediaCollection
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor from .base import CollectionExtractor
@ -102,7 +107,11 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]):
}, },
) )
def _update_object_raw(self, object: MediaCollection, data: DataType) -> None: def _update_object_raw(
self,
object: MediaCollection,
data: DataType,
) -> ChangedReport:
info = data["info"] info = data["info"]
is_channel = self.__is_channel_id(info["id"]) is_channel = self.__is_channel_id(info["id"])
object.title = ( object.title = (
@ -140,3 +149,4 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]):
f"https://www.youtube.com/channel/{info['channel']['id']}" f"https://www.youtube.com/channel/{info['channel']['id']}"
) )
) )
return ChangedReport.ChangedSome # TODO improve

@ -3,6 +3,7 @@ from __future__ import annotations
import dataclasses import dataclasses
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
import enum
from enum import Enum from enum import Enum
import logging import logging
from typing import Generic, Optional, TypeVar from typing import Generic, Optional, TypeVar
@ -13,6 +14,24 @@ from ..models import MediaCollection, MediaElement
T = TypeVar("T") T = TypeVar("T")
class ChangedReport(Enum):
StayedSame = enum.auto()
"""Declares that the action did not change anything.
This requires that really nothing changed. If unsure, use ChangedSome.
"""
ChangedSome = enum.auto()
"""Declares that something (might) have changed.
It is not required that something really changed,
this could also mean that there is currently no better way to determine if something really changed.
"""
@property
def may_has_changed(self) -> bool:
return self != self.StayedSame
class SuitableLevel(Enum): class SuitableLevel(Enum):
NO = (False, False) NO = (False, False)
@ -158,7 +177,7 @@ class GeneralExtractor(Generic[E, T]):
def _extract_online(self, uri: str) -> ExtractedDataOnline[T]: def _extract_online(self, uri: str) -> ExtractedDataOnline[T]:
raise NotImplementedError() raise NotImplementedError()
def _update_object_raw(self, object: E, data: T) -> None: def _update_object_raw(self, object: E, data: T) -> ChangedReport:
raise NotImplementedError() raise NotImplementedError()
def _update_hook(self, object: E, data: ExtractedDataOnline[T]) -> None: def _update_hook(self, object: E, data: ExtractedDataOnline[T]) -> None:
@ -180,14 +199,18 @@ class GeneralExtractor(Generic[E, T]):
return data.online_type return data.online_type
return self._extract_online(data.object_uri) return self._extract_online(data.object_uri)
def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> E: def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> ChangedReport:
object.uri = data.object_uri object.uri = data.object_uri
self._update_object_raw(object, data.data) self._update_object_raw(object, data.data)
self._update_hook(object, data) self._update_hook(object, data)
object.last_updated = datetime.now() object.last_updated = datetime.now()
return object return ChangedReport.ChangedSome # TODO improve
def update_object(self, object: E, check_cache_expired: bool = True) -> E: def update_object(
self,
object: E,
check_cache_expired: bool = True,
) -> ChangedReport:
if ( if (
object.was_extracted object.was_extracted
and check_cache_expired and check_cache_expired
@ -196,7 +219,7 @@ class GeneralExtractor(Generic[E, T]):
logging.debug( logging.debug(
f"Skip info for element as already extracted and cache valid: {object.title!r}" f"Skip info for element as already extracted and cache valid: {object.title!r}"
) )
return object return ChangedReport.StayedSame
data = self._extract_online(object.uri) data = self._extract_online(object.uri)
logging.debug(f"Updating info for media: {data!r}") logging.debug(f"Updating info for media: {data!r}")
return self._update_object(object, data) return self._update_object(object, data)
@ -207,7 +230,8 @@ class GeneralExtractor(Generic[E, T]):
if object is None: if object is None:
logging.debug(f"Store info for object: {data!r}") logging.debug(f"Store info for object: {data!r}")
object = self._create_object(data) object = self._create_object(data)
return self._update_object(object, data) self._update_object(object, data)
return object
def store_object(self, data: ExtractedDataOffline[T]) -> E: def store_object(self, data: ExtractedDataOffline[T]) -> E:
object = self._load_object(data) object = self._load_object(data)
@ -217,7 +241,8 @@ class GeneralExtractor(Generic[E, T]):
full_data = self._extract_required(data) full_data = self._extract_required(data)
logging.debug(f"Store info for object: {full_data!r}") logging.debug(f"Store info for object: {full_data!r}")
object = self._create_object(full_data) object = self._create_object(full_data)
return self._update_object(object, full_data) self._update_object(object, full_data)
return object
def extract_and_store(self, uri: str) -> E: def extract_and_store(self, uri: str) -> E:
object = self.check_uri(uri) object = self.check_uri(uri)

@ -9,6 +9,7 @@ from pony import orm
from ...models import MediaElement, MediaThumbnail, Query, Tag from ...models import MediaElement, MediaThumbnail, Query, Tag
from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI
from ..generic import ( from ..generic import (
ChangedReport,
ExtractedDataOnline, ExtractedDataOnline,
ExtractedDataOffline, ExtractedDataOffline,
ExtractionError, ExtractionError,
@ -64,7 +65,11 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]):
data=data, data=data,
) )
def _update_object_raw(self, object: MediaElement, data: TmdbMovieData) -> None: def _update_object_raw(
self,
object: MediaElement,
data: TmdbMovieData,
) -> ChangedReport:
# sanity check # sanity check
if not data.was_released: if not data.was_released:
raise ExtractionError( raise ExtractionError(
@ -91,3 +96,4 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]):
) )
if len(tag_list) == 1: if len(tag_list) == 1:
object.tag_list.add(tag_list[0]) object.tag_list.add(tag_list[0])
return ChangedReport.ChangedSome # TODO improve

@ -9,6 +9,7 @@ import requests
from ...models import MediaElement, MediaThumbnail from ...models import MediaElement, MediaThumbnail
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, select_best_image from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, select_best_image
from ..generic import ( from ..generic import (
ChangedReport,
ExtractedDataOnline, ExtractedDataOnline,
ExtractedDataOffline, ExtractedDataOffline,
ExtractionError, ExtractionError,
@ -92,7 +93,7 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
self, self,
object: MediaElement, object: MediaElement,
data: TvmazeEpisodeEmbedded, data: TvmazeEpisodeEmbedded,
) -> None: ) -> ChangedReport:
# sanity check # sanity check
airstamp = data.get("airstamp") airstamp = data.get("airstamp")
if airstamp is None: # not released yet if airstamp is None: # not released yet
@ -124,3 +125,4 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
self.__get_episode_custom_uri(data["id"]), self.__get_episode_custom_uri(data["id"]),
) )
) )
return ChangedReport.ChangedSome # TODO improve

@ -14,6 +14,7 @@ from ...models import (
) )
from ..generic import ( from ..generic import (
AuthorExtractedData, AuthorExtractedData,
ChangedReport,
ExtractedDataOnline, ExtractedDataOnline,
ExtractionError, ExtractionError,
SuitableLevel, SuitableLevel,
@ -113,7 +114,11 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
data=vid_data, data=vid_data,
) )
def _update_object_raw(self, object: MediaElement, data: YoutubeVideoData) -> None: def _update_object_raw(
self,
object: MediaElement,
data: YoutubeVideoData,
) -> ChangedReport:
object.title = f"{data['title']} - {data['channel']['name']}" object.title = f"{data['title']} - {data['channel']['name']}"
object.description = data.get("description") object.description = data.get("description")
if data.get("thumbnails"): if data.get("thumbnails"):
@ -133,3 +138,4 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
f"https://youtube.com/watch?v={data['id']}", f"https://youtube.com/watch?v={data['id']}",
) )
) )
return ChangedReport.ChangedSome # TODO improve

@ -14,6 +14,7 @@ from ...models import (
from ..all.ytdl import get_video_info, YtdlErrorException from ..all.ytdl import get_video_info, YtdlErrorException
from ..generic import ( from ..generic import (
AuthorExtractedData, AuthorExtractedData,
ChangedReport,
ExtractedDataOnline, ExtractedDataOnline,
ExtractionError, ExtractionError,
SuitableLevel, SuitableLevel,
@ -70,7 +71,7 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
data=vid_data, data=vid_data,
) )
def _update_object_raw(self, object: MediaElement, data: Dict) -> None: def _update_object_raw(self, object: MediaElement, data: Dict) -> ChangedReport:
object.title = ( object.title = (
f"{data['title']} - {data['uploader']}" f"{data['title']} - {data['uploader']}"
if "uploader" in data if "uploader" in data
@ -96,3 +97,4 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
object.thumbnail = MediaThumbnail.from_uri(data["thumbnail"]) object.thumbnail = MediaThumbnail.from_uri(data["thumbnail"])
object.release_date = datetime.strptime(data["upload_date"], "%Y%m%d") object.release_date = datetime.strptime(data["upload_date"], "%Y%m%d")
object.length = int(data["duration"]) object.length = int(data["duration"])
return ChangedReport.ChangedSome # TODO improve

Loading…
Cancel
Save