Improved change detection for updates

master
Felix Stupp 1 year ago
parent 5bbef2a5a1
commit eaa2bcbfe2
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -790,17 +790,17 @@ def refresh_collections() -> ResponseReturnValue:
orm.select(c.id for c in MediaCollection if c.keep_updated)
)
errors = []
failed_colls = set[int]()
changed_colls = list[int]()
for coll_id in collection_ids:
coll = MediaCollection[coll_id]
try:
coll = MediaCollection[coll_id]
collection_update(coll)
change_state = collection_update(coll)
orm.commit()
if change_state.may_has_changed:
changed_colls.append(coll_id)
# TODO make Exception more specific
except Exception as e:
orm.rollback()
failed_colls.add(coll_id)
coll = MediaCollection[coll_id]
errors.append(
{
"collection": {
@ -813,9 +813,7 @@ def refresh_collections() -> ResponseReturnValue:
},
},
)
# TODO detect changed collections properly to speed up cache rebuild
# meaning check if collection really changed
update_element_lookup_cache(collection_ids - failed_colls)
update_element_lookup_cache(changed_colls)
if errors:
return (
{

@ -5,6 +5,7 @@ from typing import Dict, Tuple
from ...config import app_config
from ...models import MediaCollection
from ..generic import ChangedReport
from ..helpers import expect_suitable_extractor
from .base import CollectionExtractor
from .aggregated import AggregatedCollectionExtractor
@ -39,9 +40,9 @@ def collection_expect_extractor(uri: str) -> CollectionExtractor:
def collection_update(
collection: MediaCollection,
check_cache_expired: bool = True,
) -> None:
) -> ChangedReport:
ex = collection_expect_extractor(collection.uri)
ex.update_object(
return ex.update_object(
object=collection,
check_cache_expired=check_cache_expired,
)

@ -6,7 +6,12 @@ from typing import List, Set, TypeAlias
from pony import orm
from ...models import MediaCollection, MediaCollectionLink, MediaElement
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor
@ -72,7 +77,11 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]):
],
)
def _update_object_raw(self, object: MediaCollection, data: DataType) -> None:
def _update_object_raw(
self,
object: MediaCollection,
data: DataType,
) -> ChangedReport:
if object.title is None or "[aggregated]" not in object.title:
object.title = f"[aggregated] {object.uri}"
object.creator = None
@ -91,3 +100,4 @@ class AggregatedCollectionExtractor(CollectionExtractor[DataType]):
orm.delete(link for link in object.media_links if link.element.id in all_links)
for uri_link in list(object.uris):
uri_link.delete()
return ChangedReport.ChangedSome # TODO improve

@ -12,7 +12,12 @@ from ..all.tmdb import (
TMDB_REGEX_URI,
TmdbKeywordData,
)
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor
@ -84,7 +89,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]):
self,
object: MediaCollection,
data: TmdbCollectionData,
) -> None:
) -> ChangedReport:
# extract data
object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}"
object.description = data.description or ""
@ -103,6 +108,7 @@ class TmdbCollectionExtractor(TmdbBaseExtractor[TmdbCollectionData]):
)
if element:
orm.commit()
return ChangedReport.ChangedSome # TODO improve
class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
@ -123,7 +129,7 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
self,
object: MediaCollection,
data: TmdbKeywordData,
) -> None:
) -> ChangedReport:
# extract data
object.title = f"[tmdb] [{self.TMDB_CLASS}] {data.title}"
object.release_date = data.release_date
@ -141,3 +147,4 @@ class TmdbKeywordExtractor(TmdbBaseExtractor[TmdbKeywordData]):
)
if element:
orm.commit()
return ChangedReport.ChangedSome # TODO improve

@ -8,7 +8,12 @@ from pony import orm # TODO remove
from ...models import MediaCollection
from ..all.tt_rss import HeadlineList, TtRssConnectionParameter, TtRssUri
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor
@ -66,7 +71,11 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
data=data,
)
def _update_object_raw(self, object: MediaCollection, data: HeadlineList) -> None:
def _update_object_raw(
self,
object: MediaCollection,
data: HeadlineList,
) -> ChangedReport:
if not object.title:
object.title = object.uri
object.creator = None
@ -83,3 +92,4 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
rss_uri.set_read(self.__params, readed_headlines)
if object.watch_in_order_auto:
object.watch_in_order = False # no order available
return ChangedReport.ChangedSome # TODO improve

@ -10,7 +10,12 @@ import requests
from ...models import MediaCollection, Tag
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, add_embedding
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor
@ -102,7 +107,7 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
self,
object: MediaCollection,
data: TvmazeShowEmbedded,
) -> None:
) -> ChangedReport:
object.title = f"[tvmaze] {data['name']}"
object.description = data.get("summary", "")
object.release_date = datetime.strptime(data["premiered"], "%Y-%m-%d")
@ -134,3 +139,4 @@ class TvmazeCollectionExtractor(CollectionExtractor[TvmazeShowEmbedded]):
season=episode["season"],
episode=episode["number"],
)
return ChangedReport.ChangedSome # TODO improve

@ -9,7 +9,12 @@ from pony import orm # TODO remove
import youtubesearchpython
from ...models import MediaCollection
from ..generic import ExtractedDataOnline, ExtractedDataOffline, SuitableLevel
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
SuitableLevel,
)
from .base import CollectionExtractor
@ -102,7 +107,11 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]):
},
)
def _update_object_raw(self, object: MediaCollection, data: DataType) -> None:
def _update_object_raw(
self,
object: MediaCollection,
data: DataType,
) -> ChangedReport:
info = data["info"]
is_channel = self.__is_channel_id(info["id"])
object.title = (
@ -140,3 +149,4 @@ class YouTubeCollectionExtractor(CollectionExtractor[DataType]):
f"https://www.youtube.com/channel/{info['channel']['id']}"
)
)
return ChangedReport.ChangedSome # TODO improve

@ -3,6 +3,7 @@ from __future__ import annotations
import dataclasses
from dataclasses import dataclass
from datetime import datetime
import enum
from enum import Enum
import logging
from typing import Generic, Optional, TypeVar
@ -13,6 +14,24 @@ from ..models import MediaCollection, MediaElement
T = TypeVar("T")
class ChangedReport(Enum):
StayedSame = enum.auto()
"""Declares that the action did not change anything.
This requires that really nothing changed. If unsure, use ChangedSome.
"""
ChangedSome = enum.auto()
"""Declares that something (might) have changed.
It is not required that something really changed,
this could also mean that there is currently no better way to determine if something really changed.
"""
@property
def may_has_changed(self) -> bool:
return self != self.StayedSame
class SuitableLevel(Enum):
NO = (False, False)
@ -158,7 +177,7 @@ class GeneralExtractor(Generic[E, T]):
def _extract_online(self, uri: str) -> ExtractedDataOnline[T]:
raise NotImplementedError()
def _update_object_raw(self, object: E, data: T) -> None:
def _update_object_raw(self, object: E, data: T) -> ChangedReport:
raise NotImplementedError()
def _update_hook(self, object: E, data: ExtractedDataOnline[T]) -> None:
@ -180,14 +199,18 @@ class GeneralExtractor(Generic[E, T]):
return data.online_type
return self._extract_online(data.object_uri)
def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> E:
def _update_object(self, object: E, data: ExtractedDataOnline[T]) -> ChangedReport:
object.uri = data.object_uri
self._update_object_raw(object, data.data)
self._update_hook(object, data)
object.last_updated = datetime.now()
return object
return ChangedReport.ChangedSome # TODO improve
def update_object(self, object: E, check_cache_expired: bool = True) -> E:
def update_object(
self,
object: E,
check_cache_expired: bool = True,
) -> ChangedReport:
if (
object.was_extracted
and check_cache_expired
@ -196,7 +219,7 @@ class GeneralExtractor(Generic[E, T]):
logging.debug(
f"Skip info for element as already extracted and cache valid: {object.title!r}"
)
return object
return ChangedReport.StayedSame
data = self._extract_online(object.uri)
logging.debug(f"Updating info for media: {data!r}")
return self._update_object(object, data)
@ -207,7 +230,8 @@ class GeneralExtractor(Generic[E, T]):
if object is None:
logging.debug(f"Store info for object: {data!r}")
object = self._create_object(data)
return self._update_object(object, data)
self._update_object(object, data)
return object
def store_object(self, data: ExtractedDataOffline[T]) -> E:
object = self._load_object(data)
@ -217,7 +241,8 @@ class GeneralExtractor(Generic[E, T]):
full_data = self._extract_required(data)
logging.debug(f"Store info for object: {full_data!r}")
object = self._create_object(full_data)
return self._update_object(object, full_data)
self._update_object(object, full_data)
return object
def extract_and_store(self, uri: str) -> E:
object = self.check_uri(uri)

@ -9,6 +9,7 @@ from pony import orm
from ...models import MediaElement, MediaThumbnail, Query, Tag
from ..all.tmdb import TmdbMovieData, TMDB_REGEX_URI
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
ExtractionError,
@ -64,7 +65,11 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]):
data=data,
)
def _update_object_raw(self, object: MediaElement, data: TmdbMovieData) -> None:
def _update_object_raw(
self,
object: MediaElement,
data: TmdbMovieData,
) -> ChangedReport:
# sanity check
if not data.was_released:
raise ExtractionError(
@ -91,3 +96,4 @@ class TmdbMovieMediaExtractor(MediaExtractor[TmdbMovieData]):
)
if len(tag_list) == 1:
object.tag_list.add(tag_list[0])
return ChangedReport.ChangedSome # TODO improve

@ -9,6 +9,7 @@ import requests
from ...models import MediaElement, MediaThumbnail
from ..all.tvmaze import TvmazeEpisodeEmbedded, TvmazeShowEmbedded, select_best_image
from ..generic import (
ChangedReport,
ExtractedDataOnline,
ExtractedDataOffline,
ExtractionError,
@ -92,7 +93,7 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
self,
object: MediaElement,
data: TvmazeEpisodeEmbedded,
) -> None:
) -> ChangedReport:
# sanity check
airstamp = data.get("airstamp")
if airstamp is None: # not released yet
@ -124,3 +125,4 @@ class TvmazeMediaExtractor(MediaExtractor[TvmazeEpisodeEmbedded]):
self.__get_episode_custom_uri(data["id"]),
)
)
return ChangedReport.ChangedSome # TODO improve

@ -14,6 +14,7 @@ from ...models import (
)
from ..generic import (
AuthorExtractedData,
ChangedReport,
ExtractedDataOnline,
ExtractionError,
SuitableLevel,
@ -113,7 +114,11 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
data=vid_data,
)
def _update_object_raw(self, object: MediaElement, data: YoutubeVideoData) -> None:
def _update_object_raw(
self,
object: MediaElement,
data: YoutubeVideoData,
) -> ChangedReport:
object.title = f"{data['title']} - {data['channel']['name']}"
object.description = data.get("description")
if data.get("thumbnails"):
@ -133,3 +138,4 @@ class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
f"https://youtube.com/watch?v={data['id']}",
)
)
return ChangedReport.ChangedSome # TODO improve

@ -14,6 +14,7 @@ from ...models import (
from ..all.ytdl import get_video_info, YtdlErrorException
from ..generic import (
AuthorExtractedData,
ChangedReport,
ExtractedDataOnline,
ExtractionError,
SuitableLevel,
@ -70,7 +71,7 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
data=vid_data,
)
def _update_object_raw(self, object: MediaElement, data: Dict) -> None:
def _update_object_raw(self, object: MediaElement, data: Dict) -> ChangedReport:
object.title = (
f"{data['title']} - {data['uploader']}"
if "uploader" in data
@ -96,3 +97,4 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
object.thumbnail = MediaThumbnail.from_uri(data["thumbnail"])
object.release_date = datetime.strptime(data["upload_date"], "%Y%m%d")
object.length = int(data["duration"])
return ChangedReport.ChangedSome # TODO improve

Loading…
Cancel
Save