From 86520ae30e68f5b1e95e9191d1d5d554541915d4 Mon Sep 17 00:00:00 2001 From: Felix Stupp Date: Fri, 8 Oct 2021 14:25:20 +0200 Subject: [PATCH] Added search for suitable extractor automatically --- .../extractors/collection/__init__.py | 5 ++-- .../extractors/collection/tt_rss.py | 10 +++++++- .../extractors/collection/youtube.py | 5 +++- .../extractors/generic.py | 20 +++++++++++++-- .../extractors/helpers.py | 25 +++++++++++++++++++ .../extractors/media/__init__.py | 5 ++-- .../extractors/media/ytdl.py | 5 +++- 7 files changed, 66 insertions(+), 9 deletions(-) create mode 100644 server/entertainment_decider/extractors/helpers.py diff --git a/server/entertainment_decider/extractors/collection/__init__.py b/server/entertainment_decider/extractors/collection/__init__.py index 1b28dc9..7d11f4d 100644 --- a/server/entertainment_decider/extractors/collection/__init__.py +++ b/server/entertainment_decider/extractors/collection/__init__.py @@ -4,6 +4,7 @@ from typing import Dict from ...config import app_config from ...models import MediaCollection +from ..helpers import expect_suitable_extractor from .base import CollectionExtractor from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter from .youtube import YouTubeCollectionExtractor @@ -15,9 +16,9 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = { "youtube": YouTubeCollectionExtractor(), } -def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection: +def collection_extract_uri(uri: str) -> MediaCollection: elem: MediaCollection = CollectionExtractor.check_uri(uri) - ex = COLLECTION_EXTRACTORS[extractor_name] + ex = expect_suitable_extractor(COLLECTION_EXTRACTORS, uri) if not elem: elem = ex.extract_and_store(uri) else: diff --git a/server/entertainment_decider/extractors/collection/tt_rss.py b/server/entertainment_decider/extractors/collection/tt_rss.py index cc2bb3f..fd3a3bd 100644 --- a/server/entertainment_decider/extractors/collection/tt_rss.py +++ b/server/entertainment_decider/extractors/collection/tt_rss.py @@ -14,7 +14,7 @@ from tinytinypy import Connection from tinytinypy.main import Headline from ...models import MediaCollection -from ..generic import ExtractedData, ExtractionError +from ..generic import ExtractedData, ExtractionError, SuitableLevel from .base import CollectionExtractor @@ -85,6 +85,11 @@ class TtRssUri: id: Optional[str] options: Dict[str, str] + @classmethod + def uri_suitable(cls, uri: str) -> bool: + parts = url.urlparse(uri) + return parts.scheme == cls.scheme + @classmethod def from_str_uri(cls, uri: str) -> "TtRssUri": parts = url.urlparse(uri, scheme=cls.scheme) @@ -124,6 +129,9 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]): def __decode_uri(self, uri: str) -> TtRssUri: return TtRssUri.from_str_uri(uri) + def uri_suitable(self, uri: str) -> SuitableLevel: + return SuitableLevel.ALWAYS if TtRssUri.uri_suitable(uri) else SuitableLevel.NO + def can_extract_offline(self, uri: str) -> bool: return True diff --git a/server/entertainment_decider/extractors/collection/youtube.py b/server/entertainment_decider/extractors/collection/youtube.py index 37b8ec3..15e6bce 100644 --- a/server/entertainment_decider/extractors/collection/youtube.py +++ b/server/entertainment_decider/extractors/collection/youtube.py @@ -9,7 +9,7 @@ from pony import orm # TODO remove import youtubesearchpython from ...models import MediaCollection -from ..generic import ExtractedData, ExtractionError +from ..generic import ExtractedData, ExtractionError, SuitableLevel from .base import CollectionExtractor @@ -45,6 +45,9 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]): def __init__(self): super().__init__("youtube") + def uri_suitable(self, uri: str) -> SuitableLevel: + return SuitableLevel.ALWAYS if self.__uri_regex.match(uri) else SuitableLevel.NO + def can_extract_offline(self, uri: str) -> bool: return True diff --git a/server/entertainment_decider/extractors/generic.py b/server/entertainment_decider/extractors/generic.py index 8f3f00c..816c75e 100644 --- a/server/entertainment_decider/extractors/generic.py +++ b/server/entertainment_decider/extractors/generic.py @@ -3,6 +3,7 @@ from __future__ import annotations import dataclasses from dataclasses import dataclass from datetime import datetime +from enum import Enum import logging from typing import Dict, Generic, Optional, TypeVar @@ -12,6 +13,21 @@ from ..models import MediaCollection, MediaElement T = TypeVar("T") +class SuitableLevel(Enum): + + NO = (False, False) + FALLBACK = (True, False) + ALWAYS = (True, True) + + @property + def can_accept(self): + return self.value[0] + + @property + def accept_immediately(self): + return self.value[1] + + class ExtractionError(Exception): pass @@ -84,8 +100,8 @@ class GeneralExtractor(Generic[E, T]): # abstract (for specific extractor classes) - #def uri_suitable(self, uri: str) -> bool: - # raise NotImplementedError() + def uri_suitable(self, uri: str) -> SuitableLevel: + raise NotImplementedError() def can_extract_offline(self, uri: str) -> bool: return False diff --git a/server/entertainment_decider/extractors/helpers.py b/server/entertainment_decider/extractors/helpers.py new file mode 100644 index 0000000..beb28dd --- /dev/null +++ b/server/entertainment_decider/extractors/helpers.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import Iterable, Optional, TypeVar + +from .generic import ExtractionError, GeneralExtractor + + +T = TypeVar("T", bound=GeneralExtractor) + + +def search_suitable_extractor(extractor_list: Iterable[T], uri: str) -> Optional[T]: + best_bet: Optional[T] = None + for extractor in extractor_list: + match = extractor.uri_suitable(uri) + if match.accept_immediately: + return extractor + if match.can_accept and best_bet is None: + best_bet = extractor + return best_bet + +def expect_suitable_extractor(extractor_list: Iterable[T], uri: str) -> T: + extractor = search_suitable_extractor(extractor_list, uri) + if extractor is None: + raise ExtractionError(f"No suitable extractor found for uri {uri!r}") + return extractor diff --git a/server/entertainment_decider/extractors/media/__init__.py b/server/entertainment_decider/extractors/media/__init__.py index 1b2036d..fe23407 100644 --- a/server/entertainment_decider/extractors/media/__init__.py +++ b/server/entertainment_decider/extractors/media/__init__.py @@ -3,6 +3,7 @@ from __future__ import annotations from typing import Dict from ...models import MediaElement +from ..helpers import expect_suitable_extractor from .base import MediaExtractor from .ytdl import YtdlMediaExtractor @@ -11,8 +12,8 @@ MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = { "ytdl": YtdlMediaExtractor(), } -def media_extract_uri(extractor_name: str, uri: str) -> MediaElement: +def media_extract_uri(uri: str) -> MediaElement: elem: MediaElement = MediaExtractor.check_uri(uri) if not elem: - elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri) + elem = expect_suitable_extractor(MEDIA_EXTRACTORS, uri).extract_and_store(uri) return elem diff --git a/server/entertainment_decider/extractors/media/ytdl.py b/server/entertainment_decider/extractors/media/ytdl.py index 6f14c1f..11be766 100644 --- a/server/entertainment_decider/extractors/media/ytdl.py +++ b/server/entertainment_decider/extractors/media/ytdl.py @@ -10,7 +10,7 @@ from jsoncache import ApplicationCache from ...common import call from ...models import MediaElement -from ..generic import AuthorExtractedData, ExtractedData, ExtractionError +from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel from .base import MediaExtractor @@ -54,6 +54,9 @@ class YtdlMediaExtractor(MediaExtractor[Dict]): def __init__(self): super().__init__("ytdl") + def uri_suitable(self, uri: str) -> SuitableLevel: + return SuitableLevel.FALLBACK + def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]: video_extractor_key = data.get("extractor_key") or data["ie_key"] author_key = data.get("channel_id") or data.get("uploader_id")