Added search for suitable extractor automatically

3 years ago · 86520ae30e
parent beaf99e539
commit 86520ae30e
7 changed files with 66 additions and 9 deletions
--- a/server/entertainment_decider/extractors/collection/init.py
+++ b/server/entertainment_decider/extractors/collection/init.py
@ -4,6 +4,7 @@ from typing import Dict
 from ...config import app_config
 from ...models import MediaCollection
 from ..helpers import expect_suitable_extractor
 from .base import CollectionExtractor
 from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
 from .youtube import YouTubeCollectionExtractor
@ -15,9 +16,9 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
    "youtube": YouTubeCollectionExtractor(),
 }
-def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection:
+def collection_extract_uri(uri: str) -> MediaCollection:
    elem: MediaCollection = CollectionExtractor.check_uri(uri)
-    ex = COLLECTION_EXTRACTORS[extractor_name]
+    ex = expect_suitable_extractor(COLLECTION_EXTRACTORS, uri)
    if not elem:
        elem = ex.extract_and_store(uri)
    else:
--- a/server/entertainment_decider/extractors/collection/tt_rss.py
+++ b/server/entertainment_decider/extractors/collection/tt_rss.py
@ -14,7 +14,7 @@ from tinytinypy import Connection
 from tinytinypy.main import Headline
 from ...models import MediaCollection
-from ..generic import ExtractedData, ExtractionError
+from ..generic import ExtractedData, ExtractionError, SuitableLevel
 from .base import CollectionExtractor
@ -85,6 +85,11 @@ class TtRssUri:
    id: Optional[str]
    options: Dict[str, str]
    @classmethod
    def uri_suitable(cls, uri: str) -> bool:
        parts = url.urlparse(uri)
        return parts.scheme == cls.scheme
    @classmethod
    def from_str_uri(cls, uri: str) -> "TtRssUri":
        parts = url.urlparse(uri, scheme=cls.scheme)
@ -124,6 +129,9 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
    def __decode_uri(self, uri: str) -> TtRssUri:
        return TtRssUri.from_str_uri(uri)
    def uri_suitable(self, uri: str) -> SuitableLevel:
        return SuitableLevel.ALWAYS if TtRssUri.uri_suitable(uri) else SuitableLevel.NO
    def can_extract_offline(self, uri: str) -> bool:
        return True
--- a/server/entertainment_decider/extractors/collection/youtube.py
+++ b/server/entertainment_decider/extractors/collection/youtube.py
@ -9,7 +9,7 @@ from pony import orm # TODO remove
 import youtubesearchpython
 from ...models import MediaCollection
-from ..generic import ExtractedData, ExtractionError
+from ..generic import ExtractedData, ExtractionError, SuitableLevel
 from .base import CollectionExtractor
@ -45,6 +45,9 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
    def __init__(self):
        super().__init__("youtube")
    def uri_suitable(self, uri: str) -> SuitableLevel:
        return SuitableLevel.ALWAYS if self.__uri_regex.match(uri) else SuitableLevel.NO
    def can_extract_offline(self, uri: str) -> bool:
        return True
--- a/server/entertainment_decider/extractors/generic.py
+++ b/server/entertainment_decider/extractors/generic.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import dataclasses
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
 import logging
 from typing import Dict, Generic, Optional, TypeVar
@ -12,6 +13,21 @@ from ..models import MediaCollection, MediaElement
 T = TypeVar("T")
 class SuitableLevel(Enum):
    NO = (False, False)
    FALLBACK = (True, False)
    ALWAYS = (True, True)
    @property
    def can_accept(self):
        return self.value[0]
    @property
    def accept_immediately(self):
        return self.value[1]
 class ExtractionError(Exception):
    pass
@ -84,8 +100,8 @@ class GeneralExtractor(Generic[E, T]):
    # abstract (for specific extractor classes)
-    #def uri_suitable(self, uri: str) -> bool:
+    def uri_suitable(self, uri: str) -> SuitableLevel:
-    #    raise NotImplementedError()
+        raise NotImplementedError()
    def can_extract_offline(self, uri: str) -> bool:
        return False
--- a/server/entertainment_decider/extractors/helpers.py
+++ b/server/entertainment_decider/extractors/helpers.py
@ -0,0 +1,25 @@
 from __future__ import annotations
 from typing import Iterable, Optional, TypeVar
 from .generic import ExtractionError, GeneralExtractor
 T = TypeVar("T", bound=GeneralExtractor)
 def search_suitable_extractor(extractor_list: Iterable[T], uri: str) -> Optional[T]:
    best_bet: Optional[T] = None
    for extractor in extractor_list:
        match = extractor.uri_suitable(uri)
        if match.accept_immediately:
            return extractor
        if match.can_accept and best_bet is None:
            best_bet = extractor
    return best_bet
 def expect_suitable_extractor(extractor_list: Iterable[T], uri: str) -> T:
    extractor = search_suitable_extractor(extractor_list, uri)
    if extractor is None:
        raise ExtractionError(f"No suitable extractor found for uri {uri!r}")
    return extractor
--- a/server/entertainment_decider/extractors/media/init.py
+++ b/server/entertainment_decider/extractors/media/init.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from typing import Dict
 from ...models import MediaElement
 from ..helpers import expect_suitable_extractor
 from .base import MediaExtractor
 from .ytdl import YtdlMediaExtractor
@ -11,8 +12,8 @@ MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
    "ytdl": YtdlMediaExtractor(),
 }
-def media_extract_uri(extractor_name: str, uri: str) -> MediaElement:
+def media_extract_uri(uri: str) -> MediaElement:
    elem: MediaElement = MediaExtractor.check_uri(uri)
    if not elem:
-        elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri)
+        elem = expect_suitable_extractor(MEDIA_EXTRACTORS, uri).extract_and_store(uri)
    return elem
--- a/server/entertainment_decider/extractors/media/ytdl.py
+++ b/server/entertainment_decider/extractors/media/ytdl.py
@ -10,7 +10,7 @@ from jsoncache import ApplicationCache
 from ...common import call
 from ...models import MediaElement
-from ..generic import AuthorExtractedData, ExtractedData, ExtractionError
+from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel
 from .base import  MediaExtractor
@ -54,6 +54,9 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
    def __init__(self):
        super().__init__("ytdl")
    def uri_suitable(self, uri: str) -> SuitableLevel:
        return SuitableLevel.FALLBACK
    def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
        video_extractor_key = data.get("extractor_key") or data["ie_key"]
        author_key = data.get("channel_id") or data.get("uploader_id")