Added search for suitable extractor automatically

4 years ago · 86520ae30e
parent beaf99e539
commit 86520ae30e
7 changed files with 66 additions and 9 deletions
--- a/server/entertainment_decider/extractors/collection/init.py
+++ b/server/entertainment_decider/extractors/collection/init.py
@ -4,6 +4,7 @@ from typing import Dict

 from ...config import app_config
 from ...models import MediaCollection
+from ..helpers import expect_suitable_extractor
 from .base import CollectionExtractor
 from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
 from .youtube import YouTubeCollectionExtractor
@ -15,9 +16,9 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
    "youtube": YouTubeCollectionExtractor(),
 }

-def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection:
+def collection_extract_uri(uri: str) -> MediaCollection:
    elem: MediaCollection = CollectionExtractor.check_uri(uri)
-    ex = COLLECTION_EXTRACTORS[extractor_name]
+    ex = expect_suitable_extractor(COLLECTION_EXTRACTORS, uri)
    if not elem:
        elem = ex.extract_and_store(uri)
    else:
--- a/server/entertainment_decider/extractors/collection/tt_rss.py
+++ b/server/entertainment_decider/extractors/collection/tt_rss.py
@ -14,7 +14,7 @@ from tinytinypy import Connection
 from tinytinypy.main import Headline

 from ...models import MediaCollection
-from ..generic import ExtractedData, ExtractionError
+from ..generic import ExtractedData, ExtractionError, SuitableLevel
 from .base import CollectionExtractor


@ -85,6 +85,11 @@ class TtRssUri:
    id: Optional[str]
    options: Dict[str, str]

+    @classmethod
+    def uri_suitable(cls, uri: str) -> bool:
+        parts = url.urlparse(uri)
+        return parts.scheme == cls.scheme
+
    @classmethod
    def from_str_uri(cls, uri: str) -> "TtRssUri":
        parts = url.urlparse(uri, scheme=cls.scheme)
@ -124,6 +129,9 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
    def __decode_uri(self, uri: str) -> TtRssUri:
        return TtRssUri.from_str_uri(uri)

+    def uri_suitable(self, uri: str) -> SuitableLevel:
+        return SuitableLevel.ALWAYS if TtRssUri.uri_suitable(uri) else SuitableLevel.NO
+
    def can_extract_offline(self, uri: str) -> bool:
        return True

--- a/server/entertainment_decider/extractors/collection/youtube.py
+++ b/server/entertainment_decider/extractors/collection/youtube.py
@ -9,7 +9,7 @@ from pony import orm # TODO remove
 import youtubesearchpython

 from ...models import MediaCollection
-from ..generic import ExtractedData, ExtractionError
+from ..generic import ExtractedData, ExtractionError, SuitableLevel
 from .base import CollectionExtractor


@ -45,6 +45,9 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
    def __init__(self):
        super().__init__("youtube")

+    def uri_suitable(self, uri: str) -> SuitableLevel:
+        return SuitableLevel.ALWAYS if self.__uri_regex.match(uri) else SuitableLevel.NO
+
    def can_extract_offline(self, uri: str) -> bool:
        return True

--- a/server/entertainment_decider/extractors/generic.py
+++ b/server/entertainment_decider/extractors/generic.py
@ -3,6 +3,7 @@ from __future__ import annotations
 import dataclasses
 from dataclasses import dataclass
 from datetime import datetime
+from enum import Enum
 import logging
 from typing import Dict, Generic, Optional, TypeVar

@ -12,6 +13,21 @@ from ..models import MediaCollection, MediaElement
 T = TypeVar("T")


+class SuitableLevel(Enum):
+
+    NO = (False, False)
+    FALLBACK = (True, False)
+    ALWAYS = (True, True)
+
+    @property
+    def can_accept(self):
+        return self.value[0]
+
+    @property
+    def accept_immediately(self):
+        return self.value[1]
+
+
 class ExtractionError(Exception):
    pass

@ -84,8 +100,8 @@ class GeneralExtractor(Generic[E, T]):

    # abstract (for specific extractor classes)

-    #def uri_suitable(self, uri: str) -> bool:
-    #    raise NotImplementedError()
+    def uri_suitable(self, uri: str) -> SuitableLevel:
+        raise NotImplementedError()

    def can_extract_offline(self, uri: str) -> bool:
        return False
--- a/server/entertainment_decider/extractors/helpers.py
+++ b/server/entertainment_decider/extractors/helpers.py
@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from typing import Iterable, Optional, TypeVar
+
+from .generic import ExtractionError, GeneralExtractor
+
+
+T = TypeVar("T", bound=GeneralExtractor)
+
+
+def search_suitable_extractor(extractor_list: Iterable[T], uri: str) -> Optional[T]:
+    best_bet: Optional[T] = None
+    for extractor in extractor_list:
+        match = extractor.uri_suitable(uri)
+        if match.accept_immediately:
+            return extractor
+        if match.can_accept and best_bet is None:
+            best_bet = extractor
+    return best_bet
+
+def expect_suitable_extractor(extractor_list: Iterable[T], uri: str) -> T:
+    extractor = search_suitable_extractor(extractor_list, uri)
+    if extractor is None:
+        raise ExtractionError(f"No suitable extractor found for uri {uri!r}")
+    return extractor
--- a/server/entertainment_decider/extractors/media/init.py
+++ b/server/entertainment_decider/extractors/media/init.py
@ -3,6 +3,7 @@ from __future__ import annotations
 from typing import Dict

 from ...models import MediaElement
+from ..helpers import expect_suitable_extractor
 from .base import MediaExtractor
 from .ytdl import YtdlMediaExtractor

@ -11,8 +12,8 @@ MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
    "ytdl": YtdlMediaExtractor(),
 }

-def media_extract_uri(extractor_name: str, uri: str) -> MediaElement:
+def media_extract_uri(uri: str) -> MediaElement:
    elem: MediaElement = MediaExtractor.check_uri(uri)
    if not elem:
-        elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri)
+        elem = expect_suitable_extractor(MEDIA_EXTRACTORS, uri).extract_and_store(uri)
    return elem
--- a/server/entertainment_decider/extractors/media/ytdl.py
+++ b/server/entertainment_decider/extractors/media/ytdl.py
@ -10,7 +10,7 @@ from jsoncache import ApplicationCache

 from ...common import call
 from ...models import MediaElement
-from ..generic import AuthorExtractedData, ExtractedData, ExtractionError
+from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel
 from .base import  MediaExtractor


@ -54,6 +54,9 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
    def __init__(self):
        super().__init__("ytdl")

+    def uri_suitable(self, uri: str) -> SuitableLevel:
+        return SuitableLevel.FALLBACK
+
    def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
        video_extractor_key = data.get("extractor_key") or data["ie_key"]
        author_key = data.get("channel_id") or data.get("uploader_id")