Added search for suitable extractor automatically

master
Felix Stupp 3 years ago
parent beaf99e539
commit 86520ae30e
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -4,6 +4,7 @@ from typing import Dict
from ...config import app_config
from ...models import MediaCollection
from ..helpers import expect_suitable_extractor
from .base import CollectionExtractor
from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
from .youtube import YouTubeCollectionExtractor
@ -15,9 +16,9 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
"youtube": YouTubeCollectionExtractor(),
}
def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection:
def collection_extract_uri(uri: str) -> MediaCollection:
elem: MediaCollection = CollectionExtractor.check_uri(uri)
ex = COLLECTION_EXTRACTORS[extractor_name]
ex = expect_suitable_extractor(COLLECTION_EXTRACTORS, uri)
if not elem:
elem = ex.extract_and_store(uri)
else:

@ -14,7 +14,7 @@ from tinytinypy import Connection
from tinytinypy.main import Headline
from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError
from ..generic import ExtractedData, ExtractionError, SuitableLevel
from .base import CollectionExtractor
@ -85,6 +85,11 @@ class TtRssUri:
id: Optional[str]
options: Dict[str, str]
@classmethod
def uri_suitable(cls, uri: str) -> bool:
parts = url.urlparse(uri)
return parts.scheme == cls.scheme
@classmethod
def from_str_uri(cls, uri: str) -> "TtRssUri":
parts = url.urlparse(uri, scheme=cls.scheme)
@ -124,6 +129,9 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
def __decode_uri(self, uri: str) -> TtRssUri:
return TtRssUri.from_str_uri(uri)
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.ALWAYS if TtRssUri.uri_suitable(uri) else SuitableLevel.NO
def can_extract_offline(self, uri: str) -> bool:
return True

@ -9,7 +9,7 @@ from pony import orm # TODO remove
import youtubesearchpython
from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError
from ..generic import ExtractedData, ExtractionError, SuitableLevel
from .base import CollectionExtractor
@ -45,6 +45,9 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
def __init__(self):
super().__init__("youtube")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.ALWAYS if self.__uri_regex.match(uri) else SuitableLevel.NO
def can_extract_offline(self, uri: str) -> bool:
return True

@ -3,6 +3,7 @@ from __future__ import annotations
import dataclasses
from dataclasses import dataclass
from datetime import datetime
from enum import Enum
import logging
from typing import Dict, Generic, Optional, TypeVar
@ -12,6 +13,21 @@ from ..models import MediaCollection, MediaElement
T = TypeVar("T")
class SuitableLevel(Enum):
NO = (False, False)
FALLBACK = (True, False)
ALWAYS = (True, True)
@property
def can_accept(self):
return self.value[0]
@property
def accept_immediately(self):
return self.value[1]
class ExtractionError(Exception):
pass
@ -84,8 +100,8 @@ class GeneralExtractor(Generic[E, T]):
# abstract (for specific extractor classes)
#def uri_suitable(self, uri: str) -> bool:
# raise NotImplementedError()
def uri_suitable(self, uri: str) -> SuitableLevel:
raise NotImplementedError()
def can_extract_offline(self, uri: str) -> bool:
return False

@ -0,0 +1,25 @@
from __future__ import annotations
from typing import Iterable, Optional, TypeVar
from .generic import ExtractionError, GeneralExtractor
T = TypeVar("T", bound=GeneralExtractor)
def search_suitable_extractor(extractor_list: Iterable[T], uri: str) -> Optional[T]:
best_bet: Optional[T] = None
for extractor in extractor_list:
match = extractor.uri_suitable(uri)
if match.accept_immediately:
return extractor
if match.can_accept and best_bet is None:
best_bet = extractor
return best_bet
def expect_suitable_extractor(extractor_list: Iterable[T], uri: str) -> T:
extractor = search_suitable_extractor(extractor_list, uri)
if extractor is None:
raise ExtractionError(f"No suitable extractor found for uri {uri!r}")
return extractor

@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Dict
from ...models import MediaElement
from ..helpers import expect_suitable_extractor
from .base import MediaExtractor
from .ytdl import YtdlMediaExtractor
@ -11,8 +12,8 @@ MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
"ytdl": YtdlMediaExtractor(),
}
def media_extract_uri(extractor_name: str, uri: str) -> MediaElement:
def media_extract_uri(uri: str) -> MediaElement:
elem: MediaElement = MediaExtractor.check_uri(uri)
if not elem:
elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri)
elem = expect_suitable_extractor(MEDIA_EXTRACTORS, uri).extract_and_store(uri)
return elem

@ -10,7 +10,7 @@ from jsoncache import ApplicationCache
from ...common import call
from ...models import MediaElement
from ..generic import AuthorExtractedData, ExtractedData, ExtractionError
from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel
from .base import MediaExtractor
@ -54,6 +54,9 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
def __init__(self):
super().__init__("ytdl")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.FALLBACK
def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
video_extractor_key = data.get("extractor_key") or data["ie_key"]
author_key = data.get("channel_id") or data.get("uploader_id")

Loading…
Cancel
Save