Added search for suitable extractor automatically

master
Felix Stupp 3 years ago
parent beaf99e539
commit 86520ae30e
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -4,6 +4,7 @@ from typing import Dict
from ...config import app_config from ...config import app_config
from ...models import MediaCollection from ...models import MediaCollection
from ..helpers import expect_suitable_extractor
from .base import CollectionExtractor from .base import CollectionExtractor
from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter from .tt_rss import TtRssCollectionExtractor, TtRssConnectionParameter
from .youtube import YouTubeCollectionExtractor from .youtube import YouTubeCollectionExtractor
@ -15,9 +16,9 @@ COLLECTION_EXTRACTORS: Dict[str, CollectionExtractor] = {
"youtube": YouTubeCollectionExtractor(), "youtube": YouTubeCollectionExtractor(),
} }
def collection_extract_uri(extractor_name: str, uri: str) -> MediaCollection: def collection_extract_uri(uri: str) -> MediaCollection:
elem: MediaCollection = CollectionExtractor.check_uri(uri) elem: MediaCollection = CollectionExtractor.check_uri(uri)
ex = COLLECTION_EXTRACTORS[extractor_name] ex = expect_suitable_extractor(COLLECTION_EXTRACTORS, uri)
if not elem: if not elem:
elem = ex.extract_and_store(uri) elem = ex.extract_and_store(uri)
else: else:

@ -14,7 +14,7 @@ from tinytinypy import Connection
from tinytinypy.main import Headline from tinytinypy.main import Headline
from ...models import MediaCollection from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError from ..generic import ExtractedData, ExtractionError, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
@ -85,6 +85,11 @@ class TtRssUri:
id: Optional[str] id: Optional[str]
options: Dict[str, str] options: Dict[str, str]
@classmethod
def uri_suitable(cls, uri: str) -> bool:
parts = url.urlparse(uri)
return parts.scheme == cls.scheme
@classmethod @classmethod
def from_str_uri(cls, uri: str) -> "TtRssUri": def from_str_uri(cls, uri: str) -> "TtRssUri":
parts = url.urlparse(uri, scheme=cls.scheme) parts = url.urlparse(uri, scheme=cls.scheme)
@ -124,6 +129,9 @@ class TtRssCollectionExtractor(CollectionExtractor[HeadlineList]):
def __decode_uri(self, uri: str) -> TtRssUri: def __decode_uri(self, uri: str) -> TtRssUri:
return TtRssUri.from_str_uri(uri) return TtRssUri.from_str_uri(uri)
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.ALWAYS if TtRssUri.uri_suitable(uri) else SuitableLevel.NO
def can_extract_offline(self, uri: str) -> bool: def can_extract_offline(self, uri: str) -> bool:
return True return True

@ -9,7 +9,7 @@ from pony import orm # TODO remove
import youtubesearchpython import youtubesearchpython
from ...models import MediaCollection from ...models import MediaCollection
from ..generic import ExtractedData, ExtractionError from ..generic import ExtractedData, ExtractionError, SuitableLevel
from .base import CollectionExtractor from .base import CollectionExtractor
@ -45,6 +45,9 @@ class YouTubeCollectionExtractor(CollectionExtractor[Dict]):
def __init__(self): def __init__(self):
super().__init__("youtube") super().__init__("youtube")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.ALWAYS if self.__uri_regex.match(uri) else SuitableLevel.NO
def can_extract_offline(self, uri: str) -> bool: def can_extract_offline(self, uri: str) -> bool:
return True return True

@ -3,6 +3,7 @@ from __future__ import annotations
import dataclasses import dataclasses
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime from datetime import datetime
from enum import Enum
import logging import logging
from typing import Dict, Generic, Optional, TypeVar from typing import Dict, Generic, Optional, TypeVar
@ -12,6 +13,21 @@ from ..models import MediaCollection, MediaElement
T = TypeVar("T") T = TypeVar("T")
class SuitableLevel(Enum):
NO = (False, False)
FALLBACK = (True, False)
ALWAYS = (True, True)
@property
def can_accept(self):
return self.value[0]
@property
def accept_immediately(self):
return self.value[1]
class ExtractionError(Exception): class ExtractionError(Exception):
pass pass
@ -84,8 +100,8 @@ class GeneralExtractor(Generic[E, T]):
# abstract (for specific extractor classes) # abstract (for specific extractor classes)
#def uri_suitable(self, uri: str) -> bool: def uri_suitable(self, uri: str) -> SuitableLevel:
# raise NotImplementedError() raise NotImplementedError()
def can_extract_offline(self, uri: str) -> bool: def can_extract_offline(self, uri: str) -> bool:
return False return False

@ -0,0 +1,25 @@
from __future__ import annotations
from typing import Iterable, Optional, TypeVar
from .generic import ExtractionError, GeneralExtractor
T = TypeVar("T", bound=GeneralExtractor)
def search_suitable_extractor(extractor_list: Iterable[T], uri: str) -> Optional[T]:
best_bet: Optional[T] = None
for extractor in extractor_list:
match = extractor.uri_suitable(uri)
if match.accept_immediately:
return extractor
if match.can_accept and best_bet is None:
best_bet = extractor
return best_bet
def expect_suitable_extractor(extractor_list: Iterable[T], uri: str) -> T:
extractor = search_suitable_extractor(extractor_list, uri)
if extractor is None:
raise ExtractionError(f"No suitable extractor found for uri {uri!r}")
return extractor

@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Dict from typing import Dict
from ...models import MediaElement from ...models import MediaElement
from ..helpers import expect_suitable_extractor
from .base import MediaExtractor from .base import MediaExtractor
from .ytdl import YtdlMediaExtractor from .ytdl import YtdlMediaExtractor
@ -11,8 +12,8 @@ MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
"ytdl": YtdlMediaExtractor(), "ytdl": YtdlMediaExtractor(),
} }
def media_extract_uri(extractor_name: str, uri: str) -> MediaElement: def media_extract_uri(uri: str) -> MediaElement:
elem: MediaElement = MediaExtractor.check_uri(uri) elem: MediaElement = MediaExtractor.check_uri(uri)
if not elem: if not elem:
elem = MEDIA_EXTRACTORS[extractor_name].extract_and_store(uri) elem = expect_suitable_extractor(MEDIA_EXTRACTORS, uri).extract_and_store(uri)
return elem return elem

@ -10,7 +10,7 @@ from jsoncache import ApplicationCache
from ...common import call from ...common import call
from ...models import MediaElement from ...models import MediaElement
from ..generic import AuthorExtractedData, ExtractedData, ExtractionError from ..generic import AuthorExtractedData, ExtractedData, ExtractionError, SuitableLevel
from .base import MediaExtractor from .base import MediaExtractor
@ -54,6 +54,9 @@ class YtdlMediaExtractor(MediaExtractor[Dict]):
def __init__(self): def __init__(self):
super().__init__("ytdl") super().__init__("ytdl")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.FALLBACK
def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]: def _get_author_data(self, data: Dict) -> Optional[AuthorExtractedData]:
video_extractor_key = data.get("extractor_key") or data["ie_key"] video_extractor_key = data.get("extractor_key") or data["ie_key"]
author_key = data.get("channel_id") or data.get("uploader_id") author_key = data.get("channel_id") or data.get("uploader_id")

Loading…
Cancel
Save