Added youtube media extractor based on new version of youtubesearchpython

master
Felix Stupp 3 years ago
parent 075b0a544c
commit d0c2aab529
Signed by: zocker
GPG Key ID: 93E1BD26F6B02FB7

@ -2,13 +2,16 @@ from __future__ import annotations
from typing import Dict
from ...models import MediaElement
from ..helpers import expect_suitable_extractor
from .base import MediaExtractor
from .youtube import YoutubeMediaExtractor
from .ytdl import YtdlMediaExtractor
MEDIA_EXTRACTORS: Dict[str, MediaExtractor] = {
"youtube": YoutubeMediaExtractor(),
"ytdl": YtdlMediaExtractor(),
}

@ -0,0 +1,83 @@
from __future__ import annotations
from datetime import datetime
import logging
import re
from typing import List, Optional, TypedDict
from youtubesearchpython import ResultMode, Video
from ...models import MediaElement
from ..generic import AuthorExtractedData, ExtractedData, SuitableLevel
from .base import MediaExtractor
class YoutubeDuration(TypedDict):
secondsText: str
class YoutubeViewCount(TypedDict):
text: str
class YoutubeThumbnailData(TypedDict):
url: str
width: int
height: int
class YoutubeChannelData(TypedDict):
name: str
id: str
link: str
class YoutubeVideoData(TypedDict):
id: str
title: str
duration: YoutubeDuration
viewCount: YoutubeViewCount
thumbnails: List[YoutubeThumbnailData]
description: str
channel: YoutubeChannelData
allowRatings: bool
averageRating: float
keywords: List[str]
isLiveContent: bool
publishDate: str
uploadDate: str
isLiveNow: bool
link: str
class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
__uri_regex = re.compile(r"^https?://((www\.)?youtube\.com/watch\?v=|youtu\.be/)(?P<id>[^/&?]+)")
def __init__(self):
super().__init__("ytdl")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.always_or_no(self.__uri_regex.match(uri))
def _get_author_data(self, data: YoutubeVideoData) -> Optional[AuthorExtractedData]:
return AuthorExtractedData(
object_uri = data["channel"]["link"],
extractor_name = self.name,
object_key = f"author:{data['channel']['id']}",
author_name = data["channel"]["name"]
)
def _extract_online(self, uri: str) -> ExtractedData[YoutubeVideoData]:
logging.info(f"Request info using youtube_search_python for {uri!r}")
vid_data: YoutubeVideoData = Video.getInfo(
videoLink = uri,
mode = ResultMode.dict,
)
return ExtractedData[YoutubeVideoData](
object_uri=uri,
extractor_name=self.name,
object_key=vid_data["id"],
data=vid_data,
)
def _update_object_raw(self, object: MediaElement, data: YoutubeVideoData):
object.title = f"{data['title']} - {data['channel']['name']}"
object.release_date = datetime.strptime(data.get("uploadDate") or data["publishDate"], "%Y-%m-%d")
object.length = int(data["duration"]["secondsText"])

@ -3,7 +3,7 @@ pony>=0.7.14
pyyaml>=5.4.1
rss-parser>=0.2.3
youtube-dl>=2021.6.6
youtube-search-python>=1.4.9
youtube-search-python>=1.5.1
# custom, local requirements; TODO remove or change to persistent dependency
/home/zocker/Repositories/python-jsoncache

Loading…
Cancel
Save