You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
2.5 KiB
Python

from __future__ import annotations
from datetime import datetime
import logging
import re
from typing import List, Optional, TypedDict
from youtubesearchpython import ResultMode, Video
from ...models import MediaElement
from ..generic import AuthorExtractedData, ExtractedData, SuitableLevel
from .base import MediaExtractor
class YoutubeDuration(TypedDict):
secondsText: str
class YoutubeViewCount(TypedDict):
text: str
class YoutubeThumbnailData(TypedDict):
url: str
width: int
height: int
class YoutubeChannelData(TypedDict):
name: str
id: str
link: str
class YoutubeVideoData(TypedDict):
id: str
title: str
duration: YoutubeDuration
viewCount: YoutubeViewCount
thumbnails: List[YoutubeThumbnailData]
description: str
channel: YoutubeChannelData
allowRatings: bool
averageRating: float
keywords: List[str]
isLiveContent: bool
publishDate: str
uploadDate: str
isLiveNow: bool
link: str
class YoutubeMediaExtractor(MediaExtractor[YoutubeVideoData]):
__uri_regex = re.compile(r"""^
https?://(
(www\.)?youtube\.com/(
watch\?v=
)|
youtu\.be/
)(?P<id>[^/&?]+)
$""", re.VERBOSE)
def __init__(self):
super().__init__("ytdl")
def uri_suitable(self, uri: str) -> SuitableLevel:
return SuitableLevel.always_or_no(self.__uri_regex.match(uri))
def _get_author_data(self, data: YoutubeVideoData) -> Optional[AuthorExtractedData]:
return AuthorExtractedData(
object_uri = data["channel"]["link"],
extractor_name = self.name,
object_key = f"author:{data['channel']['id']}",
author_name = data["channel"]["name"]
)
def _extract_online(self, uri: str) -> ExtractedData[YoutubeVideoData]:
logging.info(f"Request info using youtube_search_python for {uri!r}")
vid_data: YoutubeVideoData = Video.getInfo(
videoLink = uri,
mode = ResultMode.dict,
)
return ExtractedData[YoutubeVideoData](
object_uri=uri,
extractor_name=self.name,
object_key=vid_data["id"],
data=vid_data,
)
def _update_object_raw(self, object: MediaElement, data: YoutubeVideoData):
object.title = f"{data['title']} - {data['channel']['name']}"
object.release_date = datetime.strptime(data.get("uploadDate") or data["publishDate"], "%Y-%m-%d")
object.length = int(data["duration"]["secondsText"])