extractors/collection/youtube: migrate to yt-dlp

6 months ago · c503b3f148
parent ae85d6ae0c
commit c503b3f148
1 changed files with 34 additions and 20 deletions
--- a/server/entertainment_decider/extractors/collection/youtube.py
+++ b/server/entertainment_decider/extractors/collection/youtube.py
@ -11,7 +11,7 @@ from typing import (
 )
 from pony import orm  # TODO remove
-import youtubesearchpython
+from yt_dlp import YoutubeDL
 from ...models import MediaCollection
 from ..all.youtube import (
@ -27,6 +27,11 @@ from ..generic import (
 from .base import CollectionExtractor
 YTDLP_OPTS = {
    "extract_flat": "in_playlist",
 }
 class PlaylistChannel(TypedDict):
    name: str
    id: str
@ -122,32 +127,21 @@ class YouTubeCollectionExtractor(CollectionExtractor[PlaylistData]):
        orig_id = self.__get_id(uri)
        playlist_id = self.__convert_if_required(orig_id)
        playlist_link = f"https://www.youtube.com/playlist?list={playlist_id}"
        is_channel = self.__is_channel_id(playlist_id)
        logging.info(f"Request Youtube playlist {playlist_link!r}")
-        playlist = youtubesearchpython.Playlist(playlist_link)
+        with YoutubeDL(YTDLP_OPTS) as ydl:
-        try:
+            info = ydl.extract_info(
-            while playlist.hasMoreVideos:
+                playlist_link,
-                playlist.getNextVideos()
+                download=False,
-        except Exception as e:
+            )
-            # TODO improve check of Exception kind if possible
+            playlist = self.__adapt_ytdlp_format(ydl.sanitize_info(info))
            if is_channel and "invalid status code" in str(e.args[0]).lower():
                # Partial Update on channels can be accepted because newest videos are at the top
                logging.warning(
                    f"Failed to retrieve channel completely, proceed with partial update"
                )
            else:
                raise e
        logging.debug(
-            f"Retrieved {len(playlist.videos)} videos from playlist {playlist_link!r}"
+            f"Retrieved {len(playlist['videos'])} videos from playlist {playlist_link!r}"
        )
        return ExtractedDataOnline[PlaylistData](
            extractor_name=self.name,
            object_key=playlist_id,
            object_uri=uri,
-            data={
+            data=playlist,
                "info": playlist.info["info"],
                "videos": playlist.videos,
            },
        )
    def _update_object_raw(
@ -193,3 +187,23 @@ class YouTubeCollectionExtractor(CollectionExtractor[PlaylistData]):
            )
        )
        return ChangedReport.ChangedSome  # TODO improve
    @staticmethod
    def __adapt_ytdlp_format(ytdlp_info) -> PlaylistData:
        return {
            "info": {
                "id": ytdlp_info["id"],
                "title": ytdlp_info["title"],
                "channel": {
                    "id": ytdlp_info["channel_id"],
                    "name": ytdlp_info["channel"],
                },
                "link": ytdlp_info["webpage_url"],
            },
            "videos": [
                {
                    "id": elem["id"],
                }
                for elem in ytdlp_info["entries"]
            ],
        }