[niconico] Add playlist extractors and refactor (#2915)

Authored by: Lesmiscore
2 years ago · f494ddada8
parent 02fc6feb6e
commit f494ddada8
2 changed files with 155 additions and 35 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1011,11 +1011,12 @@ from .nick import (
    NickNightIE,
    NickRuIE,
 )
-
 from .niconico import (
    NiconicoIE,
    NiconicoPlaylistIE,
    NiconicoUserIE,
+    NiconicoSeriesIE,
+    NiconicoHistoryIE,
    NicovideoSearchDateIE,
    NicovideoSearchIE,
    NicovideoSearchURLIE,
--- a/yt_dlp/extractor/niconico.py
+++ b/yt_dlp/extractor/niconico.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals

 import datetime
 import itertools
+import functools
 import json
 import re

@ -12,6 +13,7 @@ from ..compat import (
    compat_str,
    compat_parse_qs,
    compat_urllib_parse_urlparse,
+    compat_HTTPError,
 )
 from ..utils import (
    ExtractorError,
@ -24,7 +26,9 @@ from ..utils import (
    PostProcessingError,
    remove_start,
    str_or_none,
+    traverse_obj,
    try_get,
+    unescapeHTML,
    unified_timestamp,
    urlencode_postdata,
    xpath_text,
@ -606,8 +610,61 @@ class NiconicoIE(InfoExtractor):
        }


-class NiconicoPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?nicovideo\.jp/(?:user/\d+/|my/)?mylist/(?P<id>\d+)'
+class NiconicoPlaylistBaseIE(InfoExtractor):
+    _PAGE_SIZE = 100
+
+    _API_HEADERS = {
+        'X-Frontend-ID': '6',
+        'X-Frontend-Version': '0',
+        'X-Niconico-Language': 'en-us'
+    }
+
+    def _call_api(self, list_id, resource, query):
+        "Implement this in child class"
+        pass
+
+    @staticmethod
+    def _parse_owner(item):
+        return {
+            'uploader': traverse_obj(item, ('owner', 'name')),
+            'uploader_id': traverse_obj(item, ('owner', 'id')),
+        }
+
+    def _fetch_page(self, list_id, page):
+        page += 1
+        resp = self._call_api(list_id, 'page %d' % page, {
+            'page': page,
+            'pageSize': self._PAGE_SIZE,
+        })
+        # this is needed to support both mylist and user
+        for video in traverse_obj(resp, ('items', ..., ('video', None))) or []:
+            video_id = video.get('id')
+            if not video_id:
+                # skip {"video": {"id": "blablabla", ...}}
+                continue
+            count = video.get('count') or {}
+            get_count = lambda x: int_or_none(count.get(x))
+            yield {
+                '_type': 'url',
+                'id': video_id,
+                'title': video.get('title'),
+                'url': f'https://www.nicovideo.jp/watch/{video_id}',
+                'description': video.get('shortDescription'),
+                'duration': int_or_none(video.get('duration')),
+                'view_count': get_count('view'),
+                'comment_count': get_count('comment'),
+                'thumbnail': traverse_obj(video, ('thumbnail', ('nHdUrl', 'largeUrl', 'listingUrl', 'url'))),
+                'ie_key': NiconicoIE.ie_key(),
+                **self._parse_owner(video),
+            }
+
+    def _entries(self, list_id):
+        return OnDemandPagedList(functools.partial(self._fetch_page, list_id), self._PAGE_SIZE)
+
+
+class NiconicoPlaylistIE(NiconicoPlaylistBaseIE):
+    IE_NAME = 'niconico:playlist'
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/(?:user/\d+/)?(?:my/)?mylist/(?:#/)?(?P<id>\d+)'

    _TESTS = [{
        'url': 'http://www.nicovideo.jp/mylist/27411728',
@ -618,48 +675,110 @@ class NiconicoPlaylistIE(InfoExtractor):
            'uploader': 'のっく',
            'uploader_id': '805442',
        },
-        'playlist_mincount': 225,
+        'playlist_mincount': 291,
    }, {
        'url': 'https://www.nicovideo.jp/user/805442/mylist/27411728',
        'only_matching': True,
+    }, {
+        'url': 'https://www.nicovideo.jp/my/mylist/#/68048635',
+        'only_matching': True,
    }]

-    _API_HEADERS = {
-        'X-Frontend-ID': '6',
-        'X-Frontend-Version': '0'
-    }
+    def _call_api(self, list_id, resource, query):
+        return self._download_json(
+            f'https://nvapi.nicovideo.jp/v2/mylists/{list_id}', list_id,
+            f'Downloading {resource}', query=query,
+            headers=self._API_HEADERS)['data']['mylist']

    def _real_extract(self, url):
        list_id = self._match_id(url)
+        mylist = self._call_api(list_id, 'list', {
+            'pageSize': 1,
+        })
+        return self.playlist_result(
+            self._entries(list_id), list_id,
+            mylist.get('name'), mylist.get('description'), **self._parse_owner(mylist))

-        def get_page_data(pagenum, pagesize):
-            return self._download_json(
-                'http://nvapi.nicovideo.jp/v2/mylists/' + list_id, list_id,
-                query={'page': 1 + pagenum, 'pageSize': pagesize},
-                headers=self._API_HEADERS).get('data').get('mylist')
-
-        data = get_page_data(0, 1)
-        title = data.get('name')
-        description = data.get('description')
-        uploader = data.get('owner').get('name')
-        uploader_id = data.get('owner').get('id')
-
-        def pagefunc(pagenum):
-            data = get_page_data(pagenum, 25)
-            return ({
-                '_type': 'url',
-                'url': 'http://www.nicovideo.jp/watch/' + item.get('watchId'),
-            } for item in data.get('items'))

-        return {
-            '_type': 'playlist',
-            'id': list_id,
-            'title': title,
-            'description': description,
-            'uploader': uploader,
-            'uploader_id': uploader_id,
-            'entries': OnDemandPagedList(pagefunc, 25),
-        }
+class NiconicoSeriesIE(InfoExtractor):
+    IE_NAME = 'niconico:series'
+    _VALID_URL = r'https?://(?:(?:www\.|sp\.)?nicovideo\.jp|nico\.ms)/series/(?P<id>\d+)'
+
+    _TESTS = [{
+        'url': 'https://www.nicovideo.jp/series/110226',
+        'info_dict': {
+            'id': '110226',
+            'title': 'ご立派ァ！のシリーズ',
+        },
+        'playlist_mincount': 10,  # as of 2021/03/17
+    }, {
+        'url': 'https://www.nicovideo.jp/series/12312/',
+        'info_dict': {
+            'id': '12312',
+            'title': 'バトルスピリッツ　お勧めカード紹介(調整中)',
+        },
+        'playlist_mincount': 97,  # as of 2021/03/17
+    }, {
+        'url': 'https://nico.ms/series/203559',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        list_id = self._match_id(url)
+        webpage = self._download_webpage(f'https://www.nicovideo.jp/series/{list_id}', list_id)
+
+        title = self._search_regex(
+            (r'<title>「(.+)（全',
+             r'<div class="TwitterShareButton"\s+data-text="(.+)\s+https:'),
+            webpage, 'title', fatal=False)
+        if title:
+            title = unescapeHTML(title)
+        playlist = [
+            self.url_result(f'https://www.nicovideo.jp/watch/{v_id}', video_id=v_id)
+            for v_id in re.findall(r'href="/watch/([a-z0-9]+)" data-href="/watch/\1', webpage)]
+        return self.playlist_result(playlist, list_id, title)
+
+
+class NiconicoHistoryIE(NiconicoPlaylistBaseIE):
+    IE_NAME = 'niconico:history'
+    IE_DESC = 'NicoNico user history. Requires cookies.'
+    _VALID_URL = r'https?://(?:www\.|sp\.)?nicovideo\.jp/my/history'
+
+    _TESTS = [{
+        'note': 'PC page, with /video',
+        'url': 'https://www.nicovideo.jp/my/history/video',
+        'only_matching': True,
+    }, {
+        'note': 'PC page, without /video',
+        'url': 'https://www.nicovideo.jp/my/history',
+        'only_matching': True,
+    }, {
+        'note': 'mobile page, with /video',
+        'url': 'https://sp.nicovideo.jp/my/history/video',
+        'only_matching': True,
+    }, {
+        'note': 'mobile page, without /video',
+        'url': 'https://sp.nicovideo.jp/my/history',
+        'only_matching': True,
+    }]
+
+    def _call_api(self, list_id, resource, query):
+        return self._download_json(
+            'https://nvapi.nicovideo.jp/v1/users/me/watch/history', 'history',
+            f'Downloading {resource}', query=query,
+            headers=self._API_HEADERS)['data']
+
+    def _real_extract(self, url):
+        list_id = 'history'
+        try:
+            mylist = self._call_api(list_id, 'list', {
+                'pageSize': 1,
+            })
+        except ExtractorError as e:
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                self.raise_login_required('You have to be logged in to get your watch history')
+            raise
+        return self.playlist_result(self._entries(list_id), list_id, **self._parse_owner(mylist))


 class NicovideoSearchBaseIE(InfoExtractor):