|
|
@ -4,13 +4,16 @@ from __future__ import unicode_literals
|
|
|
|
import hashlib
|
|
|
|
import hashlib
|
|
|
|
import itertools
|
|
|
|
import itertools
|
|
|
|
import json
|
|
|
|
import json
|
|
|
|
|
|
|
|
import functools
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
import math
|
|
|
|
|
|
|
|
|
|
|
|
from .common import InfoExtractor, SearchInfoExtractor
|
|
|
|
from .common import InfoExtractor, SearchInfoExtractor
|
|
|
|
from ..compat import (
|
|
|
|
from ..compat import (
|
|
|
|
compat_str,
|
|
|
|
compat_str,
|
|
|
|
compat_parse_qs,
|
|
|
|
compat_parse_qs,
|
|
|
|
compat_urlparse,
|
|
|
|
compat_urlparse,
|
|
|
|
|
|
|
|
compat_urllib_parse_urlparse
|
|
|
|
)
|
|
|
|
)
|
|
|
|
from ..utils import (
|
|
|
|
from ..utils import (
|
|
|
|
ExtractorError,
|
|
|
|
ExtractorError,
|
|
|
@ -24,6 +27,7 @@ from ..utils import (
|
|
|
|
unified_timestamp,
|
|
|
|
unified_timestamp,
|
|
|
|
unsmuggle_url,
|
|
|
|
unsmuggle_url,
|
|
|
|
urlencode_postdata,
|
|
|
|
urlencode_postdata,
|
|
|
|
|
|
|
|
OnDemandPagedList
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -535,6 +539,75 @@ class BilibiliChannelIE(InfoExtractor):
|
|
|
|
return self.playlist_result(self._entries(list_id), list_id)
|
|
|
|
return self.playlist_result(self._entries(list_id), list_id)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BilibiliCategoryIE(InfoExtractor):
|
|
|
|
|
|
|
|
IE_NAME = 'Bilibili category extractor'
|
|
|
|
|
|
|
|
_MAX_RESULTS = 1000000
|
|
|
|
|
|
|
|
_VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
|
|
|
|
|
|
|
|
_TESTS = [{
|
|
|
|
|
|
|
|
'url': 'https://www.bilibili.com/v/kichiku/mad',
|
|
|
|
|
|
|
|
'info_dict': {
|
|
|
|
|
|
|
|
'id': 'kichiku: mad',
|
|
|
|
|
|
|
|
'title': 'kichiku: mad'
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
'playlist_mincount': 45,
|
|
|
|
|
|
|
|
'params': {
|
|
|
|
|
|
|
|
'playlistend': 45
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _fetch_page(self, api_url, num_pages, query, page_num):
|
|
|
|
|
|
|
|
parsed_json = self._download_json(
|
|
|
|
|
|
|
|
api_url, query, query={'Search_key': query, 'pn': page_num},
|
|
|
|
|
|
|
|
note='Extracting results from page %s of %s' % (page_num, num_pages))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
|
|
|
|
|
|
|
|
if not video_list:
|
|
|
|
|
|
|
|
raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for video in video_list:
|
|
|
|
|
|
|
|
yield self.url_result(
|
|
|
|
|
|
|
|
'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _entries(self, category, subcategory, query):
|
|
|
|
|
|
|
|
# map of categories : subcategories : RIDs
|
|
|
|
|
|
|
|
rid_map = {
|
|
|
|
|
|
|
|
'kichiku': {
|
|
|
|
|
|
|
|
'mad': 26,
|
|
|
|
|
|
|
|
'manual_vocaloid': 126,
|
|
|
|
|
|
|
|
'guide': 22,
|
|
|
|
|
|
|
|
'theatre': 216,
|
|
|
|
|
|
|
|
'course': 127
|
|
|
|
|
|
|
|
},
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if category not in rid_map:
|
|
|
|
|
|
|
|
raise ExtractorError('The supplied category, %s, is not supported. List of supported categories: %s' % (category, list(rid_map.keys())))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if subcategory not in rid_map[category]:
|
|
|
|
|
|
|
|
raise ExtractorError('The subcategory, %s, isn\'t supported for this category. Supported subcategories: %s' % (subcategory, list(rid_map[category].keys())))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rid_value = rid_map[category][subcategory]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
|
|
|
|
|
|
|
|
page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
|
|
|
|
|
|
|
|
page_data = try_get(page_json, lambda x: x['data']['page'], dict)
|
|
|
|
|
|
|
|
count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
|
|
|
|
|
|
|
|
if count is None or not size:
|
|
|
|
|
|
|
|
raise ExtractorError('Failed to calculate either page count or size')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
num_pages = math.ceil(count / size)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return OnDemandPagedList(functools.partial(
|
|
|
|
|
|
|
|
self._fetch_page, api_url, num_pages, query), size)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
|
|
|
|
u = compat_urllib_parse_urlparse(url)
|
|
|
|
|
|
|
|
category, subcategory = u.path.split('/')[2:4]
|
|
|
|
|
|
|
|
query = '%s: %s' % (category, subcategory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return self.playlist_result(self._entries(category, subcategory, query), query, query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BiliBiliSearchIE(SearchInfoExtractor):
|
|
|
|
class BiliBiliSearchIE(SearchInfoExtractor):
|
|
|
|
IE_DESC = 'Bilibili video search, "bilisearch" keyword'
|
|
|
|
IE_DESC = 'Bilibili video search, "bilisearch" keyword'
|
|
|
|
_MAX_RESULTS = 100000
|
|
|
|
_MAX_RESULTS = 100000
|
|
|
|