From 15b252dfd2c6807fe57afc5a95e59abadb32ccd2 Mon Sep 17 00:00:00 2001 From: c-basalt <117849907+c-basalt@users.noreply.github.com> Date: Sat, 11 Nov 2023 15:02:59 -0500 Subject: [PATCH] [ie/weibo] Fix extraction (#8463) Closes #8445 Authored by: c-basalt --- yt_dlp/extractor/weibo.py | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/yt_dlp/extractor/weibo.py b/yt_dlp/extractor/weibo.py index b0c3052b6..2fca745aa 100644 --- a/yt_dlp/extractor/weibo.py +++ b/yt_dlp/extractor/weibo.py @@ -1,3 +1,4 @@ +import json import random import itertools import urllib.parse @@ -18,24 +19,33 @@ from ..utils import ( class WeiboBaseIE(InfoExtractor): - def _update_visitor_cookies(self, video_id): + def _update_visitor_cookies(self, visitor_url, video_id): + headers = {'Referer': visitor_url} + chrome_ver = self._search_regex( + r'Chrome/(\d+)', self.get_param('http_headers')['User-Agent'], 'user agent version', default='90') visitor_data = self._download_json( 'https://passport.weibo.com/visitor/genvisitor', video_id, note='Generating first-visit guest request', - transform_source=strip_jsonp, + headers=headers, transform_source=strip_jsonp, data=urlencode_postdata({ 'cb': 'gen_callback', - 'fp': '{"os":"2","browser":"Gecko57,0,0,0","fonts":"undefined","screenInfo":"1440*900*24","plugins":""}', - })) + 'fp': json.dumps({ + 'os': '1', + 'browser': f'Chrome{chrome_ver},0,0,0', + 'fonts': 'undefined', + 'screenInfo': '1920*1080*24', + 'plugins': '' + }, separators=(',', ':'))}))['data'] self._download_webpage( 'https://passport.weibo.com/visitor/visitor', video_id, note='Running first-visit callback to get guest cookies', - query={ + headers=headers, query={ 'a': 'incarnate', - 't': visitor_data['data']['tid'], - 'w': 2, - 'c': '%03d' % visitor_data['data']['confidence'], + 't': visitor_data['tid'], + 'w': 3 if visitor_data.get('new_tid') else 2, + 'c': f'{visitor_data.get("confidence", 100):03d}', + 'gc': '', 'cb': 'cross_domain', 'from': 'weibo', '_rand': random.random(), @@ -44,7 +54,7 @@ class WeiboBaseIE(InfoExtractor): def _weibo_download_json(self, url, video_id, *args, fatal=True, note='Downloading JSON metadata', **kwargs): webpage, urlh = self._download_webpage_handle(url, video_id, *args, fatal=fatal, note=note, **kwargs) if urllib.parse.urlparse(urlh.url).netloc == 'passport.weibo.com': - self._update_visitor_cookies(video_id) + self._update_visitor_cookies(urlh.url, video_id) webpage = self._download_webpage(url, video_id, *args, fatal=fatal, note=note, **kwargs) return self._parse_json(webpage, video_id, fatal=fatal)