@ -6,7 +6,7 @@ import string
import time
import time
from . common import InfoExtractor
from . common import InfoExtractor
from . . compat import compat_urllib_parse_u nquote, compat_urllib_parse_u rlparse
from . . compat import compat_urllib_parse_u rlparse
from . . networking import HEADRequest
from . . networking import HEADRequest
from . . utils import (
from . . utils import (
ExtractorError ,
ExtractorError ,
@ -15,7 +15,6 @@ from ..utils import (
UserNotLive ,
UserNotLive ,
determine_ext ,
determine_ext ,
format_field ,
format_field ,
get_first ,
int_or_none ,
int_or_none ,
join_nonempty ,
join_nonempty ,
merge_dicts ,
merge_dicts ,
@ -219,8 +218,8 @@ class TikTokBaseIE(InfoExtractor):
def extract_addr ( addr , add_meta = { } ) :
def extract_addr ( addr , add_meta = { } ) :
parsed_meta , res = parse_url_key ( addr . get ( ' url_key ' , ' ' ) )
parsed_meta , res = parse_url_key ( addr . get ( ' url_key ' , ' ' ) )
if res :
if res :
known_resolutions . setdefault ( res , { } ) . setdefault ( ' height ' , add_meta. get ( ' height ' ) or addr . get ( ' height ' ) )
known_resolutions . setdefault ( res , { } ) . setdefault ( ' height ' , int_or_none( addr . get ( ' height ' ) ) )
known_resolutions [ res ] . setdefault ( ' width ' , add_meta. get ( ' width ' ) or addr . get ( ' width ' ) )
known_resolutions [ res ] . setdefault ( ' width ' , int_or_none( addr . get ( ' width ' ) ) )
parsed_meta . update ( known_resolutions . get ( res , { } ) )
parsed_meta . update ( known_resolutions . get ( res , { } ) )
add_meta . setdefault ( ' height ' , int_or_none ( res [ : - 1 ] ) )
add_meta . setdefault ( ' height ' , int_or_none ( res [ : - 1 ] ) )
return [ {
return [ {
@ -237,22 +236,26 @@ class TikTokBaseIE(InfoExtractor):
# Hack: Add direct video links first to prioritize them when removing duplicate formats
# Hack: Add direct video links first to prioritize them when removing duplicate formats
formats = [ ]
formats = [ ]
width = int_or_none ( video_info . get ( ' width ' ) )
height = int_or_none ( video_info . get ( ' height ' ) )
if video_info . get ( ' play_addr ' ) :
if video_info . get ( ' play_addr ' ) :
formats . extend ( extract_addr ( video_info [ ' play_addr ' ] , {
formats . extend ( extract_addr ( video_info [ ' play_addr ' ] , {
' format_id ' : ' play_addr ' ,
' format_id ' : ' play_addr ' ,
' format_note ' : ' Direct video ' ,
' format_note ' : ' Direct video ' ,
' vcodec ' : ' h265 ' if traverse_obj (
' vcodec ' : ' h265 ' if traverse_obj (
video_info , ' is_bytevc1 ' , ' is_h265 ' ) else ' h264 ' , # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
video_info , ' is_bytevc1 ' , ' is_h265 ' ) else ' h264 ' , # TODO: Check for "direct iOS" videos, like https://www.tiktok.com/@cookierun_dev/video/7039716639834656002
' width ' : video_info. get ( ' width' ) ,
' width ' : width,
' height ' : video_info. get ( ' height' ) ,
' height ' : height,
} ) )
} ) )
if video_info . get ( ' download_addr ' ) :
if video_info . get ( ' download_addr ' ) :
formats . extend ( extract_addr ( video_info [ ' download_addr ' ] , {
download_addr = video_info [ ' download_addr ' ]
dl_width = int_or_none ( download_addr . get ( ' width ' ) )
formats . extend ( extract_addr ( download_addr , {
' format_id ' : ' download_addr ' ,
' format_id ' : ' download_addr ' ,
' format_note ' : ' Download video %s ' % ( ' , watermarked ' if video_info . get ( ' has_watermark ' ) else ' ' ) ,
' format_note ' : ' Download video %s ' % ( ' , watermarked ' if video_info . get ( ' has_watermark ' ) else ' ' ) ,
' vcodec ' : ' h264 ' ,
' vcodec ' : ' h264 ' ,
' width ' : video_info. get ( ' width ' ) ,
' width ' : dl_width or width ,
' height ' : video_info. get ( ' height ' ) ,
' height ' : try_call( lambda : int ( dl_width / 0.5625 ) ) or height , # download_addr['height'] is wrong
' preference ' : - 2 if video_info . get ( ' has_watermark ' ) else - 1 ,
' preference ' : - 2 if video_info . get ( ' has_watermark ' ) else - 1 ,
} ) )
} ) )
if video_info . get ( ' play_addr_h264 ' ) :
if video_info . get ( ' play_addr_h264 ' ) :
@ -921,20 +924,23 @@ class DouyinIE(TikTokBaseIE):
_VALID_URL = r ' https?://(?:www \ .)?douyin \ .com/video/(?P<id>[0-9]+) '
_VALID_URL = r ' https?://(?:www \ .)?douyin \ .com/video/(?P<id>[0-9]+) '
_TESTS = [ {
_TESTS = [ {
' url ' : ' https://www.douyin.com/video/6961737553342991651 ' ,
' url ' : ' https://www.douyin.com/video/6961737553342991651 ' ,
' md5 ' : ' a97db7e3e67eb57bf40735c022ffa228 ' ,
' md5 ' : ' 9ecce7bc5b302601018ecb2871c63a75 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 6961737553342991651 ' ,
' id ' : ' 6961737553342991651 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' #杨超越 小小水手带你去远航❤️ ' ,
' title ' : ' #杨超越 小小水手带你去远航❤️ ' ,
' description ' : ' #杨超越 小小水手带你去远航❤️ ' ,
' description ' : ' #杨超越 小小水手带你去远航❤️ ' ,
' uploader ' : ' 6897520xka ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' creator ' : ' 杨超越 ' ,
' creator ' : ' 杨超越 ' ,
' duration ' : 19782 ,
' creators ' : [ ' 杨超越 ' ] ,
' duration ' : 19 ,
' timestamp ' : 1620905839 ,
' timestamp ' : 1620905839 ,
' upload_date ' : ' 20210513 ' ,
' upload_date ' : ' 20210513 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' artists ' : [ ' 杨超越 ' ] ,
' view_count ' : int ,
' view_count ' : int ,
' like_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' repost_count ' : int ,
@ -943,20 +949,23 @@ class DouyinIE(TikTokBaseIE):
} ,
} ,
} , {
} , {
' url ' : ' https://www.douyin.com/video/6982497745948921092 ' ,
' url ' : ' https://www.douyin.com/video/6982497745948921092 ' ,
' md5 ' : ' 34a87ebff3833357733da3fe17e37c0e ' ,
' md5 ' : ' 15c5e660b7048af3707304e3cc02bbb5 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 6982497745948921092 ' ,
' id ' : ' 6982497745948921092 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' 这个夏日和小羊@杨超越 一起遇见白色幻想 ' ,
' title ' : ' 这个夏日和小羊@杨超越 一起遇见白色幻想 ' ,
' description ' : ' 这个夏日和小羊@杨超越 一起遇见白色幻想 ' ,
' description ' : ' 这个夏日和小羊@杨超越 一起遇见白色幻想 ' ,
' uploader ' : ' 0731chaoyue ' ,
' uploader_id ' : ' 408654318141572 ' ,
' uploader_id ' : ' 408654318141572 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA ' ,
' channel_id ' : ' MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA ' ,
' channel_id ' : ' MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA ' ,
' creator ' : ' 杨超越工作室 ' ,
' creator ' : ' 杨超越工作室 ' ,
' duration ' : 42479 ,
' creators ' : [ ' 杨超越工作室 ' ] ,
' duration ' : 42 ,
' timestamp ' : 1625739481 ,
' timestamp ' : 1625739481 ,
' upload_date ' : ' 20210708 ' ,
' upload_date ' : ' 20210708 ' ,
' track ' : ' @杨超越工作室创作的原声 ' ,
' track ' : ' @杨超越工作室创作的原声 ' ,
' artists ' : [ ' 杨超越工作室 ' ] ,
' view_count ' : int ,
' view_count ' : int ,
' like_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' repost_count ' : int ,
@ -965,20 +974,23 @@ class DouyinIE(TikTokBaseIE):
} ,
} ,
} , {
} , {
' url ' : ' https://www.douyin.com/video/6953975910773099811 ' ,
' url ' : ' https://www.douyin.com/video/6953975910773099811 ' ,
' md5 ' : ' dde3302460f19db59c47060ff013b902 ' ,
' md5 ' : ' 0e6443758b8355db9a3c34864a4276be ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 6953975910773099811 ' ,
' id ' : ' 6953975910773099811 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' #一起看海 出现在你的夏日里 ' ,
' title ' : ' #一起看海 出现在你的夏日里 ' ,
' description ' : ' #一起看海 出现在你的夏日里 ' ,
' description ' : ' #一起看海 出现在你的夏日里 ' ,
' uploader ' : ' 6897520xka ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' creator ' : ' 杨超越 ' ,
' creator ' : ' 杨超越 ' ,
' duration ' : 17343 ,
' creators ' : [ ' 杨超越 ' ] ,
' duration ' : 17 ,
' timestamp ' : 1619098692 ,
' timestamp ' : 1619098692 ,
' upload_date ' : ' 20210422 ' ,
' upload_date ' : ' 20210422 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' artists ' : [ ' 杨超越 ' ] ,
' view_count ' : int ,
' view_count ' : int ,
' like_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' repost_count ' : int ,
@ -1004,20 +1016,23 @@ class DouyinIE(TikTokBaseIE):
' skip ' : ' No longer available ' ,
' skip ' : ' No longer available ' ,
} , {
} , {
' url ' : ' https://www.douyin.com/video/6963263655114722595 ' ,
' url ' : ' https://www.douyin.com/video/6963263655114722595 ' ,
' md5 ' : ' cf9f11f0ec45d131445ec2f06766e122 ' ,
' md5 ' : ' 1440bcf59d8700f8e014da073a4dfea8 ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 6963263655114722595 ' ,
' id ' : ' 6963263655114722595 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' #哪个爱豆的105度最甜 换个角度看看我哈哈 ' ,
' title ' : ' #哪个爱豆的105度最甜 换个角度看看我哈哈 ' ,
' description ' : ' #哪个爱豆的105度最甜 换个角度看看我哈哈 ' ,
' description ' : ' #哪个爱豆的105度最甜 换个角度看看我哈哈 ' ,
' uploader ' : ' 6897520xka ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_id ' : ' 110403406559 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' uploader_url ' : ' https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' channel_id ' : ' MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98 ' ,
' creator ' : ' 杨超越 ' ,
' creator ' : ' 杨超越 ' ,
' duration ' : 15115 ,
' creators ' : [ ' 杨超越 ' ] ,
' duration ' : 15 ,
' timestamp ' : 1621261163 ,
' timestamp ' : 1621261163 ,
' upload_date ' : ' 20210517 ' ,
' upload_date ' : ' 20210517 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' track ' : ' @杨超越创作的原声 ' ,
' artists ' : [ ' 杨超越 ' ] ,
' view_count ' : int ,
' view_count ' : int ,
' like_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' repost_count ' : int ,
@ -1025,34 +1040,23 @@ class DouyinIE(TikTokBaseIE):
' thumbnail ' : r ' re:https?://.+ \ .jpe?g ' ,
' thumbnail ' : r ' re:https?://.+ \ .jpe?g ' ,
} ,
} ,
} ]
} ]
_APP_VERSIONS = [ ( ' 23.3.0 ' , ' 230300 ' ) ]
_APP_NAME = ' aweme '
_AID = 1128
_API_HOSTNAME = ' aweme.snssdk.com '
_UPLOADER_URL_FORMAT = ' https://www.douyin.com/user/ %s '
_UPLOADER_URL_FORMAT = ' https://www.douyin.com/user/ %s '
_WEBPAGE_HOST = ' https://www.douyin.com/ '
_WEBPAGE_HOST = ' https://www.douyin.com/ '
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
try :
detail = traverse_obj ( self . _download_json (
return self . _extract_aweme_app ( video_id )
' https://www.douyin.com/aweme/v1/web/aweme/detail/ ' , video_id ,
except ExtractorError as e :
' Downloading web detail JSON ' , ' Failed to download web detail JSON ' ,
e . expected = True
query = { ' aweme_id ' : video_id } , fatal = False ) , ( ' aweme_detail ' , { dict } ) )
self . to_screen ( f ' { e } ; trying with webpage ' )
if not detail :
webpage = self . _download_webpage ( url , video_id )
render_data = self . _search_json (
r ' <script [^>]* \ bid=[ \' " ]RENDER_DATA[ \' " ][^>]*> ' , webpage , ' render data ' , video_id ,
contains_pattern = r ' % 7B(?s:.+) % 7D ' , fatal = False , transform_source = compat_urllib_parse_unquote )
if not render_data :
# TODO: Run verification challenge code to generate signature cookies
# TODO: Run verification challenge code to generate signature cookies
cookies = self . _get_cookies ( self . _WEBPAGE_HOST )
expected = not cookies . get ( ' s_v_web_id ' ) or not cookies . get ( ' ttwid ' )
raise ExtractorError (
raise ExtractorError (
' Fresh cookies (not necessarily logged in) are needed ' , expected = expected )
' Fresh cookies (not necessarily logged in) are needed ' ,
expected = not self . _get_cookies ( self . _WEBPAGE_HOST ) . get ( ' s_v_web_id ' ) )
return self . _parse_aweme_video_ web( get_first ( render_data , ( ' aweme ' , ' detail ' ) ) , url , video_id )
return self . _parse_aweme_video_app ( detail )
class TikTokVMIE ( InfoExtractor ) :
class TikTokVMIE ( InfoExtractor ) :