@ -1,9 +1,11 @@
import json
import re
import urllib . error
from . common import InfoExtractor
from . periscope import PeriscopeBaseIE , PeriscopeIE
from . . compat import functools # isort: split
from . . compat import (
compat_HTTPError ,
compat_parse_qs ,
compat_urllib_parse_unquote ,
compat_urllib_parse_urlparse ,
@ -18,6 +20,7 @@ from ..utils import (
str_or_none ,
strip_or_none ,
traverse_obj ,
try_call ,
try_get ,
unified_timestamp ,
update_url_query ,
@ -28,8 +31,12 @@ from ..utils import (
class TwitterBaseIE ( InfoExtractor ) :
_API_BASE = ' https://api.twitter.com/1.1/ '
_GRAPHQL_API_BASE = ' https://twitter.com/i/api/graphql/ '
_TOKENS = {
' AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs % 3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA ' : None ,
' AAAAAAAAAAAAAAAAAAAAAPYXBAAAAAAACLXUNDekMxqa8h %2F 40K4moUkGsoc % 3DTYfbDKbT3jJPCEVnMYqilB28NHfOPqkca3qaAxGfsyKCs0wRbw ' : None ,
}
_BASE_REGEX = r ' https?://(?:(?:www|m(?:obile)?) \ .)?(?:twitter \ .com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid \ .onion)/ '
_GUEST_TOKEN = None
def _extract_variant_formats ( self , variant , video_id ) :
variant_url = variant . get ( ' url ' )
@ -81,28 +88,73 @@ class TwitterBaseIE(InfoExtractor):
' height ' : int ( m . group ( ' height ' ) ) ,
} )
def _call_api ( self , path , video_id , query = { } ) :
headers = {
' Authorization ' : ' Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs % 3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA ' ,
}
token = self . _get_cookies ( self . _API_BASE ) . get ( ' ct0 ' )
if token :
headers [ ' x-csrf-token ' ] = token . value
if not self . _GUEST_TOKEN :
self . _GUEST_TOKEN = self . _download_json (
@functools.cached_property
def is_logged_in ( self ) :
return bool ( self . _get_cookies ( self . _API_BASE ) . get ( ' auth_token ' ) )
def _call_api ( self , path , video_id , query = { } , graphql = False ) :
cookies = self . _get_cookies ( self . _API_BASE )
headers = { }
csrf_cookie = cookies . get ( ' ct0 ' )
if csrf_cookie :
headers [ ' x-csrf-token ' ] = csrf_cookie . value
if self . is_logged_in :
headers . update ( {
' x-twitter-auth-type ' : ' OAuth2Session ' ,
' x-twitter-client-language ' : ' en ' ,
' x-twitter-active-user ' : ' yes ' ,
} )
result , last_error = None , None
for bearer_token in self . _TOKENS :
headers [ ' Authorization ' ] = f ' Bearer { bearer_token } '
if not self . is_logged_in :
if not self . _TOKENS [ bearer_token ] :
headers . pop ( ' x-guest-token ' , None )
guest_token_response = self . _download_json (
self . _API_BASE + ' guest/activate.json ' , video_id ,
' Downloading guest token ' , data = b ' ' ,
headers = headers ) [ ' guest_token ' ]
headers [ ' x-guest-token ' ] = self . _GUEST_TOKEN
' Downloading guest token ' , data = b ' ' , headers = headers )
self . _TOKENS [ bearer_token ] = guest_token_response . get ( ' guest_token ' )
if not self . _TOKENS [ bearer_token ] :
raise ExtractorError ( ' Could not retrieve guest token ' )
headers [ ' x-guest-token ' ] = self . _TOKENS [ bearer_token ]
try :
return self . _download_json (
self . _API_BASE + path , video_id , headers = headers , query = query )
allowed_status = { 400 , 403 , 404 } if graphql else { 403 }
result = self . _download_json (
( self . _GRAPHQL_API_BASE if graphql else self . _API_BASE ) + path ,
video_id , headers = headers , query = query , expected_status = allowed_status )
break
except ExtractorError as e :
if isinstance ( e . cause , compat_HTTPError ) and e . cause . code == 403 :
raise ExtractorError ( self . _parse_json (
e . cause . read ( ) . decode ( ) ,
video_id ) [ ' errors ' ] [ 0 ] [ ' message ' ] , expected = True )
if last_error :
raise last_error
elif not isinstance ( e . cause , urllib . error . HTTPError ) or e . cause . code != 404 :
raise
last_error = e
self . report_warning (
' Twitter API gave 404 response, retrying with deprecated token. '
' Only one media item can be extracted ' )
if result . get ( ' errors ' ) :
error_message = ' , ' . join ( set ( traverse_obj (
result , ( ' errors ' , . . . , ' message ' ) , expected_type = str ) ) ) or ' Unknown error '
raise ExtractorError ( f ' Error(s) while querying api: { error_message } ' , expected = True )
assert result is not None
return result
def _build_graphql_query ( self , media_id ) :
raise NotImplementedError ( ' Method must be implemented to support GraphQL ' )
def _call_graphql_api ( self , endpoint , media_id ) :
data = self . _build_graphql_query ( media_id )
query = { key : json . dumps ( value , separators = ( ' , ' , ' : ' ) ) for key , value in data . items ( ) }
return traverse_obj ( self . _call_api ( endpoint , media_id , query = query , graphql = True ) , ' data ' )
class TwitterCardIE ( InfoExtractor ) :
@ -113,7 +165,7 @@ class TwitterCardIE(InfoExtractor):
' url ' : ' https://twitter.com/i/cards/tfw/v1/560070183650213889 ' ,
# MD5 checksums are different in different places
' info_dict ' : {
' id ' : ' 5600701 8 36502 1388 9' ,
' id ' : ' 5600701 31976 392705 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Twitter - You can now shoot, edit and share video on Twitter. Capture life ' s most moving moments from your perspective. " ,
' description ' : ' md5:18d3e24bb4f6e5007487dd546e53bd96 ' ,
@ -123,6 +175,13 @@ class TwitterCardIE(InfoExtractor):
' duration ' : 30.033 ,
' timestamp ' : 1422366112 ,
' upload_date ' : ' 20150127 ' ,
' age_limit ' : 0 ,
' comment_count ' : int ,
' tags ' : [ ] ,
' repost_count ' : int ,
' like_count ' : int ,
' display_id ' : ' 560070183650213889 ' ,
' uploader_url ' : ' https://twitter.com/Twitter ' ,
} ,
} ,
{
@ -137,7 +196,14 @@ class TwitterCardIE(InfoExtractor):
' uploader_id ' : ' NASA ' ,
' timestamp ' : 1437408129 ,
' upload_date ' : ' 20150720 ' ,
' uploader_url ' : ' https://twitter.com/NASA ' ,
' age_limit ' : 0 ,
' comment_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' tags ' : [ ' PlutoFlyby ' ] ,
} ,
' params ' : { ' format ' : ' [protocol=https] ' }
} ,
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/654001591733886977 ' ,
@ -150,12 +216,27 @@ class TwitterCardIE(InfoExtractor):
' upload_date ' : ' 20111013 ' ,
' uploader ' : ' OMG! UBUNTU! ' ,
' uploader_id ' : ' omgubuntu ' ,
' channel_url ' : ' https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ ' ,
' channel_id ' : ' UCIiSwcm9xiFb3Y4wjzR41eQ ' ,
' channel_follower_count ' : int ,
' chapters ' : ' count:8 ' ,
' uploader_url ' : ' http://www.youtube.com/user/omgubuntu ' ,
' duration ' : 138 ,
' categories ' : [ ' Film & Animation ' ] ,
' age_limit ' : 0 ,
' comment_count ' : int ,
' availability ' : ' public ' ,
' like_count ' : int ,
' thumbnail ' : ' https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg ' ,
' view_count ' : int ,
' tags ' : ' count:12 ' ,
' channel ' : ' OMG! UBUNTU! ' ,
' playable_in_embed ' : True ,
} ,
' add_ie ' : [ ' Youtube ' ] ,
} ,
{
' url ' : ' https://twitter.com/i/cards/tfw/v1/665289828897005568 ' ,
' md5 ' : ' 6dabeaca9e68cbb71c99c322a4b42a11 ' ,
' info_dict ' : {
' id ' : ' iBb2x00UVlv ' ,
' ext ' : ' mp4 ' ,
@ -164,9 +245,17 @@ class TwitterCardIE(InfoExtractor):
' uploader ' : ' ArsenalTerje ' ,
' title ' : ' Vine by ArsenalTerje ' ,
' timestamp ' : 1447451307 ,
' alt_title ' : ' Vine by ArsenalTerje ' ,
' comment_count ' : int ,
' like_count ' : int ,
' thumbnail ' : r ' re:^https?://[^?#]+ \ .jpg ' ,
' view_count ' : int ,
' repost_count ' : int ,
} ,
' add_ie ' : [ ' Vine ' ] ,
} , {
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ,
{
' url ' : ' https://twitter.com/i/videos/tweet/705235433198714880 ' ,
' md5 ' : ' 884812a2adc8aaf6fe52b15ccbfa3b88 ' ,
' info_dict ' : {
@ -180,7 +269,8 @@ class TwitterCardIE(InfoExtractor):
' upload_date ' : ' 20160303 ' ,
} ,
' skip ' : ' This content is no longer available. ' ,
} , {
} ,
{
' url ' : ' https://twitter.com/i/videos/752274308186120192 ' ,
' only_matching ' : True ,
} ,
@ -211,7 +301,6 @@ class TwitterIE(TwitterBaseIE):
' duration ' : 12.922 ,
' timestamp ' : 1442188653 ,
' upload_date ' : ' 20150913 ' ,
' age_limit ' : 18 ,
' uploader_url ' : ' https://twitter.com/freethenipple ' ,
' comment_count ' : int ,
' repost_count ' : int ,
@ -239,10 +328,10 @@ class TwitterIE(TwitterBaseIE):
' id ' : ' 665052190608723968 ' ,
' display_id ' : ' 665052190608723968 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Star Wars - A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. ' ,
' title ' : ' md5:3f57ab5d35116537a2ae7345cd0060d8 ' ,
' description ' : ' A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ ' ,
' uploader_id ' : ' starwars ' ,
' uploader ' : ' Star Wars' ,
' uploader ' : r ' re: Star Wars.* ' ,
' timestamp ' : 1447395772 ,
' upload_date ' : ' 20151113 ' ,
' uploader_url ' : ' https://twitter.com/starwars ' ,
@ -487,7 +576,7 @@ class TwitterIE(TwitterBaseIE):
' uploader_url ' : ' https://twitter.com/oshtru ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' duration ' : 30.03 ,
' timestamp ' : 1665025050. 0,
' timestamp ' : 166502505 0,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
@ -505,7 +594,7 @@ class TwitterIE(TwitterBaseIE):
' uploader_id ' : ' UltimaShadowX ' ,
' uploader_url ' : ' https://twitter.com/UltimaShadowX ' ,
' upload_date ' : ' 20221005 ' ,
' timestamp ' : 1664992565.0 ,
' timestamp ' : 1664992565 ,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
@ -514,6 +603,121 @@ class TwitterIE(TwitterBaseIE):
} ,
' playlist_count ' : 4 ,
' params ' : { ' skip_download ' : True } ,
} , {
' url ' : ' https://twitter.com/MesoMax919/status/1575560063510810624 ' ,
' info_dict ' : {
' id ' : ' 1575559336759263233 ' ,
' display_id ' : ' 1575560063510810624 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' md5:eec26382babd0f7c18f041db8ae1c9c9 ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg ' ,
' description ' : ' md5:95aea692fda36a12081b9629b02daa92 ' ,
' uploader ' : ' Max Olson ' ,
' uploader_id ' : ' MesoMax919 ' ,
' uploader_url ' : ' https://twitter.com/MesoMax919 ' ,
' duration ' : 21.321 ,
' timestamp ' : 1664477766 ,
' upload_date ' : ' 20220929 ' ,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
' tags ' : [ ' HurricaneIan ' ] ,
' age_limit ' : 0 ,
} ,
} , {
# Adult content, uses old token
# Fails if not logged in (GraphQL)
' url ' : ' https://twitter.com/Rizdraws/status/1575199173472927762 ' ,
' info_dict ' : {
' id ' : ' 1575199163847000068 ' ,
' display_id ' : ' 1575199173472927762 ' ,
' ext ' : ' mp4 ' ,
' title ' : str ,
' description ' : str ,
' uploader ' : str ,
' uploader_id ' : ' Rizdraws ' ,
' uploader_url ' : ' https://twitter.com/Rizdraws ' ,
' upload_date ' : ' 20220928 ' ,
' timestamp ' : 1664391723 ,
' thumbnail ' : ' re:^https?://.* \\ .jpg ' ,
' like_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
' age_limit ' : 18 ,
' tags ' : [ ]
} ,
' expected_warnings ' : [ ' 404 ' ] ,
} , {
# Description is missing one https://t.co url (GraphQL)
' url ' : ' https://twitter.com/Srirachachau/status/1395079556562706435 ' ,
' playlist_mincount ' : 2 ,
' info_dict ' : {
' id ' : ' 1395079556562706435 ' ,
' title ' : str ,
' tags ' : [ ] ,
' uploader ' : str ,
' like_count ' : int ,
' upload_date ' : ' 20210519 ' ,
' age_limit ' : 0 ,
' repost_count ' : int ,
' description ' : ' Here it is! Finished my gothic western cartoon. Pretty proud of it. It \' s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw https://t.co/kbXZrozlY7 ' ,
' uploader_id ' : ' Srirachachau ' ,
' comment_count ' : int ,
' uploader_url ' : ' https://twitter.com/Srirachachau ' ,
' timestamp ' : 1621447860 ,
} ,
} , {
# Description is missing one https://t.co url (GraphQL)
' url ' : ' https://twitter.com/DavidToons_/status/1578353380363501568 ' ,
' playlist_mincount ' : 2 ,
' info_dict ' : {
' id ' : ' 1578353380363501568 ' ,
' title ' : str ,
' uploader_id ' : ' DavidToons_ ' ,
' repost_count ' : int ,
' like_count ' : int ,
' uploader ' : str ,
' timestamp ' : 1665143744 ,
' uploader_url ' : ' https://twitter.com/DavidToons_ ' ,
' description ' : ' Chris sounds like Linda from Bob \' s Burgers, so as an animator: this had to be done. https://t.co/glfQdgfFXH https://t.co/WgJauwIW1w ' ,
' tags ' : [ ] ,
' comment_count ' : int ,
' upload_date ' : ' 20221007 ' ,
' age_limit ' : 0 ,
} ,
} , {
' url ' : ' https://twitter.com/primevideouk/status/1578401165338976258 ' ,
' playlist_count ' : 2 ,
' info_dict ' : {
' id ' : ' 1578401165338976258 ' ,
' title ' : str ,
' description ' : ' md5:659a6b517a034b4cee5d795381a2dc41 ' ,
' uploader ' : str ,
' uploader_id ' : ' primevideouk ' ,
' timestamp ' : 1665155137 ,
' upload_date ' : ' 20221007 ' ,
' age_limit ' : 0 ,
' uploader_url ' : ' https://twitter.com/primevideouk ' ,
' comment_count ' : int ,
' repost_count ' : int ,
' like_count ' : int ,
' tags ' : [ ' TheRingsOfPower ' ] ,
} ,
} , {
# Twitter Spaces
' url ' : ' https://twitter.com/MoniqueCamarra/status/1550101959377551360 ' ,
' info_dict ' : {
' id ' : ' 1lPJqmBeeNAJb ' ,
' ext ' : ' m4a ' ,
' title ' : ' EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West ' ,
' uploader ' : r ' re:Monique Camarra.+? ' ,
' uploader_id ' : ' MoniqueCamarra ' ,
' live_status ' : ' was_live ' ,
' description ' : ' md5:acce559345fd49f129c20dbcda3f1201 ' ,
' timestamp ' : 1658407771464 ,
} ,
' add_ie ' : [ ' TwitterSpaces ' ] ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} , {
# onion route
' url ' : ' https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273 ' ,
@ -552,10 +756,77 @@ class TwitterIE(TwitterBaseIE):
' only_matching ' : True ,
} ]
def _graphql_to_legacy ( self , data , twid ) :
result = traverse_obj ( data , (
' threaded_conversation_with_injections_v2 ' , ' instructions ' , 0 , ' entries ' ,
lambda _ , v : v [ ' entryId ' ] == f ' tweet- { twid } ' , ' content ' , ' itemContent ' ,
' tweet_results ' , ' result '
) , expected_type = dict , default = { } , get_all = False )
if ' tombstone ' in result :
cause = traverse_obj ( result , ( ' tombstone ' , ' text ' , ' text ' ) , expected_type = str )
raise ExtractorError ( f ' Twitter API says: { cause or " Unknown error " } ' , expected = True )
status = result . get ( ' legacy ' , { } )
status . update ( traverse_obj ( result , {
' user ' : ( ' core ' , ' user_results ' , ' result ' , ' legacy ' ) ,
' card ' : ( ' card ' , ' legacy ' ) ,
' quoted_status ' : ( ' quoted_status_result ' , ' result ' , ' legacy ' ) ,
} , expected_type = dict , default = { } ) )
# extra transformation is needed since result does not match legacy format
binding_values = {
binding_value . get ( ' key ' ) : binding_value . get ( ' value ' )
for binding_value in traverse_obj ( status , ( ' card ' , ' binding_values ' , . . . ) , expected_type = dict )
}
if binding_values :
status [ ' card ' ] [ ' binding_values ' ] = binding_values
return status
def _build_graphql_query ( self , media_id ) :
return {
' variables ' : {
' focalTweetId ' : media_id ,
' includePromotedContent ' : True ,
' with_rux_injections ' : False ,
' withBirdwatchNotes ' : True ,
' withCommunity ' : True ,
' withDownvotePerspective ' : False ,
' withQuickPromoteEligibilityTweetFields ' : True ,
' withReactionsMetadata ' : False ,
' withReactionsPerspective ' : False ,
' withSuperFollowsTweetFields ' : True ,
' withSuperFollowsUserFields ' : True ,
' withV2Timeline ' : True ,
' withVoice ' : True ,
} ,
' features ' : {
' graphql_is_translatable_rweb_tweet_is_translatable_enabled ' : False ,
' interactive_text_enabled ' : True ,
' responsive_web_edit_tweet_api_enabled ' : True ,
' responsive_web_enhance_cards_enabled ' : True ,
' responsive_web_graphql_timeline_navigation_enabled ' : False ,
' responsive_web_text_conversations_enabled ' : False ,
' responsive_web_uc_gql_enabled ' : True ,
' standardized_nudges_misinfo ' : True ,
' tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled ' : False ,
' tweetypie_unmention_optimization_enabled ' : True ,
' unified_cards_ad_metadata_container_dynamic_card_content_query_enabled ' : True ,
' verified_phone_label_enabled ' : False ,
' vibe_api_enabled ' : True ,
} ,
}
def _real_extract ( self , url ) :
twid = self . _match_id ( url )
status = self . _call_api (
' statuses/show/ %s .json ' % twid , twid , {
if self . is_logged_in or self . _configuration_arg ( ' force_graphql ' ) :
self . write_debug ( f ' Using GraphQL API (Auth = { self . is_logged_in } ) ' )
result = self . _call_graphql_api ( ' zZXycP0V6H7m-2r0mOnFcA/TweetDetail ' , twid )
status = self . _graphql_to_legacy ( result , twid )
else :
status = self . _call_api ( f ' statuses/show/ { twid } .json ' , twid , {
' cards_platform ' : ' Web-12 ' ,
' include_cards ' : 1 ,
' include_reply_count ' : 1 ,
@ -569,7 +840,7 @@ class TwitterIE(TwitterBaseIE):
user = status . get ( ' user ' ) or { }
uploader = user . get ( ' name ' )
if uploader :
title = ' %s - %s ' % ( uploader , title )
title = f' { uploader } - { title } '
uploader_id = user . get ( ' screen_name ' )
tags = [ ]
@ -642,31 +913,37 @@ class TwitterIE(TwitterBaseIE):
card_name = card [ ' name ' ] . split ( ' : ' ) [ - 1 ]
if card_name == ' player ' :
return {
yield {
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' player_url ' ) ,
}
elif card_name == ' periscope_broadcast ' :
return {
yield {
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' url ' ) or get_binding_value ( ' player_url ' ) ,
' ie_key ' : PeriscopeIE . ie_key ( ) ,
}
elif card_name == ' broadcast ' :
return {
yield {
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' broadcast_url ' ) ,
' ie_key ' : TwitterBroadcastIE . ie_key ( ) ,
}
elif card_name == ' audiospace ' :
yield {
' _type ' : ' url ' ,
' url ' : f ' https://twitter.com/i/spaces/ { get_binding_value ( " id " ) } ' ,
' ie_key ' : TwitterSpacesIE . ie_key ( ) ,
}
elif card_name == ' summary ' :
return {
yield {
' _type ' : ' url ' ,
' url ' : get_binding_value ( ' card_url ' ) ,
}
elif card_name == ' unified_card ' :
media_entities = self . _parse_json ( get_binding_value ( ' unified_card ' ) , twid ) [ ' media_entities ' ]
media = traverse_obj ( media_entities , . . . , expected_type = dict , get_all = False )
return extract_from_video_info ( media )
unified_card = self . _parse_json ( get_binding_value ( ' unified_card ' ) , twid )
yield from map ( extract_from_video_info , traverse_obj (
unified_card , ( ' media_entities ' , . . . ) , expected_type = dict ) )
# amplify, promo_video_website, promo_video_convo, appplayer,
# video_direct_message, poll2choice_video, poll3choice_video,
# poll4choice_video, ...
@ -690,7 +967,7 @@ class TwitterIE(TwitterBaseIE):
' height ' : int_or_none ( image . get ( ' height ' ) ) ,
} )
return {
yield {
' formats ' : formats ,
' subtitles ' : subtitles ,
' thumbnails ' : thumbnails ,
@ -700,11 +977,8 @@ class TwitterIE(TwitterBaseIE):
media_path = ( ( None , ' quoted_status ' ) , ' extended_entities ' , ' media ' , lambda _ , m : m [ ' type ' ] != ' photo ' )
videos = map ( extract_from_video_info , traverse_obj ( status , media_path , expected_type = dict ) )
entries = [ { * * info , * * data , ' display_id ' : twid } for data in videos if data ]
data = extract_from_card_info ( status . get ( ' card ' ) )
if data :
entries . append ( { * * info , * * data , ' display_id ' : twid } )
cards = extract_from_card_info ( status . get ( ' card ' ) )
entries = [ { * * info , * * data , ' display_id ' : twid } for data in ( * videos , * cards ) ]
if not entries :
expanded_url = traverse_obj ( status , ( ' entities ' , ' urls ' , 0 , ' expanded_url ' ) , expected_type = url_or_none )
@ -730,13 +1004,14 @@ class TwitterAmplifyIE(TwitterBaseIE):
_TEST = {
' url ' : ' https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951 ' ,
' md5 ' : ' 7df102d0b9fd7066b86f3159f8e81bf6 ' ,
' md5 ' : ' fec25801d18a4557c5c9f33d2c379ffa ' ,
' info_dict ' : {
' id ' : ' 0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Twitter Video ' ,
' thumbnail ' : ' re:^https?://.* ' ,
} ,
' params ' : { ' format ' : ' [protocol=https] ' } ,
}
def _real_extract ( self , url ) :
@ -745,7 +1020,7 @@ class TwitterAmplifyIE(TwitterBaseIE):
vmap_url = self . _html_search_meta (
' twitter:amplify:vmap ' , webpage , ' vmap url ' )
formats = self . _extract_formats_from_vmap_url ( vmap_url , video_id )
formats , _ = self . _extract_formats_from_vmap_url ( vmap_url , video_id )
thumbnails = [ ]
thumbnail = self . _html_search_meta (
@ -793,6 +1068,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
' title ' : ' Andrea May Sahouri - Periscope Broadcast ' ,
' uploader ' : ' Andrea May Sahouri ' ,
' uploader_id ' : ' 1PXEdBZWpGwKe ' ,
' thumbnail ' : r ' re:^https?://[^?#]+ \ .jpg \ ?token= ' ,
' view_count ' : int ,
} ,
}
@ -804,7 +1081,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
info = self . _parse_broadcast_data ( broadcast , broadcast_id )
media_key = broadcast [ ' media_key ' ]
source = self . _call_api (
' live_video_stream/status/ ' + media_key , media_key ) [ ' source ' ]
f' live_video_stream/status/ { media_key } ' , media_key ) [ ' source ' ]
m3u8_url = source . get ( ' noRedirectPlaybackUrl ' ) or source [ ' location ' ]
if ' /live_video_stream/geoblocked/ ' in m3u8_url :
self . raise_geo_restricted ( )
@ -816,6 +1093,100 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
return info
class TwitterSpacesIE ( TwitterBaseIE ) :
IE_NAME = ' twitter:spaces '
_VALID_URL = TwitterBaseIE . _BASE_REGEX + r ' i/spaces/(?P<id>[0-9a-zA-Z] {13} ) '
_TWITTER_GRAPHQL = ' https://twitter.com/i/api/graphql/HPEisOmj1epUNLCWTYhUWw/ '
_TESTS = [ {
' url ' : ' https://twitter.com/i/spaces/1RDxlgyvNXzJL ' ,
' info_dict ' : {
' id ' : ' 1RDxlgyvNXzJL ' ,
' ext ' : ' m4a ' ,
' title ' : ' King Carlo e la mossa Kansas City per fare il Grande Centro ' ,
' description ' : ' Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe ' ,
' uploader ' : r ' re:Lucio Di Gaetano.*? ' ,
' uploader_id ' : ' luciodigaetano ' ,
' live_status ' : ' was_live ' ,
' timestamp ' : 1659877956397 ,
} ,
' params ' : { ' skip_download ' : ' m3u8 ' } ,
} ]
SPACE_STATUS = {
' notstarted ' : ' is_upcoming ' ,
' ended ' : ' was_live ' ,
' running ' : ' is_live ' ,
' timedout ' : ' post_live ' ,
}
def _build_graphql_query ( self , space_id ) :
return {
' variables ' : {
' id ' : space_id ,
' isMetatagsQuery ' : True ,
' withDownvotePerspective ' : False ,
' withReactionsMetadata ' : False ,
' withReactionsPerspective ' : False ,
' withReplays ' : True ,
' withSuperFollowsUserFields ' : True ,
' withSuperFollowsTweetFields ' : True ,
} ,
' features ' : {
' dont_mention_me_view_api_enabled ' : True ,
' interactive_text_enabled ' : True ,
' responsive_web_edit_tweet_api_enabled ' : True ,
' responsive_web_enhance_cards_enabled ' : True ,
' responsive_web_uc_gql_enabled ' : True ,
' spaces_2022_h2_clipping ' : True ,
' spaces_2022_h2_spaces_communities ' : False ,
' standardized_nudges_misinfo ' : True ,
' tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled ' : False ,
' vibe_api_enabled ' : True ,
} ,
}
def _real_extract ( self , url ) :
space_id = self . _match_id ( url )
space_data = self . _call_graphql_api ( ' HPEisOmj1epUNLCWTYhUWw/AudioSpaceById ' , space_id ) [ ' audioSpace ' ]
if not space_data :
raise ExtractorError ( ' Twitter Space not found ' , expected = True )
metadata = space_data [ ' metadata ' ]
live_status = try_call ( lambda : self . SPACE_STATUS [ metadata [ ' state ' ] . lower ( ) ] )
formats = [ ]
if live_status == ' is_upcoming ' :
self . raise_no_formats ( ' Twitter Space not started yet ' , expected = True )
elif live_status == ' post_live ' :
self . raise_no_formats ( ' Twitter Space ended but not downloadable yet ' , expected = True )
else :
source = self . _call_api (
f ' live_video_stream/status/ { metadata [ " media_key " ] } ' , metadata [ ' media_key ' ] ) [ ' source ' ]
# XXX: Native downloader does not work
formats = self . _extract_m3u8_formats (
traverse_obj ( source , ' noRedirectPlaybackUrl ' , ' location ' ) ,
metadata [ ' media_key ' ] , ' m4a ' , ' m3u8 ' , live = live_status == ' is_live ' )
for fmt in formats :
fmt . update ( { ' vcodec ' : ' none ' , ' acodec ' : ' aac ' } )
participants = ' , ' . join ( traverse_obj (
space_data , ( ' participants ' , ' speakers ' , . . . , ' display_name ' ) ) ) or ' nobody yet '
return {
' id ' : space_id ,
' title ' : metadata . get ( ' title ' ) ,
' description ' : f ' Twitter Space participated by { participants } ' ,
' uploader ' : traverse_obj (
metadata , ( ' creator_results ' , ' result ' , ' legacy ' , ' name ' ) ) ,
' uploader_id ' : traverse_obj (
metadata , ( ' creator_results ' , ' result ' , ' legacy ' , ' screen_name ' ) ) ,
' live_status ' : live_status ,
' timestamp ' : metadata . get ( ' created_at ' ) ,
' formats ' : formats ,
}
class TwitterShortenerIE ( TwitterBaseIE ) :
IE_NAME = ' twitter:shortener '
_VALID_URL = r ' https?://t.co/(?P<id>[^?]+)|tco:(?P<eid>[^?]+) '