@ -5,7 +5,6 @@ from .common import InfoExtractor
from . . compat import compat_urlparse
from . . compat import compat_urlparse
from . . utils import (
from . . utils import (
parse_count ,
parse_count ,
unified_strdate ,
unified_timestamp ,
unified_timestamp ,
remove_end ,
remove_end ,
determine_ext ,
determine_ext ,
@ -25,6 +24,16 @@ class NitterIE(InfoExtractor):
' nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion ' ,
' nitter.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd.onion ' ,
' i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion ' ,
' i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad.onion ' ,
' 26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion ' ,
' 26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid.onion ' ,
' vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad.onion ' ,
' iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd.onion ' ,
' erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad.onion ' ,
' ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd.onion ' ,
' jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid.onion ' ,
' nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad.onion ' ,
' nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd.onion ' ,
' nitter.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd.onion ' ,
' ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad.onion ' ,
' ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd.onion ' ,
' nitter.i2p ' ,
' nitter.i2p ' ,
' u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p ' ,
' u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa.b32.i2p ' ,
@ -36,28 +45,55 @@ class NitterIE(InfoExtractor):
' nitter.42l.fr ' ,
' nitter.42l.fr ' ,
' nitter.pussthecat.org ' ,
' nitter.pussthecat.org ' ,
' nitter.nixnet.services ' ,
' nitter.nixnet.services ' ,
' nitter.mastodont.cat ' ,
' nitter.tedomum.net ' ,
' nitter.fdn.fr ' ,
' nitter.fdn.fr ' ,
' nitter.1d4.us ' ,
' nitter.1d4.us ' ,
' nitter.kavin.rocks ' ,
' nitter.kavin.rocks ' ,
' tweet.lambda.dance ' ,
' nitter.cc ' ,
' nitter.vxempire.xyz ' ,
' nitter.unixfox.eu ' ,
' nitter.unixfox.eu ' ,
' nitter.domain.glass ' ,
' nitter.domain.glass ' ,
' nitter.himiko.cloud ' ,
' nitter.eu ' ,
' nitter.eu ' ,
' nitter.namazso.eu ' ,
' nitter.namazso.eu ' ,
' nitter.mailstation.de ' ,
' nitter.actionsack.com ' ,
' nitter.actionsack.com ' ,
' nitter.cattube.org ' ,
' nitter.dark.fail ' ,
' birdsite.xanny.family ' ,
' birdsite.xanny.family ' ,
' nitter.40two.app ' ,
' nitter.hu ' ,
' nitter.skrep.in ' ,
' twitr.gq ' ,
' nitter.moomoo.me ' ,
' nittereu.moomoo.me ' ,
' bird.from.tf ' ,
' nitter.it ' ,
' twitter.censors.us ' ,
' twitter.grimneko.de ' ,
' nitter.alefvanoon.xyz ' ,
' n.hyperborea.cloud ' ,
' nitter.ca ' ,
' twitter.076.ne.jp ' ,
' twitter.mstdn.social ' ,
' nitter.fly.dev ' ,
' notabird.site ' ,
' nitter.weiler.rocks ' ,
' nitter.silkky.cloud ' ,
' nitter.sethforprivacy.com ' ,
' nttr.stream ' ,
' nitter.cutelab.space ' ,
' nitter.nl ' ,
' nitter.mint.lgbt ' ,
' nitter.bus-hit.me ' ,
' fuckthesacklers.network ' ,
' nitter.govt.land ' ,
' nitter.datatunnel.xyz ' ,
' nitter.esmailelbob.xyz ' ,
' tw.artemislena.eu ' ,
' de.nttr.stream ' ,
' nitter.winscloud.net ' ,
' nitter.tiekoetter.com ' ,
' nitter.spaceint.fr ' ,
' twtr.bch.bar ' ,
' nitter.exonip.de ' ,
' nitter.mastodon.pro ' ,
' nitter.notraxx.ch ' ,
# not in the list anymore
# not in the list anymore
' nitter.skrep.in ' ,
' nitter.snopyta.org ' ,
' nitter.snopyta.org ' ,
)
)
@ -68,96 +104,121 @@ class NitterIE(InfoExtractor):
# official, rate limited
# official, rate limited
' nitter.net ' ,
' nitter.net ' ,
# offline
# offline
' is-nitter.resolv.ee ' ,
' lu-nitter.resolv.ee ' ,
' nitter.13ad.de ' ,
' nitter.13ad.de ' ,
' nitter.40two.app ' ,
' nitter.cattube.org ' ,
' nitter.cc ' ,
' nitter.dark.fail ' ,
' nitter.himiko.cloud ' ,
' nitter.koyu.space ' ,
' nitter.mailstation.de ' ,
' nitter.mastodont.cat ' ,
' nitter.tedomum.net ' ,
' nitter.tokhmi.xyz ' ,
' nitter.weaponizedhumiliation.com ' ,
' nitter.weaponizedhumiliation.com ' ,
' nitter.vxempire.xyz ' ,
' tweet.lambda.dance ' ,
)
)
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
INSTANCES = NON_HTTP_INSTANCES + HTTP_INSTANCES + DEAD_INSTANCES
_INSTANCES_RE = ' (?: ' + ' | ' . join ( [ re . escape ( instance ) for instance in INSTANCES ] ) + ' ) '
_INSTANCES_RE = f' (?: { " | " . join ( map ( re . escape , INSTANCES ) ) } ) '
_VALID_URL = r ' https?:// %(instance)s /(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)? ' % { ' instance ' : _INSTANCES_RE }
_VALID_URL = fr' https?:// { _INSTANCES_RE } /(?P<uploader_id>.+)/status/(?P<id>[0-9]+)(#.)? '
current_instance = random . choice ( HTTP_INSTANCES )
current_instance = random . choice ( HTTP_INSTANCES )
_TESTS = [
_TESTS = [
{
{
# GIF (wrapped in mp4)
# GIF (wrapped in mp4)
' url ' : ' https:// %s /firefox/status/1314279897502629888#m ' % current_instance ,
' url ' : f' https:// { current_instance } /firefox/status/1314279897502629888#m ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1314279897502629888 ' ,
' id ' : ' 1314279897502629888 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Firefox 🔥 - You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n \n Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg \n \n #UnfckTheInternet ' ,
' title ' : ' md5:7890a9277da4639ab624dd899424c5d8 ' ,
' description ' : ' You know the old saying, if you see something say something. Now you actually can with the YouTube regrets extension. \n \n Report harmful YouTube recommendations so others can avoid watching them. ➡️ https://mzl.la/3iFIiyg \n \n #UnfckTheInternet ' ,
' description ' : ' md5:5fea96a4d3716c350f8b95b21b3111fe ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Firefox 🔥 ' ,
' uploader ' : ' Firefox 🔥 ' ,
' uploader_id ' : ' firefox ' ,
' uploader_id ' : ' firefox ' ,
' uploader_url ' : ' https:// %s /firefox ' % current_instance ,
' uploader_url ' : f' https:// { current_instance } /firefox ' ,
' upload_date ' : ' 20201008 ' ,
' upload_date ' : ' 20201008 ' ,
' timestamp ' : 1602183720 ,
' timestamp ' : 1602183720 ,
' like_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
} ,
} ,
} , { # normal video
} , { # normal video
' url ' : ' https:// %s /Le___Doc/status/1299715685392756737#m ' % current_instance ,
' url ' : f' https:// { current_instance } /Le___Doc/status/1299715685392756737#m ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1299715685392756737 ' ,
' id ' : ' 1299715685392756737 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Le Doc - " Je ne prédis jamais rien " \n D Raoult, Août 2020... ' ,
' title ' : ' re:^.* - " Je ne prédis jamais rien " \n D Raoult, Août 2020... ' ,
' description ' : ' " Je ne prédis jamais rien " \n D Raoult, Août 2020... ' ,
' description ' : ' " Je ne prédis jamais rien " \n D Raoult, Août 2020... ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Le Doc' ,
' uploader ' : ' re:^ Le * Doc' ,
' uploader_id ' : ' Le___Doc ' ,
' uploader_id ' : ' Le___Doc ' ,
' uploader_url ' : ' https:// %s /Le___Doc ' % current_instance ,
' uploader_url ' : f' https:// { current_instance } /Le___Doc ' ,
' upload_date ' : ' 20200829 ' ,
' upload_date ' : ' 20200829 ' ,
' timestamp ' : 159871134 1 ,
' timestamp ' : 159871134 0 ,
' view_count ' : int ,
' view_count ' : int ,
' like_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
' comment_count ' : int ,
} ,
} ,
} , { # video embed in a "Streaming Political Ads" box
} , { # video embed in a "Streaming Political Ads" box
' url ' : ' https:// %s /mozilla/status/1321147074491092994#m ' % current_instance ,
' url ' : f' https:// { current_instance } /mozilla/status/1321147074491092994#m ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 1321147074491092994 ' ,
' id ' : ' 1321147074491092994 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : " Mozilla - Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? \n \n This isn ' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. \n \n Learn more ➡️ https://mzl.la/StreamingAds " ,
' title ' : ' md5:8290664aabb43b9189145c008386bf12 ' ,
' description ' : " Are you being targeted with weird, ominous or just plain annoying political ads while streaming your favorite shows? \n \n This isn ' t a real political ad, but if you ' re watching streaming TV in the U.S., chances are you ' ve seen quite a few. \n \n Learn more ➡️ https://mzl.la/StreamingAds " ,
' description ' : ' md5:9cf2762d49674bc416a191a689fb2aaa ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Mozilla ' ,
' uploader ' : ' Mozilla ' ,
' uploader_id ' : ' mozilla ' ,
' uploader_id ' : ' mozilla ' ,
' uploader_url ' : ' https:// %s /mozilla ' % current_instance ,
' uploader_url ' : f' https:// { current_instance } /mozilla ' ,
' upload_date ' : ' 20201027 ' ,
' upload_date ' : ' 20201027 ' ,
' timestamp ' : 1603820982
' timestamp ' : 1603820940 ,
' view_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
} ,
} ,
' expected_warnings ' : [ ' Ignoring subtitle tracks found in the HLS manifest ' ] ,
} , { # not the first tweet but main-tweet
} , { # not the first tweet but main-tweet
' url ' : ' https:// %s /TheNaturalNu/status/1379050895539724290#m ' % current_instance ,
' url ' : f' https:// { current_instance } /firefox/status/1354848277481414657#m ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' 13 79050895539724290 ' ,
' id ' : ' 13 54848277481414657 ' ,
' ext ' : ' mp4 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Dorothy Zbornak - This had me hollering!! ' ,
' title ' : ' md5:bef647f03bd1c6b15b687ea70dfc9700 ' ,
' description ' : ' This had me hollering!! ' ,
' description ' : ' md5:5efba25e2f9dac85ebcd21160cb4341f ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' thumbnail ' : r ' re:^https?://.* \ .jpg$ ' ,
' uploader ' : ' Dorothy Zbornak ' ,
' uploader ' : ' Firefox 🔥 ' ,
' uploader_id ' : ' TheNaturalNu ' ,
' uploader_id ' : ' firefox ' ,
' uploader_url ' : ' https:// %s /TheNaturalNu ' % current_instance ,
' uploader_url ' : f ' https:// { current_instance } /firefox ' ,
' timestamp ' : 1617626329 ,
' upload_date ' : ' 20210128 ' ,
' upload_date ' : ' 20210405 '
' timestamp ' : 1611855960 ,
' view_count ' : int ,
' like_count ' : int ,
' repost_count ' : int ,
' comment_count ' : int ,
}
}
}
}
]
]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_ id( url )
video_id , uploader_id = self . _match_ val id_url ( url ) . group ( ' id ' , ' uploader_id ' )
parsed_url = compat_urlparse . urlparse ( url )
parsed_url = compat_urlparse . urlparse ( url )
base_url = ' %s :// %s ' % ( parsed_url . scheme , parsed_url . netloc )
base_url = f' { parsed_url . scheme } :// { parsed_url . netloc } '
self . _set_cookie ( parsed_url . netloc , ' hlsPlayback ' , ' on ' )
self . _set_cookie ( parsed_url . netloc , ' hlsPlayback ' , ' on ' )
full_webpage = self . _download_webpage ( url , video_id )
full_webpage = webpage = self . _download_webpage ( url , video_id )
main_tweet_start = full_webpage . find ( ' class= " main-tweet " ' )
main_tweet_start = full_webpage . find ( ' class= " main-tweet " ' )
if main_tweet_start > 0 :
if main_tweet_start > 0 :
webpage = full_webpage [ main_tweet_start : ]
webpage = full_webpage [ main_tweet_start : ]
if not webpage :
webpage = full_webpage
video_url = ' %s %s ' % ( base_url , self . _html_search_regex ( r ' (?:<video[^>]+data-url|<source[^>]+src)= " ([^ " ]+) " ' , webpage , ' video url ' ) )
video_url = ' %s %s ' % ( base_url , self . _html_search_regex (
r ' (?:<video[^>]+data-url|<source[^>]+src)= " ([^ " ]+) " ' , webpage , ' video url ' ) )
ext = determine_ext ( video_url )
ext = determine_ext ( video_url )
if ext == ' unknown_video ' :
if ext == ' unknown_video ' :
@ -168,61 +229,49 @@ class NitterIE(InfoExtractor):
' ext ' : ext
' ext ' : ext
} ]
} ]
title = self . _og_search_description ( full_webpage )
title = description = self . _og_search_description ( full_webpage ) or self . _html_search_regex (
if not title :
r ' <div class= " tweet-content[^>]+>([^<]+)</div> ' , webpage , ' title ' , fatal = False )
title = self . _html_search_regex ( r ' <div class= " tweet-content[^>]+>([^<]+)</div> ' , webpage , ' title ' )
description = title
mobj = self . _match_valid_url ( url )
uploader_id = self . _html_search_regex (
uploader_id = (
r ' <a class= " username " [^>]+title= " @([^ " ]+) " ' , webpage , ' uploader id ' , fatal = False ) or uploader_id
mobj . group ( ' uploader_id ' )
or self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
uploader = self . _html_search_regex (
)
r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
if uploader :
title = f ' { uploader } - { title } '
if uploader_id :
counts = {
uploader_url = ' %s / %s ' % ( base_url , uploader_id )
f ' { x [ 0 ] } _count ' : self . _html_search_regex (
fr ' <span[^>]+class= " icon- { x [ 1 ] } [^>]*></span>([^<]*)</div> ' ,
webpage , f ' { x [ 0 ] } count ' , fatal = False )
for x in ( ( ' view ' , ' play ' ) , ( ' like ' , ' heart ' ) , ( ' repost ' , ' retweet ' ) , ( ' comment ' , ' comment ' ) )
}
counts = { field : 0 if count == ' ' else parse_count ( count ) for field , count in counts . items ( ) }
uploader = self . _html_search_regex ( r ' <a class= " fullname " [^>]+title= " ([^ " ]+) " ' , webpage , ' uploader name ' , fatal = False )
thumbnail = (
self . _html_search_meta ( ' og:image ' , full_webpage , ' thumbnail url ' )
or remove_end ( ' %s %s ' % ( base_url , self . _html_search_regex (
r ' <video[^>]+poster= " ([^ " ]+) " ' , webpage , ' thumbnail url ' , fatal = False ) ) , ' % 3Asmall ' ) )
if uploader :
thumbnails = [
title = ' %s - %s ' % ( uploader , title )
{ ' id ' : id , ' url ' : f ' { thumbnail } %3A { id } ' }
for id in ( ' thumb ' , ' small ' , ' large ' , ' medium ' , ' orig ' )
view_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-play[^>]*></span> \ s([^<]+)</div> ' , webpage , ' view count ' , fatal = False ) )
]
like_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-heart[^>]*></span> \ s([^<]+)</div> ' , webpage , ' like count ' , fatal = False ) )
repost_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-retweet[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
date = self . _html_search_regex (
comment_count = parse_count ( self . _html_search_regex ( r ' <span[^>]+class= " icon-comment[^>]*></span> \ s([^<]+)</div> ' , webpage , ' repost count ' , fatal = False ) )
r ' <span[^>]+class= " tweet-date " [^>]*><a[^>]+title= " ([^ " ]+) " ' ,
webpage , ' upload date ' , default = ' ' ) . replace ( ' · ' , ' ' )
thumbnail = self . _html_search_meta ( ' og:image ' , full_webpage , ' thumbnail url ' )
if not thumbnail :
thumbnail = ' %s %s ' % ( base_url , self . _html_search_regex ( r ' <video[^>]+poster= " ([^ " ]+) " ' , webpage , ' thumbnail url ' , fatal = False ) )
thumbnail = remove_end ( thumbnail , ' % 3Asmall ' )
thumbnails = [ ]
thumbnail_ids = ( ' thumb ' , ' small ' , ' large ' , ' medium ' , ' orig ' )
for id in thumbnail_ids :
thumbnails . append ( {
' id ' : id ,
' url ' : thumbnail + ' % 3A ' + id ,
} )
date = self . _html_search_regex ( r ' <span[^>]+class= " tweet-date " [^>]*><a[^>]+title= " ([^ " ]+) " ' , webpage , ' upload date ' , fatal = False )
upload_date = unified_strdate ( date )
timestamp = unified_timestamp ( date )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : title ,
' title ' : title ,
' description ' : description ,
' description ' : description ,
' uploader ' : uploader ,
' uploader ' : uploader ,
' timestamp ' : timestamp,
' timestamp ' : unified_timestamp ( date ) ,
' uploader_id ' : uploader_id ,
' uploader_id ' : uploader_id ,
' uploader_url ' : uploader_url ,
' uploader_url ' : f ' { base_url } / { uploader_id } ' ,
' view_count ' : view_count ,
' like_count ' : like_count ,
' repost_count ' : repost_count ,
' comment_count ' : comment_count ,
' formats ' : formats ,
' formats ' : formats ,
' thumbnails ' : thumbnails ,
' thumbnails ' : thumbnails ,
' thumbnail ' : thumbnail ,
' thumbnail ' : thumbnail ,
' upload_date ' : upload_date ,
* * counts ,
}
}