@ -1,4 +1,6 @@
# encoding: utf-8
# encoding: utf-8
from __future__ import unicode_literals
import json
import json
import re
import re
import itertools
import itertools
@ -31,54 +33,55 @@ class VimeoIE(InfoExtractor):
( ? P < id > [ 0 - 9 ] + )
( ? P < id > [ 0 - 9 ] + )
/ ? ( ? : [ ? & ] . * ) ? ( ? : [ #].*)?$'''
/ ? ( ? : [ ? & ] . * ) ? ( ? : [ #].*)?$'''
_NETRC_MACHINE = ' vimeo '
_NETRC_MACHINE = ' vimeo '
IE_NAME = u ' vimeo '
IE_NAME = ' vimeo '
_TESTS = [
_TESTS = [
{
{
u ' url ' : u ' http://vimeo.com/56015672#at=0 ' ,
' url ' : ' http://vimeo.com/56015672#at=0 ' ,
u ' file ' : u ' 56015672.mp4 ' ,
' file ' : ' 56015672.mp4 ' ,
u ' md5 ' : u ' 8879b6cc097e987f02484baf890129e5 ' ,
' md5 ' : ' 8879b6cc097e987f02484baf890129e5 ' ,
u ' info_dict ' : {
' info_dict ' : {
u " upload_date " : u " 20121220 " ,
" upload_date " : " 20121220 " ,
u " description " : u " This is a test case for youtube-dl. \n For more information, see github.com/rg3/youtube-dl \n Test chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550 " ,
" description " : " This is a test case for youtube-dl. \n For more information, see github.com/rg3/youtube-dl \n Test chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550 " ,
u " uploader_id " : u " user7108434 " ,
" uploader_id " : " user7108434 " ,
u " uploader " : u " Filippo Valsorda " ,
" uploader " : " Filippo Valsorda " ,
u " title " : u " youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550 " ,
" title " : " youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550 " ,
} ,
} ,
} ,
} ,
{
{
u ' url ' : u ' http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876 ' ,
' url ' : ' http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876 ' ,
u ' file ' : u ' 68093876.mp4 ' ,
' file ' : ' 68093876.mp4 ' ,
u ' md5 ' : u ' 3b5ca6aa22b60dfeeadf50b72e44ed82 ' ,
' md5 ' : ' 3b5ca6aa22b60dfeeadf50b72e44ed82 ' ,
u ' note ' : u ' Vimeo Pro video (#1197) ' ,
' note ' : ' Vimeo Pro video (#1197) ' ,
u ' info_dict ' : {
' info_dict ' : {
u ' uploader_id ' : u ' openstreetmapus ' ,
' uploader_id ' : ' openstreetmapus ' ,
u ' uploader ' : u ' OpenStreetMap US ' ,
' uploader ' : ' OpenStreetMap US ' ,
u ' title ' : u ' Andy Allan - Putting the Carto into OpenStreetMap Cartography ' ,
' title ' : ' Andy Allan - Putting the Carto into OpenStreetMap Cartography ' ,
} ,
} ,
} ,
} ,
{
{
u ' url ' : u ' http://player.vimeo.com/video/54469442 ' ,
' url ' : ' http://player.vimeo.com/video/54469442 ' ,
u ' file ' : u ' 54469442.mp4 ' ,
' file ' : ' 54469442.mp4 ' ,
u ' md5 ' : u ' 619b811a4417aa4abe78dc653becf511 ' ,
' md5 ' : ' 619b811a4417aa4abe78dc653becf511 ' ,
u ' note ' : u ' Videos that embed the url in the player page ' ,
' note ' : ' Videos that embed the url in the player page ' ,
u ' info_dict ' : {
' info_dict ' : {
u ' title ' : u ' Kathy Sierra: Building the minimum Badass User, Business of Software ' ,
' title ' : ' Kathy Sierra: Building the minimum Badass User, Business of Software ' ,
u ' uploader ' : u ' The BLN & Business of Software ' ,
' uploader ' : ' The BLN & Business of Software ' ,
' uploader_id ' : ' theblnbusinessofsoftware ' ,
} ,
} ,
} ,
} ,
{
{
u ' url ' : u ' http://vimeo.com/68375962 ' ,
' url ' : ' http://vimeo.com/68375962 ' ,
u ' file ' : u ' 68375962.mp4 ' ,
' file ' : ' 68375962.mp4 ' ,
u ' md5 ' : u ' aaf896bdb7ddd6476df50007a0ac0ae7 ' ,
' md5 ' : ' aaf896bdb7ddd6476df50007a0ac0ae7 ' ,
u ' note ' : u ' Video protected with password ' ,
' note ' : ' Video protected with password ' ,
u ' info_dict ' : {
' info_dict ' : {
u ' title ' : u ' youtube-dl password protected test video ' ,
' title ' : ' youtube-dl password protected test video ' ,
u ' upload_date ' : u ' 20130614 ' ,
' upload_date ' : ' 20130614 ' ,
u ' uploader_id ' : u ' user18948128 ' ,
' uploader_id ' : ' user18948128 ' ,
u ' uploader ' : u ' Jaime Marquínez Ferrándiz ' ,
' uploader ' : ' Jaime Marquínez Ferrándiz ' ,
} ,
} ,
u ' params ' : {
' params ' : {
u ' videopassword ' : u ' youtube-dl ' ,
' videopassword ' : ' youtube-dl ' ,
} ,
} ,
} ,
} ,
]
]
@ -100,12 +103,12 @@ class VimeoIE(InfoExtractor):
login_request = compat_urllib_request . Request ( login_url , data )
login_request = compat_urllib_request . Request ( login_url , data )
login_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
login_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
login_request . add_header ( ' Cookie ' , ' xsrft= %s ' % token )
login_request . add_header ( ' Cookie ' , ' xsrft= %s ' % token )
self . _download_webpage ( login_request , None , False , u ' Wrong login info ' )
self . _download_webpage ( login_request , None , False , ' Wrong login info ' )
def _verify_video_password ( self , url , video_id , webpage ) :
def _verify_video_password ( self , url , video_id , webpage ) :
password = self . _downloader . params . get ( ' videopassword ' , None )
password = self . _downloader . params . get ( ' videopassword ' , None )
if password is None :
if password is None :
raise ExtractorError ( u ' This video is protected by a password, use the --video-password option ' )
raise ExtractorError ( ' This video is protected by a password, use the --video-password option ' )
token = re . search ( r ' xsrft: \' (.*?) \' ' , webpage ) . group ( 1 )
token = re . search ( r ' xsrft: \' (.*?) \' ' , webpage ) . group ( 1 )
data = compat_urllib_parse . urlencode ( { ' password ' : password ,
data = compat_urllib_parse . urlencode ( { ' password ' : password ,
' token ' : token } )
' token ' : token } )
@ -118,8 +121,8 @@ class VimeoIE(InfoExtractor):
password_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
password_request . add_header ( ' Content-Type ' , ' application/x-www-form-urlencoded ' )
password_request . add_header ( ' Cookie ' , ' xsrft= %s ' % token )
password_request . add_header ( ' Cookie ' , ' xsrft= %s ' % token )
self . _download_webpage ( password_request , video_id ,
self . _download_webpage ( password_request , video_id ,
u ' Verifying the password ' ,
' Verifying the password ' ,
u ' Wrong password ' )
' Wrong password ' )
def _real_initialize ( self ) :
def _real_initialize ( self ) :
self . _login ( )
self . _login ( )
@ -134,7 +137,7 @@ class VimeoIE(InfoExtractor):
# Extract ID from URL
# Extract ID from URL
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
if mobj is None :
if mobj is None :
raise ExtractorError ( u ' Invalid URL: %s ' % url )
raise ExtractorError ( ' Invalid URL: %s ' % url )
video_id = mobj . group ( ' id ' )
video_id = mobj . group ( ' id ' )
if mobj . group ( ' pro ' ) or mobj . group ( ' player ' ) :
if mobj . group ( ' pro ' ) or mobj . group ( ' player ' ) :
@ -155,7 +158,7 @@ class VimeoIE(InfoExtractor):
try :
try :
try :
try :
config_url = self . _html_search_regex (
config_url = self . _html_search_regex (
r ' data-config-url= " (.+?) " ' , webpage , u ' config URL ' )
r ' data-config-url= " (.+?) " ' , webpage , ' config URL ' )
config_json = self . _download_webpage ( config_url , video_id )
config_json = self . _download_webpage ( config_url , video_id )
config = json . loads ( config_json )
config = json . loads ( config_json )
except RegexNotFoundError :
except RegexNotFoundError :
@ -166,18 +169,18 @@ class VimeoIE(InfoExtractor):
config_re = r ' %s =( { .+?}); ' % re . escape ( m_variable_name . group ( 1 ) )
config_re = r ' %s =( { .+?}); ' % re . escape ( m_variable_name . group ( 1 ) )
else :
else :
config_re = [ r ' = { config:( { .+?}),assets: ' , r ' (?:[abc])=( { .+?}); ' ]
config_re = [ r ' = { config:( { .+?}),assets: ' , r ' (?:[abc])=( { .+?}); ' ]
config = self . _search_regex ( config_re , webpage , u ' info section ' ,
config = self . _search_regex ( config_re , webpage , ' info section ' ,
flags = re . DOTALL )
flags = re . DOTALL )
config = json . loads ( config )
config = json . loads ( config )
except Exception as e :
except Exception as e :
if re . search ( ' The creator of this video has not given you permission to embed it on this domain. ' , webpage ) :
if re . search ( ' The creator of this video has not given you permission to embed it on this domain. ' , webpage ) :
raise ExtractorError ( u ' The author has restricted the access to this video, try with the " --referer " option ' )
raise ExtractorError ( ' The author has restricted the access to this video, try with the " --referer " option ' )
if re . search ( ' <form[^>]+?id= " pw_form " ' , webpage ) is not None :
if re . search ( ' <form[^>]+?id= " pw_form " ' , webpage ) is not None :
self . _verify_video_password ( url , video_id , webpage )
self . _verify_video_password ( url , video_id , webpage )
return self . _real_extract ( url )
return self . _real_extract ( url )
else :
else :
raise ExtractorError ( u ' Unable to extract info section ' ,
raise ExtractorError ( ' Unable to extract info section ' ,
cause = e )
cause = e )
else :
else :
if config . get ( ' view ' ) == 4 :
if config . get ( ' view ' ) == 4 :
@ -216,9 +219,9 @@ class VimeoIE(InfoExtractor):
video_upload_date = mobj . group ( 1 ) + mobj . group ( 2 ) + mobj . group ( 3 )
video_upload_date = mobj . group ( 1 ) + mobj . group ( 2 ) + mobj . group ( 3 )
try :
try :
view_count = int ( self . _search_regex ( r ' UserPlays:( \ d+) ' , webpage , u ' view count ' ) )
view_count = int ( self . _search_regex ( r ' UserPlays:( \ d+) ' , webpage , ' view count ' ) )
like_count = int ( self . _search_regex ( r ' UserLikes:( \ d+) ' , webpage , u ' like count ' ) )
like_count = int ( self . _search_regex ( r ' UserLikes:( \ d+) ' , webpage , ' like count ' ) )
comment_count = int ( self . _search_regex ( r ' UserComments:( \ d+) ' , webpage , u ' comment count ' ) )
comment_count = int ( self . _search_regex ( r ' UserComments:( \ d+) ' , webpage , ' comment count ' ) )
except RegexNotFoundError :
except RegexNotFoundError :
# This info is only available in vimeo.com/{id} urls
# This info is only available in vimeo.com/{id} urls
view_count = None
view_count = None
@ -259,7 +262,7 @@ class VimeoIE(InfoExtractor):
for key in ( ' other ' , ' sd ' , ' hd ' ) :
for key in ( ' other ' , ' sd ' , ' hd ' ) :
formats + = files [ key ]
formats + = files [ key ]
if len ( formats ) == 0 :
if len ( formats ) == 0 :
raise ExtractorError ( u ' No known codec found ' )
raise ExtractorError ( ' No known codec found ' )
return {
return {
' id ' : video_id ,
' id ' : video_id ,
@ -278,7 +281,7 @@ class VimeoIE(InfoExtractor):
class VimeoChannelIE ( InfoExtractor ) :
class VimeoChannelIE ( InfoExtractor ) :
IE_NAME = u ' vimeo:channel '
IE_NAME = ' vimeo:channel '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/channels/(?P<id>[^/]+) '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/channels/(?P<id>[^/]+) '
_MORE_PAGES_INDICATOR = r ' <a.+?rel= " next " '
_MORE_PAGES_INDICATOR = r ' <a.+?rel= " next " '
_TITLE_RE = r ' <link rel= " alternate " [^>]+?title= " (.*?) " '
_TITLE_RE = r ' <link rel= " alternate " [^>]+?title= " (.*?) " '
@ -287,14 +290,14 @@ class VimeoChannelIE(InfoExtractor):
return ' %s /videos/page: %d / ' % ( base_url , pagenum )
return ' %s /videos/page: %d / ' % ( base_url , pagenum )
def _extract_list_title ( self , webpage ) :
def _extract_list_title ( self , webpage ) :
return self . _html_search_regex ( self . _TITLE_RE , webpage , u ' list title ' )
return self . _html_search_regex ( self . _TITLE_RE , webpage , ' list title ' )
def _extract_videos ( self , list_id , base_url ) :
def _extract_videos ( self , list_id , base_url ) :
video_ids = [ ]
video_ids = [ ]
for pagenum in itertools . count ( 1 ) :
for pagenum in itertools . count ( 1 ) :
webpage = self . _download_webpage (
webpage = self . _download_webpage (
self . _page_url ( base_url , pagenum ) , list_id ,
self . _page_url ( base_url , pagenum ) , list_id ,
u ' Downloading page %s ' % pagenum )
' Downloading page %s ' % pagenum )
video_ids . extend ( re . findall ( r ' id= " clip_( \ d+?) " ' , webpage ) )
video_ids . extend ( re . findall ( r ' id= " clip_( \ d+?) " ' , webpage ) )
if re . search ( self . _MORE_PAGES_INDICATOR , webpage , re . DOTALL ) is None :
if re . search ( self . _MORE_PAGES_INDICATOR , webpage , re . DOTALL ) is None :
break
break
@ -314,7 +317,7 @@ class VimeoChannelIE(InfoExtractor):
class VimeoUserIE ( VimeoChannelIE ) :
class VimeoUserIE ( VimeoChannelIE ) :
IE_NAME = u ' vimeo:user '
IE_NAME = ' vimeo:user '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/(?P<name>[^/]+)(?:[#?]|$) '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/(?P<name>[^/]+)(?:[#?]|$) '
_TITLE_RE = r ' <a[^>]+?class= " user " >([^<>]+?)</a> '
_TITLE_RE = r ' <a[^>]+?class= " user " >([^<>]+?)</a> '
@ -331,7 +334,7 @@ class VimeoUserIE(VimeoChannelIE):
class VimeoAlbumIE ( VimeoChannelIE ) :
class VimeoAlbumIE ( VimeoChannelIE ) :
IE_NAME = u ' vimeo:album '
IE_NAME = ' vimeo:album '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/album/(?P<id> \ d+) '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/album/(?P<id> \ d+) '
_TITLE_RE = r ' <header id= " page_header " > \ n \ s*<h1>(.*?)</h1> '
_TITLE_RE = r ' <header id= " page_header " > \ n \ s*<h1>(.*?)</h1> '
@ -345,7 +348,7 @@ class VimeoAlbumIE(VimeoChannelIE):
class VimeoGroupsIE ( VimeoAlbumIE ) :
class VimeoGroupsIE ( VimeoAlbumIE ) :
IE_NAME = u ' vimeo:group '
IE_NAME = ' vimeo:group '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/groups/(?P<name>[^/]+) '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/groups/(?P<name>[^/]+) '
def _extract_list_title ( self , webpage ) :
def _extract_list_title ( self , webpage ) :
@ -358,8 +361,8 @@ class VimeoGroupsIE(VimeoAlbumIE):
class VimeoReviewIE ( InfoExtractor ) :
class VimeoReviewIE ( InfoExtractor ) :
IE_NAME = u ' vimeo:review '
IE_NAME = ' vimeo:review '
IE_DESC = u ' Review pages on vimeo '
IE_DESC = ' Review pages on vimeo '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/[^/]+/review/(?P<id>[^/]+) '
_VALID_URL = r ' (?:https?://)?vimeo. \ com/[^/]+/review/(?P<id>[^/]+) '
_TEST = {
_TEST = {
' url ' : ' https://vimeo.com/user21297594/review/75524534/3c257a1b5d ' ,
' url ' : ' https://vimeo.com/user21297594/review/75524534/3c257a1b5d ' ,