@ -1,7 +1,6 @@
from __future__ import unicode_literals
from __future__ import unicode_literals
import re
import re
import itertools
from . common import InfoExtractor
from . common import InfoExtractor
from . . compat import (
from . . compat import (
@ -46,20 +45,16 @@ class MixcloudIE(InfoExtractor):
} ,
} ,
} ]
} ]
def _get_url ( self , track_id , template_url , server_number ) :
def _check_url ( self , url , track_id , ext ) :
boundaries = ( 1 , 30 )
try :
for nr in server_numbers ( server_number , boundaries ) :
# We only want to know if the request succeed
url = template_url % nr
# don't download the whole file
try :
self . _request_webpage (
# We only want to know if the request succeed
HEADRequest ( url ) , track_id ,
# don't download the whole file
' Trying %s URL ' % ext )
self . _request_webpage (
return True
HEADRequest ( url ) , track_id ,
except ExtractorError :
' Checking URL %d / %d ... ' % ( nr , boundaries [ - 1 ] ) )
return False
return url
except ExtractorError :
pass
return None
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
mobj = re . match ( self . _VALID_URL , url )
@ -72,15 +67,10 @@ class MixcloudIE(InfoExtractor):
preview_url = self . _search_regex (
preview_url = self . _search_regex (
r ' \ s(?:data-preview-url|m-preview)= " ([^ " ]+) " ' , webpage , ' preview url ' )
r ' \ s(?:data-preview-url|m-preview)= " ([^ " ]+) " ' , webpage , ' preview url ' )
song_url = preview_url . replace ( ' /previews/ ' , ' /c/originals/ ' )
song_url = preview_url . replace ( ' /previews/ ' , ' /c/originals/ ' )
server_number = int ( self . _search_regex ( r ' stream( \ d+) ' , song_url , ' server number ' ) )
if not self . _check_url ( song_url , track_id , ' mp3 ' ) :
template_url = re . sub ( r ' (stream \ d*) ' , ' stream %d ' , song_url )
song_url = song_url . replace ( ' .mp3 ' , ' .m4a ' ) . replace ( ' originals/ ' , ' m4a/64/ ' )
final_song_url = self . _get_url ( track_id , template_url , server_number )
if not self . _check_url ( song_url , track_id , ' m4a ' ) :
if final_song_url is None :
raise ExtractorError ( ' Unable to extract track url ' )
self . to_screen ( ' Trying with m4a extension ' )
template_url = template_url . replace ( ' .mp3 ' , ' .m4a ' ) . replace ( ' originals/ ' , ' m4a/64/ ' )
final_song_url = self . _get_url ( track_id , template_url , server_number )
if final_song_url is None :
raise ExtractorError ( ' Unable to extract track url ' )
PREFIX = (
PREFIX = (
r ' m-play-on-spacebar[^>]+ '
r ' m-play-on-spacebar[^>]+ '
@ -107,7 +97,7 @@ class MixcloudIE(InfoExtractor):
return {
return {
' id ' : track_id ,
' id ' : track_id ,
' title ' : title ,
' title ' : title ,
' url ' : final_ song_url,
' url ' : song_url,
' description ' : description ,
' description ' : description ,
' thumbnail ' : thumbnail ,
' thumbnail ' : thumbnail ,
' uploader ' : uploader ,
' uploader ' : uploader ,
@ -115,35 +105,3 @@ class MixcloudIE(InfoExtractor):
' view_count ' : view_count ,
' view_count ' : view_count ,
' like_count ' : like_count ,
' like_count ' : like_count ,
}
}
def server_numbers ( first , boundaries ) :
""" Server numbers to try in descending order of probable availability.
Starting from first ( i . e . the number of the server hosting the preview file )
and going further and further up to the higher boundary and down to the
lower one in an alternating fashion . Namely :
server_numbers ( 2 , ( 1 , 5 ) )
# Where the preview server is 2, min number is 1 and max is 5.
# Yields: 2, 3, 1, 4, 5
Why not random numbers or increasing sequences ? Since from what I ' ve seen,
full length files seem to be hosted on servers whose number is closer to
that of the preview ; to be confirmed .
"""
zip_longest = getattr ( itertools , ' zip_longest ' , None )
if zip_longest is None :
# python 2.x
zip_longest = itertools . izip_longest
if len ( boundaries ) != 2 :
raise ValueError ( " boundaries should be a two-element tuple " )
min , max = boundaries
highs = range ( first + 1 , max + 1 )
lows = range ( first - 1 , min - 1 , - 1 )
rest = filter (
None , itertools . chain . from_iterable ( zip_longest ( highs , lows ) ) )
yield first
for n in rest :
yield n