mirror of https://github.com/yt-dlp/yt-dlp
Merge branch 'master' into subtitles_rework
commit
72836fcee4
@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import textwrap
|
||||||
|
|
||||||
|
# We must be able to import youtube_dl
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
||||||
|
|
||||||
|
import youtube_dl
|
||||||
|
|
||||||
|
def main():
|
||||||
|
with open('supportedsites.html.in', 'r', encoding='utf-8') as tmplf:
|
||||||
|
template = tmplf.read()
|
||||||
|
|
||||||
|
ie_htmls = []
|
||||||
|
for ie in sorted(youtube_dl.gen_extractors(), key=lambda i: i.IE_NAME):
|
||||||
|
ie_html = '<b>{}</b>'.format(ie.IE_NAME)
|
||||||
|
try:
|
||||||
|
ie_html += ': {}'.format(ie.IE_DESC)
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
if ie.working() == False:
|
||||||
|
ie_html += ' (Currently broken)'
|
||||||
|
ie_htmls.append('<li>{}</li>'.format(ie_html))
|
||||||
|
|
||||||
|
template = template.replace('@SITES@', textwrap.indent('\n'.join(ie_htmls), '\t'))
|
||||||
|
|
||||||
|
with open('supportedsites.html', 'w', encoding='utf-8') as sitesf:
|
||||||
|
sitesf.write(template)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -0,0 +1,202 @@
|
|||||||
|
__all__ = ['aes_encrypt', 'key_expansion', 'aes_ctr_decrypt', 'aes_decrypt_text']
|
||||||
|
|
||||||
|
import base64
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
from .utils import bytes_to_intlist, intlist_to_bytes
|
||||||
|
|
||||||
|
BLOCK_SIZE_BYTES = 16
|
||||||
|
|
||||||
|
def aes_ctr_decrypt(data, key, counter):
|
||||||
|
"""
|
||||||
|
Decrypt with aes in counter mode
|
||||||
|
|
||||||
|
@param {int[]} data cipher
|
||||||
|
@param {int[]} key 16/24/32-Byte cipher key
|
||||||
|
@param {instance} counter Instance whose next_value function (@returns {int[]} 16-Byte block)
|
||||||
|
returns the next counter block
|
||||||
|
@returns {int[]} decrypted data
|
||||||
|
"""
|
||||||
|
expanded_key = key_expansion(key)
|
||||||
|
block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES))
|
||||||
|
|
||||||
|
decrypted_data=[]
|
||||||
|
for i in range(block_count):
|
||||||
|
counter_block = counter.next_value()
|
||||||
|
block = data[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES]
|
||||||
|
block += [0]*(BLOCK_SIZE_BYTES - len(block))
|
||||||
|
|
||||||
|
cipher_counter_block = aes_encrypt(counter_block, expanded_key)
|
||||||
|
decrypted_data += xor(block, cipher_counter_block)
|
||||||
|
decrypted_data = decrypted_data[:len(data)]
|
||||||
|
|
||||||
|
return decrypted_data
|
||||||
|
|
||||||
|
def key_expansion(data):
|
||||||
|
"""
|
||||||
|
Generate key schedule
|
||||||
|
|
||||||
|
@param {int[]} data 16/24/32-Byte cipher key
|
||||||
|
@returns {int[]} 176/208/240-Byte expanded key
|
||||||
|
"""
|
||||||
|
data = data[:] # copy
|
||||||
|
rcon_iteration = 1
|
||||||
|
key_size_bytes = len(data)
|
||||||
|
expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES
|
||||||
|
|
||||||
|
while len(data) < expanded_key_size_bytes:
|
||||||
|
temp = data[-4:]
|
||||||
|
temp = key_schedule_core(temp, rcon_iteration)
|
||||||
|
rcon_iteration += 1
|
||||||
|
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
|
||||||
|
|
||||||
|
for _ in range(3):
|
||||||
|
temp = data[-4:]
|
||||||
|
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
|
||||||
|
|
||||||
|
if key_size_bytes == 32:
|
||||||
|
temp = data[-4:]
|
||||||
|
temp = sub_bytes(temp)
|
||||||
|
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
|
||||||
|
|
||||||
|
for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0):
|
||||||
|
temp = data[-4:]
|
||||||
|
data += xor(temp, data[-key_size_bytes : 4-key_size_bytes])
|
||||||
|
data = data[:expanded_key_size_bytes]
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def aes_encrypt(data, expanded_key):
|
||||||
|
"""
|
||||||
|
Encrypt one block with aes
|
||||||
|
|
||||||
|
@param {int[]} data 16-Byte state
|
||||||
|
@param {int[]} expanded_key 176/208/240-Byte expanded key
|
||||||
|
@returns {int[]} 16-Byte cipher
|
||||||
|
"""
|
||||||
|
rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1
|
||||||
|
|
||||||
|
data = xor(data, expanded_key[:BLOCK_SIZE_BYTES])
|
||||||
|
for i in range(1, rounds+1):
|
||||||
|
data = sub_bytes(data)
|
||||||
|
data = shift_rows(data)
|
||||||
|
if i != rounds:
|
||||||
|
data = mix_columns(data)
|
||||||
|
data = xor(data, expanded_key[i*BLOCK_SIZE_BYTES : (i+1)*BLOCK_SIZE_BYTES])
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def aes_decrypt_text(data, password, key_size_bytes):
|
||||||
|
"""
|
||||||
|
Decrypt text
|
||||||
|
- The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter
|
||||||
|
- The cipher key is retrieved by encrypting the first 16 Byte of 'password'
|
||||||
|
with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's)
|
||||||
|
- Mode of operation is 'counter'
|
||||||
|
|
||||||
|
@param {str} data Base64 encoded string
|
||||||
|
@param {str,unicode} password Password (will be encoded with utf-8)
|
||||||
|
@param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit
|
||||||
|
@returns {str} Decrypted data
|
||||||
|
"""
|
||||||
|
NONCE_LENGTH_BYTES = 8
|
||||||
|
|
||||||
|
data = bytes_to_intlist(base64.b64decode(data))
|
||||||
|
password = bytes_to_intlist(password.encode('utf-8'))
|
||||||
|
|
||||||
|
key = password[:key_size_bytes] + [0]*(key_size_bytes - len(password))
|
||||||
|
key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES)
|
||||||
|
|
||||||
|
nonce = data[:NONCE_LENGTH_BYTES]
|
||||||
|
cipher = data[NONCE_LENGTH_BYTES:]
|
||||||
|
|
||||||
|
class Counter:
|
||||||
|
__value = nonce + [0]*(BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)
|
||||||
|
def next_value(self):
|
||||||
|
temp = self.__value
|
||||||
|
self.__value = inc(self.__value)
|
||||||
|
return temp
|
||||||
|
|
||||||
|
decrypted_data = aes_ctr_decrypt(cipher, key, Counter())
|
||||||
|
plaintext = intlist_to_bytes(decrypted_data)
|
||||||
|
|
||||||
|
return plaintext
|
||||||
|
|
||||||
|
RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36)
|
||||||
|
SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
|
||||||
|
0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
|
||||||
|
0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
|
||||||
|
0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
|
||||||
|
0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
|
||||||
|
0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
|
||||||
|
0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
|
||||||
|
0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
|
||||||
|
0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
|
||||||
|
0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
|
||||||
|
0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
|
||||||
|
0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
|
||||||
|
0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
|
||||||
|
0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
|
||||||
|
0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
|
||||||
|
0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16)
|
||||||
|
MIX_COLUMN_MATRIX = ((2,3,1,1),
|
||||||
|
(1,2,3,1),
|
||||||
|
(1,1,2,3),
|
||||||
|
(3,1,1,2))
|
||||||
|
|
||||||
|
def sub_bytes(data):
|
||||||
|
return [SBOX[x] for x in data]
|
||||||
|
|
||||||
|
def rotate(data):
|
||||||
|
return data[1:] + [data[0]]
|
||||||
|
|
||||||
|
def key_schedule_core(data, rcon_iteration):
|
||||||
|
data = rotate(data)
|
||||||
|
data = sub_bytes(data)
|
||||||
|
data[0] = data[0] ^ RCON[rcon_iteration]
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
def xor(data1, data2):
|
||||||
|
return [x^y for x, y in zip(data1, data2)]
|
||||||
|
|
||||||
|
def mix_column(data):
|
||||||
|
data_mixed = []
|
||||||
|
for row in range(4):
|
||||||
|
mixed = 0
|
||||||
|
for column in range(4):
|
||||||
|
addend = data[column]
|
||||||
|
if MIX_COLUMN_MATRIX[row][column] in (2,3):
|
||||||
|
addend <<= 1
|
||||||
|
if addend > 0xff:
|
||||||
|
addend &= 0xff
|
||||||
|
addend ^= 0x1b
|
||||||
|
if MIX_COLUMN_MATRIX[row][column] == 3:
|
||||||
|
addend ^= data[column]
|
||||||
|
mixed ^= addend & 0xff
|
||||||
|
data_mixed.append(mixed)
|
||||||
|
return data_mixed
|
||||||
|
|
||||||
|
def mix_columns(data):
|
||||||
|
data_mixed = []
|
||||||
|
for i in range(4):
|
||||||
|
column = data[i*4 : (i+1)*4]
|
||||||
|
data_mixed += mix_column(column)
|
||||||
|
return data_mixed
|
||||||
|
|
||||||
|
def shift_rows(data):
|
||||||
|
data_shifted = []
|
||||||
|
for column in range(4):
|
||||||
|
for row in range(4):
|
||||||
|
data_shifted.append( data[((column + row) & 0b11) * 4 + row] )
|
||||||
|
return data_shifted
|
||||||
|
|
||||||
|
def inc(data):
|
||||||
|
data = data[:] # copy
|
||||||
|
for i in range(len(data)-1,-1,-1):
|
||||||
|
if data[i] == 255:
|
||||||
|
data[i] = 0
|
||||||
|
else:
|
||||||
|
data[i] = data[i] + 1
|
||||||
|
break
|
||||||
|
return data
|
@ -0,0 +1,75 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_HTTPError,
|
||||||
|
compat_str,
|
||||||
|
compat_urllib_parse,
|
||||||
|
compat_urllib_parse_urlparse,
|
||||||
|
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AddAnimeIE(InfoExtractor):
|
||||||
|
|
||||||
|
_VALID_URL = r'^http://(?:\w+\.)?add-anime\.net/watch_video.php\?(?:.*?)v=(?P<video_id>[\w_]+)(?:.*)'
|
||||||
|
IE_NAME = u'AddAnime'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.add-anime.net/watch_video.php?v=24MR3YO5SAS9',
|
||||||
|
u'file': u'24MR3YO5SAS9.flv',
|
||||||
|
u'md5': u'1036a0e0cd307b95bd8a8c3a5c8cfaf1',
|
||||||
|
u'info_dict': {
|
||||||
|
u"description": u"One Piece 606",
|
||||||
|
u"title": u"One Piece 606"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
try:
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
except ExtractorError as ee:
|
||||||
|
if not isinstance(ee.cause, compat_HTTPError):
|
||||||
|
raise
|
||||||
|
|
||||||
|
redir_webpage = ee.cause.read().decode('utf-8')
|
||||||
|
action = self._search_regex(
|
||||||
|
r'<form id="challenge-form" action="([^"]+)"',
|
||||||
|
redir_webpage, u'Redirect form')
|
||||||
|
vc = self._search_regex(
|
||||||
|
r'<input type="hidden" name="jschl_vc" value="([^"]+)"/>',
|
||||||
|
redir_webpage, u'redirect vc value')
|
||||||
|
av = re.search(
|
||||||
|
r'a\.value = ([0-9]+)[+]([0-9]+)[*]([0-9]+);',
|
||||||
|
redir_webpage)
|
||||||
|
if av is None:
|
||||||
|
raise ExtractorError(u'Cannot find redirect math task')
|
||||||
|
av_res = int(av.group(1)) + int(av.group(2)) * int(av.group(3))
|
||||||
|
|
||||||
|
parsed_url = compat_urllib_parse_urlparse(url)
|
||||||
|
av_val = av_res + len(parsed_url.netloc)
|
||||||
|
confirm_url = (
|
||||||
|
parsed_url.scheme + u'://' + parsed_url.netloc +
|
||||||
|
action + '?' +
|
||||||
|
compat_urllib_parse.urlencode({
|
||||||
|
'jschl_vc': vc, 'jschl_answer': compat_str(av_val)}))
|
||||||
|
self._download_webpage(
|
||||||
|
confirm_url, video_id,
|
||||||
|
note=u'Confirming after redirect')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
video_url = self._search_regex(r"var normal_video_file = '(.*?)';",
|
||||||
|
webpage, u'video file URL')
|
||||||
|
video_title = self._og_search_title(webpage)
|
||||||
|
video_description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'title': video_title,
|
||||||
|
'description': video_description
|
||||||
|
}
|
@ -0,0 +1,166 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AppleTrailersIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
|
||||||
|
u"playlist": [
|
||||||
|
{
|
||||||
|
u"file": u"manofsteel-trailer4.mov",
|
||||||
|
u"md5": u"11874af099d480cc09e103b189805d5f",
|
||||||
|
u"info_dict": {
|
||||||
|
u"duration": 111,
|
||||||
|
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_11624.jpg",
|
||||||
|
u"title": u"Trailer 4",
|
||||||
|
u"upload_date": u"20130523",
|
||||||
|
u"uploader_id": u"wb",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u"file": u"manofsteel-trailer3.mov",
|
||||||
|
u"md5": u"07a0a262aae5afe68120eed61137ab34",
|
||||||
|
u"info_dict": {
|
||||||
|
u"duration": 182,
|
||||||
|
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_10793.jpg",
|
||||||
|
u"title": u"Trailer 3",
|
||||||
|
u"upload_date": u"20130417",
|
||||||
|
u"uploader_id": u"wb",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u"file": u"manofsteel-trailer.mov",
|
||||||
|
u"md5": u"e401fde0813008e3307e54b6f384cff1",
|
||||||
|
u"info_dict": {
|
||||||
|
u"duration": 148,
|
||||||
|
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_8703.jpg",
|
||||||
|
u"title": u"Trailer",
|
||||||
|
u"upload_date": u"20121212",
|
||||||
|
u"uploader_id": u"wb",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u"file": u"manofsteel-teaser.mov",
|
||||||
|
u"md5": u"76b392f2ae9e7c98b22913c10a639c97",
|
||||||
|
u"info_dict": {
|
||||||
|
u"duration": 93,
|
||||||
|
u"thumbnail": u"http://trailers.apple.com/trailers/wb/manofsteel/images/thumbnail_6899.jpg",
|
||||||
|
u"title": u"Teaser",
|
||||||
|
u"upload_date": u"20120721",
|
||||||
|
u"uploader_id": u"wb",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
movie = mobj.group('movie')
|
||||||
|
uploader_id = mobj.group('company')
|
||||||
|
|
||||||
|
playlist_url = url.partition(u'?')[0] + u'/includes/playlists/web.inc'
|
||||||
|
playlist_snippet = self._download_webpage(playlist_url, movie)
|
||||||
|
playlist_cleaned = re.sub(r'(?s)<script>.*?</script>', u'', playlist_snippet)
|
||||||
|
playlist_html = u'<html>' + playlist_cleaned + u'</html>'
|
||||||
|
|
||||||
|
size_cache = {}
|
||||||
|
|
||||||
|
doc = xml.etree.ElementTree.fromstring(playlist_html)
|
||||||
|
playlist = []
|
||||||
|
for li in doc.findall('./div/ul/li'):
|
||||||
|
title = li.find('.//h3').text
|
||||||
|
video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
|
||||||
|
thumbnail = li.find('.//img').attrib['src']
|
||||||
|
|
||||||
|
date_el = li.find('.//p')
|
||||||
|
upload_date = None
|
||||||
|
m = re.search(r':\s?(?P<month>[0-9]{2})/(?P<day>[0-9]{2})/(?P<year>[0-9]{2})', date_el.text)
|
||||||
|
if m:
|
||||||
|
upload_date = u'20' + m.group('year') + m.group('month') + m.group('day')
|
||||||
|
runtime_el = date_el.find('./br')
|
||||||
|
m = re.search(r':\s?(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime_el.tail)
|
||||||
|
duration = None
|
||||||
|
if m:
|
||||||
|
duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for formats_el in li.findall('.//a'):
|
||||||
|
if formats_el.attrib['class'] != 'OverlayPanel':
|
||||||
|
continue
|
||||||
|
target = formats_el.attrib['target']
|
||||||
|
|
||||||
|
format_code = formats_el.text
|
||||||
|
if 'Automatic' in format_code:
|
||||||
|
continue
|
||||||
|
|
||||||
|
size_q = formats_el.attrib['href']
|
||||||
|
size_id = size_q.rpartition('#videos-')[2]
|
||||||
|
if size_id not in size_cache:
|
||||||
|
size_url = url + size_q
|
||||||
|
sizepage_html = self._download_webpage(
|
||||||
|
size_url, movie,
|
||||||
|
note=u'Downloading size info %s' % size_id,
|
||||||
|
errnote=u'Error while downloading size info %s' % size_id,
|
||||||
|
)
|
||||||
|
_doc = xml.etree.ElementTree.fromstring(sizepage_html)
|
||||||
|
size_cache[size_id] = _doc
|
||||||
|
|
||||||
|
sizepage_doc = size_cache[size_id]
|
||||||
|
links = sizepage_doc.findall('.//{http://www.w3.org/1999/xhtml}ul/{http://www.w3.org/1999/xhtml}li/{http://www.w3.org/1999/xhtml}a')
|
||||||
|
for vid_a in links:
|
||||||
|
href = vid_a.get('href')
|
||||||
|
if not href.endswith(target):
|
||||||
|
continue
|
||||||
|
detail_q = href.partition('#')[0]
|
||||||
|
detail_url = url + '/' + detail_q
|
||||||
|
|
||||||
|
m = re.match(r'includes/(?P<detail_id>[^/]+)/', detail_q)
|
||||||
|
detail_id = m.group('detail_id')
|
||||||
|
|
||||||
|
detail_html = self._download_webpage(
|
||||||
|
detail_url, movie,
|
||||||
|
note=u'Downloading detail %s %s' % (detail_id, size_id),
|
||||||
|
errnote=u'Error while downloading detail %s %s' % (detail_id, size_id)
|
||||||
|
)
|
||||||
|
detail_doc = xml.etree.ElementTree.fromstring(detail_html)
|
||||||
|
movie_link_el = detail_doc.find('.//{http://www.w3.org/1999/xhtml}a')
|
||||||
|
assert movie_link_el.get('class') == 'movieLink'
|
||||||
|
movie_link = movie_link_el.get('href').partition('?')[0].replace('_', '_h')
|
||||||
|
ext = determine_ext(movie_link)
|
||||||
|
assert ext == 'mov'
|
||||||
|
|
||||||
|
formats.append({
|
||||||
|
'format': format_code,
|
||||||
|
'ext': ext,
|
||||||
|
'url': movie_link,
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'duration': duration,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'upload_date': upload_date,
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'user_agent': 'QuickTime compatible (youtube-dl)',
|
||||||
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info['url'] = formats[-1]['url']
|
||||||
|
info['ext'] = formats[-1]['ext']
|
||||||
|
|
||||||
|
playlist.append(info)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'playlist',
|
||||||
|
'id': movie,
|
||||||
|
'entries': playlist,
|
||||||
|
}
|
@ -0,0 +1,35 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class Canalc2IE(InfoExtractor):
|
||||||
|
_IE_NAME = 'canalc2.tv'
|
||||||
|
_VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
|
||||||
|
u'file': u'12163.mp4',
|
||||||
|
u'md5': u'060158428b650f896c542dfbb3d6487f',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Terrasses du Numérique'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
video_id = re.match(self._VALID_URL, url).group(1)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
file_name = self._search_regex(
|
||||||
|
r"so\.addVariable\('file','(.*?)'\);",
|
||||||
|
webpage, 'file name')
|
||||||
|
video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
|
||||||
|
|
||||||
|
title = self._html_search_regex(
|
||||||
|
r'class="evenement8">(.*?)</a>', webpage, u'title')
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
|
||||||
|
class CNNIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'''(?x)https?://(edition\.)?cnn\.com/video/(data/.+?|\?)/
|
||||||
|
(?P<path>.+?/(?P<title>[^/]+?)(?:\.cnn|(?=&)))'''
|
||||||
|
|
||||||
|
_TESTS = [{
|
||||||
|
u'url': u'http://edition.cnn.com/video/?/video/sports/2013/06/09/nadal-1-on-1.cnn',
|
||||||
|
u'file': u'sports_2013_06_09_nadal-1-on-1.cnn.mp4',
|
||||||
|
u'md5': u'3e6121ea48df7e2259fe73a0628605c4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Nadal wins 8th French Open title',
|
||||||
|
u'description': u'World Sport\'s Amanda Davies chats with 2013 French Open champion Rafael Nadal.',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
u"url": u"http://edition.cnn.com/video/?/video/us/2013/08/21/sot-student-gives-epic-speech.georgia-institute-of-technology&utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+rss%2Fcnn_topstories+%28RSS%3A+Top+Stories%29",
|
||||||
|
u"file": u"us_2013_08_21_sot-student-gives-epic-speech.georgia-institute-of-technology.mp4",
|
||||||
|
u"md5": u"b5cc60c60a3477d185af8f19a2a26f4e",
|
||||||
|
u"info_dict": {
|
||||||
|
u"title": "Student's epic speech stuns new freshmen",
|
||||||
|
u"description": "A Georgia Tech student welcomes the incoming freshmen with an epic speech backed by music from \"2001: A Space Odyssey.\""
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
path = mobj.group('path')
|
||||||
|
page_title = mobj.group('title')
|
||||||
|
info_url = u'http://cnn.com/video/data/3.0/%s/index.xml' % path
|
||||||
|
info_xml = self._download_webpage(info_url, page_title)
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for f in info.findall('files/file'):
|
||||||
|
mf = re.match(r'(\d+)x(\d+)(?:_(.*)k)?',f.attrib['bitrate'])
|
||||||
|
if mf is not None:
|
||||||
|
formats.append((int(mf.group(1)), int(mf.group(2)), int(mf.group(3) or 0), f.text))
|
||||||
|
formats = sorted(formats)
|
||||||
|
(_,_,_, video_path) = formats[-1]
|
||||||
|
video_url = 'http://ht.cdn.turner.com/cnn/big%s' % video_path
|
||||||
|
|
||||||
|
thumbnails = sorted([((int(t.attrib['height']),int(t.attrib['width'])), t.text) for t in info.findall('images/image')])
|
||||||
|
thumbs_dict = [{'resolution': res, 'url': t_url} for (res, t_url) in thumbnails]
|
||||||
|
|
||||||
|
return {'id': info.attrib['id'],
|
||||||
|
'title': info.find('headline').text,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': determine_ext(video_url),
|
||||||
|
'thumbnail': thumbnails[-1][1],
|
||||||
|
'thumbnails': thumbs_dict,
|
||||||
|
'description': info.find('description').text,
|
||||||
|
}
|
@ -0,0 +1,74 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
determine_ext,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class DaumIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tvpot\.daum\.net/.*?clipid=(?P<id>\d+)'
|
||||||
|
IE_NAME = u'daum.net'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://tvpot.daum.net/clip/ClipView.do?clipid=52554690',
|
||||||
|
u'file': u'52554690.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
|
u'description': u'DOTA 2GETHER 시즌2 6회 - 2부',
|
||||||
|
u'upload_date': u'20130831',
|
||||||
|
u'duration': 3868,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
canonical_url = 'http://tvpot.daum.net/v/%s' % video_id
|
||||||
|
webpage = self._download_webpage(canonical_url, video_id)
|
||||||
|
full_id = self._search_regex(r'<link rel="video_src" href=".+?vid=(.+?)"',
|
||||||
|
webpage, u'full id')
|
||||||
|
query = compat_urllib_parse.urlencode({'vid': full_id})
|
||||||
|
info_xml = self._download_webpage(
|
||||||
|
'http://tvpot.daum.net/clip/ClipInfoXml.do?' + query, video_id,
|
||||||
|
u'Downloading video info')
|
||||||
|
urls_xml = self._download_webpage(
|
||||||
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieData.apixml?' + query,
|
||||||
|
video_id, u'Downloading video formats info')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
self.to_screen(u'%s: Getting video urls' % video_id)
|
||||||
|
formats = []
|
||||||
|
for format_el in urls.findall('result/output_list/output_list'):
|
||||||
|
profile = format_el.attrib['profile']
|
||||||
|
format_query = compat_urllib_parse.urlencode({
|
||||||
|
'vid': full_id,
|
||||||
|
'profile': profile,
|
||||||
|
})
|
||||||
|
url_xml = self._download_webpage(
|
||||||
|
'http://videofarm.daum.net/controller/api/open/v1_2/MovieLocation.apixml?' + format_query,
|
||||||
|
video_id, note=False)
|
||||||
|
url_doc = xml.etree.ElementTree.fromstring(url_xml.encode('utf-8'))
|
||||||
|
format_url = url_doc.find('result/url').text
|
||||||
|
formats.append({
|
||||||
|
'url': format_url,
|
||||||
|
'ext': determine_ext(format_url),
|
||||||
|
'format_id': profile,
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info.find('TITLE').text,
|
||||||
|
'formats': formats,
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'description': info.find('CONTENTS').text,
|
||||||
|
'duration': int(info.find('DURATION').text),
|
||||||
|
'upload_date': info.find('REGDTTM').text[:8],
|
||||||
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info.update(formats[-1])
|
||||||
|
return info
|
@ -0,0 +1,39 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class DefenseGouvFrIE(InfoExtractor):
|
||||||
|
_IE_NAME = 'defense.gouv.fr'
|
||||||
|
_VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/'
|
||||||
|
r'ligthboxvideo/base-de-medias/webtv/(.*)')
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': (u'http://www.defense.gouv.fr/layout/set/ligthboxvideo/'
|
||||||
|
u'base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1'),
|
||||||
|
u'file': u'11213.mp4',
|
||||||
|
u'md5': u'75bba6124da7e63d2d60b5244ec9430c',
|
||||||
|
"info_dict": {
|
||||||
|
"title": "attaque-chimique-syrienne-du-21-aout-2013-1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
title = re.match(self._VALID_URL, url).group(1)
|
||||||
|
webpage = self._download_webpage(url, title)
|
||||||
|
video_id = self._search_regex(
|
||||||
|
r"flashvars.pvg_id=\"(\d+)\";",
|
||||||
|
webpage, 'ID')
|
||||||
|
|
||||||
|
json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'
|
||||||
|
+ video_id)
|
||||||
|
info = self._download_webpage(json_url, title,
|
||||||
|
'Downloading JSON config')
|
||||||
|
video_url = json.loads(info)['renditions'][0]['url']
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': video_url,
|
||||||
|
'title': title,
|
||||||
|
}
|
@ -0,0 +1,37 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import determine_ext
|
||||||
|
|
||||||
|
class HarkIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
|
||||||
|
u'file': u'mmbzyhkgny.mp3',
|
||||||
|
u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
|
||||||
|
u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
|
||||||
|
u'duration': 11,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
json_url = "http://www.hark.com/clips/%s.json" %(video_id)
|
||||||
|
info_json = self._download_webpage(json_url, video_id)
|
||||||
|
info = json.loads(info_json)
|
||||||
|
final_url = info['url']
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'url' : final_url,
|
||||||
|
'title': info['name'],
|
||||||
|
'ext': determine_ext(final_url),
|
||||||
|
'description': info['description'],
|
||||||
|
'thumbnail': info['image_original'],
|
||||||
|
'duration': info['duration'],
|
||||||
|
}
|
@ -0,0 +1,55 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
import operator
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class MetacriticIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222',
|
||||||
|
u'file': u'3698222.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors',
|
||||||
|
u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.',
|
||||||
|
u'duration': 221,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
# The xml is not well formatted, there are raw '&'
|
||||||
|
info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id,
|
||||||
|
video_id, u'Downloading info xml').replace('&', '&')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id)
|
||||||
|
formats = []
|
||||||
|
for videoFile in clip.findall('httpURI/videoFile'):
|
||||||
|
rate_str = videoFile.find('rate').text
|
||||||
|
video_url = videoFile.find('filePath').text
|
||||||
|
formats.append({
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'format_id': rate_str,
|
||||||
|
'rate': int(rate_str),
|
||||||
|
})
|
||||||
|
formats.sort(key=operator.itemgetter('rate'))
|
||||||
|
|
||||||
|
description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>',
|
||||||
|
webpage, u'description', flags=re.DOTALL)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': clip.find('title').text,
|
||||||
|
'formats': formats,
|
||||||
|
'description': description,
|
||||||
|
'duration': int(clip.find('duration').text),
|
||||||
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info.update(formats[-1])
|
||||||
|
return info
|
@ -0,0 +1,74 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
get_element_by_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TechTVMITIE(InfoExtractor):
|
||||||
|
IE_NAME = u'techtv.mit.edu'
|
||||||
|
_VALID_URL = r'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
|
||||||
|
u'file': u'25418.mp4',
|
||||||
|
u'md5': u'1f8cb3e170d41fd74add04d3c9330e5f',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'MIT DNA Learning Center Set',
|
||||||
|
u'description': u'md5:82313335e8a8a3f243351ba55bc1b474',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
raw_page = self._download_webpage(
|
||||||
|
'http://techtv.mit.edu/videos/%s' % video_id, video_id)
|
||||||
|
clean_page = re.compile(u'<!--.*?-->', re.S).sub(u'', raw_page)
|
||||||
|
|
||||||
|
base_url = self._search_regex(r'ipadUrl: \'(.+?cloudfront.net/)',
|
||||||
|
raw_page, u'base url')
|
||||||
|
formats_json = self._search_regex(r'bitrates: (\[.+?\])', raw_page,
|
||||||
|
u'video formats')
|
||||||
|
formats = json.loads(formats_json)
|
||||||
|
formats = sorted(formats, key=lambda f: f['bitrate'])
|
||||||
|
|
||||||
|
title = get_element_by_id('edit-title', clean_page)
|
||||||
|
description = clean_html(get_element_by_id('edit-description', clean_page))
|
||||||
|
thumbnail = self._search_regex(r'playlist:.*?url: \'(.+?)\'',
|
||||||
|
raw_page, u'thumbnail', flags=re.DOTALL)
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': base_url + formats[-1]['url'].replace('mp4:', ''),
|
||||||
|
'ext': 'mp4',
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MITIE(TechTVMITIE):
|
||||||
|
IE_NAME = u'video.mit.edu'
|
||||||
|
_VALID_URL = r'https?://video\.mit\.edu/watch/(?P<title>[^/]+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://video.mit.edu/watch/the-government-is-profiling-you-13222/',
|
||||||
|
u'file': u'21783.mp4',
|
||||||
|
u'md5': u'7db01d5ccc1895fc5010e9c9e13648da',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'The Government is Profiling You',
|
||||||
|
u'description': u'md5:ad5795fe1e1623b73620dbfd47df9afd',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
page_title = mobj.group('title')
|
||||||
|
webpage = self._download_webpage(url, page_title)
|
||||||
|
self.to_screen('%s: Extracting %s url' % (page_title, TechTVMITIE.IE_NAME))
|
||||||
|
embed_url = self._search_regex(r'<iframe .*?src="(.+?)"', webpage,
|
||||||
|
u'embed url')
|
||||||
|
return self.url_result(embed_url, ie='TechTVMIT')
|
@ -0,0 +1,73 @@
|
|||||||
|
# encoding: utf-8
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urllib_parse,
|
||||||
|
ExtractorError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NaverIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tvcast\.naver\.com/v/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://tvcast.naver.com/v/81652',
|
||||||
|
u'file': u'81652.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'[9월 모의고사 해설강의][수학_김상희] 수학 A형 16~20번',
|
||||||
|
u'description': u'합격불변의 법칙 메가스터디 | 메가스터디 수학 김상희 선생님이 9월 모의고사 수학A형 16번에서 20번까지 해설강의를 공개합니다.',
|
||||||
|
u'upload_date': u'20130903',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group(1)
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
m_id = re.search(r'var rmcPlayer = new nhn.rmcnmv.RMCVideoPlayer\("(.+?)", "(.+?)"',
|
||||||
|
webpage)
|
||||||
|
if m_id is None:
|
||||||
|
raise ExtractorError(u'couldn\'t extract vid and key')
|
||||||
|
vid = m_id.group(1)
|
||||||
|
key = m_id.group(2)
|
||||||
|
query = compat_urllib_parse.urlencode({'vid': vid, 'inKey': key,})
|
||||||
|
query_urls = compat_urllib_parse.urlencode({
|
||||||
|
'masterVid': vid,
|
||||||
|
'protocol': 'p2p',
|
||||||
|
'inKey': key,
|
||||||
|
})
|
||||||
|
info_xml = self._download_webpage(
|
||||||
|
'http://serviceapi.rmcnmv.naver.com/flash/videoInfo.nhn?' + query,
|
||||||
|
video_id, u'Downloading video info')
|
||||||
|
urls_xml = self._download_webpage(
|
||||||
|
'http://serviceapi.rmcnmv.naver.com/flash/playableEncodingOption.nhn?' + query_urls,
|
||||||
|
video_id, u'Downloading video formats info')
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8'))
|
||||||
|
urls = xml.etree.ElementTree.fromstring(urls_xml.encode('utf-8'))
|
||||||
|
|
||||||
|
formats = []
|
||||||
|
for format_el in urls.findall('EncodingOptions/EncodingOption'):
|
||||||
|
domain = format_el.find('Domain').text
|
||||||
|
if domain.startswith('rtmp'):
|
||||||
|
continue
|
||||||
|
formats.append({
|
||||||
|
'url': domain + format_el.find('uri').text,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'width': int(format_el.find('width').text),
|
||||||
|
'height': int(format_el.find('height').text),
|
||||||
|
})
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'id': video_id,
|
||||||
|
'title': info.find('Subject').text,
|
||||||
|
'formats': formats,
|
||||||
|
'description': self._og_search_description(webpage),
|
||||||
|
'thumbnail': self._og_search_thumbnail(webpage),
|
||||||
|
'upload_date': info.find('WriteDate').text.replace('.', ''),
|
||||||
|
'view_count': int(info.find('PlayCount').text),
|
||||||
|
}
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info.update(formats[-1])
|
||||||
|
return info
|
@ -0,0 +1,33 @@
|
|||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import find_xpath_attr, compat_str
|
||||||
|
|
||||||
|
|
||||||
|
class NBCNewsIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://www\.nbcnews\.com/video/.+?/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://www.nbcnews.com/video/nbc-news/52753292',
|
||||||
|
u'file': u'52753292.flv',
|
||||||
|
u'md5': u'47abaac93c6eaf9ad37ee6c4463a5179',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Crew emerges after four-month Mars food study',
|
||||||
|
u'description': u'md5:24e632ffac72b35f8b67a12d1b6ddfc1',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
info_xml = self._download_webpage('http://www.nbcnews.com/id/%s/displaymode/1219' % video_id, video_id)
|
||||||
|
info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')).find('video')
|
||||||
|
|
||||||
|
return {'id': video_id,
|
||||||
|
'title': info.find('headline').text,
|
||||||
|
'ext': 'flv',
|
||||||
|
'url': find_xpath_attr(info, 'media', 'type', 'flashVideo').text,
|
||||||
|
'description': compat_str(info.find('caption').text),
|
||||||
|
'thumbnail': find_xpath_attr(info, 'media', 'type', 'thumbnail').text,
|
||||||
|
}
|
@ -0,0 +1,54 @@
|
|||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
ExtractorError,
|
||||||
|
find_xpath_attr,
|
||||||
|
)
|
||||||
|
|
||||||
|
class ORFIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://tvthek.orf.at/(programs/.+?/episodes|topics/.+?)/(?P<id>\d+)'
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
playlist_id = mobj.group('id')
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
|
||||||
|
flash_xml = self._search_regex('ORF.flashXML = \'(.+?)\'', webpage, u'flash xml')
|
||||||
|
flash_xml = compat_urlparse.parse_qs('xml='+flash_xml)['xml'][0]
|
||||||
|
flash_config = xml.etree.ElementTree.fromstring(flash_xml.encode('utf-8'))
|
||||||
|
playlist_json = self._search_regex(r'playlist\': \'(\[.*?\])\'', webpage, u'playlist').replace(r'\"','"')
|
||||||
|
playlist = json.loads(playlist_json)
|
||||||
|
|
||||||
|
videos = []
|
||||||
|
ns = '{http://tempuri.org/XMLSchema.xsd}'
|
||||||
|
xpath = '%(ns)sPlaylist/%(ns)sItems/%(ns)sItem' % {'ns': ns}
|
||||||
|
webpage_description = self._og_search_description(webpage)
|
||||||
|
for (i, (item, info)) in enumerate(zip(flash_config.findall(xpath), playlist), 1):
|
||||||
|
# Get best quality url
|
||||||
|
rtmp_url = None
|
||||||
|
for q in ['Q6A', 'Q4A', 'Q1A']:
|
||||||
|
video_url = find_xpath_attr(item, '%sVideoUrl' % ns, 'quality', q)
|
||||||
|
if video_url is not None:
|
||||||
|
rtmp_url = video_url.text
|
||||||
|
break
|
||||||
|
if rtmp_url is None:
|
||||||
|
raise ExtractorError(u'Couldn\'t get video url: %s' % info['id'])
|
||||||
|
description = self._html_search_regex(
|
||||||
|
r'id="playlist_entry_%s".*?<p>(.*?)</p>' % i, webpage,
|
||||||
|
u'description', default=webpage_description, flags=re.DOTALL)
|
||||||
|
videos.append({
|
||||||
|
'_type': 'video',
|
||||||
|
'id': info['id'],
|
||||||
|
'title': info['title'],
|
||||||
|
'url': rtmp_url,
|
||||||
|
'ext': 'flv',
|
||||||
|
'description': description,
|
||||||
|
})
|
||||||
|
|
||||||
|
return videos
|
@ -0,0 +1,42 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
clean_html,
|
||||||
|
compat_parse_qs,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class Ro220IE(InfoExtractor):
|
||||||
|
IE_NAME = '220.ro'
|
||||||
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
|
||||||
|
u'file': u'LYV6doKo7f.mp4',
|
||||||
|
u'md5': u'03af18b73a07b4088753930db7a34add',
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Luati-le Banii sez 4 ep 1",
|
||||||
|
u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
flashVars_str = self._search_regex(
|
||||||
|
r'<param name="flashVars" value="([^"]+)"',
|
||||||
|
webpage, u'flashVars')
|
||||||
|
flashVars = compat_parse_qs(flashVars_str)
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'url': flashVars['videoURL'][0],
|
||||||
|
'title': flashVars['title'][0],
|
||||||
|
'description': clean_html(flashVars['desc'][0]),
|
||||||
|
'thumbnail': flashVars['preview'][0],
|
||||||
|
}
|
||||||
|
return info
|
@ -0,0 +1,73 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
import xml.etree.ElementTree
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
|
||||||
|
|
||||||
|
class TriluliluIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
|
||||||
|
_TEST = {
|
||||||
|
u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
|
||||||
|
u'file': u"big-buck-bunny-1.mp4",
|
||||||
|
u'info_dict': {
|
||||||
|
u"title": u"Big Buck Bunny",
|
||||||
|
u"description": u":) pentru copilul din noi",
|
||||||
|
},
|
||||||
|
# Server ignores Range headers (--test)
|
||||||
|
u"params": {
|
||||||
|
u"skip_download": True
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('video_id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
|
||||||
|
title = self._og_search_title(webpage)
|
||||||
|
thumbnail = self._og_search_thumbnail(webpage)
|
||||||
|
description = self._og_search_description(webpage)
|
||||||
|
|
||||||
|
log_str = self._search_regex(
|
||||||
|
r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
|
||||||
|
log = json.loads(log_str)
|
||||||
|
|
||||||
|
format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
|
||||||
|
u'video-formats2' % log)
|
||||||
|
format_str = self._download_webpage(
|
||||||
|
format_url, video_id,
|
||||||
|
note=u'Downloading formats',
|
||||||
|
errnote=u'Error while downloading formats')
|
||||||
|
|
||||||
|
format_doc = xml.etree.ElementTree.fromstring(format_str)
|
||||||
|
|
||||||
|
video_url_template = (
|
||||||
|
u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
|
||||||
|
u'&source=site&hash=%(hash)s&username=%(userid)s&'
|
||||||
|
u'key=ministhebest&format=%%s&sig=&exp=' %
|
||||||
|
log)
|
||||||
|
formats = [
|
||||||
|
{
|
||||||
|
'format': fnode.text,
|
||||||
|
'url': video_url_template % fnode.text,
|
||||||
|
}
|
||||||
|
|
||||||
|
for fnode in format_doc.findall('./formats/format')
|
||||||
|
]
|
||||||
|
|
||||||
|
info = {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'formats': formats,
|
||||||
|
'title': title,
|
||||||
|
'description': description,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: Remove when #980 has been merged
|
||||||
|
info['url'] = formats[-1]['url']
|
||||||
|
info['ext'] = formats[-1]['format'].partition('-')[0]
|
||||||
|
|
||||||
|
return info
|
@ -0,0 +1,56 @@
|
|||||||
|
import re
|
||||||
|
import json
|
||||||
|
|
||||||
|
from .common import InfoExtractor
|
||||||
|
from ..utils import (
|
||||||
|
compat_urlparse,
|
||||||
|
get_element_by_id,
|
||||||
|
clean_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
class VeeHDIE(InfoExtractor):
|
||||||
|
_VALID_URL = r'https?://veehd.com/video/(?P<id>\d+)'
|
||||||
|
|
||||||
|
_TEST = {
|
||||||
|
u'url': u'http://veehd.com/video/4686958',
|
||||||
|
u'file': u'4686958.mp4',
|
||||||
|
u'info_dict': {
|
||||||
|
u'title': u'Time Lapse View from Space ( ISS)',
|
||||||
|
u'uploader_id': u'spotted',
|
||||||
|
u'description': u'md5:f0094c4cf3a72e22bc4e4239ef767ad7',
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
mobj = re.match(self._VALID_URL, url)
|
||||||
|
video_id = mobj.group('id')
|
||||||
|
|
||||||
|
webpage = self._download_webpage(url, video_id)
|
||||||
|
player_path = self._search_regex(r'\$\("#playeriframe"\).attr\({src : "(.+?)"',
|
||||||
|
webpage, u'player path')
|
||||||
|
player_url = compat_urlparse.urljoin(url, player_path)
|
||||||
|
player_page = self._download_webpage(player_url, video_id,
|
||||||
|
u'Downloading player page')
|
||||||
|
config_json = self._search_regex(r'value=\'config=({.+?})\'',
|
||||||
|
player_page, u'config json')
|
||||||
|
config = json.loads(config_json)
|
||||||
|
|
||||||
|
video_url = compat_urlparse.unquote(config['clip']['url'])
|
||||||
|
title = clean_html(get_element_by_id('videoName', webpage).rpartition('|')[0])
|
||||||
|
uploader_id = self._html_search_regex(r'<a href="/profile/\d+">(.+?)</a>',
|
||||||
|
webpage, u'uploader')
|
||||||
|
thumbnail = self._search_regex(r'<img id="veehdpreview" src="(.+?)"',
|
||||||
|
webpage, u'thumbnail')
|
||||||
|
description = self._html_search_regex(r'<td class="infodropdown".*?<div>(.*?)<ul',
|
||||||
|
webpage, u'description', flags=re.DOTALL)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'_type': 'video',
|
||||||
|
'id': video_id,
|
||||||
|
'title': title,
|
||||||
|
'url': video_url,
|
||||||
|
'ext': 'mp4',
|
||||||
|
'uploader_id': uploader_id,
|
||||||
|
'thumbnail': thumbnail,
|
||||||
|
'description': description,
|
||||||
|
}
|
@ -1,2 +1,2 @@
|
|||||||
|
|
||||||
__version__ = '2013.08.22'
|
__version__ = '2013.09.06.1'
|
||||||
|
Loading…
Reference in New Issue