Merge remote-tracking branch 'origin/master'

pull/8/head
Philipp Hagemeister 10 years ago
commit 71a6eaff83

@ -65,6 +65,7 @@ __authors__ = (
'Tobias Bell', 'Tobias Bell',
'Naglis Jonaitis', 'Naglis Jonaitis',
'Charles Chen', 'Charles Chen',
'Hassaan Ali',
) )
__license__ = 'Public Domain' __license__ = 'Public Domain'

@ -53,6 +53,7 @@ from .cnn import (
from .collegehumor import CollegeHumorIE from .collegehumor import CollegeHumorIE
from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE from .comedycentral import ComedyCentralIE, ComedyCentralShowsIE
from .condenast import CondeNastIE from .condenast import CondeNastIE
from .cracked import CrackedIE
from .criterion import CriterionIE from .criterion import CriterionIE
from .crunchyroll import CrunchyrollIE from .crunchyroll import CrunchyrollIE
from .cspan import CSpanIE from .cspan import CSpanIE
@ -252,6 +253,7 @@ from .rutube import (
RutubePersonIE, RutubePersonIE,
) )
from .rutv import RUTVIE from .rutv import RUTVIE
from .sapo import SapoIE
from .savefrom import SaveFromIE from .savefrom import SaveFromIE
from .scivee import SciVeeIE from .scivee import SciVeeIE
from .screencast import ScreencastIE from .screencast import ScreencastIE
@ -399,6 +401,7 @@ from .youtube import (
YoutubeUserIE, YoutubeUserIE,
YoutubeWatchLaterIE, YoutubeWatchLaterIE,
) )
from .zdf import ZDFIE from .zdf import ZDFIE

@ -0,0 +1,65 @@
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_iso8601,
str_to_int,
)
class CrackedIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?cracked\.com/video_(?P<id>\d+)_[\da-z-]+\.html'
_TEST = {
'url': 'http://www.cracked.com/video_19006_4-plot-holes-you-didnt-notice-in-your-favorite-movies.html',
'md5': '4b29a5eeec292cd5eca6388c7558db9e',
'info_dict': {
'id': '19006',
'ext': 'mp4',
'title': '4 Plot Holes You Didn\'t Notice in Your Favorite Movies',
'description': 'md5:3b909e752661db86007d10e5ec2df769',
'timestamp': 1405659600,
'upload_date': '20140718',
}
}
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
webpage = self._download_webpage(url, video_id)
video_url = self._html_search_regex(
[r'var\s+CK_vidSrc\s*=\s*"([^"]+)"', r'<video\s+src="([^"]+)"'], webpage, 'video URL')
title = self._og_search_title(webpage)
description = self._og_search_description(webpage)
timestamp = self._html_search_regex(r'<time datetime="([^"]+)"', webpage, 'upload date', fatal=False)
if timestamp:
timestamp = parse_iso8601(timestamp[:-6])
view_count = str_to_int(self._html_search_regex(
r'<span class="views" id="viewCounts">([\d,\.]+) Views</span>', webpage, 'view count', fatal=False))
comment_count = str_to_int(self._html_search_regex(
r'<span id="commentCounts">([\d,\.]+)</span>', webpage, 'comment count', fatal=False))
m = re.search(r'_(?P<width>\d+)X(?P<height>\d+)\.mp4$', video_url)
if m:
width = int(m.group('width'))
height = int(m.group('height'))
else:
width = height = None
return {
'id': video_id,
'url':video_url,
'title': title,
'description': description,
'timestamp': timestamp,
'view_count': view_count,
'comment_count': comment_count,
'height': height,
'width': width,
}

@ -48,7 +48,7 @@ class PluzzIE(FranceTVBaseInfoExtractor):
class FranceTvInfoIE(FranceTVBaseInfoExtractor): class FranceTvInfoIE(FranceTVBaseInfoExtractor):
IE_NAME = 'francetvinfo.fr' IE_NAME = 'francetvinfo.fr'
_VALID_URL = r'https?://www\.francetvinfo\.fr/.*/(?P<title>.+)\.html' _VALID_URL = r'https?://(?:www|mobile)\.francetvinfo\.fr/.*/(?P<title>.+)\.html'
_TESTS = [{ _TESTS = [{
'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html', 'url': 'http://www.francetvinfo.fr/replay-jt/france-3/soir-3/jt-grand-soir-3-lundi-26-aout-2013_393427.html',
@ -211,7 +211,7 @@ class GenerationQuoiIE(InfoExtractor):
class CultureboxIE(FranceTVBaseInfoExtractor): class CultureboxIE(FranceTVBaseInfoExtractor):
IE_NAME = 'culturebox.francetvinfo.fr' IE_NAME = 'culturebox.francetvinfo.fr'
_VALID_URL = r'https?://culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)' _VALID_URL = r'https?://(?:m\.)?culturebox\.francetvinfo\.fr/(?P<name>.*?)(\?|$)'
_TEST = { _TEST = {
'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813', 'url': 'http://culturebox.francetvinfo.fr/einstein-on-the-beach-au-theatre-du-chatelet-146813',

@ -11,7 +11,7 @@ from ..utils import (
class MLBIE(InfoExtractor): class MLBIE(InfoExtractor):
_VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)' _VALID_URL = r'https?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
_TESTS = [ _TESTS = [
{ {
'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',

@ -0,0 +1,119 @@
# encoding: utf-8
from __future__ import unicode_literals
import re
from .common import InfoExtractor
from ..utils import (
parse_duration,
unified_strdate,
)
class SapoIE(InfoExtractor):
IE_DESC = 'SAPO Vídeos'
_VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})'
_TESTS = [
{
'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi',
'md5': '79ee523f6ecb9233ac25075dee0eda83',
'note': 'SD video',
'info_dict': {
'id': 'UBz95kOtiWYUMTA5Ghfi',
'ext': 'mp4',
'title': 'Benfica - Marcas na Hitória',
'description': 'md5:c9082000a128c3fd57bf0299e1367f22',
'duration': 264,
'uploader': 'tiago_1988',
'upload_date': '20080229',
'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'],
},
},
{
'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF',
'md5': '90a2f283cfb49193fe06e861613a72aa',
'note': 'HD video',
'info_dict': {
'id': 'IyusNAZ791ZdoCY5H5IF',
'ext': 'mp4',
'title': 'Codebits VII - Report',
'description': 'md5:6448d6fd81ce86feac05321f354dbdc8',
'duration': 144,
'uploader': 'codebits',
'upload_date': '20140427',
'categories': ['codebits', 'codebits2014'],
},
},
{
'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz',
'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac',
'note': 'v2 video',
'info_dict': {
'id': 'yLqjzPtbTimsn2wWBKHz',
'ext': 'mp4',
'title': 'Hipnose Condicionativa 4',
'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40',
'duration': 692,
'uploader': 'sapozen',
'upload_date': '20090609',
'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'],
},
},
]
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
video_id = mobj.group('id')
item = self._download_xml(
'http://rd3.videos.sapo.pt/%s/rss2' % video_id, video_id).find('./channel/item')
title = item.find('./title').text
description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text
thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url')
duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text)
uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text
upload_date = unified_strdate(item.find('./pubDate').text)
view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text)
comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text)
tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text
categories = tags.split() if tags else []
age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0
video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text
video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x')
formats = [{
'url': video_url,
'ext': 'mp4',
'format_id': 'sd',
'width': int(video_size[0]),
'height': int(video_size[1]),
}]
if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true':
formats.append({
'url': re.sub(r'/mov/1$', '/mov/39', video_url),
'ext': 'mp4',
'format_id': 'hd',
'width': 1280,
'height': 720,
})
self._sort_formats(formats)
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'duration': duration,
'uploader': uploader,
'upload_date': upload_date,
'view_count': view_count,
'comment_count': comment_count,
'categories': categories,
'age_limit': age_limit,
'formats': formats,
}

@ -1194,6 +1194,8 @@ def format_bytes(bytes):
def str_to_int(int_str): def str_to_int(int_str):
if int_str is None:
return None
int_str = re.sub(r'[,\.]', u'', int_str) int_str = re.sub(r'[,\.]', u'', int_str)
return int(int_str) return int(int_str)

Loading…
Cancel
Save