added YouJizz extractor

pull/619/head
Jeff Crouse 12 years ago
parent 9a2cf56d51
commit 187da2c093

@ -25,7 +25,7 @@ which means you can modify it, redistribute it or use it however you like.
--list-extractors List all supported extractors and the URLs they --list-extractors List all supported extractors and the URLs they
would handle would handle
## Video Selection: Video Selection:
--playlist-start NUMBER playlist video to start at (default is 1) --playlist-start NUMBER playlist video to start at (default is 1)
--playlist-end NUMBER playlist video to end at (default is last) --playlist-end NUMBER playlist video to end at (default is last)
--match-title REGEX download only matching titles (regex or caseless --match-title REGEX download only matching titles (regex or caseless
@ -34,7 +34,7 @@ which means you can modify it, redistribute it or use it however you like.
caseless sub-string) caseless sub-string)
--max-downloads NUMBER Abort after downloading NUMBER files --max-downloads NUMBER Abort after downloading NUMBER files
## Filesystem Options: Filesystem Options:
-t, --title use title in file name -t, --title use title in file name
--id use video ID in file name --id use video ID in file name
-l, --literal use literal title in file name -l, --literal use literal title in file name
@ -59,7 +59,7 @@ which means you can modify it, redistribute it or use it however you like.
--write-description write video description to a .description file --write-description write video description to a .description file
--write-info-json write video metadata to a .info.json file --write-info-json write video metadata to a .info.json file
## Verbosity / Simulation Options: Verbosity / Simulation Options:
-q, --quiet activates quiet mode -q, --quiet activates quiet mode
-s, --simulate do not download the video and do not write anything -s, --simulate do not download the video and do not write anything
to disk to disk
@ -74,7 +74,7 @@ which means you can modify it, redistribute it or use it however you like.
--console-title display progress in console titlebar --console-title display progress in console titlebar
-v, --verbose print various debugging information -v, --verbose print various debugging information
## Video Format Options: Video Format Options:
-f, --format FORMAT video format code -f, --format FORMAT video format code
--all-formats download all available video formats --all-formats download all available video formats
--prefer-free-formats prefer free video formats unless a specific one is --prefer-free-formats prefer free video formats unless a specific one is
@ -86,12 +86,12 @@ which means you can modify it, redistribute it or use it however you like.
--srt-lang LANG language of the closed captions to download --srt-lang LANG language of the closed captions to download
(optional) use IETF language tags like 'en' (optional) use IETF language tags like 'en'
## Authentication Options: Authentication Options:
-u, --username USERNAME account username -u, --username USERNAME account username
-p, --password PASSWORD account password -p, --password PASSWORD account password
-n, --netrc use .netrc authentication data -n, --netrc use .netrc authentication data
## Post-processing Options: Post-processing Options:
-x, --extract-audio convert video files to audio-only files (requires -x, --extract-audio convert video files to audio-only files (requires
ffmpeg or avconv and ffprobe or avprobe) ffmpeg or avconv and ffprobe or avprobe)
--audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav"; --audio-format FORMAT "best", "aac", "vorbis", "mp3", "m4a", or "wav";
@ -133,7 +133,7 @@ youtube requires an additional signature since September 2012 which is not suppo
The error The error
File "youtube-dl", line 2 File "youtube-dl", line 2
SyntaxError: Non-ASCII character '\x93' ... SyntaxError: Non-ASCII character '' ...
means you're using an outdated version of Python. Please update to Python 2.6 or 2.7. means you're using an outdated version of Python. Please update to Python 2.6 or 2.7.

@ -3439,7 +3439,7 @@ class YouPornIE(InfoExtractor):
return return
self.report_webpage(url) self.report_webpage(url)
# Get the video URL # Get the video title
result = re.search(self.VIDEO_TITLE_RE, webpage) result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None: if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title') self._downloader.trouble(u'ERROR: unable to extract video title')
@ -3610,3 +3610,87 @@ class PornotubeIE(InfoExtractor):
return [info] return [info]
class YouJizzIE(InfoExtractor):
"""Information extractor for youjizz.com."""
_VALID_URL = r'^(?:https?://)?(?:\w+\.)?youjizz\.com/videos/([^.]+).html$'
IE_NAME = u'youjizz'
VIDEO_TITLE_RE = r'<title>(?P<title>.*)</title>'
EMBED_PAGE_RE = r'http://www.youjizz.com/videos/embed/(?P<videoid>[0-9]+)'
SOURCE_RE = r'so.addVariable\("file",encodeURIComponent\("(?P<source>[^"]+)"\)\);'
def __init__(self, downloader=None):
InfoExtractor.__init__(self, downloader)
def report_extract_entry(self, url):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Downloading entry: %s' % url.decode('utf-8'))
def report_webpage(self, url):
"""Report downloading page"""
self._downloader.to_screen(u'[youjizz] Downloaded page: %s' % url)
def report_title(self, video_title):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Title: %s' % video_title.decode('utf-8'))
def report_embed_page(self, embed_page):
"""Report downloading extry"""
self._downloader.to_screen(u'[youjizz] Embed Page: %s' % embed_page.decode('utf-8'))
def _real_extract(self, url):
# Get webpage content
try:
webpage = urllib2.urlopen(url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % err)
return
self.report_webpage(url)
# Get the video title
result = re.search(self.VIDEO_TITLE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video title')
return
video_title = result.group('title').decode('utf-8').strip()
self.report_title(video_title)
# Get the embed page
result = re.search(self.EMBED_PAGE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract embed page')
return
embed_page_url = result.group(0).decode('utf-8').strip()
video_id = result.group('videoid').decode('utf-8')
self.report_embed_page(embed_page_url)
try:
webpage = urllib2.urlopen(embed_page_url).read()
except (urllib2.URLError, httplib.HTTPException, socket.error), err:
self._downloader.trouble(u'ERROR: unable to download video embed page: %s' % err)
return
# Get the video URL
result = re.search(self.SOURCE_RE, webpage)
if result is None:
self._downloader.trouble(u'ERROR: unable to extract video url')
return
video_url = result.group('source').decode('utf-8')
self.report_extract_entry(video_url)
info = {'id': video_id,
'url': video_url,
'uploader': None,
'upload_date': None,
'title': video_title,
'ext': 'flv',
'format': 'flv',
'thumbnail': None,
'description': None,
'player_url': embed_page_url}
return [info]

@ -363,7 +363,7 @@ def gen_extractors():
GooglePlusIE(), GooglePlusIE(),
PornotubeIE(), PornotubeIE(),
YouPornIE(), YouPornIE(),
YouJizzIE(),
GenericIE() GenericIE()
] ]

Loading…
Cancel
Save