|
|
|
@ -61,6 +61,23 @@ class MotherlessIE(InfoExtractor):
|
|
|
|
|
# no keywords
|
|
|
|
|
'url': 'http://motherless.com/8B4BBC1',
|
|
|
|
|
'only_matching': True,
|
|
|
|
|
}, {
|
|
|
|
|
# see https://motherless.com/videos/recent for recent videos with
|
|
|
|
|
# uploaded date in "ago" format
|
|
|
|
|
'url': 'https://motherless.com/3C3E2CF',
|
|
|
|
|
'info_dict': {
|
|
|
|
|
'id': '3C3E2CF',
|
|
|
|
|
'ext': 'mp4',
|
|
|
|
|
'title': 'a/ Hot Teens',
|
|
|
|
|
'categories': list,
|
|
|
|
|
'upload_date': '20210104',
|
|
|
|
|
'uploader_id': 'yonbiw',
|
|
|
|
|
'thumbnail': r're:https?://.*\.jpg',
|
|
|
|
|
'age_limit': 18,
|
|
|
|
|
},
|
|
|
|
|
'params': {
|
|
|
|
|
'skip_download': True,
|
|
|
|
|
},
|
|
|
|
|
}]
|
|
|
|
|
|
|
|
|
|
def _real_extract(self, url):
|
|
|
|
@ -85,29 +102,28 @@ class MotherlessIE(InfoExtractor):
|
|
|
|
|
or 'http://cdn4.videos.motherlessmedia.com/videos/%s.mp4?fs=opencloud' % video_id)
|
|
|
|
|
age_limit = self._rta_search(webpage)
|
|
|
|
|
view_count = str_to_int(self._html_search_regex(
|
|
|
|
|
(r'>([\d,.]+)\s+Views<', # 1,234,567 Views
|
|
|
|
|
r'<strong>Views</strong>\s+([^<]+)<'),
|
|
|
|
|
(r'>([\d,.]+)\s+Views<', r'<strong>Views</strong>\s+([^<]+)<'),
|
|
|
|
|
webpage, 'view count', fatal=False))
|
|
|
|
|
like_count = str_to_int(self._html_search_regex(
|
|
|
|
|
(r'>([\d,.]+)\s+Favorites<', # 1,234 Favorites
|
|
|
|
|
(r'>([\d,.]+)\s+Favorites<',
|
|
|
|
|
r'<strong>Favorited</strong>\s+([^<]+)<'),
|
|
|
|
|
webpage, 'like count', fatal=False))
|
|
|
|
|
|
|
|
|
|
upload_date = self._html_search_regex(
|
|
|
|
|
(r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<',
|
|
|
|
|
r'class=["\']count[^>]+>(\d+[hd])\s+[aA]go<', # 20h/1d ago
|
|
|
|
|
r'<strong>Uploaded</strong>\s+([^<]+)<'), webpage, 'upload date')
|
|
|
|
|
relative = re.match(r'(\d+)([hd])$', upload_date)
|
|
|
|
|
if relative:
|
|
|
|
|
delta = int(relative.group(1))
|
|
|
|
|
unit = relative.group(2)
|
|
|
|
|
if unit == 'h':
|
|
|
|
|
delta_t = datetime.timedelta(hours=delta)
|
|
|
|
|
else: # unit == 'd'
|
|
|
|
|
delta_t = datetime.timedelta(days=delta)
|
|
|
|
|
upload_date = (datetime.datetime.now() - delta_t).strftime('%Y%m%d')
|
|
|
|
|
else:
|
|
|
|
|
upload_date = unified_strdate(upload_date)
|
|
|
|
|
upload_date = unified_strdate(self._search_regex(
|
|
|
|
|
r'class=["\']count[^>]+>(\d+\s+[a-zA-Z]{3}\s+\d{4})<', webpage,
|
|
|
|
|
'upload date', default=None))
|
|
|
|
|
if not upload_date:
|
|
|
|
|
uploaded_ago = self._search_regex(
|
|
|
|
|
r'>\s*(\d+[hd])\s+[aA]go\b', webpage, 'uploaded ago',
|
|
|
|
|
default=None)
|
|
|
|
|
if uploaded_ago:
|
|
|
|
|
delta = int(uploaded_ago[:-1])
|
|
|
|
|
_AGO_UNITS = {
|
|
|
|
|
'h': 'hours',
|
|
|
|
|
'd': 'days',
|
|
|
|
|
}
|
|
|
|
|
kwargs = {_AGO_UNITS.get(uploaded_ago[-1]): delta}
|
|
|
|
|
upload_date = (datetime.datetime.utcnow() - datetime.timedelta(**kwargs)).strftime('%Y%m%d')
|
|
|
|
|
|
|
|
|
|
comment_count = webpage.count('class="media-comment-contents"')
|
|
|
|
|
uploader_id = self._html_search_regex(
|
|
|
|
|