You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
10 KiB

import functools
import re
from .common import InfoExtractor
from ..compat import compat_HTTPError
from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
class VideocampusSachsenIE(InfoExtractor):
_VALID_URL = r'''(?x)https?://(?P<host>%s)/(?:
)''' % ('|'.join(map(re.escape, _INSTANCES)))
_TESTS = [
'url': '',
'info_dict': {
'id': 'e6b9349905c1628631f175712250f2a1',
'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
'thumbnail': '',
'ext': 'mp4',
'url': '',
'info_dict': {
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
'description': 'md5:196aa3b0509a526db62f84679522a2f5',
'thumbnail': '',
'display_id': 'Was-ist-selbstgesteuertes-Lernen',
'ext': 'mp4',
'url': '',
'info_dict': {
'id': '09d4ed029002eb1bdda610f1103dd54c',
'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
'thumbnail': '',
'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
'ext': 'mp4',
'url': '',
'info_dict': {
'url': '',
'id': '0183356e41af7bfb83d7667b20d9b6a3',
'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
'description': 'md5:508958bd93e0ca002ac731d94182a54f',
'thumbnail': '',
'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
'ext': 'mp4',
'url': '',
'info_dict': {
'id': 'c8816f1cc942c12b6cce57c835cffd7c',
'title': 'Preisverleihung »Produkte des Jahres 2022«',
'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
'thumbnail': '',
'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
'ext': 'mp4',
'url': '',
'info_dict': {
'id': 'fc99c527e4205b121cb7c74433469262',
'title': 'Was ist selbstgesteuertes Lernen?',
'ext': 'mp4',
def _real_extract(self, url):
host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
'host', 'id', 'tmp_id', 'display_id', 'embed_id')
webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
if not video_id:
video_id = embed_id or self._html_search_regex(
webpage, 'video_id')
if not (display_id or tmp_id):
# Title, description from embedded page's meta wouldn't be correct
title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
description = None
thumbnail = None
title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
description = self._html_search_meta(
('og:description', 'twitter:description', 'description'), webpage, fatal=False)
thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
formats, subtitles = [], {}
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
video_id, 'mp4', m3u8_id='hls', fatal=True)
except ExtractorError as e:
if not isinstance(e.cause, compat_HTTPError) or e.cause.code not in (404, 500):
formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
return {
'id': video_id,
'title': title,
'description': description,
'thumbnail': thumbnail,
'display_id': display_id,
'formats': formats,
'subtitles': subtitles,
class ViMPPlaylistIE(InfoExtractor):
IE_NAME = 'ViMP:Playlist'
_VALID_URL = r'''(?x)(?P<host>https?://(?:%s))/(?:
)''' % '|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES))
_TESTS = [{
'url': '',
'info_dict': {
'id': 'channel-3',
'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
'playlist_mincount': 9,
}, {
'url': '',
'info_dict': {
'id': 'album-208',
'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
'playlist_mincount': 4,
}, {
'url': '',
'info_dict': {
'id': 'category-91',
'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
'playlist_mincount': 7,
def _fetch_page(self, host, url_part, id, data, page):
webpage = self._download_webpage(
f'{host}/media/ajax/component/boxList/{url_part}', id,
query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
for url in urls:
yield self.url_result(host + url, VideocampusSachsenIE)
def _real_extract(self, url):
host, album_id, mode, name, id = self._match_valid_url(url).group(
'host', 'album_id', 'mode', 'name', 'id')
webpage = self._download_webpage(url, album_id or id, fatal=False) or ''
title = (self._html_search_meta('title', webpage, fatal=False)
or self._html_extract_title(webpage))
url_part = (f'aid/{album_id}' if album_id
else f'category/{name}/category_id/{id}' if mode == 'category'
else f'title/{name}/channel/{id}')
mode = mode or 'album'
data = {
'vars[mode]': mode,
f'vars[{mode}]': album_id or id,
'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
'vars[context_id]': album_id or id,
'vars[layout]': 'thumb',
'vars[per_page][thumb]': str(self._PAGE_SIZE),
return self.playlist_result(
self._fetch_page, host, url_part, album_id or id, data), self._PAGE_SIZE),
playlist_title=title, id=f'{mode}-{album_id or id}')