From f1d42a83ab47683ddbe7c66393130f63262aeca0 Mon Sep 17 00:00:00 2001 From: Ashish Gupta <39122144+Ashish0804@users.noreply.github.com> Date: Tue, 28 Sep 2021 02:31:23 +0530 Subject: [PATCH] [Rumble] Add RumbleChannelIE (#1088) Authored by: Ashish0804 --- yt_dlp/extractor/extractors.py | 5 ++++- yt_dlp/extractor/rumble.py | 37 +++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 4774a3ebb..93934b682 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1205,7 +1205,10 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe from .rtvnh import RTVNHIE from .rtvs import RTVSIE from .ruhd import RUHDIE -from .rumble import RumbleEmbedIE +from .rumble import ( + RumbleEmbedIE, + RumbleChannelIE, +) from .rutube import ( RutubeIE, RutubeChannelIE, diff --git a/yt_dlp/extractor/rumble.py b/yt_dlp/extractor/rumble.py index b526de76b..49c1f4485 100644 --- a/yt_dlp/extractor/rumble.py +++ b/yt_dlp/extractor/rumble.py @@ -1,15 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor -from ..compat import compat_str +from ..compat import compat_str, compat_HTTPError from ..utils import ( determine_ext, int_or_none, parse_iso8601, try_get, + ExtractorError, ) @@ -75,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor): 'channel_url': author.get('url'), 'duration': int_or_none(video.get('duration')), } + + +class RumbleChannelIE(InfoExtractor): + _VALID_URL = r'(?Phttps?://(?:www\.)?rumble\.com/(?:c|user)/(?P[^&?#$/]+))' + + _TESTS = [{ + 'url': 'https://rumble.com/c/Styxhexenhammer666', + 'playlist_mincount': 1160, + 'info_dict': { + 'id': 'Styxhexenhammer666', + }, + }, { + 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', + 'playlist_count': 4, + 'info_dict': { + 'id': 'goldenpoodleharleyeuna', + }, + }] + + def entries(self, url, playlist_id): + for page in itertools.count(1): + try: + webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404: + break + raise + for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage): + yield self.url_result('https://rumble.com' + video_url) + + def _real_extract(self, url): + url, playlist_id = self._match_valid_url(url).groups() + return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)