You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
yt-dlp/yt_dlp/extractor/lefigaro.py

137 lines
5.3 KiB
Python

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
import math
from .common import InfoExtractor
from ..utils import (
InAdvancePagedList,
traverse_obj,
)
class LeFigaroVideoEmbedIE(InfoExtractor):
_VALID_URL = r'https?://video\.lefigaro\.fr/embed/[^?#]+/(?P<id>[\w-]+)'
_TESTS = [{
'url': 'https://video.lefigaro.fr/embed/figaro/video/les-francais-ne-veulent-ils-plus-travailler-suivez-en-direct-le-club-le-figaro-idees/',
'md5': 'a0c3069b7e4c4526abf0053a7713f56f',
'info_dict': {
'id': 'g9j7Eovo',
'title': 'Les Français ne veulent-ils plus travailler ? Retrouvez Le Club Le Figaro Idées',
'description': 'md5:862b8813148ba4bf10763a65a69dfe41',
'upload_date': '20230216',
'timestamp': 1676581615,
'duration': 3076,
'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
'ext': 'mp4',
},
}, {
'url': 'https://video.lefigaro.fr/embed/figaro/video/intelligence-artificielle-faut-il-sen-mefier/',
'md5': '319c662943dd777bab835cae1e2d73a5',
'info_dict': {
'id': 'LeAgybyc',
'title': 'Intelligence artificielle : faut-il sen méfier ?',
'description': 'md5:249d136e3e5934a67c8cb704f8abf4d2',
'upload_date': '20230124',
'timestamp': 1674584477,
'duration': 860,
'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
'ext': 'mp4',
},
}]
_WEBPAGE_TESTS = [{
'url': 'https://video.lefigaro.fr/figaro/video/suivez-en-direct-le-club-le-figaro-international-avec-philippe-gelie-9/',
'md5': '6289f9489efb969e38245f31721596fe',
'info_dict': {
'id': 'QChnbPYA',
'title': 'Où en est le couple franco-allemand ? Retrouvez Le Club Le Figaro International',
'description': 'md5:6f47235b7e7c93b366fd8ebfa10572ac',
'upload_date': '20230123',
'timestamp': 1674503575,
'duration': 3153,
'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
'age_limit': 0,
'ext': 'mp4',
},
}, {
'url': 'https://video.lefigaro.fr/figaro/video/la-philosophe-nathalie-sarthou-lajus-est-linvitee-du-figaro-live/',
'md5': 'f6df814cae53e85937621599d2967520',
'info_dict': {
'id': 'QJzqoNbf',
'title': 'La philosophe Nathalie Sarthou-Lajus est linvitée du Figaro Live',
'description': 'md5:c586793bb72e726c83aa257f99a8c8c4',
'upload_date': '20230217',
'timestamp': 1676661986,
'duration': 1558,
'thumbnail': r're:^https?://[^?#]+\.(?:jpeg|jpg)',
'age_limit': 0,
'ext': 'mp4',
},
}]
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
player_data = self._search_nextjs_data(
webpage, display_id)['props']['pageProps']['initialProps']['pageData']['playerData']
return self.url_result(
f'jwplatform:{player_data["videoId"]}', title=player_data.get('title'),
description=player_data.get('description'), thumbnail=player_data.get('poster'))
class LeFigaroVideoSectionIE(InfoExtractor):
_VALID_URL = r'https?://video\.lefigaro\.fr/figaro/(?P<id>[\w-]+)/?(?:[#?]|$)'
_TESTS = [{
'url': 'https://video.lefigaro.fr/figaro/le-club-le-figaro-idees/',
'info_dict': {
'id': 'le-club-le-figaro-idees',
'title': 'Le Club Le Figaro Idées',
},
'playlist_mincount': 14,
}, {
'url': 'https://video.lefigaro.fr/figaro/factu/',
'info_dict': {
'id': 'factu',
'title': 'Factu',
},
'playlist_mincount': 519,
}]
_PAGE_SIZE = 20
def _get_api_response(self, display_id, page_num, note=None):
return self._download_json(
'https://api-graphql.lefigaro.fr/graphql', display_id, note=note,
query={
'id': 'flive-website_UpdateListPage_1fb260f996bca2d78960805ac382544186b3225f5bedb43ad08b9b8abef79af6',
'variables': json.dumps({
'slug': display_id,
'videosLimit': self._PAGE_SIZE,
'sort': 'DESC',
'order': 'PUBLISHED_AT',
'page': page_num,
}).encode(),
})
def _real_extract(self, url):
display_id = self._match_id(url)
initial_response = self._get_api_response(display_id, page_num=1)['data']['playlist']
def page_func(page_num):
api_response = self._get_api_response(display_id, page_num + 1, note=f'Downloading page {page_num + 1}')
return [self.url_result(
video['embedUrl'], LeFigaroVideoEmbedIE, **traverse_obj(video, {
'title': 'name',
'description': 'description',
'thumbnail': 'thumbnailUrl',
})) for video in api_response['data']['playlist']['jsonLd'][0]['itemListElement']]
entries = InAdvancePagedList(
page_func, math.ceil(initial_response['videoCount'] / self._PAGE_SIZE), self._PAGE_SIZE)
return self.playlist_result(entries, playlist_id=display_id, playlist_title=initial_response.get('title'))