@ -2,7 +2,6 @@ import re
from . common import InfoExtractor
from . common import InfoExtractor
from . . utils import (
from . . utils import (
determine_ext ,
float_or_none ,
float_or_none ,
HEADRequest ,
HEADRequest ,
int_or_none ,
int_or_none ,
@ -13,13 +12,13 @@ from ..utils import (
class LA7IE ( InfoExtractor ) :
class LA7IE ( InfoExtractor ) :
IE_NAME = ' la7.it '
IE_NAME = ' la7.it '
_VALID_URL = r ''' (?x) ( https?://)? (?:
_VALID_URL = r ''' (?x) https?://(?:
( ? : www \. ) ? la7 \. it / ( [ ^ / ] + ) / ( ? : rivedila7 | video ) / |
( ? : www \. ) ? la7 \. it / ( [ ^ / ] + ) / ( ? : rivedila7 | video | news ) / |
tg \. la7 \. it / repliche - tgla7 \? id =
tg \. la7 \. it / repliche - tgla7 \? id =
) ( ? P < id > . + ) '''
) ( ? P < id > . + ) '''
_TESTS = [ {
_TESTS = [ {
# 'src' is a plain URL
# single quality video
' url ' : ' http://www.la7.it/crozza/video/inccool8-02-10-2015-163722 ' ,
' url ' : ' http://www.la7.it/crozza/video/inccool8-02-10-2015-163722 ' ,
' md5 ' : ' 8b613ffc0c4bf9b9e377169fc19c214c ' ,
' md5 ' : ' 8b613ffc0c4bf9b9e377169fc19c214c ' ,
' info_dict ' : {
' info_dict ' : {
@ -29,6 +28,20 @@ class LA7IE(InfoExtractor):
' description ' : ' Benvenuti nell \' incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico ' ,
' description ' : ' Benvenuti nell \' incredibile mondo della INC. COOL. 8. dove “INC.” sta per “Incorporated” “COOL” sta per “fashion” ed Eight sta per il gesto atletico ' ,
' thumbnail ' : ' re:^https?://.* ' ,
' thumbnail ' : ' re:^https?://.* ' ,
' upload_date ' : ' 20151002 ' ,
' upload_date ' : ' 20151002 ' ,
' formats ' : ' count:4 ' ,
} ,
} , {
# multiple quality video
' url ' : ' https://www.la7.it/calcio-femminile/news/il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736 ' ,
' md5 ' : ' d2370e78f75e8d1238cb3a0db9a2eda3 ' ,
' info_dict ' : {
' id ' : ' il-gol-di-lindsey-thomas-fiorentina-vs-milan-serie-a-calcio-femminile-26-11-2022-461736 ' ,
' ext ' : ' mp4 ' ,
' title ' : ' Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile ' ,
' description ' : ' Il gol di Lindsey Thomas | Fiorentina vs Milan | Serie A Calcio Femminile ' ,
' thumbnail ' : ' re:^https?://.* ' ,
' upload_date ' : ' 20221126 ' ,
' formats ' : ' count:8 ' ,
} ,
} ,
} , {
} , {
' url ' : ' http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077 ' ,
' url ' : ' http://www.la7.it/omnibus/rivedila7/omnibus-news-02-07-2016-189077 ' ,
@ -39,7 +52,7 @@ class LA7IE(InfoExtractor):
def _generate_mp4_url ( self , quality , m3u8_formats ) :
def _generate_mp4_url ( self , quality , m3u8_formats ) :
for f in m3u8_formats :
for f in m3u8_formats :
if f [ ' vcodec ' ] != ' none ' and quality in f [ ' url ' ] :
if f [ ' vcodec ' ] != ' none ' and quality in f [ ' url ' ] :
http_url = ' %s %s .mp4 ' % ( self . _HOST , quality )
http_url = f' { self . _HOST } { quality } .mp4 '
urlh = self . _request_webpage (
urlh = self . _request_webpage (
HEADRequest ( http_url ) , quality ,
HEADRequest ( http_url ) , quality ,
@ -58,12 +71,13 @@ class LA7IE(InfoExtractor):
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :
video_id = self . _match_id ( url )
video_id = self . _match_id ( url )
webpage = self . _download_webpage ( url , video_id )
if not url . startswith ( ' http ' ) :
if re . search ( r ' (?i)(drmsupport \ s*: \ s*true) \ s* ' , webpage ) :
url = ' %s // %s ' % ( self . http_scheme ( ) , url )
self . report_drm ( video_id )
webpage = self . _download_webpage ( url , video_id )
video_path = self . _search_regex (
video_path = self . _search_regex ( r ' (/content/.*?).mp4 ' , webpage , ' video_path ' )
r ' (/content/[ \ w/,]+?) \ .mp4(?: \ .csmil)?/master \ .m3u8 ' , webpage , ' video_path ' )
formats = self . _extract_mpd_formats (
formats = self . _extract_mpd_formats (
f ' { self . _HOST } /local/dash/, { video_path } .mp4.urlset/manifest.mpd ' ,
f ' { self . _HOST } /local/dash/, { video_path } .mp4.urlset/manifest.mpd ' ,
@ -90,8 +104,7 @@ class LA7IE(InfoExtractor):
class LA7PodcastEpisodeIE ( InfoExtractor ) :
class LA7PodcastEpisodeIE ( InfoExtractor ) :
IE_NAME = ' la7.it:pod:episode '
IE_NAME = ' la7.it:pod:episode '
_VALID_URL = r ''' (?x)(https?://)?
_VALID_URL = r ' https?://(?:www \ .)?la7 \ .it/[^/]+/podcast/([^/]+-)?(?P<id> \ d+) '
( ? : www \. ) ? la7 \. it / [ ^ / ] + / podcast / ( [ ^ / ] + - ) ? ( ? P < id > \d + ) '''
_TESTS = [ {
_TESTS = [ {
' url ' : ' https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497 ' ,
' url ' : ' https://www.la7.it/voicetown/podcast/la-carezza-delle-memoria-di-carlo-verdone-23-03-2021-371497 ' ,
@ -125,14 +138,15 @@ class LA7PodcastEpisodeIE(InfoExtractor):
webpage , ' video_id ' , group = ' vid ' )
webpage , ' video_id ' , group = ' vid ' )
media_url = self . _search_regex (
media_url = self . _search_regex (
( r ' src :\ s*([ \' " ])(?P<url> . +?mp3.+?)\ 1 ' ,
( r ' src \ s* :\ s*([ \' " ])(?P<url> \ S +?mp3.+?)\ 1 ' ,
r ' data-podcast =([\' " ])(?P<url> . +?mp3.+?)\ 1 ' ) ,
r ' data-podcast \ s* =\ s* ([\' " ])(?P<url> \ S +?mp3.+?)\ 1 ' ) ,
webpage , ' media_url ' , group = ' url ' )
webpage , ' media_url ' , group = ' url ' )
ext = determine_ext ( media_url )
formats = [ {
formats = [ {
' url ' : media_url ,
' url ' : media_url ,
' format_id ' : ext ,
' format_id ' : ' http-mp3 ' ,
' ext ' : ext ,
' ext ' : ' mp3 ' ,
' acodec ' : ' mp3 ' ,
' vcodec ' : ' none ' ,
} ]
} ]
title = self . _html_search_regex (
title = self . _html_search_regex (
@ -173,7 +187,7 @@ class LA7PodcastEpisodeIE(InfoExtractor):
# and title is the same as the show_title
# and title is the same as the show_title
# add the date to the title
# add the date to the title
if date and not date_alt and ppn and ppn . lower ( ) == title . lower ( ) :
if date and not date_alt and ppn and ppn . lower ( ) == title . lower ( ) :
title += ' del %s ' % date
title = f ' { title } del { date } '
return {
return {
' id ' : video_id ,
' id ' : video_id ,
' title ' : title ,
' title ' : title ,
@ -193,7 +207,7 @@ class LA7PodcastEpisodeIE(InfoExtractor):
class LA7PodcastIE ( LA7PodcastEpisodeIE ) : # XXX: Do not subclass from concrete IE
class LA7PodcastIE ( LA7PodcastEpisodeIE ) : # XXX: Do not subclass from concrete IE
IE_NAME = ' la7.it:podcast '
IE_NAME = ' la7.it:podcast '
_VALID_URL = r ' ( https?://)? (www\ .)?la7 \ .it/(?P<id>[^/]+)/podcast/?(?:$|[#?]) '
_VALID_URL = r ' https?://(?: www\ .)?la7 \ .it/(?P<id>[^/]+)/podcast/?(?:$|[#?]) '
_TESTS = [ {
_TESTS = [ {
' url ' : ' https://www.la7.it/propagandalive/podcast ' ,
' url ' : ' https://www.la7.it/propagandalive/podcast ' ,
@ -201,7 +215,7 @@ class LA7PodcastIE(LA7PodcastEpisodeIE): # XXX: Do not subclass from concrete I
' id ' : ' propagandalive ' ,
' id ' : ' propagandalive ' ,
' title ' : " Propaganda Live " ,
' title ' : " Propaganda Live " ,
} ,
} ,
' playlist_count ' : 10 ,
' playlist_count _min ' : 10 ,
} ]
} ]
def _real_extract ( self , url ) :
def _real_extract ( self , url ) :