@ -1088,7 +1088,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} ,
} ,
} ,
} ,
{
{
# artist and track fields should return non-null, per issue #20599
# Youtube Music Auto-generated description
' url ' : ' https://music.youtube.com/watch?v=MgNrAu2pzNs ' ,
' url ' : ' https://music.youtube.com/watch?v=MgNrAu2pzNs ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' MgNrAu2pzNs ' ,
' id ' : ' MgNrAu2pzNs ' ,
@ -1109,11 +1109,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} ,
} ,
} ,
} ,
{
{
# Youtube Music Auto-generated description
# Retrieve 'artist' field from 'Artist:' in video description
# Retrieve 'artist' field from 'Artist:' in video description
# when it is present on youtube music video
# when it is present on youtube music video
# Some videos have release_date and no release_year -
# (release_year should be extracted from release_date)
# https://github.com/ytdl-org/youtube-dl/pull/20742#issuecomment-485740932
' url ' : ' https://www.youtube.com/watch?v=k0jLE7tTwjY ' ,
' url ' : ' https://www.youtube.com/watch?v=k0jLE7tTwjY ' ,
' info_dict ' : {
' info_dict ' : {
' id ' : ' k0jLE7tTwjY ' ,
' id ' : ' k0jLE7tTwjY ' ,
@ -1134,6 +1132,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} ,
} ,
} ,
} ,
{
{
# Youtube Music Auto-generated description
# handle multiple artists on youtube music video
# handle multiple artists on youtube music video
' url ' : ' https://www.youtube.com/watch?v=74qn0eJSjpA ' ,
' url ' : ' https://www.youtube.com/watch?v=74qn0eJSjpA ' ,
' info_dict ' : {
' info_dict ' : {
@ -1155,6 +1154,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
} ,
} ,
} ,
} ,
{
{
# Youtube Music Auto-generated description
# handle youtube music video with release_year and no release_date
# handle youtube music video with release_year and no release_date
' url ' : ' https://www.youtube.com/watch?v=-hcAI0g-f5M ' ,
' url ' : ' https://www.youtube.com/watch?v=-hcAI0g-f5M ' ,
' info_dict ' : {
' info_dict ' : {
@ -2161,36 +2161,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
track = extract_meta ( ' Song ' )
track = extract_meta ( ' Song ' )
artist = extract_meta ( ' Artist ' )
artist = extract_meta ( ' Artist ' )
album = None
release_date = None
# Youtube Music Auto-generated description
release_year = None
album = release_date = release_year = None
if video_description :
description_info = video_description . split ( ' \n \n ' )
mobj = re . search ( r ' (?s)Provided to YouTube by [^ \ n]+ \ n+(?P<track>[^·]+)·(?P<artist>[^ \ n]+) \ n+(?P<album>[^ \ n]+)(?:.+?℗ \ s*(?P<release_year> \ d {4} )(?! \ d))?(?:.+?Released on \ s*: \ s*(?P<release_date> \ d {4} - \ d {2} - \ d {2} ))?(.+? \ nArtist \ s*: \ s*(?P<clean_artist>[^ \ n]+))? ' , video_description )
# If the description of the video has the youtube music auto-generated format, extract additional info
if mobj :
if len ( description_info ) > = 5 and description_info [ - 1 ] == ' Auto-generated by YouTube. ' :
if not track :
track_artist = description_info [ 1 ] . split ( ' · ' )
track = mobj . group ( ' track ' ) . strip ( )
if len ( track_artist ) > = 2 :
if not artist :
if track is None :
artist = mobj . group ( ' clean_artist ' ) or ' , ' . join ( a . strip ( ) for a in mobj . group ( ' artist ' ) . split ( ' · ' ) )
track = track_artist [ 0 ]
album = mobj . group ( ' album ' . strip ( ) )
if artist is None :
release_year = mobj . group ( ' release_year ' )
artist = re . search ( r ' Artist: ([^ \ n]+) ' , description_info [ - 2 ] )
release_date = mobj . group ( ' release_date ' )
if artist :
if release_date :
artist = artist . group ( 1 )
release_date = release_date . replace ( ' - ' , ' ' )
if artist is None :
if not release_year :
artist = track_artist [ 1 ]
release_year = int ( release_date [ : 4 ] )
# handle multiple artists
if release_year :
if len ( track_artist ) > 2 :
release_year = int ( release_year )
for i in range ( 2 , len ( track_artist ) ) :
artist + = ' , %s ' % track_artist [ i ]
release_year = re . search ( r ' ℗ ([0-9]+) ' , video_description )
if release_year :
release_year = int_or_none ( release_year . group ( 1 ) )
album = description_info [ 2 ]
if description_info [ 4 ] . startswith ( ' Released on: ' ) :
release_date = description_info [ 4 ] . split ( ' : ' ) [ 1 ] . replace ( ' - ' , ' ' )
# extract release_year from release_date if necessary
if release_year is None :
release_year = int_or_none ( release_date [ 0 : 4 ] )
m_episode = re . search (
m_episode = re . search (
r ' <div[^>]+id= " watch7-headline " [^>]*> \ s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b> \ s*S(?P<season> \ d+) \ s*• \ s*E(?P<episode> \ d+)</span> ' ,
r ' <div[^>]+id= " watch7-headline " [^>]*> \ s*<span[^>]*>.*?>(?P<series>[^<]+)</a></b> \ s*S(?P<season> \ d+) \ s*• \ s*E(?P<episode> \ d+)</span> ' ,