[extractor/udemy] Fix lectures that have no URL and detect DRM

Closes #5662
1 year ago · 8d1ddb0805
parent 9bb856998b
commit 8d1ddb0805
1 changed files with 15 additions and 5 deletions
--- a/yt_dlp/extractor/udemy.py
+++ b/yt_dlp/extractor/udemy.py
@ -11,8 +11,10 @@ from ..utils import (
    int_or_none,
    js_to_json,
    sanitized_Request,
    smuggle_url,
    try_get,
    unescapeHTML,
    unsmuggle_url,
    url_or_none,
    urlencode_postdata,
 )
@ -106,7 +108,7 @@ class UdemyIE(InfoExtractor):
            % (course_id, lecture_id),
            lecture_id, 'Downloading lecture JSON', query={
                'fields[lecture]': 'title,description,view_html,asset',
-                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data',
+                'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data,course_is_drmed',
            })
    def _handle_error(self, response):
@ -199,16 +201,19 @@ class UdemyIE(InfoExtractor):
    def _real_extract(self, url):
        lecture_id = self._match_id(url)
        course_id = unsmuggle_url(url, {})[1].get('course_id')
-        webpage = self._download_webpage(url, lecture_id)
+        webpage = None
-
+        if not course_id:
-        course_id, _ = self._extract_course_info(webpage, lecture_id)
+            webpage = self._download_webpage(url, lecture_id)
            course_id, _ = self._extract_course_info(webpage, lecture_id)
        try:
            lecture = self._download_lecture(course_id, lecture_id)
        except ExtractorError as e:
            # Error could possibly mean we are not enrolled in the course
            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
                webpage = webpage or self._download_webpage(url, lecture_id)
                self._enroll_course(url, webpage, course_id)
                lecture = self._download_lecture(course_id, lecture_id)
            else:
@ -391,6 +396,9 @@ class UdemyIE(InfoExtractor):
                if f.get('url'):
                    formats.append(f)
        if not formats and asset.get('course_is_drmed'):
            self.report_drm(video_id)
        return {
            'id': video_id,
            'title': title,
@ -449,7 +457,9 @@ class UdemyCourseIE(UdemyIE):  # XXX: Do not subclass from concrete IE
                if lecture_id:
                    entry = {
                        '_type': 'url_transparent',
-                        'url': 'https://www.udemy.com/%s/learn/v4/t/lecture/%s' % (course_path, entry['id']),
+                        'url': smuggle_url(
                            f'https://www.udemy.com/{course_path}/learn/v4/t/lecture/{entry["id"]}',
                            {'course_id': course_id}),
                        'title': entry.get('title'),
                        'ie_key': UdemyIE.ie_key(),
                    }