[ie/youtube] Remove broken OAuth support (#11558)

Closes #11462 Authored by: bashonly
5 days ago · 52c0ffe40a
parent 637d62a3a9
commit 52c0ffe40a
1 changed files with 18 additions and 222 deletions
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@ -22,7 +22,7 @@ import urllib.parse
 from .common import InfoExtractor, SearchInfoExtractor
 from .openload import PhantomJSwrapper
 from ..jsinterp import JSInterpreter
-from ..networking.exceptions import HTTPError, TransportError, network_exceptions
+from ..networking.exceptions import HTTPError, network_exceptions
 from ..utils import (
    NO_DEFAULT,
    ExtractorError,
@ -50,12 +50,12 @@ from ..utils import (
    parse_iso8601,
    parse_qs,
    qualities,
+    remove_end,
    remove_start,
    smuggle_url,
    str_or_none,
    str_to_int,
    strftime_or_none,
-    time_seconds,
    traverse_obj,
    try_call,
    try_get,
@ -573,208 +573,18 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        self._check_login_required()

    def _perform_login(self, username, password):
-        auth_type, _, user = (username or '').partition('+')
-
-        if auth_type != 'oauth':
-            raise ExtractorError(self._youtube_login_hint, expected=True)
-
-        self._initialize_oauth(user, password)
-
-    '''
-    OAuth 2.0 Device Authorization Grant flow, used by the YouTube TV client (youtube.com/tv).
-
-    For more information regarding OAuth 2.0 and the Device Authorization Grant flow in general, see:
-    - https://developers.google.com/identity/protocols/oauth2/limited-input-device
-    - https://accounts.google.com/.well-known/openid-configuration
-    - https://www.rfc-editor.org/rfc/rfc8628
-    - https://www.rfc-editor.org/rfc/rfc6749
-
-    Note: The official client appears to use a proxied version of the oauth2 endpoints on youtube.com/o/oauth2,
-    which applies some modifications to the response (such as returning errors as 200 OK).
-    Since the client works with the standard API, we will use that as it is well-documented.
-    '''
-
-    _OAUTH_PROFILE = None
-    _OAUTH_ACCESS_TOKEN_CACHE = {}
-    _OAUTH_DISPLAY_ID = 'oauth'
-
-    # YouTube TV (TVHTML5) client. You can find these at youtube.com/tv
-    _OAUTH_CLIENT_ID = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
-    _OAUTH_CLIENT_SECRET = 'SboVhoG9s0rNafixCSGGKXAT'
-    _OAUTH_SCOPE = 'http://gdata.youtube.com https://www.googleapis.com/auth/youtube-paid-content'
-
-    # From https://accounts.google.com/.well-known/openid-configuration
-    # Technically, these should be fetched dynamically and not hard-coded.
-    # However, as these endpoints rarely change, we can risk saving an extra request for every invocation.
-    _OAUTH_DEVICE_AUTHORIZATION_ENDPOINT = 'https://oauth2.googleapis.com/device/code'
-    _OAUTH_TOKEN_ENDPOINT = 'https://oauth2.googleapis.com/token'
-
-    @property
-    def _oauth_cache_key(self):
-        return f'oauth_refresh_token_{self._OAUTH_PROFILE}'
-
-    def _read_oauth_error_response(self, response):
-        return traverse_obj(
-            self._webpage_read_content(response, self._OAUTH_TOKEN_ENDPOINT, self._OAUTH_DISPLAY_ID, fatal=False),
-            ({json.loads}, 'error', {str}))
-
-    def _set_oauth_info(self, token_response):
-        YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.setdefault(self._OAUTH_PROFILE, {}).update({
-            'access_token': token_response['access_token'],
-            'token_type': token_response['token_type'],
-            'expiry': time_seconds(
-                seconds=traverse_obj(token_response, ('expires_in', {float_or_none}), default=300) - 10),
-        })
-        refresh_token = traverse_obj(token_response, ('refresh_token', {str}))
-        if refresh_token:
-            self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
-            YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
-
-    def _initialize_oauth(self, user, refresh_token):
-        self._OAUTH_PROFILE = user or 'default'
-
-        if self._OAUTH_PROFILE in YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE:
-            self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Using cached access token for profile "{self._OAUTH_PROFILE}"')
-            return
-
-        YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE] = {}
-
-        if refresh_token:
-            msg = f'{self._OAUTH_DISPLAY_ID}: Using password input as refresh token'
-            if self.get_param('cachedir') is not False:
-                msg += ' and caching token to disk; you should supply an empty password next time'
-            self.to_screen(msg)
-            self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, refresh_token)
-        else:
-            refresh_token = self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key)
-
-        if refresh_token:
-            YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE[self._OAUTH_PROFILE]['refresh_token'] = refresh_token
-            try:
-                token_response = self._refresh_token(refresh_token)
-            except ExtractorError as e:
-                error_msg = str(e.orig_msg).replace('Failed to refresh access token: ', '')
-                self.report_warning(f'{self._OAUTH_DISPLAY_ID}: Failed to refresh access token: {error_msg}')
-                token_response = self._oauth_authorize
-        else:
-            token_response = self._oauth_authorize
-
-        self._set_oauth_info(token_response)
-        self.write_debug(f'{self._OAUTH_DISPLAY_ID}: Logged in using profile "{self._OAUTH_PROFILE}"')
-
-    def _refresh_token(self, refresh_token):
-        try:
-            token_response = self._download_json(
-                self._OAUTH_TOKEN_ENDPOINT,
-                video_id=self._OAUTH_DISPLAY_ID,
-                note='Refreshing access token',
-                data=json.dumps({
-                    'client_id': self._OAUTH_CLIENT_ID,
-                    'client_secret': self._OAUTH_CLIENT_SECRET,
-                    'refresh_token': refresh_token,
-                    'grant_type': 'refresh_token',
-                }).encode(),
-                headers={'Content-Type': 'application/json'})
-        except ExtractorError as e:
-            if isinstance(e.cause, HTTPError):
-                error = self._read_oauth_error_response(e.cause.response)
-                if error == 'invalid_grant':
-                    # RFC6749 § 5.2
-                    raise ExtractorError(
-                        'Failed to refresh access token: Refresh token is invalid, revoked, or expired (invalid_grant)',
-                        expected=True, video_id=self._OAUTH_DISPLAY_ID)
-                raise ExtractorError(
-                    f'Failed to refresh access token: Authorization server returned error {error}',
-                    video_id=self._OAUTH_DISPLAY_ID)
-            raise
-        return token_response
-
-    @property
-    def _oauth_authorize(self):
-        code_response = self._download_json(
-            self._OAUTH_DEVICE_AUTHORIZATION_ENDPOINT,
-            video_id=self._OAUTH_DISPLAY_ID,
-            note='Initializing authorization flow',
-            data=json.dumps({
-                'client_id': self._OAUTH_CLIENT_ID,
-                'scope': self._OAUTH_SCOPE,
-            }).encode(),
-            headers={'Content-Type': 'application/json'})
-
-        verification_url = traverse_obj(code_response, ('verification_url', {str}))
-        user_code = traverse_obj(code_response, ('user_code', {str}))
-        if not verification_url or not user_code:
+        if username.startswith('oauth'):
            raise ExtractorError(
-                'Authorization server did not provide verification_url or user_code', video_id=self._OAUTH_DISPLAY_ID)
+                f'Login with OAuth is no longer supported. {self._youtube_login_hint}', expected=True)

-        # note: The whitespace is intentional
-        self.to_screen(
-            f'{self._OAUTH_DISPLAY_ID}: To give yt-dlp access to your account, '
-            f'go to  {verification_url}  and enter code  {user_code}')
-
-        # RFC8628 § 3.5: default poll interval is 5 seconds if not provided
-        poll_interval = traverse_obj(code_response, ('interval', {int}), default=5)
-
-        for retry in self.RetryManager():
-            while True:
-                try:
-                    token_response = self._download_json(
-                        self._OAUTH_TOKEN_ENDPOINT,
-                        video_id=self._OAUTH_DISPLAY_ID,
-                        note=False,
-                        errnote='Failed to request access token',
-                        data=json.dumps({
-                            'client_id': self._OAUTH_CLIENT_ID,
-                            'client_secret': self._OAUTH_CLIENT_SECRET,
-                            'device_code': code_response['device_code'],
-                            'grant_type': 'urn:ietf:params:oauth:grant-type:device_code',
-                        }).encode(),
-                        headers={'Content-Type': 'application/json'})
-                except ExtractorError as e:
-                    if isinstance(e.cause, TransportError):
-                        retry.error = e
-                        break
-                    elif isinstance(e.cause, HTTPError):
-                        error = self._read_oauth_error_response(e.cause.response)
-                        if not error:
-                            retry.error = e
-                            break
-
-                        if error == 'authorization_pending':
-                            time.sleep(poll_interval)
-                            continue
-                        elif error == 'expired_token':
-                            raise ExtractorError(
-                                'Authorization timed out', expected=True, video_id=self._OAUTH_DISPLAY_ID)
-                        elif error == 'access_denied':
-                            raise ExtractorError(
-                                'You denied access to an account', expected=True, video_id=self._OAUTH_DISPLAY_ID)
-                        elif error == 'slow_down':
-                            # RFC8628 § 3.5: add 5 seconds to the poll interval
-                            poll_interval += 5
-                            time.sleep(poll_interval)
-                            continue
-                        else:
-                            raise ExtractorError(
-                                f'Authorization server returned an error when fetching access token: {error}',
-                                video_id=self._OAUTH_DISPLAY_ID)
-                    raise
-
-                return token_response
-
-    def _update_oauth(self):
-        token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
-        if token is None or token['expiry'] > time.time():
-            return
-
-        self._set_oauth_info(self._refresh_token(token['refresh_token']))
+        self.report_warning(
+            f'Login with password is not supported for YouTube. {self._youtube_login_hint}')

    @property
    def _youtube_login_hint(self):
-        return ('Use  --username=oauth[+PROFILE] --password=""  to log in using oauth, '
-                f'or else u{self._login_hint(method="cookies")[1:]}. '
-                'See  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#logging-in-with-oauth  for more on how to use oauth. '
-                'See  https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  for help with cookies')
+        return (f'{self._login_hint(method="cookies")}. Also see  '
+                'https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies  '
+                'for tips on effectively exporting YouTube cookies')

    def _check_login_required(self):
        if self._LOGIN_REQUIRED and not self.is_authenticated:
@ -924,7 +734,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):

    @functools.cached_property
    def is_authenticated(self):
-        return self._OAUTH_PROFILE or bool(self._generate_sapisidhash_header())
+        return bool(self._generate_sapisidhash_header())

    def extract_ytcfg(self, video_id, webpage):
        if not webpage:
@ -934,16 +744,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
                default='{}'), video_id, fatal=False) or {}

-    def _generate_oauth_headers(self):
-        self._update_oauth()
-        oauth_token = YoutubeBaseInfoExtractor._OAUTH_ACCESS_TOKEN_CACHE.get(self._OAUTH_PROFILE)
-        if not oauth_token:
-            return {}
-
-        return {
-            'Authorization': f'{oauth_token["token_type"]} {oauth_token["access_token"]}',
-        }
-
    def _generate_cookie_auth_headers(self, *, ytcfg=None, account_syncid=None, session_index=None, origin=None, **kwargs):
        headers = {}
        account_syncid = account_syncid or self._extract_account_syncid(ytcfg)
@ -973,14 +773,10 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
            'Origin': origin,
            'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
            'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client),
-            **self._generate_oauth_headers(),
            **self._generate_cookie_auth_headers(ytcfg=ytcfg, account_syncid=account_syncid, session_index=session_index, origin=origin),
        }
        return filter_dict(headers)

-    def _generate_webpage_headers(self):
-        return self._generate_oauth_headers()
-
    def _download_ytcfg(self, client, video_id):
        url = {
            'web': 'https://www.youtube.com',
@ -990,8 +786,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
        if not url:
            return {}
        webpage = self._download_webpage(
-            url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config',
-            headers=self._generate_webpage_headers())
+            url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
        return self.extract_ytcfg(video_id, webpage) or {}

    @staticmethod
@ -3256,8 +3051,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            code = self._download_webpage(
                player_url, video_id, fatal=fatal,
                note='Downloading player ' + player_id,
-                errnote=f'Download of {player_url} failed',
-                headers=self._generate_webpage_headers())
+                errnote=f'Download of {player_url} failed')
            if code:
                self._code_cache[player_id] = code
        return self._code_cache.get(player_id)
@ -3540,8 +3334,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):

            self._download_webpage(
                url, video_id, f'Marking {label}watched',
-                'Unable to mark watched', fatal=False,
-                headers=self._generate_webpage_headers())
+                'Unable to mark watched', fatal=False)

    @classmethod
    def _extract_from_webpage(cls, url, webpage):
@ -4523,7 +4316,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
            if pp:
                query['pp'] = pp
            webpage = self._download_webpage(
-                webpage_url, video_id, fatal=False, query=query, headers=self._generate_webpage_headers())
+                webpage_url, video_id, fatal=False, query=query)

        master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()

@ -4666,6 +4459,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                    self.raise_geo_restricted(subreason, countries, metadata_available=True)
                reason += f'. {subreason}'
            if reason:
+                if 'sign in' in reason.lower():
+                    reason = remove_end(reason, 'This helps protect our community. Learn more')
+                    reason = f'{remove_end(reason.strip(), ".")}. {self._youtube_login_hint}'
                self.raise_no_formats(reason, expected=True)

        keywords = get_first(video_details, 'keywords', expected_type=list) or []
@ -5811,7 +5607,7 @@ class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor):
        webpage, data = None, None
        for retry in self.RetryManager(fatal=fatal):
            try:
-                webpage = self._download_webpage(url, item_id, note='Downloading webpage', headers=self._generate_webpage_headers())
+                webpage = self._download_webpage(url, item_id, note='Downloading webpage')
                data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
            except ExtractorError as e:
                if isinstance(e.cause, network_exceptions):