From e7e94f2a5c5de78dd281de752f2b3a8663159726 Mon Sep 17 00:00:00 2001 From: David <59258980+zerodytrash@users.noreply.github.com> Date: Sun, 1 Aug 2021 23:13:46 +0200 Subject: [PATCH] [youtube] Add age-gate bypass for unverified accounts (#600) Adds `_creator` variants for each client Authored by: zerodytrash, colethedj, pukkandan --- README.md | 2 +- yt_dlp/extractor/youtube.py | 76 ++++++++++++++++++++++++++----------- 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 357362e12..8ce5ed354 100644 --- a/README.md +++ b/README.md @@ -1357,7 +1357,7 @@ Some extractors accept additional arguments which can be passed using `--extract The following extractors use this feature: * **youtube** * `skip`: `hls` or `dash` (or both) to skip download of the respective manifests - * `player_client`: Clients to extract video data from - one or more of `web`, `android`, `ios`, `mweb`, `web_music`, `android_music`, `ios_music`, `web_embedded`, `android_embedded`, `ios_embedded`, `web_agegate`, `android_agegate`, `ios_agegate`, `mweb_agegate` or `all`. By default, `android,web` is used. If the URL is from `music.youtube.com`, `android,web,android_music,web_music` is used. If age-gate is detected, the `_agegate` variants are automatically added. + * `player_client`: Clients to extract video data from. The main clients are `web`, `android`, `ios`, `mweb`. These also have `_music`, `_embedded`, `_agegate`, and `_creator` variants (Eg: `web_embedded`) (`mweb` has only `_agegate`). By default, `android,web` is used, but the agegate and creator variants are added as required for age-gated videos. Similarly the music variants are added for `music.youtube.com` urls. You can also use `all` to use all the clients * `player_skip`: `configs` - skip any requests for client configs and use defaults * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side). * `max_comments`: maximum amount of comments to download (default all). diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 82e27e605..511eee4d7 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -100,6 +100,16 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, + 'web_creator': { + 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'WEB_CREATOR', + 'clientVersion': '1.20210621.00.00', + } + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, + }, 'android': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', 'INNERTUBE_CONTEXT': { @@ -131,6 +141,15 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, }, + 'android_creator': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_CREATOR', + 'clientVersion': '21.24.100', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 14 + }, # ios has HLS live streams # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680 'ios': { @@ -164,6 +183,15 @@ INNERTUBE_CLIENTS = { }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26 }, + 'ios_creator': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'IOS_CREATOR', + 'clientVersion': '21.24.100', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 15 + }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 'mweb': { @@ -1036,17 +1064,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt') - _AGE_GATE_REASONS = ( - 'Sign in to confirm your age', - 'This video may be inappropriate for some users.', - 'Sorry, this content is age-restricted.', - 'Please confirm your age.') - - _AGE_GATE_STATUS_REASONS = ( - 'AGE_VERIFICATION_REQUIRED', - 'AGE_CHECK_REQUIRED' - ) - _GEO_BYPASS = False IE_NAME = 'youtube' @@ -2402,14 +2419,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'racyCheckOk': True } - def _is_agegated(self, player_response): - reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[]) - for reason in reasons: - if reason in self._AGE_GATE_REASONS + self._AGE_GATE_STATUS_REASONS: - return True - if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')) is not None: + @staticmethod + def _is_agegated(player_response): + if traverse_obj(player_response, ('playabilityStatus', 'desktopLegacyAgeGateReason')): return True - return False + + reasons = traverse_obj(player_response, ('playabilityStatus', ('status', 'reason')), default=[]) + AGE_GATE_REASONS = ( + 'confirm your age', 'age-restricted', 'inappropriate', # reason + 'age_verification_required', 'age_check_required', # status + ) + return any(expected in reason for expected in AGE_GATE_REASONS for reason in reasons) + + @staticmethod + def _is_unplayable(player_response): + return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, identity_token, player_url, initial_pr): @@ -2446,7 +2470,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if smuggled_data.get('is_music_url') or self.is_music_url(url): requested_clients.extend( - f'{client}_music' for client in requested_clients if not client.endswith('_music')) + f'{client}_music' for client in requested_clients if f'{client}_music' in INNERTUBE_CLIENTS) return orderedSet(requested_clients) @@ -2469,6 +2493,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): original_clients = clients clients = clients[::-1] + + def append_client(client_name): + if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: + clients.append(client_name) + while clients: client = clients.pop() player_ytcfg = master_ytcfg if client == 'web' else {} @@ -2482,10 +2511,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if pr: yield pr - if self._is_agegated(pr): - client = f'{client}_agegate' - if client in INNERTUBE_CLIENTS and client not in original_clients: - clients.append(client) + # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in + if client.endswith('_agegate') and self._is_unplayable(pr) and self._generate_sapisidhash_header(): + append_client(client.replace('_agegate', '_creator')) + elif self._is_agegated(pr): + append_client(f'{client}_agegate') # Android player_response does not have microFormats which are needed for # extraction of some data. So we return the initial_pr with formats