From 3d3bb1688bfc5373105e6bf7c3d4729cf3f78788 Mon Sep 17 00:00:00 2001 From: pukkandan Date: Sun, 17 Apr 2022 23:19:53 +0530 Subject: [PATCH] [docs] Improve embedding docs and other minor fixes --- CONTRIBUTING.md | 10 +-- README.md | 147 +++++++++++++++++++++++---------- yt_dlp/__init__.py | 5 +- yt_dlp/extractor/kakao.py | 1 + yt_dlp/postprocessor/common.py | 3 +- 5 files changed, 116 insertions(+), 50 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index eff6becac..19888cff4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -374,21 +374,21 @@ When extracting metadata try to do so from multiple sources. For example if `tit #### Example -Say `meta` from the previous example has a `title` and you are about to extract it. Since `title` is a mandatory meta field you should end up with something like: +Say `meta` from the previous example has a `title` and you are about to extract it like: ```python -title = meta['title'] +title = meta.get('title') ``` -If `title` disappears from `meta` in future due to some changes on the hoster's side the extraction would fail since `title` is mandatory. That's expected. +If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. -Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback scenario: +Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: ```python title = meta.get('title') or self._og_search_title(webpage) ``` -This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`. +This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. ### Regular expressions diff --git a/README.md b/README.md index 8a8477c9b..197d7b49b 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * youtube-dl tries to remove some superfluous punctuations from filenames. While this can sometimes be helpfull, it is often undesirable. So yt-dlp tries to keep the fields in the filenames as close to their original values as possible. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior For ease of use, a few more compat options are available: + * `--compat-options all`: Use all compat options * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect` @@ -166,7 +167,7 @@ You can simply download the [correct binary file](#release-files) for your OS [![Linux](https://img.shields.io/badge/-Linux/MacOS/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) -[![ALl versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) +[![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) Note: The manpages, shell completion files etc. are available in the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) @@ -485,7 +486,7 @@ You can also fork the project on github and run your fork's [build workflow](.gi -R, --retries RETRIES Number of retries (default is 10), or "infinite" --file-access-retries RETRIES Number of times to retry on file access - error (default is 10), or "infinite" + error (default is 3), or "infinite" --fragment-retries RETRIES Number of retries for a fragment (default is 10), or "infinite" (DASH, hlsnative and ISM) @@ -925,8 +926,8 @@ You can also fork the project on github and run your fork's [build workflow](.gi same codecs and number of streams to be concatable. The "pl_video:" prefix can be used with "--paths" and "--output" to set - the output filename for the split files. - See "OUTPUT TEMPLATE" for details + the output filename for the concatenated + files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the @@ -1063,8 +1064,9 @@ You can configure yt-dlp by placing any supported command line option to a confi * `%APPDATA%/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` - + `%XDG_CONFIG_HOME%` defaults to `~/.config` if undefined. On windows, `%APPDATA%` generally points to `C:\Users\\AppData\Roaming` and `~` points to `%HOME%` if present, `%USERPROFILE%` (generally `C:\Users\`), or `%HOMEDRIVE%%HOMEPATH%` + 1. **System Configuration**: `/etc/yt-dlp.conf` For example, with the following configuration file yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: @@ -1121,6 +1123,7 @@ The simplest usage of `-o` is not to set any template arguments when downloading It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). For example, `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: + 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. Eg: `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. Eg: `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` @@ -1601,7 +1604,9 @@ The general syntax of `--parse-metadata FROM:TO` is to give the name of a field Note that any field created by this can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--add-metadata`. This option also has a few special uses: + * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. Eg: `--parse-metadata "description:(?Phttps?://www\.vimeo\.com/\d+)` will download the first vimeo video found in the description + * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file. For example, you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta_` prefix (Eg: `meta1_language`). Any value set to the `meta_` field will overwrite all default values. **Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. @@ -1743,19 +1748,72 @@ From a Python program, you can embed yt-dlp in a more powerful fashion, like thi ```python from yt_dlp import YoutubeDL -ydl_opts = {'format': 'bestaudio'} -with YoutubeDL(ydl_opts) as ydl: - ydl.download(['https://www.youtube.com/watch?v=BaW_jenozKc']) +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] +with YoutubeDL() as ydl: + ydl.download(URLS) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). -Here's a more complete example demonstrating various functionality: +**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above + +## Embedding examples + +### Extracting information ```python import json import yt_dlp +URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' + +# ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions +ydl_opts = {} +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(URL, download=False) + + # ℹ️ ydl.sanitize_info makes the info json-serializable + print(json.dumps(ydl.sanitize_info(info))) +``` +### Download from info-json + +```python +import yt_dlp + +INFO_FILE = 'path/to/video.info.json' + +with yt_dlp.YoutubeDL() as ydl: + error_code = ydl.download_with_info_file(INFO_FILE) + +print('Some videos failed to download' if error_code + else 'All videos successfully downloaded') +``` + +### Extract audio + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +ydl_opts = { + 'format': 'm4a/bestaudio/best' + # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments + 'postprocessors': [{ # Extract audio using ffmpeg + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'm4a', + }] +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + error_code = ydl.download(URLS) +``` +### Adding logger and progress hook + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] class MyLogger: def debug(self, msg): @@ -1776,23 +1834,51 @@ class MyLogger: print(msg) -# ℹ️ See the docstring of yt_dlp.postprocessor.common.PostProcessor +# ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) +def my_hook(d): + if d['status'] == 'finished': + print('Done downloading, now post-processing ...') + + +ydl_opts = { + 'logger': MyLogger(), + 'progress_hooks': [my_hook], +} + +with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download(URLS) +``` + +### Add a custom PostProcessor + +```python +import yt_dlp + +URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + +# ℹ️ See help(yt_dlp.postprocessor.PostProcessor) class MyCustomPP(yt_dlp.postprocessor.PostProcessor): - # ℹ️ See docstring of yt_dlp.postprocessor.common.PostProcessor.run def run(self, info): self.to_screen('Doing stuff') return [], info -# ℹ️ See "progress_hooks" in the docstring of yt_dlp.YoutubeDL -def my_hook(d): - if d['status'] == 'finished': - print('Done downloading, now converting ...') +with yt_dlp.YoutubeDL() as ydl: + ydl.add_post_processor(MyCustomPP()) + ydl.download(URLS) +``` +### Use a custom format selector + +```python +import yt_dlp + +URL = ['https://www.youtube.com/watch?v=BaW_jenozKc'] + def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. - This is just an example and does not handle all cases """ + NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] @@ -1807,8 +1893,8 @@ def format_selector(ctx): best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) + # These are the minimum required fields for a merged format yield { - # These are the minimum required fields for a merged format 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], @@ -1817,36 +1903,14 @@ def format_selector(ctx): } -# ℹ️ See docstring of yt_dlp.YoutubeDL for a description of the options ydl_opts = { 'format': format_selector, - 'postprocessors': [{ - # Embed metadata in video using ffmpeg. - # ℹ️ See yt_dlp.postprocessor.FFmpegMetadataPP for the arguments it accepts - 'key': 'FFmpegMetadata', - 'add_chapters': True, - 'add_metadata': True, - }], - 'logger': MyLogger(), - 'progress_hooks': [my_hook], - # Add custom headers - 'http_headers': {'Referer': 'https://www.google.com'} } - -# ℹ️ See the public functions in yt_dlp.YoutubeDL for for other available functions. -# Eg: "ydl.download", "ydl.download_with_info_file" with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.add_post_processor(MyCustomPP()) - info = ydl.extract_info('https://www.youtube.com/watch?v=BaW_jenozKc') - - # ℹ️ ydl.sanitize_info makes the info json-serializable - print(json.dumps(ydl.sanitize_info(info))) + ydl.download(URLS) ``` -**Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the example above - - # DEPRECATED OPTIONS @@ -1960,8 +2024,7 @@ These options may no longer work as intended These options were deprecated since 2014 and have now been entirely removed -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" - -t, --title -o "%(title)s-%(id)s.%(ext)s" - -l, --literal -o accepts literal names + -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" # CONTRIBUTING See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) diff --git a/yt_dlp/__init__.py b/yt_dlp/__init__.py index f339e4cd1..24991e19b 100644 --- a/yt_dlp/__init__.py +++ b/yt_dlp/__init__.py @@ -397,7 +397,8 @@ def validate_options(opts): # Conflicting options report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) - report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', opts.exec_cmd.get('before_dl')) + report_conflict('--exec-before-download', 'exec_before_dl_cmd', + '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') @@ -412,7 +413,7 @@ def validate_options(opts): report_conflict('--embed-subs', 'embedsubtitles') report_conflict('--embed-thumbnail', 'embedthumbnail') report_conflict('--extract-audio', 'extractaudio') - report_conflict('--fixup', 'fixup', val1=(opts.fixup or '').lower() in ('', 'never', 'ignore'), default='never') + report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') diff --git a/yt_dlp/extractor/kakao.py b/yt_dlp/extractor/kakao.py index 8ad1d9efd..a5014d931 100644 --- a/yt_dlp/extractor/kakao.py +++ b/yt_dlp/extractor/kakao.py @@ -105,6 +105,7 @@ class KakaoIE(InfoExtractor): resp = self._parse_json(e.cause.read().decode(), video_id) if resp.get('code') == 'GeoBlocked': self.raise_geo_restricted() + raise fmt_url = traverse_obj(fmt_url_json, ('videoLocation', 'url')) if not fmt_url: diff --git a/yt_dlp/postprocessor/common.py b/yt_dlp/postprocessor/common.py index ce6dec2f5..fdea3a7ea 100644 --- a/yt_dlp/postprocessor/common.py +++ b/yt_dlp/postprocessor/common.py @@ -83,7 +83,8 @@ class PostProcessor(metaclass=PostProcessorMetaClass): write_string(f'DeprecationWarning: {text}') def report_error(self, text, *args, **kwargs): - # Exists only for compatibility. Do not use + self.deprecation_warning('"yt_dlp.postprocessor.PostProcessor.report_error" is deprecated. ' + 'raise "yt_dlp.utils.PostProcessingError" instead') if self._downloader: return self._downloader.report_error(text, *args, **kwargs)