From 1890fc6389393ffaa05fa27bd47717f4d862404f Mon Sep 17 00:00:00 2001 From: pukkandan Date: Fri, 3 Jun 2022 21:29:03 +0530 Subject: [PATCH] [cleanup] Misc fixes Cherry-picks from: #3498, #3947 Related: #3949, https://github.com/yt-dlp/yt-dlp/issues/1839#issuecomment-1140313836 Authored by: pukkandan, flashdagger, gamer191 --- .../ISSUE_TEMPLATE/3_site_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/5_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE/6_question.yml | 6 +- .../3_site_feature_request.yml | 2 +- .../ISSUE_TEMPLATE_tmpl/5_feature_request.yml | 2 +- .github/ISSUE_TEMPLATE_tmpl/6_question.yml | 6 +- README.md | 2 +- pyinst.py | 2 +- yt_dlp/YoutubeDL.py | 64 +++++++------ yt_dlp/downloader/fragment.py | 20 ++-- yt_dlp/extractor/common.py | 5 +- yt_dlp/extractor/youtube.py | 93 +++++++++---------- yt_dlp/postprocessor/ffmpeg.py | 2 +- yt_dlp/utils.py | 9 +- 14 files changed, 119 insertions(+), 98 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml index 7a81cede6..b4bb2c839 100644 --- a/.github/ISSUE_TEMPLATE/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/3_site_feature_request.yml @@ -9,7 +9,7 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a site feature request + - label: I'm requesting a site-specific feature required: true - label: I've verified that I'm running yt-dlp version **2022.05.18** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true diff --git a/.github/ISSUE_TEMPLATE/5_feature_request.yml b/.github/ISSUE_TEMPLATE/5_feature_request.yml index 08115f799..5bfcbb6cb 100644 --- a/.github/ISSUE_TEMPLATE/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE/5_feature_request.yml @@ -9,7 +9,7 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a feature request + - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true diff --git a/.github/ISSUE_TEMPLATE/6_question.yml b/.github/ISSUE_TEMPLATE/6_question.yml index 030d2cfe7..8b434aef0 100644 --- a/.github/ISSUE_TEMPLATE/6_question.yml +++ b/.github/ISSUE_TEMPLATE/6_question.yml @@ -9,13 +9,13 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm asking a question and **not** reporting a bug/feature request + - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones. DO NOT post duplicates required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question diff --git a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml index a175b92c3..77e9d3469 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml @@ -9,7 +9,7 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a site feature request + - label: I'm requesting a site-specific feature required: true - label: I've verified that I'm running yt-dlp version **%(version)s** ([update instructions](https://github.com/yt-dlp/yt-dlp#update)) or later (specify commit) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml index 1f33f09dc..4686c1dff 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml @@ -9,7 +9,7 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm reporting a feature request + - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true diff --git a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml index 030d2cfe7..8b434aef0 100644 --- a/.github/ISSUE_TEMPLATE_tmpl/6_question.yml +++ b/.github/ISSUE_TEMPLATE_tmpl/6_question.yml @@ -9,13 +9,13 @@ body: description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - - label: I'm asking a question and **not** reporting a bug/feature request + - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) + - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones. DO NOT post duplicates required: true - - label: I've searched the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions including closed ones + - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question diff --git a/README.md b/README.md index 912a42f72..87986e4c3 100644 --- a/README.md +++ b/README.md @@ -1783,7 +1783,7 @@ with YoutubeDL() as ydl: ydl.download(URLS) ``` -Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L181). +Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L180). **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) diff --git a/pyinst.py b/pyinst.py index de3504b35..292f5d719 100644 --- a/pyinst.py +++ b/pyinst.py @@ -105,7 +105,7 @@ def pycryptodome_module(): def set_version_info(exe, version): - if OS_NAME == 'Windows': + if OS_NAME == 'win32': windows_set_version(exe, version) diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5aae25707..e71e85d2e 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -195,13 +195,6 @@ class YoutubeDL: For compatibility, a single list is also accepted print_to_file: A dict with keys WHEN (same as forceprint) mapped to a list of tuples with (template, filename) - forceurl: Force printing final URL. (Deprecated) - forcetitle: Force printing title. (Deprecated) - forceid: Force printing ID. (Deprecated) - forcethumbnail: Force printing thumbnail URL. (Deprecated) - forcedescription: Force printing description. (Deprecated) - forcefilename: Force printing final filename. (Deprecated) - forceduration: Force printing duration. (Deprecated) forcejson: Force printing info_dict as JSON. dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. @@ -278,9 +271,6 @@ class YoutubeDL: writedesktoplink: Write a Linux internet shortcut file (.desktop) writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file - allsubtitles: Deprecated - Use subtitleslangs = ['all'] - Downloads all the subtitles of the video - (requires writesubtitles or writeautomaticsub) listsubtitles: Lists all available subtitles for the video subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download (can be regex). @@ -334,7 +324,6 @@ class YoutubeDL: bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi debug_printtraffic:Print out sent and received HTTP traffic - include_ads: Download ads as well (deprecated) default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. @@ -350,10 +339,6 @@ class YoutubeDL: * when: When to run the postprocessor. Allowed values are the entries of utils.POSTPROCESS_WHEN Assumed to be 'post_process' if not given - post_hooks: Deprecated - Register a custom postprocessor instead - A list of functions that get called as the final step - for each video file, after all postprocessors have been - called. The filename will be passed as the only argument. progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries * status: One of "downloading", "error", or "finished". @@ -398,8 +383,6 @@ class YoutubeDL: - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: Client-side IP address to bind to. - call_home: Boolean, true iff we are allowed to contact the - yt-dlp servers for debugging. (BROKEN) sleep_interval_requests: Number of seconds to sleep between requests during extraction sleep_interval: Number of seconds to sleep before each download when @@ -440,11 +423,6 @@ class YoutubeDL: external downloader to use for it. The allowed protocols are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. Set the value to 'native' to use the native downloader - hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'} - or {'m3u8': 'ffmpeg'} instead. - Use the native HLS downloader instead of ffmpeg/avconv - if True, otherwise use ffmpeg/avconv if False, otherwise - use downloader suggested by extractor if None. compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort @@ -466,8 +444,6 @@ class YoutubeDL: external_downloader_args, concurrent_fragment_downloads. The following options are used by the post processors: - prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available, - otherwise prefer ffmpeg. (avconv support is deprecated) ffmpeg_location: Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) @@ -487,12 +463,48 @@ class YoutubeDL: See "EXTRACTOR ARGUMENTS" for details. Eg: {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube - youtube_include_dash_manifest: Deprecated - Use extractor_args instead. + + The following options are deprecated and may be removed in the future: + + forceurl: - Use forceprint + Force printing final URL. + forcetitle: - Use forceprint + Force printing title. + forceid: - Use forceprint + Force printing ID. + forcethumbnail: - Use forceprint + Force printing thumbnail URL. + forcedescription: - Use forceprint + Force printing description. + forcefilename: - Use forceprint + Force printing final filename. + forceduration: - Use forceprint + Force printing duration. + allsubtitles: - Use subtitleslangs = ['all'] + Downloads all the subtitles of the video + (requires writesubtitles or writeautomaticsub) + include_ads: - Doesn't work + Download ads as well + call_home: - Not implemented + Boolean, true iff we are allowed to contact the + yt-dlp servers for debugging. + post_hooks: - Register a custom postprocessor + A list of functions that get called as the final step + for each video file, after all postprocessors have been + called. The filename will be passed as the only argument. + hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. + Use the native HLS downloader instead of ffmpeg/avconv + if True, otherwise use ffmpeg/avconv if False, otherwise + use downloader suggested by extractor if None. + prefer_ffmpeg: - avconv support is deprecated + If False, use avconv instead of ffmpeg if both are available, + otherwise prefer ffmpeg. + youtube_include_dash_manifest: - Use extractor_args If True (default), DASH manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about DASH. (only for youtube) - youtube_include_hls_manifest: Deprecated - Use extractor_args instead. + youtube_include_hls_manifest: - Use extractor_args If True (default), HLS manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 493849001..d94cb4956 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -496,12 +496,20 @@ class FragmentFD(FileDownloader): self.report_warning('The download speed shown is only of one thread. This is a known issue and patches are welcome') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: - for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): - ctx['fragment_filename_sanitized'] = frag_filename - ctx['fragment_index'] = frag_index - result = append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx) - if not result: - return False + try: + for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): + ctx.update({ + 'fragment_filename_sanitized': frag_filename, + 'fragment_index': frag_index, + }) + if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): + return False + except KeyboardInterrupt: + self._finish_multiline_status() + self.report_error( + 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) + pool.shutdown(wait=False) + raise else: for fragment in fragments: if not interrupt_trigger[0]: diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index c1a160e82..2e62660c7 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -786,7 +786,8 @@ class InfoExtractor: self.report_warning(errmsg) return False - def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): + def _download_webpage_handle(self, url_or_request, video_id, note=None, errnote=None, fatal=True, + encoding=None, data=None, headers={}, query={}, expected_status=None): """ Return a tuple (page content as string, URL handle). @@ -943,7 +944,7 @@ class InfoExtractor: except ValueError: raise e except ValueError as ve: - errmsg = '%s: Failed to parse JSON ' % video_id + errmsg = f'{video_id}: Failed to parse JSON' if fatal: raise ExtractorError(errmsg, cause=ve) else: diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index c9bdd309d..8b2332dc1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -15,7 +15,7 @@ import time import traceback from .common import InfoExtractor, SearchInfoExtractor -from ..compat import functools +from ..compat import functools # isort: split from ..compat import ( compat_chr, compat_HTTPError, @@ -483,6 +483,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor): if data: return self._parse_json(data, item_id, fatal=fatal) + def _extract_yt_initial_variable(self, webpage, regex, video_id, name): + return self._parse_json(self._search_regex( + (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', + regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True) + @staticmethod def _extract_session_index(*data): """ @@ -2733,54 +2738,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor): chapter_time = lambda chapter: parse_duration(self._get_text(chapter, 'timeDescription')) chapter_title = lambda chapter: self._get_text(chapter, 'title') - return next(( - filter(None, ( - self._extract_chapters( - traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), - chapter_time, chapter_title, duration) - for contents in content_list - ))), []) + return next(filter(None, ( + self._extract_chapters(traverse_obj(contents, (..., 'macroMarkersListItemRenderer')), + chapter_time, chapter_title, duration) + for contents in content_list)), []) - @staticmethod - def _extract_chapters_from_description(description, duration): - chapters = [{'start_time': 0}] - for timestamp, title in re.findall( - r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''): - start = parse_duration(timestamp) - if start and title and chapters[-1]['start_time'] < start < duration: - chapters[-1]['end_time'] = start - chapters.append({ - 'start_time': start, - 'title': title, - }) - chapters[-1]['end_time'] = duration - return chapters[1:] - - def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration): - chapters = [] - last_chapter = {'start_time': 0} - for idx, chapter in enumerate(chapter_list or []): - title = chapter_title(chapter) - start_time = chapter_time(chapter) - if start_time is None: - continue - last_chapter['end_time'] = start_time - if start_time < last_chapter['start_time']: - if idx == 1: - chapters.pop() - self.report_warning('Invalid start time for chapter "%s"' % last_chapter['title']) - else: - self.report_warning(f'Invalid start time for chapter "{title}"') - continue - last_chapter = {'start_time': start_time, 'title': title} - chapters.append(last_chapter) - last_chapter['end_time'] = duration - return chapters + def _extract_chapters_from_description(self, description, duration): + return self._extract_chapters( + re.findall(r'(?m)^((?:\d+:)?\d{1,2}:\d{2})\b\W*\s(.+?)\s*$', description or ''), + chapter_time=lambda x: parse_duration(x[0]), chapter_title=lambda x: x[1], + duration=duration, strict=False) - def _extract_yt_initial_variable(self, webpage, regex, video_id, name): - return self._parse_json(self._search_regex( - (fr'{regex}\s*{self._YT_INITIAL_BOUNDARY_RE}', - regex), webpage, name, default='{}'), video_id, fatal=False, lenient=True) + def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration, strict=True): + if not duration: + return + chapter_list = [{ + 'start_time': chapter_time(chapter), + 'title': chapter_title(chapter), + } for chapter in chapter_list or []] + if not strict: + chapter_list.sort(key=lambda c: c['start_time'] or 0) + + chapters = [{'start_time': 0, 'title': ''}] + for idx, chapter in enumerate(chapter_list): + if chapter['start_time'] is None or not chapter['title']: + self.report_warning(f'Incomplete chapter {idx}') + elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration: + chapters[-1]['end_time'] = chapter['start_time'] + chapters.append(chapter) + else: + self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"') + chapters[-1]['end_time'] = duration + return chapters if len(chapters) > 1 and chapters[1]['start_time'] else chapters[1:] def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') @@ -3663,7 +3652,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Youtube Music Auto-generated description if video_description: - mobj = re.search(r'(?s)(?P[^·\n]+)·(?P[^\n]+)\n+(?P[^\n]+)(?:.+?℗\s*(?P\d{4})(?!\d))?(?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))?(.+?\nArtist\s*:\s*(?P[^\n]+))?.+\nAuto-generated by YouTube\.\s*$', video_description) + mobj = re.search( + r'''(?xs) + (?P[^·\n]+)·(?P[^\n]+)\n+ + (?P[^\n]+) + (?:.+?℗\s*(?P\d{4})(?!\d))? + (?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? + (.+?\nArtist\s*:\s*(?P[^\n]+))? + .+\nAuto-generated\ by\ YouTube\.\s*$ + ''', video_description) if mobj: release_year = mobj.group('release_year') release_date = mobj.group('release_date') diff --git a/yt_dlp/postprocessor/ffmpeg.py b/yt_dlp/postprocessor/ffmpeg.py index 2a456e567..dad8b7f8f 100644 --- a/yt_dlp/postprocessor/ffmpeg.py +++ b/yt_dlp/postprocessor/ffmpeg.py @@ -776,7 +776,7 @@ class FFmpegMetadataPP(FFmpegPostProcessor): for key, value in info.items(): mobj = re.fullmatch(meta_regex, key) if value is not None and mobj: - metadata[mobj.group('i') or 'common'][mobj.group('key')] = value + metadata[mobj.group('i') or 'common'][mobj.group('key')] = value.replace('\0', '') # Write id3v1 metadata also since Windows Explorer can't handle id3v2 tags yield ('-write_id3v1', '1') diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index b0300b724..00721eb46 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -1936,7 +1936,7 @@ def intlist_to_bytes(xs): class LockingUnsupportedError(OSError): - msg = 'File locking is not supported on this platform' + msg = 'File locking is not supported' def __init__(self): super().__init__(self.msg) @@ -2061,8 +2061,11 @@ class locked_file: try: self.f.truncate() except OSError as e: - if e.errno != 29: # Illegal seek, expected when self.f is a FIFO - raise e + if e.errno not in ( + errno.ESPIPE, # Illegal seek - expected for FIFO + errno.EINVAL, # Invalid argument - expected for /dev/null + ): + raise return self def unlock(self):