From 93c8410d333c9a61488448c29aabb6fa831e2991 Mon Sep 17 00:00:00 2001 From: "Lesmiscore (Naoya Ozaki)" Date: Mon, 28 Feb 2022 13:10:54 +0900 Subject: [PATCH] [downloader/fragment] Fix bugs around resuming with Range (#2901) Authored by: Lesmiscore --- yt_dlp/downloader/fragment.py | 2 +- yt_dlp/downloader/http.py | 48 ++++++++++++++++++++++------------- yt_dlp/utils.py | 10 ++++++++ 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/yt_dlp/downloader/fragment.py b/yt_dlp/downloader/fragment.py index 24f4ec959..83a9f81b6 100644 --- a/yt_dlp/downloader/fragment.py +++ b/yt_dlp/downloader/fragment.py @@ -178,7 +178,7 @@ class FragmentFD(FileDownloader): dl = HttpQuietDownloader( self.ydl, { - 'continuedl': True, + 'continuedl': self.params.get('continuedl', True), 'quiet': self.params.get('quiet'), 'noprogress': True, 'ratelimit': self.params.get('ratelimit'), diff --git a/yt_dlp/downloader/http.py b/yt_dlp/downloader/http.py index 34a1eb59b..10ba61024 100644 --- a/yt_dlp/downloader/http.py +++ b/yt_dlp/downloader/http.py @@ -5,7 +5,6 @@ import os import socket import time import random -import re from .common import FileDownloader from ..compat import ( @@ -16,6 +15,7 @@ from ..utils import ( ContentTooShortError, encodeFilename, int_or_none, + parse_http_range, sanitized_Request, ThrottledDownload, write_xattr, @@ -59,6 +59,9 @@ class HttpFD(FileDownloader): ctx.chunk_size = None throttle_start = None + # parse given Range + req_start, req_end, _ = parse_http_range(headers.get('Range')) + if self.params.get('continuedl', True): # Establish possible resume length if os.path.isfile(encodeFilename(ctx.tmpfilename)): @@ -91,6 +94,9 @@ class HttpFD(FileDownloader): if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len + if req_start is not None: + # offset the beginning of Range to be within request + range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' @@ -99,7 +105,17 @@ class HttpFD(FileDownloader): else: range_start = None ctx.is_resume = False - range_end = range_start + ctx.chunk_size - 1 if ctx.chunk_size else None + + if ctx.chunk_size: + chunk_aware_end = range_start + ctx.chunk_size - 1 + # we're not allowed to download outside Range + range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) + elif req_end is not None: + # there's no need for chunked downloads, so download until the end of Range + range_end = req_end + else: + range_end = None + if range_end and ctx.data_len is not None and range_end >= ctx.data_len: range_end = ctx.data_len - 1 has_range = range_start is not None @@ -124,23 +140,19 @@ class HttpFD(FileDownloader): # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') - if content_range: - content_range_m = re.search(r'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range) + content_range_start, content_range_end, content_len = parse_http_range(content_range) + if content_range_start is not None and range_start == content_range_start: # Content-Range is present and matches requested Range, resume is possible - if content_range_m: - if range_start == int(content_range_m.group(1)): - content_range_end = int_or_none(content_range_m.group(2)) - content_len = int_or_none(content_range_m.group(3)) - accept_content_len = ( - # Non-chunked download - not ctx.chunk_size - # Chunked download and requested piece or - # its part is promised to be served - or content_range_end == range_end - or content_len < range_end) - if accept_content_len: - ctx.data_len = content_len - return + accept_content_len = ( + # Non-chunked download + not ctx.chunk_size + # Chunked download and requested piece or + # its part is promised to be served + or content_range_end == range_end + or content_len < range_end) + if accept_content_len: + ctx.data_len = content_len + return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py index 6ec8da11b..cc08bd130 100644 --- a/yt_dlp/utils.py +++ b/yt_dlp/utils.py @@ -5252,6 +5252,16 @@ def join_nonempty(*values, delim='-', from_dict=None): return delim.join(map(str, filter(None, values))) +def parse_http_range(range): + """ Parse value of "Range" or "Content-Range" HTTP header into tuple. """ + if not range: + return None, None, None + crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range) + if not crg: + return None, None, None + return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3)) + + class Config: own_args = None filename = None