From 333217f43e58f93fc8088d4854044b907adddce5 Mon Sep 17 00:00:00 2001 From: Felix S Date: Fri, 23 Apr 2021 10:52:21 +0200 Subject: [PATCH] [downloader/hls] Remove duplicate cues using a sliding window of candidates --- yt_dlp/downloader/hls.py | 25 +++++++++++++++++++++++++ yt_dlp/webvtt.py | 10 ++++++++++ 2 files changed, 35 insertions(+) diff --git a/yt_dlp/downloader/hls.py b/yt_dlp/downloader/hls.py index cee3807ce..c0e52d35d 100644 --- a/yt_dlp/downloader/hls.py +++ b/yt_dlp/downloader/hls.py @@ -325,6 +325,31 @@ class HlsFD(FragmentFD): if isinstance(block, webvtt.CueBlock): block.start += adjust block.end += adjust + + dedup_window = extra_state.setdefault('webvtt_dedup_window', []) + cue = block.as_json + + # skip the cue if an identical one appears + # in the window of potential duplicates + # and prune the window of unviable candidates + i = 0 + skip = True + while i < len(dedup_window): + window_cue = dedup_window[i] + if window_cue == cue: + break + if window_cue['end'] >= cue['start']: + i += 1 + continue + del dedup_window[i] + else: + skip = False + + if skip: + continue + + # add the cue to the window + dedup_window.append(cue) elif isinstance(block, webvtt.Magic): # XXX: we do not handle MPEGTS overflow if frag_index == 1: diff --git a/yt_dlp/webvtt.py b/yt_dlp/webvtt.py index 4d026834a..a184ee369 100644 --- a/yt_dlp/webvtt.py +++ b/yt_dlp/webvtt.py @@ -322,6 +322,16 @@ class CueBlock(Block): stream.write(self.text) stream.write('\n') + @property + def as_json(self): + return { + 'id': self.id, + 'start': self.start, + 'end': self.end, + 'text': self.text, + 'settings': self.settings, + } + def parse_fragment(frag_content): """