From 6ada33db77456ec0431b380513c780d15feacbb4 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 19 Oct 2023 13:26:55 -0700 Subject: [PATCH] taildrop: fix theoretical race condition in fileDeleter.Init (#9876) It is possible that upon a cold-start, we enqueue a partial file for deletion that is resumed shortly after startup. If the file transfer happens to last longer than deleteDelay, we will delete the partial file, which is unfortunate. The client spent a long time uploading a file, only for it to be accidentally deleted. It's a very rare race, but also a frustrating one if it happens to manifest. Fix the code to only delete partial files that do not have an active puts against it. We also fix a minor bug in ResumeReader where we read b[:blockSize] instead of b[:cs.Size]. The former is the fixed size of 64KiB, while the latter is usually 64KiB, but may be less for the last block. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai --- taildrop/delete.go | 35 +++++++++++++++++++++++------------ taildrop/delete_test.go | 6 +++++- taildrop/resume.go | 2 +- taildrop/retrieve.go | 19 +++++++++---------- taildrop/taildrop.go | 7 ++----- 5 files changed, 40 insertions(+), 29 deletions(-) diff --git a/taildrop/delete.go b/taildrop/delete.go index a058e1a0a..12b95c020 100644 --- a/taildrop/delete.go +++ b/taildrop/delete.go @@ -27,8 +27,8 @@ const deleteDelay = time.Hour type fileDeleter struct { logf logger.Logf clock tstime.DefaultClock - event func(string) // called for certain events; for testing only dir string + event func(string) // called for certain events; for testing only mu sync.Mutex queue list.List @@ -46,11 +46,11 @@ type deleteFile struct { inserted time.Time } -func (d *fileDeleter) Init(logf logger.Logf, clock tstime.DefaultClock, event func(string), dir string) { - d.logf = logf - d.clock = clock - d.dir = dir - d.event = event +func (d *fileDeleter) Init(m *Manager, eventHook func(string)) { + d.logf = m.opts.Logf + d.clock = m.opts.Clock + d.dir = m.opts.Dir + d.event = eventHook // From a cold-start, load the list of partial and deleted files. d.byName = make(map[string]*list.Element) @@ -59,19 +59,30 @@ func (d *fileDeleter) Init(logf logger.Logf, clock tstime.DefaultClock, event fu d.group.Go(func() { d.event("start init") defer d.event("end init") - rangeDir(dir, func(de fs.DirEntry) bool { + rangeDir(d.dir, func(de fs.DirEntry) bool { switch { case d.shutdownCtx.Err() != nil: return false // terminate early case !de.Type().IsRegular(): return true - case strings.Contains(de.Name(), partialSuffix): - d.Insert(de.Name()) - case strings.Contains(de.Name(), deletedSuffix): + case strings.HasSuffix(de.Name(), partialSuffix): + // Only enqueue the file for deletion if there is no active put. + nameID := strings.TrimSuffix(de.Name(), partialSuffix) + if i := strings.LastIndexByte(nameID, '.'); i > 0 { + key := incomingFileKey{ClientID(nameID[i+len("."):]), nameID[:i]} + m.incomingFiles.LoadFunc(key, func(_ *incomingFile, loaded bool) { + if !loaded { + d.Insert(de.Name()) + } + }) + } else { + d.Insert(de.Name()) + } + case strings.HasSuffix(de.Name(), deletedSuffix): // Best-effort immediate deletion of deleted files. name := strings.TrimSuffix(de.Name(), deletedSuffix) - if os.Remove(filepath.Join(dir, name)) == nil { - if os.Remove(filepath.Join(dir, de.Name())) == nil { + if os.Remove(filepath.Join(d.dir, name)) == nil { + if os.Remove(filepath.Join(d.dir, de.Name())) == nil { break } } diff --git a/taildrop/delete_test.go b/taildrop/delete_test.go index 0f87c46c5..9066c7bc8 100644 --- a/taildrop/delete_test.go +++ b/taildrop/delete_test.go @@ -67,8 +67,12 @@ func TestDeleter(t *testing.T) { } eventHook := func(event string) { eventsChan <- event } + var m Manager var fd fileDeleter - fd.Init(t.Logf, tstime.DefaultClock{Clock: clock}, eventHook, dir) + m.opts.Logf = t.Logf + m.opts.Clock = tstime.DefaultClock{Clock: clock} + m.opts.Dir = dir + fd.Init(&m, eventHook) defer fd.Shutdown() insert := func(name string) { t.Helper() diff --git a/taildrop/resume.go b/taildrop/resume.go index 5a6ba4e93..1296df181 100644 --- a/taildrop/resume.go +++ b/taildrop/resume.go @@ -145,7 +145,7 @@ func ResumeReader(r io.Reader, hashNext func() (BlockChecksum, error)) (int64, i } // Read the contents of the next block. - n, err := io.ReadFull(r, b[:blockSize]) + n, err := io.ReadFull(r, b[:cs.Size]) b = b[:n] if err == io.EOF || err == io.ErrUnexpectedEOF { err = nil diff --git a/taildrop/retrieve.go b/taildrop/retrieve.go index ec40c32d7..3e37b492a 100644 --- a/taildrop/retrieve.go +++ b/taildrop/retrieve.go @@ -112,16 +112,15 @@ func (m *Manager) DeleteFile(baseName string) error { err := os.Remove(path) if err != nil && !os.IsNotExist(err) { err = redactError(err) - // Put a retry loop around deletes on Windows. Windows - // file descriptor closes are effectively asynchronous, - // as a bunch of hooks run on/after close, and we can't - // necessarily delete the file for a while after close, - // as we need to wait for everybody to be done with - // it. (on Windows, unlike Unix, a file can't be deleted - // if it's open anywhere) - // So try a few times but ultimately just leave a - // "foo.jpg.deleted" marker file to note that it's - // deleted and we clean it up later. + // Put a retry loop around deletes on Windows. + // + // Windows file descriptor closes are effectively asynchronous, + // as a bunch of hooks run on/after close, + // and we can't necessarily delete the file for a while after close, + // as we need to wait for everybody to be done with it. + // On Windows, unlike Unix, a file can't be deleted if it's open anywhere. + // So try a few times but ultimately just leave a "foo.jpg.deleted" + // marker file to note that it's deleted and we clean it up later. if runtime.GOOS == "windows" { if bo == nil { bo = backoff.NewBackoff("delete-retry", logf, 1*time.Second) diff --git a/taildrop/taildrop.go b/taildrop/taildrop.go index e3e1f5e78..ceda9cd39 100644 --- a/taildrop/taildrop.go +++ b/taildrop/taildrop.go @@ -135,7 +135,7 @@ func (opts ManagerOptions) New() *Manager { opts.SendFileNotify = func() {} } m := &Manager{opts: opts} - m.deleter.Init(opts.Logf, opts.Clock, func(string) {}, opts.Dir) + m.deleter.Init(m, func(string) {}) m.emptySince.Store(-1) // invalidate this cache return m } @@ -250,10 +250,6 @@ func (m *Manager) IncomingFiles() []ipn.PartialFile { return files } -// redacted is a fake path name we use in errors, to avoid -// accidentally logging actual filenames anywhere. -const redacted = "redacted" - type redactedError struct { msg string inner error @@ -270,6 +266,7 @@ func (re *redactedError) Unwrap() error { func redactString(s string) string { hash := adler32.Checksum([]byte(s)) + const redacted = "redacted" var buf [len(redacted) + len(".12345678")]byte b := append(buf[:0], []byte(redacted)...) b = append(b, '.')