mirror of https://github.com/tailscale/tailscale/
taildrop: implement asynchronous file deletion (#9844)
File resumption requires keeping partial files around for some time, but we must still eventually delete them if never resumed. Thus, we implement asynchronous file deletion, which could spawn a background goroutine to delete the files. We also use the same mechanism for deleting files on Windows, where a file can't be deleted if there is still an open file handle. We can enqueue those with the asynchronous file deleter as well. Updates tailscale/corp#14772 Signed-off-by: Joe Tsai <joetsai@digital-static.net>pull/9859/head
parent
33bb2bbfe9
commit
c2a551469c
@ -0,0 +1,182 @@
|
|||||||
|
// Copyright (c) Tailscale Inc & AUTHORS
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package taildrop
|
||||||
|
|
||||||
|
import (
|
||||||
|
"container/list"
|
||||||
|
"context"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"tailscale.com/syncs"
|
||||||
|
"tailscale.com/tstime"
|
||||||
|
"tailscale.com/types/logger"
|
||||||
|
)
|
||||||
|
|
||||||
|
// deleteDelay is the amount of time to wait before we delete a file.
|
||||||
|
// A shorter value ensures timely deletion of deleted and partial files, while
|
||||||
|
// a longer value provides more opportunity for partial files to be resumed.
|
||||||
|
const deleteDelay = time.Hour
|
||||||
|
|
||||||
|
// fileDeleter manages asynchronous deletion of files after deleteDelay.
|
||||||
|
type fileDeleter struct {
|
||||||
|
logf logger.Logf
|
||||||
|
clock tstime.DefaultClock
|
||||||
|
event func(string) // called for certain events; for testing only
|
||||||
|
dir string
|
||||||
|
|
||||||
|
mu sync.Mutex
|
||||||
|
queue list.List
|
||||||
|
byName map[string]*list.Element
|
||||||
|
|
||||||
|
emptySignal chan struct{} // signal that the queue is empty
|
||||||
|
group syncs.WaitGroup
|
||||||
|
shutdownCtx context.Context
|
||||||
|
shutdown context.CancelFunc
|
||||||
|
}
|
||||||
|
|
||||||
|
// deleteFile is a specific file to delete after deleteDelay.
|
||||||
|
type deleteFile struct {
|
||||||
|
name string
|
||||||
|
inserted time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (d *fileDeleter) Init(logf logger.Logf, clock tstime.DefaultClock, event func(string), dir string) {
|
||||||
|
d.logf = logf
|
||||||
|
d.clock = clock
|
||||||
|
d.dir = dir
|
||||||
|
d.event = event
|
||||||
|
|
||||||
|
// From a cold-start, load the list of partial and deleted files.
|
||||||
|
d.byName = make(map[string]*list.Element)
|
||||||
|
d.emptySignal = make(chan struct{})
|
||||||
|
d.shutdownCtx, d.shutdown = context.WithCancel(context.Background())
|
||||||
|
d.group.Go(func() {
|
||||||
|
d.event("start init")
|
||||||
|
defer d.event("end init")
|
||||||
|
rangeDir(dir, func(de fs.DirEntry) bool {
|
||||||
|
switch {
|
||||||
|
case d.shutdownCtx.Err() != nil:
|
||||||
|
return false // terminate early
|
||||||
|
case !de.Type().IsRegular():
|
||||||
|
return true
|
||||||
|
case strings.Contains(de.Name(), partialSuffix):
|
||||||
|
d.Insert(de.Name())
|
||||||
|
case strings.Contains(de.Name(), deletedSuffix):
|
||||||
|
// Best-effort immediate deletion of deleted files.
|
||||||
|
name := strings.TrimSuffix(de.Name(), deletedSuffix)
|
||||||
|
if os.Remove(filepath.Join(dir, name)) == nil {
|
||||||
|
if os.Remove(filepath.Join(dir, de.Name())) == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Otherwise, enqueue the file for later deletion.
|
||||||
|
d.Insert(de.Name())
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert enqueues baseName for eventual deletion.
|
||||||
|
func (d *fileDeleter) Insert(baseName string) {
|
||||||
|
d.mu.Lock()
|
||||||
|
defer d.mu.Unlock()
|
||||||
|
if d.shutdownCtx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, ok := d.byName[baseName]; ok {
|
||||||
|
return // already queued for deletion
|
||||||
|
}
|
||||||
|
d.byName[baseName] = d.queue.PushBack(&deleteFile{baseName, d.clock.Now()})
|
||||||
|
if d.queue.Len() == 1 {
|
||||||
|
d.group.Go(func() { d.waitAndDelete(deleteDelay) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// waitAndDelete is an asynchronous deletion goroutine.
|
||||||
|
// At most one waitAndDelete routine is ever running at a time.
|
||||||
|
// It is not started unless there is at least one file in the queue.
|
||||||
|
func (d *fileDeleter) waitAndDelete(wait time.Duration) {
|
||||||
|
tc, ch := d.clock.NewTimer(wait)
|
||||||
|
defer tc.Stop() // cleanup the timer resource if we stop early
|
||||||
|
d.event("start waitAndDelete")
|
||||||
|
defer d.event("end waitAndDelete")
|
||||||
|
select {
|
||||||
|
case <-d.shutdownCtx.Done():
|
||||||
|
case <-d.emptySignal:
|
||||||
|
case now := <-ch:
|
||||||
|
d.mu.Lock()
|
||||||
|
defer d.mu.Unlock()
|
||||||
|
|
||||||
|
// Iterate over all files to delete, and delete anything old enough.
|
||||||
|
var next *list.Element
|
||||||
|
var failed []*list.Element
|
||||||
|
for elem := d.queue.Front(); elem != nil; elem = next {
|
||||||
|
next = elem.Next()
|
||||||
|
file := elem.Value.(*deleteFile)
|
||||||
|
if now.Sub(file.inserted) < deleteDelay {
|
||||||
|
break // everything after this is recently inserted
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the expired file.
|
||||||
|
if name, ok := strings.CutSuffix(file.name, deletedSuffix); ok {
|
||||||
|
if err := os.Remove(filepath.Join(d.dir, name)); err != nil && !os.IsNotExist(err) {
|
||||||
|
d.logf("could not delete: %v", redactError(err))
|
||||||
|
failed = append(failed, elem)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := os.Remove(filepath.Join(d.dir, file.name)); err != nil && !os.IsNotExist(err) {
|
||||||
|
d.logf("could not delete: %v", redactError(err))
|
||||||
|
failed = append(failed, elem)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
d.queue.Remove(elem)
|
||||||
|
delete(d.byName, file.name)
|
||||||
|
d.event("deleted " + file.name)
|
||||||
|
}
|
||||||
|
for _, elem := range failed {
|
||||||
|
elem.Value.(*deleteFile).inserted = now // retry after deleteDelay
|
||||||
|
d.queue.MoveToBack(elem)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are still some files to delete, retry again later.
|
||||||
|
if d.queue.Len() > 0 {
|
||||||
|
file := d.queue.Front().Value.(*deleteFile)
|
||||||
|
retryAfter := deleteDelay - now.Sub(file.inserted)
|
||||||
|
d.group.Go(func() { d.waitAndDelete(retryAfter) })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove dequeues baseName from eventual deletion.
|
||||||
|
func (d *fileDeleter) Remove(baseName string) {
|
||||||
|
d.mu.Lock()
|
||||||
|
defer d.mu.Unlock()
|
||||||
|
if elem := d.byName[baseName]; elem != nil {
|
||||||
|
d.queue.Remove(elem)
|
||||||
|
delete(d.byName, baseName)
|
||||||
|
// Signal to terminate any waitAndDelete goroutines.
|
||||||
|
if d.queue.Len() == 0 {
|
||||||
|
select {
|
||||||
|
case <-d.shutdownCtx.Done():
|
||||||
|
case d.emptySignal <- struct{}{}:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown shuts down the deleter.
|
||||||
|
// It blocks until all goroutines are stopped.
|
||||||
|
func (d *fileDeleter) Shutdown() {
|
||||||
|
d.mu.Lock() // acquire lock to ensure no new goroutines start after shutdown
|
||||||
|
d.shutdown()
|
||||||
|
d.mu.Unlock()
|
||||||
|
d.group.Wait()
|
||||||
|
}
|
@ -0,0 +1,132 @@
|
|||||||
|
// Copyright (c) Tailscale Inc & AUTHORS
|
||||||
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
package taildrop
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/go-cmp/cmp"
|
||||||
|
"tailscale.com/tstest"
|
||||||
|
"tailscale.com/tstime"
|
||||||
|
"tailscale.com/util/must"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDeleter(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "foo.partial")))
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "bar.partial")))
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "fizz")))
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "fizz.deleted")))
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "buzz.deleted"))) // lacks a matching "buzz" file
|
||||||
|
|
||||||
|
checkDirectory := func(want ...string) {
|
||||||
|
t.Helper()
|
||||||
|
var got []string
|
||||||
|
for _, de := range must.Get(os.ReadDir(dir)) {
|
||||||
|
got = append(got, de.Name())
|
||||||
|
}
|
||||||
|
slices.Sort(got)
|
||||||
|
slices.Sort(want)
|
||||||
|
if diff := cmp.Diff(got, want); diff != "" {
|
||||||
|
t.Fatalf("directory mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
clock := tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)})
|
||||||
|
advance := func(d time.Duration) {
|
||||||
|
t.Helper()
|
||||||
|
t.Logf("advance: %v", d)
|
||||||
|
clock.Advance(d)
|
||||||
|
}
|
||||||
|
|
||||||
|
eventsChan := make(chan string, 1000)
|
||||||
|
checkEvents := func(want ...string) {
|
||||||
|
t.Helper()
|
||||||
|
tm := time.NewTimer(10 * time.Second)
|
||||||
|
defer tm.Stop()
|
||||||
|
var got []string
|
||||||
|
for range want {
|
||||||
|
select {
|
||||||
|
case event := <-eventsChan:
|
||||||
|
t.Logf("event: %s", event)
|
||||||
|
got = append(got, event)
|
||||||
|
case <-tm.C:
|
||||||
|
t.Fatalf("timed out waiting for event: got %v, want %v", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slices.Sort(got)
|
||||||
|
slices.Sort(want)
|
||||||
|
if diff := cmp.Diff(got, want); diff != "" {
|
||||||
|
t.Fatalf("events mismatch (-got +want):\n%s", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eventHook := func(event string) { eventsChan <- event }
|
||||||
|
|
||||||
|
var fd fileDeleter
|
||||||
|
fd.Init(t.Logf, tstime.DefaultClock{Clock: clock}, eventHook, dir)
|
||||||
|
defer fd.Shutdown()
|
||||||
|
insert := func(name string) {
|
||||||
|
t.Helper()
|
||||||
|
t.Logf("insert: %v", name)
|
||||||
|
fd.Insert(name)
|
||||||
|
}
|
||||||
|
remove := func(name string) {
|
||||||
|
t.Helper()
|
||||||
|
t.Logf("remove: %v", name)
|
||||||
|
fd.Remove(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
checkEvents("start init")
|
||||||
|
checkEvents("end init", "start waitAndDelete")
|
||||||
|
checkDirectory("foo.partial", "bar.partial", "buzz.deleted")
|
||||||
|
|
||||||
|
advance(deleteDelay / 2)
|
||||||
|
checkDirectory("foo.partial", "bar.partial", "buzz.deleted")
|
||||||
|
advance(deleteDelay / 2)
|
||||||
|
checkEvents("deleted foo.partial", "deleted bar.partial", "deleted buzz.deleted")
|
||||||
|
checkEvents("end waitAndDelete")
|
||||||
|
checkDirectory()
|
||||||
|
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "one.partial")))
|
||||||
|
insert("one.partial")
|
||||||
|
checkEvents("start waitAndDelete")
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "two.partial")))
|
||||||
|
insert("two.partial")
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "three.partial")))
|
||||||
|
insert("three.partial")
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
must.Do(touchFile(filepath.Join(dir, "four.partial")))
|
||||||
|
insert("four.partial")
|
||||||
|
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
checkEvents("deleted one.partial")
|
||||||
|
checkDirectory("two.partial", "three.partial", "four.partial")
|
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete")
|
||||||
|
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
checkEvents("deleted two.partial")
|
||||||
|
checkDirectory("three.partial", "four.partial")
|
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete")
|
||||||
|
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
checkEvents("deleted three.partial")
|
||||||
|
checkDirectory("four.partial")
|
||||||
|
checkEvents("end waitAndDelete", "start waitAndDelete")
|
||||||
|
|
||||||
|
advance(deleteDelay / 4)
|
||||||
|
checkEvents("deleted four.partial")
|
||||||
|
checkDirectory()
|
||||||
|
checkEvents("end waitAndDelete")
|
||||||
|
|
||||||
|
insert("wuzz.partial")
|
||||||
|
checkEvents("start waitAndDelete")
|
||||||
|
remove("wuzz.partial")
|
||||||
|
checkEvents("end waitAndDelete")
|
||||||
|
}
|
Loading…
Reference in New Issue