From 2d786821f6c3696746d4c089b47e4feba7afcaab Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Thu, 22 Apr 2021 09:21:30 -0700 Subject: [PATCH] ipn/ipnlocal: put a retry loop around Windows file deletes oh, Windows. Updates tailscale/corp#1626 Signed-off-by: Brad Fitzpatrick --- ipn/ipnlocal/peerapi.go | 44 +++++++++++++++++++++++++----- ipn/ipnlocal/peerapi_test.go | 52 ++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 7 deletions(-) diff --git a/ipn/ipnlocal/peerapi.go b/ipn/ipnlocal/peerapi.go index 5452218a3..5631789a1 100644 --- a/ipn/ipnlocal/peerapi.go +++ b/ipn/ipnlocal/peerapi.go @@ -28,6 +28,7 @@ import ( "inet.af/netaddr" "tailscale.com/client/tailscale/apitype" "tailscale.com/ipn" + "tailscale.com/logtail/backoff" "tailscale.com/net/interfaces" "tailscale.com/syncs" "tailscale.com/tailcfg" @@ -184,15 +185,44 @@ func (s *peerAPIServer) DeleteFile(baseName string) error { if !ok { return errors.New("bad filename") } - err := os.Remove(path) - if err != nil && !os.IsNotExist(err) { - if pe, ok := err.(*os.PathError); ok { - logf := s.b.logf - logf("peerapi: failed to DeleteFile: %v", pe.Err) // without filename + var bo *backoff.Backoff + logf := s.b.logf + t0 := time.Now() + for { + err := os.Remove(path) + if err != nil && !os.IsNotExist(err) { + if pe, ok := err.(*os.PathError); ok { + pe.Path = "redact" + } + // Put a retry loop around deletes on Windows. Windows + // file descriptor closes are effectively asynchronous, + // as a bunch of hooks run on/after close, and we can't + // necessarily delete the file for a while after close, + // as we need to wait for everybody to be done with + // it. (on Windows, unlike Unix, a file can't be deleted + // while open) + // + // TODO(bradfitz): we might instead want to just keep a + // map of logically deleted files and filter them out in + // WaitingFiles/OpenFile. Then we can keep trying this + // delete in the background and/or in response to future + // WaitingFiles/OpenFile calls, and then remove from the + // logicallyDeleted map. But let's start with this retry + // loop. + if runtime.GOOS == "windows" { + if bo == nil { + bo = backoff.NewBackoff("delete-retry", logf, 1*time.Second) + } + if time.Since(t0) < 10*time.Second { + bo.BackOff(context.Background(), err) + continue + } + } + logf("peerapi: failed to DeleteFile: %v", err) + return err } - return err + return nil } - return nil } func (s *peerAPIServer) OpenFile(baseName string) (rc io.ReadCloser, size int64, err error) { diff --git a/ipn/ipnlocal/peerapi_test.go b/ipn/ipnlocal/peerapi_test.go index 4285fd5d9..f1333d6a1 100644 --- a/ipn/ipnlocal/peerapi_test.go +++ b/ipn/ipnlocal/peerapi_test.go @@ -10,6 +10,7 @@ import ( "io" "io/fs" "io/ioutil" + "math/rand" "net/http" "net/http/httptest" "os" @@ -422,3 +423,54 @@ func TestHandlePeerPut(t *testing.T) { }) } } + +// Windows likes to hold on to file descriptors for some indeterminate +// amount of time after you close them and not let you delete them for +// a bit. So test that we work around that sufficiently. +func TestFileDeleteRace(t *testing.T) { + dir := t.TempDir() + ps := &peerAPIServer{ + b: &LocalBackend{ + logf: t.Logf, + netMap: &netmap.NetworkMap{ + SelfNode: &tailcfg.Node{ + Capabilities: []string{tailcfg.CapabilityFileSharing}, + }, + }, + }, + rootDir: dir, + } + ph := &peerAPIHandler{ + isSelf: true, + peerNode: &tailcfg.Node{ + ComputedName: "some-peer-name", + }, + ps: ps, + } + buf := make([]byte, 2<<20) + for i := 0; i < 30; i++ { + rr := httptest.NewRecorder() + ph.ServeHTTP(rr, httptest.NewRequest("PUT", "/v0/put/foo.txt", bytes.NewReader(buf[:rand.Intn(len(buf))]))) + if res := rr.Result(); res.StatusCode != 200 { + t.Fatal(res.Status) + } + wfs, err := ps.WaitingFiles() + if err != nil { + t.Fatal(err) + } + if len(wfs) != 1 { + t.Fatalf("waiting files = %d; want 1", len(wfs)) + } + + if err := ps.DeleteFile("foo.txt"); err != nil { + t.Fatal(err) + } + wfs, err = ps.WaitingFiles() + if err != nil { + t.Fatal(err) + } + if len(wfs) != 0 { + t.Fatalf("waiting files = %d; want 0", len(wfs)) + } + } +}