From 8b3e88cd094c745f6e57f8ca53edb16792d3fee2 Mon Sep 17 00:00:00 2001 From: James Tucker Date: Fri, 26 Sep 2025 08:06:39 -0700 Subject: [PATCH] wgengine/magicsock: fix rebind debouncing (#17282) On platforms that are causing EPIPE at a high frequency this is resulting in non-working connections, for example when Apple decides to forcefully close UDP sockets due to an unsoliced packet rejection in the firewall. Too frequent rebinds cause a failure to solicit the endpoints triggering the rebinds, that would normally happen via CallMeMaybe. Updates #14551 Updates tailscale/corp#25648 Signed-off-by: James Tucker --- wgengine/magicsock/magicsock.go | 1 + wgengine/magicsock/magicsock_test.go | 40 ++++++++++++++++++++-------- 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 0d8a1e53a..e3cf249c5 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1563,6 +1563,7 @@ func (c *Conn) maybeRebindOnError(err error) { if c.lastErrRebind.Load().Before(time.Now().Add(-5 * time.Second)) { c.logf("magicsock: performing rebind due to %q", reason) + c.lastErrRebind.Store(time.Now()) c.Rebind() go c.ReSTUN(reason) } else { diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index c6be9129d..1f533ddef 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -27,6 +27,7 @@ import ( "sync/atomic" "syscall" "testing" + "testing/synctest" "time" "unsafe" @@ -3114,18 +3115,35 @@ func TestMaybeRebindOnError(t *testing.T) { } t.Run("no-frequent-rebind", func(t *testing.T) { - if runtime.GOOS != "plan9" { - err := fmt.Errorf("outer err: %w", syscall.EPERM) - conn := newTestConn(t) - defer conn.Close() - conn.lastErrRebind.Store(time.Now().Add(-1 * time.Second)) - before := metricRebindCalls.Value() - conn.maybeRebindOnError(err) - after := metricRebindCalls.Value() - if before != after { - t.Errorf("should not rebind within 5 seconds of last") + synctest.Test(t, func(t *testing.T) { + if runtime.GOOS != "plan9" { + err := fmt.Errorf("outer err: %w", syscall.EPERM) + conn := newTestConn(t) + defer conn.Close() + lastRebindTime := time.Now().Add(-1 * time.Second) + conn.lastErrRebind.Store(lastRebindTime) + before := metricRebindCalls.Value() + conn.maybeRebindOnError(err) + after := metricRebindCalls.Value() + if before != after { + t.Errorf("should not rebind within 5 seconds of last") + } + + // ensure that rebinds are performed and store an updated last + // rebind time. + time.Sleep(6 * time.Second) + + conn.maybeRebindOnError(err) + newTime := conn.lastErrRebind.Load() + if newTime == lastRebindTime { + t.Errorf("expected a rebind to occur") + } + if newTime.Sub(lastRebindTime) < 5*time.Second { + t.Errorf("expected at least 5 seconds between %s and %s", lastRebindTime, newTime) + } } - } + + }) }) }