From 449f46c2070f49f58e52d34a01db35926050bb80 Mon Sep 17 00:00:00 2001 From: Charlotte Brandhorst-Satzkorn <46385858+catzkorn@users.noreply.github.com> Date: Mon, 15 Apr 2024 13:57:55 -0700 Subject: [PATCH] wgengine/magicsock: rebind/restun if a syscall.EPERM error is returned (#11711) We have seen in macOS client logs that the "operation not permitted", a syscall.EPERM error, is being returned when traffic is attempted to be sent. This may be caused by security software on the client. This change will perform a rebind and restun if we receive a syscall.EPERM error on clients running darwin. Rebinds will only be called if we haven't performed one specifically for an EPERM error in the past 5 seconds. Updates #11710 Signed-off-by: Charlotte Brandhorst-Satzkorn --- wgengine/magicsock/magicsock.go | 36 ++++++++++++++++++ wgengine/magicsock/magicsock_test.go | 55 ++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 719fbadec..fbd6e90af 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -18,6 +18,7 @@ import ( "strings" "sync" "sync/atomic" + "syscall" "time" "github.com/tailscale/wireguard-go/conn" @@ -305,6 +306,10 @@ type Conn struct { // getPeerByKey optionally specifies a function to look up a peer's // wireguard state by its public key. If nil, it's not used. getPeerByKey func(key.NodePublic) (_ wgint.Peer, ok bool) + + // lastEPERMRebind tracks the last time a rebind was performed + // after experiencing a syscall.EPERM. + lastEPERMRebind syncs.AtomicValue[time.Time] } // SetDebugLoggingEnabled controls whether spammy debug logging is enabled. @@ -1047,6 +1052,8 @@ func (c *Conn) sendUDPBatch(addr netip.AddrPort, buffs [][]byte) (sent bool, err if errors.As(err, &errGSO) { c.logf("magicsock: %s", errGSO.Error()) err = errGSO.RetryErr + } else { + _ = c.maybeRebindOnError(runtime.GOOS, err) } } return err == nil, err @@ -1061,6 +1068,7 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) { sent, err = c.sendUDPStd(ipp, b) if err != nil { metricSendUDPError.Add(1) + _ = c.maybeRebindOnError(runtime.GOOS, err) } else { if sent { metricSendUDP.Add(1) @@ -1069,6 +1077,34 @@ func (c *Conn) sendUDP(ipp netip.AddrPort, b []byte) (sent bool, err error) { return } +// maybeRebindOnError performs a rebind and restun if the error is defined and +// any conditionals are met. +func (c *Conn) maybeRebindOnError(os string, err error) bool { + switch err { + case syscall.EPERM: + why := "operation-not-permitted-rebind" + switch os { + // We currently will only rebind and restun on a syscall.EPERM if it is experienced + // on a client running darwin. + // TODO(charlotte, raggi): expand os options if required. + case "darwin": + // TODO(charlotte): implement a backoff, so we don't end up in a rebind loop for persistent + // EPERMs. + if c.lastEPERMRebind.Load().Before(time.Now().Add(-5 * time.Second)) { + c.logf("magicsock: performing %q", why) + c.lastEPERMRebind.Store(time.Now()) + c.Rebind() + go c.ReSTUN(why) + return true + } + default: + c.logf("magicsock: not performing %q", why) + return false + } + } + return false +} + // sendUDP sends UDP packet b to addr. // See sendAddr's docs on the return value meanings. func (c *Conn) sendUDPStd(addr netip.AddrPort, b []byte) (sent bool, err error) { diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index a4ed26d09..7605bdce9 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -23,6 +23,7 @@ import ( "strings" "sync" "sync/atomic" + "syscall" "testing" "time" "unsafe" @@ -3134,3 +3135,57 @@ func TestMaybeSetNearestDERP(t *testing.T) { }) } } + +func TestMaybeRebindOnError(t *testing.T) { + tstest.PanicOnLog() + tstest.ResourceCheck(t) + + t.Run("darwin should rebind", func(t *testing.T) { + conn, err := NewConn(Options{ + EndpointsFunc: func(eps []tailcfg.Endpoint) {}, + Logf: t.Logf, + }) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + + rebound := conn.maybeRebindOnError("darwin", syscall.EPERM) + if !rebound { + t.Errorf("darwin should rebind on syscall.EPERM") + } + }) + + t.Run("linux should not rebind", func(t *testing.T) { + conn, err := NewConn(Options{ + EndpointsFunc: func(eps []tailcfg.Endpoint) {}, + Logf: t.Logf, + }) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + + rebound := conn.maybeRebindOnError("linux", syscall.EPERM) + if rebound { + t.Errorf("linux should not rebind on syscall.EPERM") + } + }) + + t.Run("should not rebind if recently rebind recently performed", func(t *testing.T) { + conn, err := NewConn(Options{ + EndpointsFunc: func(eps []tailcfg.Endpoint) {}, + Logf: t.Logf, + }) + if err != nil { + t.Fatal(err) + } + defer conn.Close() + + conn.lastEPERMRebind.Store(time.Now().Add(-1 * time.Second)) + rebound := conn.maybeRebindOnError("darwin", syscall.EPERM) + if rebound { + t.Errorf("darwin should not rebind on syscall.EPERM within 5 seconds of last") + } + }) +}