From cb3b281e981185b16264fa80fc6d7562b407030e Mon Sep 17 00:00:00 2001 From: Maisem Ali Date: Tue, 5 Sep 2023 10:28:01 -0700 Subject: [PATCH] ipn/ipnlocal: fix race in enterState It would acquire the lock, calculate `nextState`, relase the lock, then call `enterState` which would acquire the lock again. There were obvious races there which could lead to nil panics as seen in a test in a different repo. ``` panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x2 addr=0x70 pc=0x1050f2c7c] goroutine 42240 [running]: tailscale.com/ipn/ipnlocal.(*LocalBackend).enterStateLockedOnEntry(0x14002154e00, 0x6) tailscale.com/ipn/ipnlocal/local.go:3715 +0x30c tailscale.com/ipn/ipnlocal.(*LocalBackend).enterState(0x14002154e00?, 0x14002e3a140?) tailscale.com/ipn/ipnlocal/local.go:3663 +0x8c tailscale.com/ipn/ipnlocal.(*LocalBackend).stateMachine(0x14001f5e280?) tailscale.com/ipn/ipnlocal/local.go:3836 +0x2c tailscale.com/ipn/ipnlocal.(*LocalBackend).setWgengineStatus(0x14002154e00, 0x14002e3a190, {0x0?, 0x0?}) tailscale.com/ipn/ipnlocal/local.go:1193 +0x4d0 tailscale.com/wgengine.(*userspaceEngine).RequestStatus(0x14005d90300) tailscale.com/wgengine/userspace.go:1051 +0x80 tailscale.com/wgengine.NewUserspaceEngine.func2({0x14002e3a0a0, 0x2, 0x140025cce40?}) tailscale.com/wgengine/userspace.go:318 +0x1a0 tailscale.com/wgengine/magicsock.(*Conn).updateEndpoints(0x14002154700, {0x105c13eaf, 0xf}) tailscale.com/wgengine/magicsock/magicsock.go:531 +0x424 created by tailscale.com/wgengine/magicsock.(*Conn).ReSTUN in goroutine 42077 tailscale.com/wgengine/magicsock/magicsock.go:2142 +0x3a4 ``` Updates tailscale/corp#14480 Signed-off-by: Maisem Ali --- ipn/ipnlocal/local.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index b516b12c0..94cfe1e42 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -3753,10 +3753,11 @@ func (b *LocalBackend) NodeKey() key.NodePublic { return b.pm.CurrentPrefs().Persist().PublicNodeKey() } -// nextState returns the state the backend seems to be in, based on +// nextStateLocked returns the state the backend seems to be in, based on // its internal state. -func (b *LocalBackend) nextState() ipn.State { - b.mu.Lock() +// +// b.mu must be held +func (b *LocalBackend) nextStateLocked() ipn.State { var ( cc = b.cc netMap = b.netMap @@ -3772,10 +3773,9 @@ func (b *LocalBackend) nextState() ipn.State { wantRunning = p.WantRunning() loggedOut = p.LoggedOut() } - b.mu.Unlock() switch { - case !wantRunning && !loggedOut && !blocked && b.hasNodeKey(): + case !wantRunning && !loggedOut && !blocked && b.hasNodeKeyLocked(): return ipn.Stopped case netMap == nil: if (cc != nil && cc.AuthCantContinue()) || loggedOut { @@ -3838,7 +3838,8 @@ func (b *LocalBackend) RequestEngineStatus() { // TODO(apenwarr): use a channel or something to prevent reentrancy? // Or maybe just call the state machine from fewer places. func (b *LocalBackend) stateMachine() { - b.enterState(b.nextState()) + b.mu.Lock() + b.enterStateLockedOnEntry(b.nextStateLocked()) } // stopEngineAndWait deconfigures the local network data plane, and @@ -3900,7 +3901,6 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client { // don't want to the user to have to reauthenticate in the future // when they restart the GUI. func (b *LocalBackend) ResetForClientDisconnect() { - defer b.enterState(ipn.Stopped) b.logf("LocalBackend.ResetForClientDisconnect") b.mu.Lock() @@ -3909,7 +3909,6 @@ func (b *LocalBackend) ResetForClientDisconnect() { // Needs to happen without b.mu held. defer prevCC.Shutdown() } - defer b.mu.Unlock() b.setNetMapLocked(nil) b.pm.Reset() @@ -3918,6 +3917,7 @@ func (b *LocalBackend) ResetForClientDisconnect() { b.authURLSticky = "" b.activeLogin = "" b.setAtomicValuesFromPrefsLocked(ipn.PrefsView{}) + b.enterStateLockedOnEntry(ipn.Stopped) } func (b *LocalBackend) ShouldRunSSH() bool { return b.sshAtomicBool.Load() && envknob.CanSSHD() }