ipn/ipnlocal: shut down old control client synchronously on reset

Previously, callers of (*LocalBackend).resetControlClientLocked were supposed
to call Shutdown on the returned controlclient.Client after releasing b.mu.
In #17804, we started calling Shutdown while holding b.mu, which caused
deadlocks during profile switches due to the (*ExecQueue).RunSync implementation.

We first patched this in #18053 by calling Shutdown in a new goroutine,
which avoided the deadlocks but made TestStateMachine flaky because
the shutdown order was no longer guaranteed.

In #18070, we updated (*ExecQueue).RunSync to allow shutting down
the queue without waiting for RunSync to return. With that change,
shutting down the control client while holding b.mu became safe.

Therefore, this PR updates (*LocalBackend).resetControlClientLocked
to shut down the old client synchronously during the reset, instead of
returning it and shifting that responsibility to the callers.

This fixes the flaky tests and simplifies the code.

Fixes #18052

Signed-off-by: Nick Khyl <nickk@tailscale.com>
main
Nick Khyl 16 hours ago committed by Nick Khyl
parent 7bc25f77f4
commit d199ecac80

@ -948,12 +948,8 @@ func (b *LocalBackend) pauseOrResumeControlClientLocked() {
// down, clients switch over to other replicas whilst the existing connections are kept alive for some period of time. // down, clients switch over to other replicas whilst the existing connections are kept alive for some period of time.
func (b *LocalBackend) DisconnectControl() { func (b *LocalBackend) DisconnectControl() {
b.mu.Lock() b.mu.Lock()
cc := b.resetControlClientLocked() defer b.mu.Unlock()
b.mu.Unlock() b.resetControlClientLocked()
if cc != nil {
cc.Shutdown()
}
} }
// linkChange is our network monitor callback, called whenever the network changes. // linkChange is our network monitor callback, called whenever the network changes.
@ -2419,14 +2415,6 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
logf := logger.WithPrefix(b.logf, "Start: ") logf := logger.WithPrefix(b.logf, "Start: ")
b.startOnce.Do(b.initOnce) b.startOnce.Do(b.initOnce)
var clientToShutdown controlclient.Client
defer func() {
if clientToShutdown != nil {
// Shutdown outside of b.mu to avoid deadlocks.
b.goTracker.Go(clientToShutdown.Shutdown)
}
}()
if opts.UpdatePrefs != nil { if opts.UpdatePrefs != nil {
if err := b.checkPrefsLocked(opts.UpdatePrefs); err != nil { if err := b.checkPrefsLocked(opts.UpdatePrefs); err != nil {
return err return err
@ -2469,7 +2457,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error {
// into sync with the minimal changes. But that's not how it // into sync with the minimal changes. But that's not how it
// is right now, which is a sign that the code is still too // is right now, which is a sign that the code is still too
// complicated. // complicated.
clientToShutdown = b.resetControlClientLocked() b.resetControlClientLocked()
httpTestClient := b.httpTestClient httpTestClient := b.httpTestClient
if b.hostinfo != nil { if b.hostinfo != nil {
@ -5810,13 +5798,12 @@ func (b *LocalBackend) setControlClientLocked(cc controlclient.Client) {
b.ignoreControlClientUpdates.Store(cc == nil) b.ignoreControlClientUpdates.Store(cc == nil)
} }
// resetControlClientLocked sets b.cc to nil and returns the old value. If the // resetControlClientLocked sets b.cc to nil and shuts down the previous
// returned value is non-nil, the caller must call Shutdown on it after // control client, if any.
// releasing b.mu. func (b *LocalBackend) resetControlClientLocked() {
func (b *LocalBackend) resetControlClientLocked() controlclient.Client {
syncs.RequiresMutex(&b.mu) syncs.RequiresMutex(&b.mu)
if b.cc == nil { if b.cc == nil {
return nil return
} }
b.resetAuthURLLocked() b.resetAuthURLLocked()
@ -5836,7 +5823,7 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client {
} }
prev := b.cc prev := b.cc
b.setControlClientLocked(nil) b.setControlClientLocked(nil)
return prev prev.Shutdown()
} }
// resetAuthURLLocked resets authURL, canceling any pending interactive login. // resetAuthURLLocked resets authURL, canceling any pending interactive login.
@ -6930,10 +6917,7 @@ func (b *LocalBackend) resetForProfileChangeLocked() error {
b.updateFilterLocked(ipn.PrefsView{}) b.updateFilterLocked(ipn.PrefsView{})
// Reset the NetworkMap in the engine // Reset the NetworkMap in the engine
b.e.SetNetworkMap(new(netmap.NetworkMap)) b.e.SetNetworkMap(new(netmap.NetworkMap))
if prevCC := b.resetControlClientLocked(); prevCC != nil { b.resetControlClientLocked()
// Shutdown outside of b.mu to avoid deadlocks.
b.goTracker.Go(prevCC.Shutdown)
}
// TKA errors should not prevent resetting the backend state. // TKA errors should not prevent resetting the backend state.
// However, we should still return the error to the caller. // However, we should still return the error to the caller.
tkaErr := b.initTKALocked() tkaErr := b.initTKALocked()
@ -7012,10 +6996,7 @@ func (b *LocalBackend) ResetAuth() error {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
if prevCC := b.resetControlClientLocked(); prevCC != nil { b.resetControlClientLocked()
// Shutdown outside of b.mu to avoid deadlocks.
b.goTracker.Go(prevCC.Shutdown)
}
if err := b.clearMachineKeyLocked(); err != nil { if err := b.clearMachineKeyLocked(); err != nil {
return err return err
} }

Loading…
Cancel
Save