ipn/ipnlocal: fix deadlock in resetControlClientLocked

resetControlClientLocked is called while b.mu was held and
would call cc.Shutdown which would wait for the observer queue
to drain.
However, there may be active callbacks from cc already waiting for
b.mu resulting in a deadlock.

This makes it so that resetControlClientLocked does not call
Shutdown, and instead just returns the value.
It also makes it so that any status received from previous cc
are ignored.

Updates tailscale/corp#12827

Signed-off-by: Maisem Ali <maisem@tailscale.com>
pull/9211/head
Maisem Ali 1 year ago committed by Maisem Ali
parent c6fadd6d71
commit d06a75dcd0

@ -595,7 +595,7 @@ func (c *Auto) sendStatus(who string, err error, url string, nm *netmap.NetworkM
// Launch a new goroutine to avoid blocking the caller while the observer // Launch a new goroutine to avoid blocking the caller while the observer
// does its thing, which may result in a call back into the client. // does its thing, which may result in a call back into the client.
c.observerQueue.Add(func() { c.observerQueue.Add(func() {
c.observer.SetControlClientStatus(new) c.observer.SetControlClientStatus(c, new)
}) })
} }
@ -667,6 +667,7 @@ func (c *Auto) Shutdown() {
direct := c.direct direct := c.direct
if !closed { if !closed {
c.closed = true c.closed = true
c.observerQueue.shutdown()
c.cancelAuthCtxLocked() c.cancelAuthCtxLocked()
c.cancelMapCtxLocked() c.cancelMapCtxLocked()
for _, w := range c.unpauseWaiters { for _, w := range c.unpauseWaiters {

@ -25,6 +25,9 @@ const (
// Client represents a client connection to the control server. // Client represents a client connection to the control server.
// Currently this is done through a pair of polling https requests in // Currently this is done through a pair of polling https requests in
// the Auto client, but that might change eventually. // the Auto client, but that might change eventually.
//
// The Client must be comparable as it is used by the Observer to detect stale
// clients.
type Client interface { type Client interface {
// Shutdown closes this session, which should not be used any further // Shutdown closes this session, which should not be used any further
// afterwards. // afterwards.

@ -104,7 +104,11 @@ type Direct struct {
// Observer is implemented by users of the control client (such as LocalBackend) // Observer is implemented by users of the control client (such as LocalBackend)
// to get notified of changes in the control client's status. // to get notified of changes in the control client's status.
type Observer interface { type Observer interface {
SetControlClientStatus(Status) // SetControlClientStatus is called when the client has a new status to
// report. The Client is provided to allow the Observer to track which
// Client is reporting the status, allowing it to ignore stale status
// reports from previous Clients.
SetControlClientStatus(Client, Status)
} }
type Options struct { type Options struct {

@ -897,9 +897,16 @@ func (b *LocalBackend) SetDecompressor(fn func() (controlclient.Decompressor, er
// SetControlClientStatus is the callback invoked by the control client whenever it posts a new status. // SetControlClientStatus is the callback invoked by the control client whenever it posts a new status.
// Among other things, this is where we update the netmap, packet filters, DNS and DERP maps. // Among other things, this is where we update the netmap, packet filters, DNS and DERP maps.
func (b *LocalBackend) SetControlClientStatus(st controlclient.Status) { func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st controlclient.Status) {
b.mu.Lock()
if b.cc != c {
b.logf("Ignoring SetControlClientStatus from old client")
b.mu.Unlock()
return
}
// The following do not depend on any data for which we need to lock b. // The following do not depend on any data for which we need to lock b.
if st.Err != nil { if st.Err != nil {
b.mu.Unlock()
if errors.Is(st.Err, io.EOF) { if errors.Is(st.Err, io.EOF) {
b.logf("[v1] Received error: EOF") b.logf("[v1] Received error: EOF")
return return
@ -916,8 +923,6 @@ func (b *LocalBackend) SetControlClientStatus(st controlclient.Status) {
// Track the number of calls // Track the number of calls
currCall := b.numClientStatusCalls.Add(1) currCall := b.numClientStatusCalls.Add(1)
b.mu.Lock()
// Handle node expiry in the netmap // Handle node expiry in the netmap
if st.NetMap != nil { if st.NetMap != nil {
now := b.clock.Now() now := b.clock.Now()
@ -953,7 +958,7 @@ func (b *LocalBackend) SetControlClientStatus(st controlclient.Status) {
// Call ourselves with the current status again; the logic in // Call ourselves with the current status again; the logic in
// setClientStatus will take care of updating the expired field // setClientStatus will take care of updating the expired field
// of peers in the netmap. // of peers in the netmap.
b.SetControlClientStatus(st) b.SetControlClientStatus(c, st)
}) })
} }
} }
@ -1340,16 +1345,14 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
hostinfo.Userspace.Set(b.sys.IsNetstack()) hostinfo.Userspace.Set(b.sys.IsNetstack())
hostinfo.UserspaceRouter.Set(b.sys.IsNetstackRouter()) hostinfo.UserspaceRouter.Set(b.sys.IsNetstackRouter())
if b.cc != nil { // TODO(apenwarr): avoid the need to reinit controlclient.
// TODO(apenwarr): avoid the need to reinit controlclient. // This will trigger a full relogin/reconfigure cycle every
// This will trigger a full relogin/reconfigure cycle every // time a Handle reconnects to the backend. Ideally, we
// time a Handle reconnects to the backend. Ideally, we // would send the new Prefs and everything would get back
// would send the new Prefs and everything would get back // into sync with the minimal changes. But that's not how it
// into sync with the minimal changes. But that's not how it // is right now, which is a sign that the code is still too
// is right now, which is a sign that the code is still too // complicated.
// complicated. prevCC := b.resetControlClientLocked()
b.resetControlClientLockedAsync()
}
httpTestClient := b.httpTestClient httpTestClient := b.httpTestClient
if b.hostinfo != nil { if b.hostinfo != nil {
@ -1429,6 +1432,10 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
debugFlags = append([]string{"netstack"}, debugFlags...) debugFlags = append([]string{"netstack"}, debugFlags...)
} }
if prevCC != nil {
prevCC.Shutdown()
}
// TODO(apenwarr): The only way to change the ServerURL is to // TODO(apenwarr): The only way to change the ServerURL is to
// re-run b.Start(), because this is the only place we create a // re-run b.Start(), because this is the only place we create a
// new controlclient. SetPrefs() allows you to overwrite ServerURL, // new controlclient. SetPrefs() allows you to overwrite ServerURL,
@ -3847,12 +3854,12 @@ func (b *LocalBackend) requestEngineStatusAndWait() {
b.statusLock.Unlock() b.statusLock.Unlock()
} }
// resetControlClientLockedAsync sets b.cc to nil, and starts a // resetControlClientLocked sets b.cc to nil and returns the old value. If the
// goroutine to Shutdown the old client. It does not wait for the // returned value is non-nil, the caller must call Shutdown on it after
// shutdown to complete. // releasing b.mu.
func (b *LocalBackend) resetControlClientLockedAsync() { func (b *LocalBackend) resetControlClientLocked() controlclient.Client {
if b.cc == nil { if b.cc == nil {
return return nil
} }
// When we clear the control client, stop any outstanding netmap expiry // When we clear the control client, stop any outstanding netmap expiry
@ -3868,9 +3875,10 @@ func (b *LocalBackend) resetControlClientLockedAsync() {
// will abort. // will abort.
b.numClientStatusCalls.Add(1) b.numClientStatusCalls.Add(1)
} }
b.cc.Shutdown() prev := b.cc
b.cc = nil b.cc = nil
b.ccAuto = nil b.ccAuto = nil
return prev
} }
// ResetForClientDisconnect resets the backend for GUI clients running // ResetForClientDisconnect resets the backend for GUI clients running
@ -3881,10 +3889,16 @@ func (b *LocalBackend) resetControlClientLockedAsync() {
// when they restart the GUI. // when they restart the GUI.
func (b *LocalBackend) ResetForClientDisconnect() { func (b *LocalBackend) ResetForClientDisconnect() {
defer b.enterState(ipn.Stopped) defer b.enterState(ipn.Stopped)
b.logf("LocalBackend.ResetForClientDisconnect")
b.mu.Lock() b.mu.Lock()
prevCC := b.resetControlClientLocked()
if prevCC != nil {
// Needs to happen without b.mu held.
defer prevCC.Shutdown()
}
defer b.mu.Unlock() defer b.mu.Unlock()
b.logf("LocalBackend.ResetForClientDisconnect")
b.resetControlClientLockedAsync()
b.setNetMapLocked(nil) b.setNetMapLocked(nil)
b.pm.Reset() b.pm.Reset()
b.keyExpired = false b.keyExpired = false
@ -4997,7 +5011,10 @@ func (b *LocalBackend) ListProfiles() []ipn.LoginProfile {
// called to register it as new node. // called to register it as new node.
func (b *LocalBackend) ResetAuth() error { func (b *LocalBackend) ResetAuth() error {
b.mu.Lock() b.mu.Lock()
b.resetControlClientLockedAsync() prevCC := b.resetControlClientLocked()
if prevCC != nil {
defer prevCC.Shutdown() // call must happen after release b.mu
}
if err := b.clearMachineKeyLocked(); err != nil { if err := b.clearMachineKeyLocked(); err != nil {
b.mu.Unlock() b.mu.Unlock()
return err return err

@ -31,7 +31,7 @@ import (
type observerFunc func(controlclient.Status) type observerFunc func(controlclient.Status)
func (f observerFunc) SetControlClientStatus(s controlclient.Status) { func (f observerFunc) SetControlClientStatus(_ controlclient.Client, s controlclient.Status) {
f(s) f(s)
} }

@ -173,7 +173,7 @@ func (cc *mockControl) send(err error, url string, loginFinished bool, nm *netma
} else if url == "" && err == nil && nm == nil { } else if url == "" && err == nil && nm == nil {
s.SetStateForTest(controlclient.StateNotAuthenticated) s.SetStateForTest(controlclient.StateNotAuthenticated)
} }
cc.opts.Observer.SetControlClientStatus(s) cc.opts.Observer.SetControlClientStatus(cc, s)
} }
} }

Loading…
Cancel
Save