ipn/ipnlocal: do controlclient.Shutdown in a different goroutine

We do not need to wait for it to complete. And we might have to
call Shutdown from callback from the controlclient which might
already be holding a lock that Shutdown requires.

Updates #713

Signed-off-by: Maisem Ali <maisem@tailscale.com>
pull/6315/head
Maisem Ali 2 years ago committed by Maisem Ali
parent e0cadc5496
commit fe81ee62d7

@ -1141,9 +1141,7 @@ func (b *LocalBackend) Start(opts ipn.Options) error {
// into sync with the minimal changes. But that's not how it
// is right now, which is a sign that the code is still too
// complicated.
b.mu.Unlock()
b.cc.Shutdown()
b.mu.Lock()
b.resetControlClientLockedAsync()
}
httpTestClient := b.httpTestClient
@ -1956,7 +1954,7 @@ func (b *LocalBackend) InServerMode() bool {
}
// Login implements Backend.
// As of 2022-02-17, this is only exists for tests.
// As of 2022-11-15, this is only exists for Android.
func (b *LocalBackend) Login(token *tailcfg.Oauth2Token) {
b.mu.Lock()
b.assertClientLocked()
@ -3128,7 +3126,6 @@ func (b *LocalBackend) hasNodeKey() bool {
// its internal state.
func (b *LocalBackend) nextState() ipn.State {
b.mu.Lock()
b.assertClientLocked()
var (
cc = b.cc
netMap = b.netMap
@ -3150,7 +3147,7 @@ func (b *LocalBackend) nextState() ipn.State {
case !wantRunning && !loggedOut && !blocked && b.hasNodeKey():
return ipn.Stopped
case netMap == nil:
if cc.AuthCantContinue() || loggedOut {
if (cc != nil && cc.AuthCantContinue()) || loggedOut {
// Auth was interrupted or waiting for URL visit,
// so it won't proceed without human help.
return ipn.NeedsLogin
@ -3238,6 +3235,18 @@ func (b *LocalBackend) requestEngineStatusAndWait() {
b.statusLock.Unlock()
}
// resetControlClientLockedAsync sets b.cc to nil, and starts a
// goroutine to Shutdown the old client. It does not wait for the
// shutdown to complete.
func (b *LocalBackend) resetControlClientLockedAsync() {
if b.cc == nil {
return
}
go b.cc.Shutdown()
b.cc = nil
b.ccAuto = nil
}
// ResetForClientDisconnect resets the backend for GUI clients running
// in interactive (non-headless) mode. This is currently used only by
// Windows. This causes all state to be cleared, lest an unrelated user
@ -3249,11 +3258,7 @@ func (b *LocalBackend) ResetForClientDisconnect() {
b.mu.Lock()
defer b.mu.Unlock()
b.logf("LocalBackend.ResetForClientDisconnect")
if b.cc != nil {
go b.cc.Shutdown()
b.cc = nil
}
b.resetControlClientLockedAsync()
b.setNetMapLocked(nil)
b.pm.Reset()
b.keyExpired = false

@ -92,29 +92,42 @@ func (nt *notifyThrottler) drain(count int) []ipn.Notify {
// the state machine works as expected.
type mockControl struct {
tb testing.TB
logf logger.Logf
opts controlclient.Options
logfActual logger.Logf
statusFunc func(controlclient.Status)
paused atomic.Bool
mu sync.Mutex
machineKey key.MachinePrivate
persist *persist.Persist
calls []string
authBlocked bool
persist *persist.Persist
machineKey key.MachinePrivate
shutdown chan struct{}
}
func newMockControl(tb testing.TB) *mockControl {
func newClient(tb testing.TB, opts controlclient.Options) *mockControl {
return &mockControl{
tb: tb,
authBlocked: true,
logf: opts.Logf,
opts: opts,
shutdown: make(chan struct{}),
persist: opts.Persist.Clone(),
}
}
func (cc *mockControl) logf(format string, args ...any) {
if cc.logfActual == nil {
return
func (cc *mockControl) assertShutdown(wasPaused bool) {
cc.tb.Helper()
select {
case <-cc.shutdown:
// ok
case <-time.After(500 * time.Millisecond):
cc.tb.Fatalf("timed out waiting for shutdown")
}
if wasPaused {
cc.assertCalls("unpause", "Shutdown")
} else {
cc.assertCalls("Shutdown")
}
cc.logfActual(format, args...)
}
func (cc *mockControl) populateKeys() (newKeys bool) {
@ -143,7 +156,12 @@ func (cc *mockControl) populateKeys() (newKeys bool) {
// send publishes a controlclient.Status notification upstream.
// (In our tests here, upstream is the ipnlocal.Local instance.)
func (cc *mockControl) send(err error, url string, loginFinished bool, nm *netmap.NetworkMap) {
if cc.statusFunc != nil {
if loginFinished {
cc.mu.Lock()
cc.authBlocked = false
cc.mu.Unlock()
}
if cc.opts.Status != nil {
pv := cc.persist.View()
s := controlclient.Status{
URL: url,
@ -156,7 +174,7 @@ func (cc *mockControl) send(err error, url string, loginFinished bool, nm *netma
} else if url == "" && err == nil && nm == nil {
s.LogoutFinished = &empty.Message{}
}
cc.statusFunc(s)
cc.opts.Status(s)
}
}
@ -178,34 +196,16 @@ func (cc *mockControl) assertCalls(want ...string) {
cc.calls = nil
}
// setAuthBlocked changes the return value of AuthCantContinue.
// Auth is blocked if you haven't called Login, the control server hasn't
// provided an auth URL, or it has provided an auth URL and you haven't
// visited it yet.
func (cc *mockControl) setAuthBlocked(blocked bool) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.authBlocked = blocked
}
// Shutdown disconnects the client.
//
// Note that in a normal controlclient, Shutdown would be the last thing you
// do before discarding the object. In this mock, we don't actually discard
// the object, but if you see a call to Shutdown, you should always see a
// call to New right after it, if the object continues to be used.
// (Note that "New" is the ccGen function here; it means ipn.Backend wanted
// to create an entirely new controlclient.)
func (cc *mockControl) Shutdown() {
cc.logf("Shutdown")
cc.called("Shutdown")
close(cc.shutdown)
}
// Login starts a login process.
// Note that in this mock, we don't automatically generate notifications
// about the progress of the login operation. You have to call setAuthBlocked()
// and send() as required by the test.
// Login starts a login process. Note that in this mock, we don't automatically
// generate notifications about the progress of the login operation. You have to
// call send() as required by the test.
func (cc *mockControl) Login(t *tailcfg.Oauth2Token, flags controlclient.LoginFlags) {
cc.logf("Login token=%v flags=%v", t, flags)
cc.called("Login")
@ -213,7 +213,9 @@ func (cc *mockControl) Login(t *tailcfg.Oauth2Token, flags controlclient.LoginFl
interact := (flags & controlclient.LoginInteractive) != 0
cc.logf("Login: interact=%v newKeys=%v", interact, newKeys)
cc.setAuthBlocked(interact || newKeys)
cc.mu.Lock()
defer cc.mu.Unlock()
cc.authBlocked = interact || newKeys
}
func (cc *mockControl) StartLogout() {
@ -228,6 +230,10 @@ func (cc *mockControl) Logout(ctx context.Context) error {
}
func (cc *mockControl) SetPaused(paused bool) {
was := cc.paused.Swap(paused)
if was == paused {
return
}
cc.logf("SetPaused=%v", paused)
if paused {
cc.called("pause")
@ -303,16 +309,10 @@ func TestStateMachine(t *testing.T) {
t.Fatalf("NewLocalBackend: %v", err)
}
cc := newMockControl(t)
cc.statusFunc = b.setClientStatus
var cc, previousCC *mockControl
b.SetControlClientGetterForTesting(func(opts controlclient.Options) (controlclient.Client, error) {
cc.mu.Lock()
cc.opts = opts
cc.logfActual = opts.Logf
cc.authBlocked = true
cc.persist = cc.opts.Persist.Clone()
cc.mu.Unlock()
previousCC = cc
cc = newClient(t, opts)
t.Logf("ccGen: new mockControl.")
cc.called("New")
@ -336,7 +336,7 @@ func TestStateMachine(t *testing.T) {
// Check that it hasn't called us right away.
// The state machine should be idle until we call Start().
cc.assertCalls()
c.Assert(cc, qt.IsNil)
// Start the state machine.
// Since !WantRunning by default, it'll create a controlclient,
@ -346,7 +346,7 @@ func TestStateMachine(t *testing.T) {
c.Assert(b.Start(ipn.Options{}), qt.IsNil)
{
// BUG: strictly, it should pause, not unpause, here, since !WantRunning.
cc.assertCalls("New", "unpause")
cc.assertCalls("New")
nn := notifies.drain(2)
cc.assertCalls()
@ -370,8 +370,8 @@ func TestStateMachine(t *testing.T) {
notifies.expect(2)
c.Assert(b.Start(ipn.Options{}), qt.IsNil)
{
// BUG: strictly, it should pause, not unpause, here, since !WantRunning.
cc.assertCalls("Shutdown", "unpause", "New", "unpause")
previousCC.assertShutdown(false)
cc.assertCalls("New")
nn := notifies.drain(2)
cc.assertCalls()
@ -397,6 +397,7 @@ func TestStateMachine(t *testing.T) {
// (This behaviour is needed so that b.Login() won't
// start connecting to an old account right away, if one
// exists when you launch another login.)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Attempted non-interactive login with no key; indicate that
@ -406,7 +407,7 @@ func TestStateMachine(t *testing.T) {
url1 := "http://localhost:1/1"
cc.send(nil, url1, false, nil)
{
cc.assertCalls("unpause")
cc.assertCalls()
// ...but backend eats that notification, because the user
// didn't explicitly request interactive login yet, and
@ -416,6 +417,7 @@ func TestStateMachine(t *testing.T) {
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut(), qt.IsFalse)
c.Assert(nn[0].Prefs.WantRunning(), qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Now we'll try an interactive login.
@ -427,9 +429,10 @@ func TestStateMachine(t *testing.T) {
b.StartLoginInteractive()
{
nn := notifies.drain(1)
cc.assertCalls("unpause")
cc.assertCalls()
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(url1, qt.Equals, *nn[0].BrowseToURL)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Sometimes users press the Login button again, in the middle of
@ -444,6 +447,7 @@ func TestStateMachine(t *testing.T) {
notifies.drain(0)
// backend asks control for another login sequence
cc.assertCalls("Login")
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Provide a new interactive login URL.
@ -452,13 +456,14 @@ func TestStateMachine(t *testing.T) {
url2 := "http://localhost:1/2"
cc.send(nil, url2, false, nil)
{
cc.assertCalls("unpause", "unpause")
cc.assertCalls()
// This time, backend should emit it to the UI right away,
// because the UI is anxiously awaiting a new URL to visit.
nn := notifies.drain(1)
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(url2, qt.Equals, *nn[0].BrowseToURL)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Pretend that the interactive login actually happened.
@ -467,7 +472,6 @@ func TestStateMachine(t *testing.T) {
// The backend should propagate this upward for the UI.
t.Logf("\n\nLoginFinished")
notifies.expect(3)
cc.setAuthBlocked(false)
cc.persist.LoginName = "user1"
cc.send(nil, "", true, &netmap.NetworkMap{})
{
@ -480,12 +484,13 @@ func TestStateMachine(t *testing.T) {
// wait until it gets into Starting.
// TODO: (Currently this test doesn't detect that bug, but
// it's visible in the logs)
cc.assertCalls("unpause", "unpause", "unpause")
cc.assertCalls()
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[2].State, qt.IsNotNil)
c.Assert(nn[1].Prefs.Persist().LoginName, qt.Equals, "user1")
c.Assert(ipn.NeedsMachineAuth, qt.Equals, *nn[2].State)
c.Assert(ipn.NeedsMachineAuth, qt.Equals, b.State())
}
// Pretend that the administrator has authorized our machine.
@ -502,7 +507,7 @@ func TestStateMachine(t *testing.T) {
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
cc.assertCalls()
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
}
@ -543,7 +548,7 @@ func TestStateMachine(t *testing.T) {
{
nn := notifies.drain(2)
// BUG: Login isn't needed here. We never logged out.
cc.assertCalls("Login", "unpause", "unpause")
cc.assertCalls("Login", "unpause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
@ -580,7 +585,7 @@ func TestStateMachine(t *testing.T) {
b.Logout()
{
nn := notifies.drain(2)
cc.assertCalls("pause", "StartLogout", "pause")
cc.assertCalls("pause", "StartLogout")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(ipn.Stopped, qt.Equals, *nn[0].State)
@ -593,11 +598,11 @@ func TestStateMachine(t *testing.T) {
// Let's make the logout succeed.
t.Logf("\n\nLogout (async) - succeed")
notifies.expect(3)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
previousCC.assertShutdown(true)
nn := notifies.drain(3)
cc.assertCalls("unpause", "unpause", "Shutdown", "unpause", "New", "unpause")
cc.assertCalls("New")
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(*nn[0].State, qt.Equals, ipn.NoState)
c.Assert(nn[1].Prefs, qt.IsNotNil) // emptyPrefs
@ -617,7 +622,7 @@ func TestStateMachine(t *testing.T) {
c.Assert(nn[0].Prefs, qt.IsNotNil) // emptyPrefs
// BUG: the backend has already called StartLogout, and we're
// still logged out. So it shouldn't call it again.
cc.assertCalls("StartLogout", "unpause")
cc.assertCalls("StartLogout")
cc.assertCalls()
c.Assert(b.Prefs().LoggedOut(), qt.IsTrue)
c.Assert(b.Prefs().WantRunning(), qt.IsFalse)
@ -627,7 +632,6 @@ func TestStateMachine(t *testing.T) {
// Let's acknowledge the second logout too.
t.Logf("\n\nLogout2 (async) - succeed")
notifies.expect(0)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
notifies.drain(0)
@ -645,7 +649,7 @@ func TestStateMachine(t *testing.T) {
// I guess, since that's supposed to be synchronous.
{
notifies.drain(0)
cc.assertCalls("Logout", "unpause")
cc.assertCalls("Logout")
c.Assert(b.Prefs().LoggedOut(), qt.IsTrue)
c.Assert(b.Prefs().WantRunning(), qt.IsFalse)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
@ -654,7 +658,6 @@ func TestStateMachine(t *testing.T) {
// Generate the third logout event.
t.Logf("\n\nLogout3 (sync) - succeed")
notifies.expect(0)
cc.setAuthBlocked(true)
cc.send(nil, "", false, nil)
{
notifies.drain(0)
@ -676,9 +679,10 @@ func TestStateMachine(t *testing.T) {
notifies.expect(2)
c.Assert(b.Start(ipn.Options{}), qt.IsNil)
{
previousCC.assertShutdown(false)
// BUG: We already called Shutdown(), no need to do it again.
// BUG: don't unpause because we're not logged in.
cc.assertCalls("Shutdown", "unpause", "New", "unpause")
cc.assertCalls("New")
nn := notifies.drain(2)
cc.assertCalls()
@ -693,14 +697,13 @@ func TestStateMachine(t *testing.T) {
b.Login(nil)
t.Logf("\n\nLoginFinished3")
notifies.expect(3)
cc.setAuthBlocked(false)
cc.persist.LoginName = "user2"
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(3)
cc.assertCalls("Login", "unpause", "unpause", "unpause")
cc.assertCalls("Login")
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[1].Prefs.Persist(), qt.IsNotNil)
@ -736,12 +739,14 @@ func TestStateMachine(t *testing.T) {
{
// NOTE: cc.Shutdown() is correct here, since we didn't call
// b.Shutdown() explicitly ourselves.
previousCC.assertShutdown(false)
// Note: unpause happens because ipn needs to get at least one netmap
// on startup, otherwise UIs can't show the node list, login
// name, etc when in state ipn.Stopped.
// Arguably they shouldn't try. But they currently do.
nn := notifies.drain(2)
cc.assertCalls("Shutdown", "unpause", "New", "Login", "unpause")
cc.assertCalls("New", "Login")
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[1].State, qt.IsNotNil)
c.Assert(nn[0].Prefs.WantRunning(), qt.IsFalse)
@ -765,7 +770,7 @@ func TestStateMachine(t *testing.T) {
})
{
notifies.drain(0)
cc.assertCalls("pause", "pause")
cc.assertCalls("pause")
}
// Request connection.
@ -778,7 +783,7 @@ func TestStateMachine(t *testing.T) {
})
{
nn := notifies.drain(2)
cc.assertCalls("Login", "unpause", "unpause")
cc.assertCalls("Login", "unpause")
// BUG: I would expect Prefs to change first, and state after.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
@ -817,7 +822,7 @@ func TestStateMachine(t *testing.T) {
//
// Because the login hasn't yet completed, the old login
// is still valid, so it's correct that we stay paused.
cc.assertCalls("Login", "pause", "pause")
cc.assertCalls("Login")
c.Assert(nn[0].BrowseToURL, qt.IsNotNil)
c.Assert(*nn[0].BrowseToURL, qt.Equals, url3)
}
@ -839,7 +844,7 @@ func TestStateMachine(t *testing.T) {
// and !WantRunning. But since it's a fresh and successful
// new login, WantRunning is true, so there was never a
// reason to pause().
cc.assertCalls("pause", "unpause", "unpause")
cc.assertCalls("unpause")
c.Assert(nn[0].LoginFinished, qt.IsNotNil)
c.Assert(nn[1].Prefs, qt.IsNotNil)
c.Assert(nn[2].State, qt.IsNotNil)
@ -853,35 +858,35 @@ func TestStateMachine(t *testing.T) {
// The last test case is the most common one: restarting when both
// logged in and WantRunning.
t.Logf("\n\nStart5")
notifies.expect(1)
notifies.expect(2)
c.Assert(b.Start(ipn.Options{}), qt.IsNil)
{
// NOTE: cc.Shutdown() is correct here, since we didn't call
// b.Shutdown() ourselves.
cc.assertCalls("Shutdown", "unpause", "New", "Login", "unpause")
previousCC.assertShutdown(false)
cc.assertCalls("New", "Login")
nn := notifies.drain(1)
nn := notifies.drain(2)
cc.assertCalls()
c.Assert(nn[0].Prefs, qt.IsNotNil)
c.Assert(nn[0].Prefs.LoggedOut(), qt.IsFalse)
c.Assert(nn[0].Prefs.WantRunning(), qt.IsTrue)
c.Assert(ipn.NoState, qt.Equals, b.State())
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
}
// Control server accepts our valid key from before.
t.Logf("\n\nLoginFinished5")
notifies.expect(1)
cc.setAuthBlocked(false)
notifies.expect(2)
cc.send(nil, "", true, &netmap.NetworkMap{
MachineStatus: tailcfg.MachineAuthorized,
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
nn := notifies.drain(2)
cc.assertCalls()
// NOTE: No LoginFinished message since no interactive
// login was needed.
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
c.Assert(nn[1].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[1].State)
// NOTE: No prefs change this time. WantRunning stays true.
// We were in Starting in the first place, so that doesn't
// change either.
@ -895,7 +900,7 @@ func TestStateMachine(t *testing.T) {
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause")
cc.assertCalls()
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.NeedsLogin, qt.Equals, *nn[0].State)
c.Assert(ipn.NeedsLogin, qt.Equals, b.State())
@ -910,7 +915,7 @@ func TestStateMachine(t *testing.T) {
})
{
nn := notifies.drain(1)
cc.assertCalls("unpause", "unpause", "unpause")
cc.assertCalls()
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Starting, qt.Equals, *nn[0].State)
c.Assert(ipn.Starting, qt.Equals, b.State())
@ -921,7 +926,7 @@ func TestStateMachine(t *testing.T) {
b.setWgengineStatus(&wgengine.Status{DERPs: 1, AsOf: time.Now()}, nil)
{
nn := notifies.drain(1)
cc.assertCalls("unpause")
cc.assertCalls()
c.Assert(nn[0].State, qt.IsNotNil)
c.Assert(ipn.Running, qt.Equals, *nn[0].State)
c.Assert(ipn.Running, qt.Equals, b.State())
@ -1017,11 +1022,9 @@ func TestWGEngineStatusRace(t *testing.T) {
b, err := NewLocalBackend(logf, "logid", new(mem.Store), "", nil, eng, 0)
c.Assert(err, qt.IsNil)
cc := newMockControl(t)
var cc *mockControl
b.SetControlClientGetterForTesting(func(opts controlclient.Options) (controlclient.Client, error) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.logfActual = opts.Logf
cc = newClient(t, opts)
return cc, nil
})

Loading…
Cancel
Save