diff --git a/ipn/ipnserver/server.go b/ipn/ipnserver/server.go index a69e43067..3d9c9e3d4 100644 --- a/ipn/ipnserver/server.go +++ b/ipn/ipnserver/server.go @@ -283,7 +283,18 @@ func (s *Server) blockWhileIdentityInUse(ctx context.Context, actor ipnauth.Acto for inUse() { // Check whenever the connection count drops down to zero. ready, cleanup := s.zeroReqWaiter.add(&s.mu, ctx) - <-ready + if inUse() { + // If the server was in use at the time of the initial check, + // but disconnected and was removed from the activeReqs map + // by the time we registered a waiter, the ready channel + // will never be closed, resulting in a deadlock. To avoid + // this, we can check again after registering the waiter. + // + // This method is planned for complete removal as part of the + // multi-user improvements in tailscale/corp#18342, + // and this approach should be fine as a temporary solution. + <-ready + } cleanup() if err := ctx.Err(); err != nil { return err diff --git a/ipn/ipnserver/server_test.go b/ipn/ipnserver/server_test.go index e77901c35..e56ae8dab 100644 --- a/ipn/ipnserver/server_test.go +++ b/ipn/ipnserver/server_test.go @@ -260,6 +260,52 @@ func TestConcurrentOSUserSwitchingOnWindows(t *testing.T) { } } +func TestBlockWhileIdentityInUse(t *testing.T) { + enableLogging := false + setGOOSForTest(t, "windows") + + ctx := context.Background() + server := startDefaultTestIPNServer(t, ctx, enableLogging) + + // connectWaitDisconnectAsUser connects as a user with the specified name + // and keeps the IPN bus watcher alive until the context is canceled. + // It returns a channel that is closed when done. + connectWaitDisconnectAsUser := func(ctx context.Context, name string) <-chan struct{} { + client := server.getClientAs(name) + watcher, cancelWatcher := client.WatchIPNBus(ctx, 0) + + done := make(chan struct{}) + go func() { + defer cancelWatcher() + defer close(done) + for { + _, err := watcher.Next() + if err != nil { + // There's either an error or the request has been canceled. + break + } + } + }() + return done + } + + for range 100 { + // Connect as UserA, and keep the connection alive + // until disconnectUserA is called. + userAContext, disconnectUserA := context.WithCancel(ctx) + userADone := connectWaitDisconnectAsUser(userAContext, "UserA") + disconnectUserA() + // Check if userB can connect. Calling it directly increases + // the likelihood of triggering a deadlock due to a race condition + // in blockWhileIdentityInUse. But the issue also occurs during + // the normal execution path when UserB connects to the IPN server + // while UserA is disconnecting. + userB := server.makeTestUser("UserB", "ClientB") + server.blockWhileIdentityInUse(ctx, userB) + <-userADone + } +} + func setGOOSForTest(tb testing.TB, goos string) { tb.Helper() envknob.Setenv("TS_DEBUG_FAKE_GOOS", goos)