From 41a2aaf1da9be6c939058bdd32e253ab35373c42 Mon Sep 17 00:00:00 2001 From: Alex Chan Date: Tue, 16 Sep 2025 11:22:47 +0100 Subject: [PATCH] cmd/tailscale/cli: fix race condition in `up --force-reauth` This commit fixes a race condition where `tailscale up --force-reauth` would exit prematurely on an already-logged in device. Previously, the CLI would wait for IPN to report the "Running" state and then exit. However, this could happen before the new auth URL was printed, leading to two distinct issues: * **Without seamless key renewal:** The CLI could exit immediately after the `StartLoginInteractive` call, before IPN has time to switch into the "Starting" state or send a new auth URL back to the CLI. * **With seamless key renewal:** IPN stays in the "Running" state throughout the process, so the CLI exits immediately without performing any reauthentication. The fix is to change the CLI's exit condition. Instead of waiting for the "Running" state, if we're doing a `--force-reauth` we now wait to see the node key change, which is a more reliable indicator that a successful authentication has occurred. Updates tailscale/corp#31476 Updates tailscale/tailscale#17108 Signed-off-by: Alex Chan --- cmd/tailscale/cli/up.go | 67 ++++++++++++++++---------- tstest/integration/integration_test.go | 8 --- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 0a15c8fb7..3c0883ec8 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -446,6 +446,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE return fixTailscaledConnectError(err) } origAuthURL := st.AuthURL + origNodeKey := st.Self.PublicKey // printAuthURL reports whether we should print out the // provided auth URL from an IPN notify. @@ -597,13 +598,24 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE } } - running := make(chan bool, 1) + upComplete := make(chan bool, 1) watchErr := make(chan error, 1) go func() { var printed bool // whether we've yet printed anything to stdout or stderr var lastURLPrinted string + // If we're doing a force-reauth, we need to get two notifications: + // + // 1. IPN is running + // 2. The node key has changed + // + // These two notifications arrive separately, and trying to combine them + // has caused unexpected issues elsewhere in `tailscale up`. For now, we + // track them separately. + ipnIsRunning := false + waitingForKeyChange := upArgs.forceReauth + for { n, err := watcher.Next() if err != nil { @@ -614,29 +626,34 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE msg := *n.ErrMessage fatalf("backend error: %v\n", msg) } + if s := n.State; s != nil && *s == ipn.NeedsMachineAuth { + printed = true + if env.upArgs.json { + printUpDoneJSON(ipn.NeedsMachineAuth, "") + } else { + fmt.Fprintf(Stderr, "\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", prefs.AdminPageURL(policyclient.Get())) + } + } if s := n.State; s != nil { - switch *s { - case ipn.NeedsMachineAuth: - printed = true - if env.upArgs.json { - printUpDoneJSON(ipn.NeedsMachineAuth, "") - } else { - fmt.Fprintf(Stderr, "\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", prefs.AdminPageURL(policyclient.Get())) - } - case ipn.Running: - // Done full authentication process - if env.upArgs.json { - printUpDoneJSON(ipn.Running, "") - } else if printed { - // Only need to print an update if we printed the "please click" message earlier. - fmt.Fprintf(Stderr, "Success.\n") - } - select { - case running <- true: - default: - } - cancelWatch() + ipnIsRunning = *s == ipn.Running + } + if n.NetMap != nil && n.NetMap.NodeKey != origNodeKey { + waitingForKeyChange = false + } + if ipnIsRunning && !waitingForKeyChange { + // Done full authentication process + if env.upArgs.json { + printUpDoneJSON(ipn.Running, "") + } else if printed { + // Only need to print an update if we printed the "please click" message earlier. + fmt.Fprintf(Stderr, "Success.\n") + } + select { + case upComplete <- true: + default: } + cancelWatch() + return } if url := n.BrowseToURL; url != nil { authURL := *url @@ -698,18 +715,18 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE timeoutCh = timeoutTimer.C } select { - case <-running: + case <-upComplete: return nil case <-watchCtx.Done(): select { - case <-running: + case <-upComplete: return nil default: } return watchCtx.Err() case err := <-watchErr: select { - case <-running: + case <-upComplete: return nil default: } diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 6e5022edb..fde4ff35a 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -337,14 +337,6 @@ func TestOneNodeUpAuth(t *testing.T) { t.Run(fmt.Sprintf("%s-seamless-%t", tt.name, useSeamlessKeyRenewal), func(t *testing.T) { tstest.Parallel(t) - // TODO(alexc): This test is failing because of a bug in `tailscale up` where - // it waits for ipn to enter the "Running" state. If we're already logged in - // and running, this completes immediately, before we've had a chance to show - // the user the auth URL. - if tt.name == "up-with-force-reauth-after-login" { - t.Skip() - } - env := NewTestEnv(t, ConfigureControl( func(control *testcontrol.Server) { if tt.authKey != "" {