From b7fe1cea9f17a05d5076c17b95c967013aa1c3d6 Mon Sep 17 00:00:00 2001 From: Alex Chan Date: Mon, 6 Oct 2025 17:17:52 +0100 Subject: [PATCH] cmd/tailscale/cli: only print authURLs and device approval URLs once This patch fixes several issues related to printing login and device approval URLs, especially when `tailscale up` is interrupted: 1. Only print a login URL that will cause `tailscale up` to complete. Don't print expired URLs or URLs from previous login attempts. 2. Print the device approval URL if you run `tailscale up` after previously completing a login, but before approving the device. 3. Use the correct control URL for device approval if you run a bare `tailscale up` after previously completing a login, but before approving the device. 4. Don't print the device approval URL more than once (or at least, not consecutively). Updates tailscale/corp#31476 Updates #17361 ## How these fixes work This patch went through a lot of trial and error, and there may still be bugs! These notes capture the different scenarios and considerations as we wrote it, which are also captured by integration tests. 1. We were getting stale login URLs from the initial IPN state notification. When the IPN watcher was moved to before Start() in c011369, we mistakenly continued to request the initial state. This is only necessary if you start watching after you call Start(), because you may have missed some notifications. By getting the initial state before calling Start(), we'd get a stale login URL. If you clicked that URL, you could complete the login in the control server (if it wasn't expired), but your instance of `tailscale up` would hang, because it's listening for login updates from a different login URL. In this patch, we no longer request the initial state, and so we don't print a stale URL. 2. Once you skip the initial state from IPN, the following sequence: * Run `tailscale up` * Log into a tailnet with device approval * ^C after the device approval URL is printed, but without approving * Run `tailscale up` again means that nothing would ever be printed. `tailscale up` would send tailscaled the pref `WantRunning: true`, but that was already the case so nothing changes. You never get any IPN notifications, and in particular you never get a state change to `NeedsMachineAuth`. This means we'd never print the device approval URL. In this patch, we add a hard-coded rule that if you're doing a simple up (which won't trigger any other IPN notifications) and you start in the `NeedsMachineAuth` state, we print the device approval message without waiting for an IPN notification. 3. Consider the following sequence: * Run `tailscale up --login-server=` * Log into a tailnet with device approval * ^C after the device approval URL is printed, but without approving * Run `tailscale up` again We'd print the device approval URL for the default control server, rather than the real control server, because we were using the `prefs` from the CLI arguments (which are all the defaults) rather than the `curPrefs` (which contain the custom login server). In this patch, we use the `prefs` if the user has specified any settings (and other code will ensure this is a complete set of settings) or `curPrefs` if it's a simple `tailscale up`. 4. Consider the following sequence: you've logged in, but not completed device approval, and you run `down` and `up` in quick succession. * `up`: sees state=NeedsMachineAuth * `up`: sends `{wantRunning: true}`, prints out the device approval URL * `down`: changes state to Stopped * `up`: changes state to Starting * tailscaled: changes state to NeedsMachineAuth * `up`: gets an IPN notification with the state change, and prints a second device approval URL Either URL works, but this is annoying for the user. In this patch, we track whether the last printed URL was the device approval URL, and if so, we skip printing it a second time. Signed-off-by: Alex Chan --- cmd/tailscale/cli/up.go | 48 ++++- tstest/integration/integration_test.go | 181 +++++++++++++++++- tstest/integration/testcontrol/testcontrol.go | 10 +- 3 files changed, 226 insertions(+), 13 deletions(-) diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 90c9c23af..07e008aab 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -357,6 +357,13 @@ func netfilterModeFromFlag(v string) (_ preftype.NetfilterMode, warning string, // It returns simpleUp if we're running a simple "tailscale up" to // transition to running from a previously-logged-in but down state, // without changing any settings. +// +// Note this can also mutate prefs to add implicit preferences for the +// user operator. +// +// TODO(alexc): the name of this function is confusing, and perhaps a +// sign that it's doing too much. Consider refactoring this so it's just +// telling the caller what to do next, but not changing anything itself. func updatePrefs(prefs, curPrefs *ipn.Prefs, env upCheckEnv) (simpleUp bool, justEditMP *ipn.MaskedPrefs, err error) { if !env.upArgs.reset { applyImplicitPrefs(prefs, curPrefs, env) @@ -497,6 +504,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE if err != nil { return err } + effectivePrefs := curPrefs + if cmd == "up" { // "tailscale up" should not be able to change the // profile name. @@ -546,10 +555,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE // or we could miss IPN notifications. // // In particular, if we're doing a force-reauth, we could miss the - // notification with the auth URL we should print for the user. The - // initial state could contain the auth URL, but only if IPN is in the - // NeedsLogin state -- sometimes it's in Starting, and we don't get the URL. - watcher, err := localClient.WatchIPNBus(watchCtx, ipn.NotifyInitialState) + // notification with the auth URL we should print for the user. + watcher, err := localClient.WatchIPNBus(watchCtx, 0) if err != nil { return err } @@ -591,6 +598,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE if err != nil { return err } + effectivePrefs = prefs if upArgs.forceReauth || !st.HaveNodeKey { err := localClient.StartLoginInteractive(ctx) if err != nil { @@ -604,7 +612,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE go func() { var printed bool // whether we've yet printed anything to stdout or stderr - var lastURLPrinted string + lastURLPrinted := "" // If we're doing a force-reauth, we need to get two notifications: // @@ -617,6 +625,15 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE ipnIsRunning := false waitingForKeyChange := upArgs.forceReauth + // If we're doing a simple up (i.e. `tailscale up`, no flags) and + // the initial state is NeedsMachineAuth, then we never receive a + // state notification from ipn, so we print the device approval URL + // immediately. + if simpleUp && st.BackendState == ipn.NeedsMachineAuth.String() { + printed = true + printDeviceApprovalInfo(env.upArgs.json, effectivePrefs, &lastURLPrinted) + } + for { n, err := watcher.Next() if err != nil { @@ -629,11 +646,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE } if s := n.State; s != nil && *s == ipn.NeedsMachineAuth { printed = true - if env.upArgs.json { - printUpDoneJSON(ipn.NeedsMachineAuth, "") - } else { - fmt.Fprintf(Stderr, "\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", prefs.AdminPageURL(policyclient.Get())) - } + printDeviceApprovalInfo(env.upArgs.json, effectivePrefs, &lastURLPrinted) } if s := n.State; s != nil { ipnIsRunning = *s == ipn.Running @@ -737,6 +750,21 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE } } +func printDeviceApprovalInfo(printJson bool, prefs *ipn.Prefs, lastURLPrinted *string) { + if printJson { + printUpDoneJSON(ipn.NeedsMachineAuth, "") + } else { + deviceApprovalURL := prefs.AdminPageURL(policyclient.Get()) + + if lastURLPrinted != nil && deviceApprovalURL == *lastURLPrinted { + return + } + + *lastURLPrinted = deviceApprovalURL + errf("\nTo approve your machine, visit (as admin):\n\n\t%s\n\n", deviceApprovalURL) + } +} + // upWorthWarning reports whether the health check message s is worth warning // about during "tailscale up". Many of the health checks are noisy or confusing // or very ephemeral and happen especially briefly at startup. diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 29a036cd6..2e85bc8be 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -294,13 +294,14 @@ func completeLogin(t *testing.T, control *testcontrol.Server, counter *atomic.In // This handler receives device approval URLs, and approves the device. // // It counts how many URLs it sees, and will fail the test if it -// sees multiple device approval URLs. +// sees multiple device approval URLs, or if you try to approve a device +// with the wrong control server. func completeDeviceApproval(t *testing.T, node *TestNode, counter *atomic.Int32) func(string) error { return func(urlStr string) error { control := node.env.Control nodeKey := node.MustStatus().Self.PublicKey t.Logf("saw device approval URL %q", urlStr) - if control.CompleteDeviceApproval(&nodeKey) { + if control.CompleteDeviceApproval(node.env.ControlURL(), urlStr, &nodeKey) { if counter.Add(1) > 1 { err := errors.New("completed multiple device approval URLs") t.Error(err) @@ -513,6 +514,182 @@ func TestOneNodeUpAuth(t *testing.T) { } } +// Returns true if the error returned by [exec.Run] fails with a non-zero +// exit code, false otherwise. +func isNonZeroExitCode(err error) bool { + if err == nil { + return false + } + + exitError, ok := err.(*exec.ExitError) + if !ok { + return false + } + + return exitError.ExitCode() != 0 +} + +// If we interrupt `tailscale up` and then run it again, we should only +// print a single auth URL. +func TestOneNodeUpInterruptedAuth(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + + env := NewTestEnv(t, ConfigureControl( + func(control *testcontrol.Server) { + control.RequireAuth = true + control.AllNodesSameUser = true + }, + )) + + n := NewTestNode(t, env) + d := n.StartDaemon() + defer d.MustCleanShutdown(t) + + cmdArgs := []string{"up", "--login-server=" + env.ControlURL()} + + // The first time we run the command, we wait for an auth URL to be + // printed, and then we cancel the command -- equivalent to ^C. + // + // At this point, we've connected to control to get an auth URL, + // and printed it in the CLI, but not clicked it. + t.Logf("Running command for the first time: %s", strings.Join(cmdArgs, " ")) + cmd1 := n.Tailscale(cmdArgs...) + + // This handler watches for auth URLs in stdout, then cancels the + // running `tailscale up` CLI command. + cmd1.Stdout = &authURLParserWriter{t: t, authURLFn: func(urlStr string) error { + t.Logf("saw auth URL %q", urlStr) + cmd1.Process.Kill() + return nil + }} + cmd1.Stderr = cmd1.Stdout + + if err := cmd1.Run(); !isNonZeroExitCode(err) { + t.Fatalf("Command did not fail with non-zero exit code: %q", err) + } + + // Because we didn't click the auth URL, we should still be in NeedsLogin. + n.AwaitBackendState("NeedsLogin") + + // The second time we run the command, we click the first auth URL we see + // and check that we log in correctly. + // + // In #17361, there was a bug where we'd print two auth URLs, and you could + // click either auth URL and log in to control, but logging in through the + // first URL would leave `tailscale up` hanging. + // + // Using `authURLHandler` ensures we only print the new, correct auth URL. + // + // If we print both URLs, it will throw an error because it only expects + // to log in with one auth URL. + // + // If we only print the stale auth URL, the test will timeout because + // `tailscale up` will never return. + t.Logf("Running command for the second time: %s", strings.Join(cmdArgs, " ")) + + var authURLCount atomic.Int32 + + cmd2 := n.Tailscale(cmdArgs...) + cmd2.Stdout = &authURLParserWriter{ + t: t, authURLFn: completeLogin(t, env.Control, &authURLCount), + } + cmd2.Stderr = cmd2.Stdout + + if err := cmd2.Run(); err != nil { + t.Fatalf("up: %v", err) + } + + if urls := authURLCount.Load(); urls != 1 { + t.Errorf("Auth URLs completed = %d; want %d", urls, 1) + } + + n.AwaitRunning() +} + +// If we interrupt `tailscale up` and login successfully, but don't +// complete the device approval, we should see the device approval URL +// when we run `tailscale up` a second time. +func TestOneNodeUpInterruptedDeviceApproval(t *testing.T) { + tstest.Shard(t) + tstest.Parallel(t) + + env := NewTestEnv(t, ConfigureControl( + func(control *testcontrol.Server) { + control.RequireAuth = true + control.RequireMachineAuth = true + control.AllNodesSameUser = true + }, + )) + + n := NewTestNode(t, env) + d := n.StartDaemon() + defer d.MustCleanShutdown(t) + + // The first time we run the command, we: + // + // * set a custom login URL + // * wait for an auth URL to be printed + // * click it to complete the login process + // * wait for a device approval URL to be printed + // * cancel the command, equivalent to ^C + // + // At this point, we've logged in to control, but our node isn't + // approved to connect to the tailnet. + cmd1Args := []string{"up", "--login-server=" + env.ControlURL()} + t.Logf("Running command: %s", strings.Join(cmd1Args, " ")) + cmd1 := n.Tailscale(cmd1Args...) + + handler1 := &authURLParserWriter{t: t, + authURLFn: completeLogin(t, env.Control, &atomic.Int32{}), + deviceApprovalURLFn: func(urlStr string) error { + t.Logf("saw device approval URL %q", urlStr) + cmd1.Process.Kill() + return nil + }, + } + cmd1.Stdout = handler1 + cmd1.Stderr = cmd1.Stdout + + if err := cmd1.Run(); !isNonZeroExitCode(err) { + t.Fatalf("Command did not fail with non-zero exit code: %q", err) + } + + // Because we logged in but we didn't complete the device approval, we + // should be in state NeedsMachineAuth. + n.AwaitBackendState("NeedsMachineAuth") + + // The second time we run the command, we expect not to get an auth URL + // and go straight to the device approval URL. We don't need to pass the + // login server, because `tailscale up` should remember our control URL. + cmd2Args := []string{"up"} + t.Logf("Running command: %s", strings.Join(cmd2Args, " ")) + + var deviceApprovalURLCount atomic.Int32 + + cmd2 := n.Tailscale(cmd2Args...) + cmd2.Stdout = &authURLParserWriter{t: t, + authURLFn: func(urlStr string) error { + t.Fatalf("got unexpected auth URL: %q", urlStr) + cmd2.Process.Kill() + return nil + }, + deviceApprovalURLFn: completeDeviceApproval(t, n, &deviceApprovalURLCount), + } + cmd2.Stderr = cmd2.Stdout + + if err := cmd2.Run(); err != nil { + t.Fatalf("up: %v", err) + } + + wantDeviceApprovalURLCount := int32(1) + if n := deviceApprovalURLCount.Load(); n != wantDeviceApprovalURLCount { + t.Errorf("Device approval URLs completed = %d; want %d", n, wantDeviceApprovalURLCount) + } + + n.AwaitRunning() +} + func TestConfigFileAuthKey(t *testing.T) { tstest.SkipOnUnshardedCI(t) tstest.Shard(t) diff --git a/tstest/integration/testcontrol/testcontrol.go b/tstest/integration/testcontrol/testcontrol.go index 58ca956ce..f9a33705b 100644 --- a/tstest/integration/testcontrol/testcontrol.go +++ b/tstest/integration/testcontrol/testcontrol.go @@ -687,7 +687,11 @@ func (s *Server) CompleteAuth(authPathOrURL string) bool { return true } -func (s *Server) CompleteDeviceApproval(nodeKey *key.NodePublic) bool { +// Complete the device approval for this node. +// +// This function returns false if the node does not exist, or you try to +// approve a device against a different control server. +func (s *Server) CompleteDeviceApproval(controlUrl string, urlStr string, nodeKey *key.NodePublic) bool { s.mu.Lock() defer s.mu.Unlock() @@ -696,6 +700,10 @@ func (s *Server) CompleteDeviceApproval(nodeKey *key.NodePublic) bool { return false } + if urlStr != controlUrl+"/admin" { + return false + } + sendUpdate(s.updates[node.ID], updateSelfChanged) node.MachineAuthorized = true