From e968b0ecd7b6bf30ea496cbbfbb3105237105542 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 6 May 2024 21:33:37 -0700 Subject: [PATCH] cmd/tailscale,controlclient,ipnlocal: fix 'up', deflake tests more The CLI's "up" is kinda chaotic and LocalBackend.Start is kinda chaotic and they both need to be redone/deleted (respectively), but this fixes some buggy behavior meanwhile. We were previously calling StartLoginInteractive (to start the controlclient's RegisterRequest) redundantly in some cases, causing test flakes depending on timing and up's weird state machine. We only need to call StartLoginInteractive in the client if Start itself doesn't. But Start doesn't tell us that. So cheat a bit and a put the information about whether there's a current NodeKey in the ipn.Status. It used to be accessible over LocalAPI via GetPrefs as a private key but we removed that for security. But a bool is fine. So then only call StartLoginInteractive if that bool is false and don't do it in the WatchIPNBus loop. Fixes #12028 Updates #12042 Change-Id: I0923c3f704a9d6afd825a858eb9a63ca7c1df294 Signed-off-by: Brad Fitzpatrick --- cmd/tailscale/cli/up.go | 31 +++++++++++++++++-------- control/controlclient/auto.go | 17 +++++++++++--- control/controlclient/direct.go | 18 ++++++++++++++- ipn/ipnlocal/local.go | 4 ++++ ipn/ipnstate/ipnstate.go | 3 +++ tstest/integration/integration_test.go | 32 ++++++++++++++++++++------ 6 files changed, 85 insertions(+), 20 deletions(-) diff --git a/cmd/tailscale/cli/up.go b/cmd/tailscale/cli/up.go index 18e932e8d..08b336478 100644 --- a/cmd/tailscale/cli/up.go +++ b/cmd/tailscale/cli/up.go @@ -410,6 +410,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE // printAuthURL reports whether we should print out the // provided auth URL from an IPN notify. printAuthURL := func(url string) bool { + if url == "" { + // Probably unnecessary but we used to have a bug where tailscaled + // could send an empty URL over the IPN bus. ~Harmless to keep. + return false + } if upArgs.authKeyOrFile != "" { // Issue 1755: when using an authkey, don't // show an authURL that might still be pending @@ -527,8 +532,11 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE if err != nil { return err } - if upArgs.forceReauth { - localClient.StartLoginInteractive(ctx) + if upArgs.forceReauth || !st.HaveNodeKey { + err := localClient.StartLoginInteractive(ctx) + if err != nil { + return err + } } } @@ -540,6 +548,8 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE go func() { var printed bool // whether we've yet printed anything to stdout or stderr + var lastURLPrinted string + for { n, err := watcher.Next() if err != nil { @@ -552,8 +562,6 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE } if s := n.State; s != nil { switch *s { - case ipn.NeedsLogin: - localClient.StartLoginInteractive(ctx) case ipn.NeedsMachineAuth: printed = true if env.upArgs.json { @@ -576,12 +584,17 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE cancelWatch() } } - if url := n.BrowseToURL; url != nil && printAuthURL(*url) { + if url := n.BrowseToURL; url != nil { + authURL := *url + if !printAuthURL(authURL) || authURL == lastURLPrinted { + continue + } printed = true + lastURLPrinted = authURL if upArgs.json { - js := &upOutputJSON{AuthURL: *url, BackendState: st.BackendState} + js := &upOutputJSON{AuthURL: authURL, BackendState: st.BackendState} - q, err := qrcode.New(*url, qrcode.Medium) + q, err := qrcode.New(authURL, qrcode.Medium) if err == nil { png, err := q.PNG(128) if err == nil { @@ -596,9 +609,9 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE outln(string(data)) } } else { - fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", *url) + fmt.Fprintf(Stderr, "\nTo authenticate, visit:\n\n\t%s\n\n", authURL) if upArgs.qr { - q, err := qrcode.New(*url, qrcode.Medium) + q, err := qrcode.New(authURL, qrcode.Medium) if err != nil { log.Printf("QR code error: %v", err) } else { diff --git a/control/controlclient/auto.go b/control/controlclient/auto.go index a87e96e6e..3c213de9c 100644 --- a/control/controlclient/auto.go +++ b/control/controlclient/auto.go @@ -348,9 +348,14 @@ func (c *Auto) authRoutine() { continue } if url != "" { - // goal.url ought to be empty here. - // However, not all control servers get this right, - // and logging about it here just generates noise. + // goal.url ought to be empty here. However, not all control servers + // get this right, and logging about it here just generates noise. + // + // TODO(bradfitz): I don't follow that comment. Our own testcontrol + // used by tstest/integration hits this path, in fact. + if c.direct.panicOnUse { + panic("tainted client") + } c.mu.Lock() c.urlToVisit = url c.loginGoal = &LoginGoal{ @@ -615,6 +620,9 @@ func (c *Auto) Login(t *tailcfg.Oauth2Token, flags LoginFlags) { if c.closed { return } + if c.direct != nil && c.direct.panicOnUse { + panic("tainted client") + } c.wantLoggedIn = true c.loginGoal = &LoginGoal{ token: t, @@ -632,6 +640,9 @@ func (c *Auto) Logout(ctx context.Context) error { c.wantLoggedIn = false c.loginGoal = nil closed := c.closed + if c.direct != nil && c.direct.panicOnUse { + panic("tainted client") + } c.mu.Unlock() if closed { diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index 99a0fda1e..95b698a14 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -80,6 +80,7 @@ type Direct struct { onClientVersion func(*tailcfg.ClientVersion) // or nil onControlTime func(time.Time) // or nil onTailnetDefaultAutoUpdate func(bool) // or nil + panicOnUse bool // if true, panic if client is used (for testing) dialPlan ControlDialPlanner // can be nil @@ -310,6 +311,9 @@ func NewDirect(opts Options) (*Direct, error) { } c.serverNoiseKey = key.NewMachine().Public() // prevent early error before hitting test client } + if strings.Contains(opts.ServerURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") { + c.panicOnUse = true + } return c, nil } @@ -399,7 +403,7 @@ func (c *Direct) TryLogout(ctx context.Context) error { func (c *Direct) TryLogin(ctx context.Context, t *tailcfg.Oauth2Token, flags LoginFlags) (url string, err error) { if strings.Contains(c.serverURL, "controlplane.tailscale.com") && envknob.Bool("TS_PANIC_IF_HIT_MAIN_CONTROL") { - panic("[unexpected] controlclient: TryLogin called on " + c.serverURL) + panic(fmt.Sprintf("[unexpected] controlclient: TryLogin called on %s; tainted=%v", c.serverURL, c.panicOnUse)) } c.logf("[v1] direct.TryLogin(token=%v, flags=%v)", t != nil, flags) return c.doLoginOrRegen(ctx, loginOpt{Token: t, Flags: flags}) @@ -462,6 +466,9 @@ func (c *Direct) hostInfoLocked() *tailcfg.Hostinfo { } func (c *Direct) doLogin(ctx context.Context, opt loginOpt) (mustRegen bool, newURL string, nks tkatype.MarshaledSignature, err error) { + if c.panicOnUse { + panic("tainted client") + } c.mu.Lock() persist := c.persist.AsStruct() tryingNewKey := c.tryingNewKey @@ -835,6 +842,9 @@ const watchdogTimeout = 120 * time.Second // // If nu is nil, OmitPeers will be set to true. func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu NetmapUpdater) error { + if c.panicOnUse { + panic("tainted client") + } if isStreaming && nu == nil { panic("cb must be non-nil if isStreaming is true") } @@ -1531,6 +1541,9 @@ func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err er } func (c *Direct) DoNoiseRequest(req *http.Request) (*http.Response, error) { + if c.panicOnUse { + panic("tainted client") + } nc, err := c.getNoiseClient() if err != nil { return nil, err @@ -1627,6 +1640,9 @@ func (c *Direct) ReportHealthChange(sys health.Subsystem, sysErr error) { if !ok { return } + if c.panicOnUse { + panic("tainted client") + } req := &tailcfg.HealthChangeRequest{ Subsys: string(sys), NodeKey: nodeKey, diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index b8e46619a..78c9e91a6 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -790,6 +790,7 @@ func (b *LocalBackend) UpdateStatus(sb *ipnstate.StatusBuilder) { s.ClientVersion = b.lastClientVersion } s.Health = b.health.AppendWarnings(s.Health) + s.HaveNodeKey = b.hasNodeKeyLocked() // TODO(bradfitz): move this health check into a health.Warnable // and remove from here. @@ -4605,6 +4606,9 @@ func (b *LocalBackend) resetControlClientLocked() controlclient.Client { return nil } + b.authURL = "" + b.authURLSticky = "" + // When we clear the control client, stop any outstanding netmap expiry // timer; synthesizing a new netmap while we don't have a control // client will break things. diff --git a/ipn/ipnstate/ipnstate.go b/ipn/ipnstate/ipnstate.go index d86bc1d26..869c4b8c6 100644 --- a/ipn/ipnstate/ipnstate.go +++ b/ipn/ipnstate/ipnstate.go @@ -41,6 +41,9 @@ type Status struct { // "Starting", "Running". BackendState string + // HaveNodeKey is whether the current profile has a node key configured. + HaveNodeKey bool `json:",omitempty"` + AuthURL string // current URL provided by control to authorize client TailscaleIPs []netip.Addr // Tailscale IP(s) assigned to this node Self *PeerStatus diff --git a/tstest/integration/integration_test.go b/tstest/integration/integration_test.go index 9e4472cac..1ec5e6390 100644 --- a/tstest/integration/integration_test.go +++ b/tstest/integration/integration_test.go @@ -259,7 +259,7 @@ func TestStateSavedOnStart(t *testing.T) { n1.MustDown() // And change the hostname to something: - if err := n1.Tailscale("up", "--login-server="+n1.env.ControlServer.URL, "--hostname=foo").Run(); err != nil { + if err := n1.Tailscale("up", "--login-server="+n1.env.controlURL(), "--hostname=foo").Run(); err != nil { t.Fatalf("up: %v", err) } @@ -289,9 +289,9 @@ func TestOneNodeUpAuth(t *testing.T) { st := n1.MustStatus() t.Logf("Status: %s", st.BackendState) - t.Logf("Running up --login-server=%s ...", env.ControlServer.URL) + t.Logf("Running up --login-server=%s ...", env.controlURL()) - cmd := n1.Tailscale("up", "--login-server="+env.ControlServer.URL) + cmd := n1.Tailscale("up", "--login-server="+env.controlURL()) var authCountAtomic int32 cmd.Stdout = &authURLParserWriter{fn: func(urlStr string) error { if env.Control.CompleteAuth(urlStr) { @@ -1069,7 +1069,7 @@ func TestAutoUpdateDefaults(t *testing.T) { // Should not be changed even if sent "true" later. sendAndCheckDefault(t, n, true, false) // But can be changed explicitly by the user. - if out, err := n.Tailscale("set", "--auto-update").CombinedOutput(); err != nil { + if out, err := n.TailscaleForOutput("set", "--auto-update").CombinedOutput(); err != nil { t.Fatalf("failed to enable auto-update on node: %v\noutput: %s", err, out) } sendAndCheckDefault(t, n, false, true) @@ -1083,7 +1083,7 @@ func TestAutoUpdateDefaults(t *testing.T) { // Should not be changed even if sent "false" later. sendAndCheckDefault(t, n, false, true) // But can be changed explicitly by the user. - if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil { + if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil { t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out) } sendAndCheckDefault(t, n, true, false) @@ -1093,7 +1093,7 @@ func TestAutoUpdateDefaults(t *testing.T) { desc: "user-sets-first", run: func(t *testing.T, n *testNode) { // User sets auto-update first, before receiving defaults. - if out, err := n.Tailscale("set", "--auto-update=false").CombinedOutput(); err != nil { + if out, err := n.TailscaleForOutput("set", "--auto-update=false").CombinedOutput(); err != nil { t.Fatalf("failed to disable auto-update on node: %v\noutput: %s", err, out) } // Defaults sent from control should be ignored. @@ -1135,6 +1135,16 @@ type testEnv struct { TrafficTrapServer *httptest.Server } +// controlURL returns e.ControlServer.URL, panicking if it's the empty string, +// which it should never be in tests. +func (e *testEnv) controlURL() string { + s := e.ControlServer.URL + if s == "" { + panic("control server not set") + } + return s +} + type testEnvOpt interface { modifyTestEnv(*testEnv) } @@ -1183,6 +1193,7 @@ func newTestEnv(t testing.TB, opts ...testEnvOpt) *testEnv { e.TrafficTrapServer.Close() e.ControlServer.Close() }) + t.Logf("control URL: %v", e.controlURL()) return e } @@ -1445,7 +1456,7 @@ func (n *testNode) MustUp(extraArgs ...string) { t.Helper() args := []string{ "up", - "--login-server=" + n.env.ControlServer.URL, + "--login-server=" + n.env.controlURL(), "--reset", } args = append(args, extraArgs...) @@ -1585,6 +1596,13 @@ func (n *testNode) AwaitNeedsLogin() { } } +func (n *testNode) TailscaleForOutput(arg ...string) *exec.Cmd { + cmd := n.Tailscale(arg...) + cmd.Stdout = nil + cmd.Stderr = nil + return cmd +} + // Tailscale returns a command that runs the tailscale CLI with the provided arguments. // It does not start the process. func (n *testNode) Tailscale(arg ...string) *exec.Cmd {