cmd/tailscale/cli: don't start WatchIPNBus until after up's initial Start

The CLI "up" command is a historical mess, both on the CLI side and
the LocalBackend side. We're getting closer to cleaning it up, but in
the meantime it was again implicated in flaky tests.

In this case, the background goroutine running WatchIPNBus was very
occasionally running enough to get to its StartLoginInteractive call
before the original goroutine did its Start call. That meant
integration tests were very rarely but sometimes logging in with the
default control plane URL out on the internet
(controlplane.tailscale.com) instead of the localhost control server
for tests.

This also might've affected new Headscale etc users on initial "up".

Fixes #11960
Fixes #11962

Change-Id: I36f8817b69267a99271b5ee78cb7dbf0fcc0bd34
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
pull/12024/head
Brad Fitzpatrick 7 months ago committed by Brad Fitzpatrick
parent aadb8d9d21
commit f3d2fd22ef

@ -20,7 +20,6 @@ import (
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
"sync"
"syscall" "syscall"
"time" "time"
@ -477,11 +476,6 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
watchCtx, cancelWatch := context.WithCancel(ctx) watchCtx, cancelWatch := context.WithCancel(ctx)
defer cancelWatch() defer cancelWatch()
watcher, err := localClient.WatchIPNBus(watchCtx, 0)
if err != nil {
return err
}
defer watcher.Close()
go func() { go func() {
interrupt := make(chan os.Signal, 1) interrupt := make(chan os.Signal, 1)
@ -494,29 +488,57 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
}() }()
running := make(chan bool, 1) // gets value once in state ipn.Running running := make(chan bool, 1) // gets value once in state ipn.Running
pumpErr := make(chan error, 1) watchErr := make(chan error, 1)
// localAPIMu should be held while doing mutable LocalAPI calls // Special case: bare "tailscale up" means to just start
// to the backend. In particular, it prevents StartLoginInteractive from // running, if there's ever been a login.
// being called from the watcher goroutine while the Start call from if simpleUp {
// the other goroutine is in progress. _, err := localClient.EditPrefs(ctx, &ipn.MaskedPrefs{
// See https://github.com/tailscale/tailscale/issues/7036#issuecomment-2053771466 Prefs: ipn.Prefs{
// TODO(bradfitz): simplify this once #11649 is cleaned up and Start is WantRunning: true,
// hopefully removed. },
var localAPIMu sync.Mutex WantRunningSet: true,
})
startLoginInteractive := sync.OnceFunc(func() { if err != nil {
localAPIMu.Lock() return err
defer localAPIMu.Unlock() }
localClient.StartLoginInteractive(ctx) } else {
}) if err := localClient.CheckPrefs(ctx, prefs); err != nil {
return err
}
authKey, err := upArgs.getAuthKey()
if err != nil {
return err
}
authKey, err = resolveAuthKey(ctx, authKey, upArgs.advertiseTags)
if err != nil {
return err
}
err = localClient.Start(ctx, ipn.Options{
AuthKey: authKey,
UpdatePrefs: prefs,
})
if err != nil {
return err
}
if upArgs.forceReauth {
localClient.StartLoginInteractive(ctx)
}
}
watcher, err := localClient.WatchIPNBus(watchCtx, ipn.NotifyInitialState)
if err != nil {
return err
}
defer watcher.Close()
go func() { go func() {
var printed bool // whether we've yet printed anything to stdout or stderr var printed bool // whether we've yet printed anything to stdout or stderr
for { for {
n, err := watcher.Next() n, err := watcher.Next()
if err != nil { if err != nil {
pumpErr <- err watchErr <- err
return return
} }
if n.ErrMessage != nil { if n.ErrMessage != nil {
@ -526,7 +548,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
if s := n.State; s != nil { if s := n.State; s != nil {
switch *s { switch *s {
case ipn.NeedsLogin: case ipn.NeedsLogin:
startLoginInteractive() localClient.StartLoginInteractive(ctx)
case ipn.NeedsMachineAuth: case ipn.NeedsMachineAuth:
printed = true printed = true
if env.upArgs.json { if env.upArgs.json {
@ -583,47 +605,6 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
} }
}() }()
// Special case: bare "tailscale up" means to just start
// running, if there's ever been a login.
if simpleUp {
localAPIMu.Lock()
_, err := localClient.EditPrefs(ctx, &ipn.MaskedPrefs{
Prefs: ipn.Prefs{
WantRunning: true,
},
WantRunningSet: true,
})
localAPIMu.Unlock()
if err != nil {
return err
}
} else {
if err := localClient.CheckPrefs(ctx, prefs); err != nil {
return err
}
authKey, err := upArgs.getAuthKey()
if err != nil {
return err
}
authKey, err = resolveAuthKey(ctx, authKey, upArgs.advertiseTags)
if err != nil {
return err
}
localAPIMu.Lock()
err = localClient.Start(ctx, ipn.Options{
AuthKey: authKey,
UpdatePrefs: prefs,
})
localAPIMu.Unlock()
if err != nil {
return err
}
if upArgs.forceReauth {
startLoginInteractive()
}
}
// This whole 'up' mechanism is too complicated and results in // This whole 'up' mechanism is too complicated and results in
// hairy stuff like this select. We're ultimately waiting for // hairy stuff like this select. We're ultimately waiting for
// 'running' to be done, but even in the case where // 'running' to be done, but even in the case where
@ -647,7 +628,7 @@ func runUp(ctx context.Context, cmd string, args []string, upArgs upArgsT) (retE
default: default:
} }
return watchCtx.Err() return watchCtx.Err()
case err := <-pumpErr: case err := <-watchErr:
select { select {
case <-running: case <-running:
return nil return nil

Loading…
Cancel
Save