From 5994c749a09dd1d79112ee80f23975b43a69d1ea Mon Sep 17 00:00:00 2001 From: James Sanderson Date: Fri, 14 Nov 2025 14:29:45 +0000 Subject: [PATCH] ipn/ipnlocal: add peer API endpoints to Hostinfo on initial client creation Previously we only set this when it updated, which was fine for the first call to Start(), but after that point future updates would be skipped if nothing had changed. If Start() was called again, it would wipe the peer API endpoints and they wouldn't get added back again, breaking exit nodes (and anything else requiring peer API to be advertised). Updates tailscale/corp#27173 Signed-off-by: James Sanderson --- ipn/ipnlocal/local.go | 20 +++++++-- ipn/ipnlocal/state_test.go | 92 +++++++++++++++++++++++++++++++++++--- 2 files changed, 102 insertions(+), 10 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 24ab41735..b774ba8a1 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -2524,7 +2524,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error { if inServerMode := prefs.ForceDaemon(); inServerMode || runtime.GOOS == "windows" { logf("serverMode=%v", inServerMode) } - b.applyPrefsToHostinfoLocked(hostinfo, prefs) + b.applyPrefsToHostinfoLocked(b.hostinfo, prefs) b.updateWarnSync(prefs) persistv := prefs.Persist().AsStruct() @@ -2562,7 +2562,7 @@ func (b *LocalBackend) startLocked(opts ipn.Options) error { Persist: *persistv, ServerURL: serverURL, AuthKey: opts.AuthKey, - Hostinfo: hostinfo, + Hostinfo: b.hostInfoWithServicesLocked(), HTTPTestClient: httpTestClient, DiscoPublicKey: discoPublic, DebugFlags: debugFlags, @@ -4822,6 +4822,17 @@ func (b *LocalBackend) doSetHostinfoFilterServicesLocked() { b.logf("[unexpected] doSetHostinfoFilterServices with nil hostinfo") return } + + hi := b.hostInfoWithServicesLocked() + + cc.SetHostinfo(hi) +} + +// hostInfoWithServicesLocked returns a shallow clone of b.hostinfo with +// services added. +// +// b.mu must be held. +func (b *LocalBackend) hostInfoWithServicesLocked() *tailcfg.Hostinfo { peerAPIServices := b.peerAPIServicesLocked() if b.egg { peerAPIServices = append(peerAPIServices, tailcfg.Service{Proto: "egg", Port: 1}) @@ -4849,7 +4860,7 @@ func (b *LocalBackend) doSetHostinfoFilterServicesLocked() { b.logf("Hostinfo peerAPI ports changed: expected %v, got %v", expectedPorts, actualPorts) } - cc.SetHostinfo(&hi) + return &hi } type portPair struct { @@ -5248,6 +5259,9 @@ func (b *LocalBackend) initPeerAPIListenerLocked() { if allSame { // Nothing to do. b.logf("[v1] initPeerAPIListener: %d netmap addresses match existing listeners", addrs.Len()) + // TODO(zofrex): This is fragile. It doesn't check what's actually in hostinfo, and if + // peerAPIListeners gets out of sync with hostinfo.Services, we won't get back into a good + // state. E.G. see tailscale/corp#27173. return } } diff --git a/ipn/ipnlocal/state_test.go b/ipn/ipnlocal/state_test.go index 0c95ef4fc..d38bca570 100644 --- a/ipn/ipnlocal/state_test.go +++ b/ipn/ipnlocal/state_test.go @@ -136,10 +136,12 @@ type mockControl struct { calls []string authBlocked bool shutdown chan struct{} + + hi *tailcfg.Hostinfo } func newClient(tb testing.TB, opts controlclient.Options) *mockControl { - return &mockControl{ + cc := mockControl{ tb: tb, authBlocked: true, logf: opts.Logf, @@ -148,6 +150,10 @@ func newClient(tb testing.TB, opts controlclient.Options) *mockControl { persist: opts.Persist.Clone(), controlClientID: rand.Int64(), } + if opts.Hostinfo != nil { + cc.SetHostinfoDirect(opts.Hostinfo) + } + return &cc } func (cc *mockControl) assertShutdown(wasPaused bool) { @@ -298,6 +304,11 @@ func (cc *mockControl) AuthCantContinue() bool { func (cc *mockControl) SetHostinfo(hi *tailcfg.Hostinfo) { cc.logf("SetHostinfo: %v", *hi) cc.called("SetHostinfo") + cc.SetHostinfoDirect(hi) +} + +func (cc *mockControl) SetHostinfoDirect(hi *tailcfg.Hostinfo) { + cc.hi = hi } func (cc *mockControl) SetNetInfo(ni *tailcfg.NetInfo) { @@ -1621,7 +1632,7 @@ func runTestSendPreservesAuthURL(t *testing.T, seamless bool) { return cc }) - t.Logf("Start") + t.Log("Start") b.Start(ipn.Options{ UpdatePrefs: &ipn.Prefs{ WantRunning: true, @@ -1629,7 +1640,7 @@ func runTestSendPreservesAuthURL(t *testing.T, seamless bool) { }, }) - t.Logf("LoginFinished") + t.Log("LoginFinished") cc.persist.UserProfile.LoginName = "user1" cc.persist.NodeID = "node1" @@ -1641,13 +1652,13 @@ func runTestSendPreservesAuthURL(t *testing.T, seamless bool) { SelfNode: (&tailcfg.Node{MachineAuthorized: true}).View(), }}) - t.Logf("Running") + t.Log("Running") b.setWgengineStatus(&wgengine.Status{AsOf: time.Now(), DERPs: 1}, nil) - t.Logf("Re-auth (StartLoginInteractive)") + t.Log("Re-auth (StartLoginInteractive)") b.StartLoginInteractive(t.Context()) - t.Logf("Re-auth (receive URL)") + t.Log("Re-auth (receive URL)") url1 := "https://localhost:1/1" cc.send(sendOpt{url: url1}) @@ -1655,12 +1666,79 @@ func runTestSendPreservesAuthURL(t *testing.T, seamless bool) { // be set, and once .send has completed, any opportunities for a WG engine // status update to trample it have ended as well. if b.authURL == "" { - t.Fatalf("expected authURL to be set") + t.Fatal("expected authURL to be set") } else { t.Log("authURL was set") } } +func TestServicesNotClearedByStart(t *testing.T) { + connect := &ipn.MaskedPrefs{Prefs: ipn.Prefs{WantRunning: true}, WantRunningSet: true} + node1 := buildNetmapWithPeers( + makePeer(1, withName("node-1"), withAddresses(netip.MustParsePrefix("100.64.1.1/32"))), + ) + + var cc *mockControl + lb := newLocalBackendWithTestControl(t, true, func(tb testing.TB, opts controlclient.Options) controlclient.Client { + cc = newClient(t, opts) + return cc + }) + + mustDo(t)(lb.Start(ipn.Options{})) + mustDo2(t)(lb.EditPrefs(connect)) + cc.assertCalls("Login") + + // Simulate authentication and wait for goroutines to finish (so peer + // listeners have been set up and hostinfo updated) + cc.authenticated(node1) + waitForGoroutinesToStop(lb) + + if cc.hi == nil || len(cc.hi.Services) == 0 { + t.Fatal("test setup bug: services should be present") + } + + mustDo(t)(lb.Start(ipn.Options{})) + + if len(cc.hi.Services) == 0 { + t.Error("services should still be present in hostinfo after no-op Start") + } + + lb.initPeerAPIListenerLocked() + waitForGoroutinesToStop(lb) + + // Clearing out services on Start would be less of a problem if they would at + // least come back after authreconfig or any other change, but they don't if + // the addresses in the netmap haven't changed and still match the stored + // peerAPIListeners. + if len(cc.hi.Services) == 0 { + t.Error("services STILL not present after authreconfig") + } +} + +func waitForGoroutinesToStop(lb *LocalBackend) { + goroutineDone := make(chan struct{}) + removeTrackerCallback := lb.goTracker.AddDoneCallback(func() { + select { + case goroutineDone <- struct{}{}: + default: + } + }) + defer removeTrackerCallback() + + for { + if lb.goTracker.RunningGoroutines() == 0 { + return + } + + select { + case <-time.Tick(1 * time.Second): + continue + case <-goroutineDone: + continue + } + } +} + func buildNetmapWithPeers(self tailcfg.NodeView, peers ...tailcfg.NodeView) *netmap.NetworkMap { const ( firstAutoUserID = tailcfg.UserID(10000)