From c18ba4470b452112b83975f042705e950ef7d232 Mon Sep 17 00:00:00 2001 From: Simon Law Date: Thu, 10 Jul 2025 22:15:55 -0700 Subject: [PATCH] ipn/ipnlocal: add traffic steering support to exit-node suggestions (#16527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When `tailscale exit-node suggest` contacts the LocalAPI for a suggested exit node, the client consults its netmap for peers that contain the `suggest-exit-node` peercap. It currently uses a series of heuristics to determine the exit node to suggest. When the `traffic-steering` feature flag is enabled on its tailnet, the client will defer to Control’s priority scores for a particular peer. These scores, in `tailcfg.Hostinfo.Location.Priority`, were historically only used for Mullvad exit nodes, but they have now been extended to score any peer that could host a redundant resource. Client capability version 119 is the earliest client that understands these traffic steering scores. Control tells the client to switch to rely on these scores by adding `tailcfg.NodeAttrTrafficSteering` to its `AllCaps`. Updates tailscale/corp#29966 Signed-off-by: Simon Law --- ipn/ipnlocal/local.go | 134 +++++++++++- ipn/ipnlocal/local_test.go | 417 +++++++++++++++++++++++++++++++++++++ tailcfg/tailcfg.go | 3 +- 3 files changed, 546 insertions(+), 8 deletions(-) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 48eceb36c..4ed012f2e 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -7675,13 +7675,10 @@ func allowedAutoRoute(ipp netip.Prefix) bool { var ErrNoPreferredDERP = errors.New("no preferred DERP, try again later") -// suggestExitNodeLocked computes a suggestion based on the current netmap and last netcheck report. If -// there are multiple equally good options, one is selected at random, so the result is not stable. To be -// eligible for consideration, the peer must have NodeAttrSuggestExitNode in its CapMap. -// -// Currently, peers with a DERP home are preferred over those without (typically this means Mullvad). -// Peers are selected based on having a DERP home that is the lowest latency to this device. For peers -// without a DERP home, we look for geographic proximity to this device's DERP home. +// suggestExitNodeLocked computes a suggestion based on the current netmap and +// other optional factors. If there are multiple equally good options, one may +// be selected at random, so the result is not stable. To be eligible for +// consideration, the peer must have NodeAttrSuggestExitNode in its CapMap. // // b.mu.lock() must be held. func (b *LocalBackend) suggestExitNodeLocked() (response apitype.ExitNodeSuggestionResponse, err error) { @@ -7743,7 +7740,32 @@ func fillAllowedSuggestions() set.Set[tailcfg.StableNodeID] { return s } +// suggestExitNode returns a suggestion for reasonably good exit node based on +// the current netmap and the previous suggestion. func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) { + switch { + case nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering): + // The traffic-steering feature flag is enabled on this tailnet. + return suggestExitNodeUsingTrafficSteering(nb, prevSuggestion, allowList) + default: + return suggestExitNodeUsingDERP(report, nb, prevSuggestion, selectRegion, selectNode, allowList) + } +} + +// suggestExitNodeUsingDERP is the classic algorithm used to suggest exit nodes, +// before traffic steering was implemented. This handles the plain failover +// case, in addition to the optional Regional Routing. +// +// It computes a suggestion based on the current netmap and last netcheck +// report. If there are multiple equally good options, one is selected at +// random, so the result is not stable. To be eligible for consideration, the +// peer must have NodeAttrSuggestExitNode in its CapMap. +// +// Currently, peers with a DERP home are preferred over those without (typically +// this means Mullvad). Peers are selected based on having a DERP home that is +// the lowest latency to this device. For peers without a DERP home, we look for +// geographic proximity to this device's DERP home. +func suggestExitNodeUsingDERP(report *netcheck.Report, nb *nodeBackend, prevSuggestion tailcfg.StableNodeID, selectRegion selectRegionFunc, selectNode selectNodeFunc, allowList set.Set[tailcfg.StableNodeID]) (res apitype.ExitNodeSuggestionResponse, err error) { netMap := nb.NetMap() if report == nil || report.PreferredDERP == 0 || netMap == nil || netMap.DERPMap == nil { return res, ErrNoPreferredDERP @@ -7864,6 +7886,104 @@ func suggestExitNode(report *netcheck.Report, nb *nodeBackend, prevSuggestion ta return res, nil } +var ErrNoNetMap = errors.New("no network map, try again later") + +// suggestExitNodeUsingTrafficSteering uses traffic steering priority scores to +// pick one of the best exit nodes. These priorities are provided by Control in +// the node’s [tailcfg.Location]. To be eligible for consideration, the node +// must have NodeAttrSuggestExitNode in its CapMap. +func suggestExitNodeUsingTrafficSteering(nb *nodeBackend, prev tailcfg.StableNodeID, allowed set.Set[tailcfg.StableNodeID]) (apitype.ExitNodeSuggestionResponse, error) { + nm := nb.NetMap() + if nm == nil { + return apitype.ExitNodeSuggestionResponse{}, ErrNoNetMap + } + + if !nb.SelfHasCap(tailcfg.NodeAttrTrafficSteering) { + panic("missing traffic-steering capability") + } + + peers := nm.Peers + nodes := make([]tailcfg.NodeView, 0, len(peers)) + + for _, p := range peers { + if !p.Valid() { + continue + } + if allowed != nil && !allowed.Contains(p.StableID()) { + continue + } + if !p.CapMap().Contains(tailcfg.NodeAttrSuggestExitNode) { + continue + } + if !tsaddr.ContainsExitRoutes(p.AllowedIPs()) { + continue + } + if p.StableID() == prev { + // Prevent flapping: since prev is a valid suggestion, + // force prev to be the only valid pick. + nodes = []tailcfg.NodeView{p} + break + } + nodes = append(nodes, p) + } + + var pick tailcfg.NodeView + + scores := make(map[tailcfg.NodeID]int, len(nodes)) + score := func(n tailcfg.NodeView) int { + id := n.ID() + s, ok := scores[id] + if !ok { + s = 0 // score of zero means incomparable + if hi := n.Hostinfo(); hi.Valid() { + if loc := hi.Location(); loc.Valid() { + s = loc.Priority() + } + } + scores[id] = s + } + return s + } + + if len(nodes) > 0 { + // Find the highest scoring exit nodes. + slices.SortFunc(nodes, func(a, b tailcfg.NodeView) int { + return cmp.Compare(score(b), score(a)) // reverse sort + }) + + // Find the top exit nodes, which all have the same score. + topI := len(nodes) + ts := score(nodes[0]) + for i, n := range nodes[1:] { + if score(n) < ts { + // n is the first node with a lower score. + // Make nodes[:topI] to slice the top exit nodes. + topI = i + 1 + break + } + } + + // TODO(sfllaw): add a temperature knob so that this client has + // a chance of picking the next best option. + randSeed := uint64(nm.SelfNode.ID()) + pick = nodes[rands.IntN(randSeed, topI)] + } + + if !pick.Valid() { + return apitype.ExitNodeSuggestionResponse{}, nil + } + res := apitype.ExitNodeSuggestionResponse{ + ID: pick.StableID(), + Name: pick.Name(), + } + if hi := pick.Hostinfo(); hi.Valid() { + if loc := hi.Location(); loc.Valid() { + res.Location = loc + } + } + return res, nil +} + // pickWeighted chooses the node with highest priority given a list of mullvad nodes. func pickWeighted(candidates []tailcfg.NodeView) []tailcfg.NodeView { maxWeight := 0 diff --git a/ipn/ipnlocal/local_test.go b/ipn/ipnlocal/local_test.go index bb7f433c0..0b39c45c2 100644 --- a/ipn/ipnlocal/local_test.go +++ b/ipn/ipnlocal/local_test.go @@ -4229,6 +4229,23 @@ func withLocation(loc tailcfg.LocationView) peerOptFunc { } } +func withLocationPriority(pri int) peerOptFunc { + return func(n *tailcfg.Node) { + var hi *tailcfg.Hostinfo + if n.Hostinfo.Valid() { + hi = n.Hostinfo.AsStruct() + } else { + hi = new(tailcfg.Hostinfo) + } + if hi.Location == nil { + hi.Location = new(tailcfg.Location) + } + hi.Location.Priority = pri + + n.Hostinfo = hi.View() + } +} + func withExitRoutes() peerOptFunc { return func(n *tailcfg.Node) { n.AllowedIPs = append(n.AllowedIPs, tsaddr.ExitRoutes()...) @@ -4895,6 +4912,406 @@ func TestSuggestExitNodeLongLatDistance(t *testing.T) { } } +func TestSuggestExitNodeTrafficSteering(t *testing.T) { + city := &tailcfg.Location{ + Country: "Canada", + CountryCode: "CA", + City: "Montreal", + CityCode: "MTR", + Latitude: 45.5053, + Longitude: -73.5525, + } + noLatLng := &tailcfg.Location{ + Country: "Canada", + CountryCode: "CA", + City: "Montreal", + CityCode: "MTR", + } + + selfNode := tailcfg.Node{ + ID: 0, // randomness is seeded off NetMap.SelfNode.ID + Addresses: []netip.Prefix{ + netip.MustParsePrefix("100.64.1.1/32"), + netip.MustParsePrefix("fe70::1/128"), + }, + CapMap: tailcfg.NodeCapMap{ + tailcfg.NodeAttrTrafficSteering: []tailcfg.RawMessage{}, + }, + } + + for _, tt := range []struct { + name string + + netMap *netmap.NetworkMap + lastExit tailcfg.StableNodeID + allowPolicy []tailcfg.StableNodeID + + wantID tailcfg.StableNodeID + wantName string + wantLoc *tailcfg.Location + wantPri int + + wantErr error + }{ + { + name: "no-netmap", + netMap: nil, + wantErr: ErrNoNetMap, + }, + { + name: "no-nodes", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{}, + }, + wantID: "", + }, + { + name: "no-exit-nodes", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1), + }, + }, + wantID: "", + }, + { + name: "exit-node-without-suggestion", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes()), + }, + }, + wantID: "", + }, + { + name: "suggested-exit-node-without-routes", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withSuggest()), + }, + }, + wantID: "", + }, + { + name: "suggested-exit-node", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + }, + }, + wantID: "stable1", + wantName: "peer1", + }, + { + name: "many-suggested-exit-nodes", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + makePeer(2, + withExitRoutes(), + withSuggest()), + makePeer(3, + withExitRoutes(), + withSuggest()), + makePeer(4, + withExitRoutes(), + withSuggest()), + }, + }, + wantID: "stable3", + wantName: "peer3", + }, + { + name: "suggested-exit-node-was-last-suggested", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + makePeer(2, + withExitRoutes(), + withSuggest()), + makePeer(3, + withExitRoutes(), + withSuggest()), + makePeer(4, + withExitRoutes(), + withSuggest()), + }, + }, + lastExit: "stable2", // overrides many-suggested-exit-nodes + wantID: "stable2", + wantName: "peer2", + }, + { + name: "suggested-exit-node-was-never-suggested", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + makePeer(2, + withExitRoutes(), + withSuggest()), + makePeer(3, + withExitRoutes(), + withSuggest()), + makePeer(4, + withExitRoutes(), + withSuggest()), + }, + }, + lastExit: "stable10", + wantID: "stable3", // matches many-suggested-exit-nodes + wantName: "peer3", + }, + { + name: "exit-nodes-with-and-without-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocationPriority(1)), + makePeer(2, + withExitRoutes(), + withSuggest()), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantPri: 1, + }, + { + name: "exit-nodes-without-and-with-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest()), + makePeer(2, + withExitRoutes(), + withSuggest(), + withLocationPriority(1)), + }, + }, + wantID: "stable2", + wantName: "peer2", + wantPri: 1, + }, + { + name: "exit-nodes-with-negative-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocationPriority(-1)), + makePeer(2, + withExitRoutes(), + withSuggest(), + withLocationPriority(-2)), + makePeer(3, + withExitRoutes(), + withSuggest(), + withLocationPriority(-3)), + makePeer(4, + withExitRoutes(), + withSuggest(), + withLocationPriority(-4)), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantPri: -1, + }, + { + name: "exit-nodes-no-priority-beats-negative-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocationPriority(-1)), + makePeer(2, + withExitRoutes(), + withSuggest(), + withLocationPriority(-2)), + makePeer(3, + withExitRoutes(), + withSuggest()), + }, + }, + wantID: "stable3", + wantName: "peer3", + }, + { + name: "exit-nodes-same-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocationPriority(1)), + makePeer(2, + withExitRoutes(), + withSuggest(), + withLocationPriority(2)), // top + makePeer(3, + withExitRoutes(), + withSuggest(), + withLocationPriority(1)), + makePeer(4, + withExitRoutes(), + withSuggest(), + withLocationPriority(2)), // top + makePeer(5, + withExitRoutes(), + withSuggest(), + withLocationPriority(2)), // top + makePeer(6, + withExitRoutes(), + withSuggest()), + makePeer(7, + withExitRoutes(), + withSuggest(), + withLocationPriority(2)), // top + }, + }, + wantID: "stable5", + wantName: "peer5", + wantPri: 2, + }, + { + name: "suggested-exit-node-with-city", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocation(city.View())), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantLoc: city, + }, + { + name: "suggested-exit-node-with-city-and-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocation(city.View()), + withLocationPriority(1)), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantLoc: city, + wantPri: 1, + }, + { + name: "suggested-exit-node-without-latlng", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocation(noLatLng.View())), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantLoc: noLatLng, + }, + { + name: "suggested-exit-node-without-latlng-with-priority", + netMap: &netmap.NetworkMap{ + SelfNode: selfNode.View(), + Peers: []tailcfg.NodeView{ + makePeer(1, + withExitRoutes(), + withSuggest(), + withLocation(noLatLng.View()), + withLocationPriority(1)), + }, + }, + wantID: "stable1", + wantName: "peer1", + wantLoc: noLatLng, + wantPri: 1, + }, + } { + t.Run(tt.name, func(t *testing.T) { + var allowList set.Set[tailcfg.StableNodeID] + if tt.allowPolicy != nil { + allowList = set.SetOf(tt.allowPolicy) + } + + // HACK: NetMap.AllCaps is populated by Control: + if tt.netMap != nil { + caps := maps.Keys(tt.netMap.SelfNode.CapMap().AsMap()) + tt.netMap.AllCaps = set.SetOf(slices.Collect(caps)) + } + + nb := newNodeBackend(t.Context(), eventbus.New()) + defer nb.shutdown(errShutdown) + nb.SetNetMap(tt.netMap) + + got, err := suggestExitNodeUsingTrafficSteering(nb, tt.lastExit, allowList) + if tt.wantErr == nil && err != nil { + t.Fatalf("err=%v, want nil", err) + } + if tt.wantErr != nil && !errors.Is(err, tt.wantErr) { + t.Fatalf("err=%v, want %v", err, tt.wantErr) + } + + if got.Name != tt.wantName { + t.Errorf("name=%q, want %q", got.Name, tt.wantName) + } + + if got.ID != tt.wantID { + t.Errorf("ID=%q, want %q", got.ID, tt.wantID) + } + + wantLoc := tt.wantLoc + if tt.wantPri != 0 { + if wantLoc == nil { + wantLoc = new(tailcfg.Location) + } + wantLoc.Priority = tt.wantPri + } + if diff := cmp.Diff(got.Location.AsStruct(), wantLoc); diff != "" { + t.Errorf("location mismatch (+want -got)\n%s", diff) + } + }) + } +} + func TestMinLatencyDERPregion(t *testing.T) { tests := []struct { name string diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index ab8add5b8..53c4683c1 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -163,7 +163,8 @@ type CapabilityVersion int // - 116: 2025-05-05: Client serves MagicDNS "AAAA" if NodeAttrMagicDNSPeerAAAA set on self node // - 117: 2025-05-28: Client understands DisplayMessages (structured health messages), but not necessarily PrimaryAction. // - 118: 2025-07-01: Client sends Hostinfo.StateEncrypted to report whether the state file is encrypted at rest (#15830) -const CurrentCapabilityVersion CapabilityVersion = 118 +// - 119: 2025-07-10: Client uses Hostinfo.Location.Priority to prioritize one route over another. +const CurrentCapabilityVersion CapabilityVersion = 119 // ID is an integer ID for a user, node, or login allocated by the // control plane.