From f072d017bd8241675aa946a27fc1827f570435cb Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Fri, 8 Mar 2024 12:32:15 -0500 Subject: [PATCH] wgengine/magicsock: don't change DERP home when not connected to control This pretty much always results in an outage because peers won't discover our new home region and thus won't be able to establish connectivity. Updates tailscale/corp#18095 Signed-off-by: Andrew Dunham Change-Id: Ic0d09133f198b528dd40c6383b16d7663d9d37a7 --- wgengine/magicsock/derp.go | 55 ++++++++++++- wgengine/magicsock/magicsock.go | 11 +-- wgengine/magicsock/magicsock_test.go | 117 +++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 11 deletions(-) diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go index 947466d2c..9ef728873 100644 --- a/wgengine/magicsock/derp.go +++ b/wgengine/magicsock/derp.go @@ -23,6 +23,7 @@ import ( "tailscale.com/health" "tailscale.com/logtail/backoff" "tailscale.com/net/dnscache" + "tailscale.com/net/netcheck" "tailscale.com/net/tsaddr" "tailscale.com/syncs" "tailscale.com/tailcfg" @@ -31,6 +32,7 @@ import ( "tailscale.com/types/logger" "tailscale.com/util/mak" "tailscale.com/util/sysresources" + "tailscale.com/util/testenv" ) // useDerpRoute reports whether magicsock should enable the DERP @@ -85,7 +87,10 @@ type activeDerp struct { createTime time.Time } -var processStartUnixNano = time.Now().UnixNano() +var ( + processStartUnixNano = time.Now().UnixNano() + pickDERPFallbackForTests func() int +) // pickDERPFallback returns a non-zero but deterministic DERP node to // connect to. This is only used if netcheck couldn't find the @@ -121,11 +126,59 @@ func (c *Conn) pickDERPFallback() int { return c.myDerp } + if pickDERPFallbackForTests != nil { + return pickDERPFallbackForTests() + } + h := fnv.New64() fmt.Fprintf(h, "%p/%d", c, processStartUnixNano) // arbitrary return ids[rand.New(rand.NewSource(int64(h.Sum64()))).Intn(len(ids))] } +// This allows existing tests to pass, but allows us to still test the +// behaviour during tests. +var checkControlHealthDuringNearestDERPInTests = false + +// maybeSetNearestDERP selects and changes the nearest/preferred DERP server +// based on the netcheck report and other heuristics. It returns the DERP +// region that it selected and set (via setNearestDERP). +// +// c.mu must NOT be held. +func (c *Conn) maybeSetNearestDERP(report *netcheck.Report) (preferredDERP int) { + // Don't change our PreferredDERP if we don't have a connection to + // control; if we don't, then we can't inform peers about a DERP home + // change, which breaks all connectivity. Even if this DERP region is + // down, changing our home DERP isn't correct since peers can't + // discover that change. + // + // See https://github.com/tailscale/corp/issues/18095 + // + // For tests, always assume we're connected to control unless we're + // explicitly testing this behaviour. + var connectedToControl bool + if testenv.InTest() && !checkControlHealthDuringNearestDERPInTests { + connectedToControl = true + } else { + connectedToControl = health.GetInPollNetMap() + } + if !connectedToControl { + c.mu.Lock() + defer c.mu.Unlock() + return c.myDerp + } + + preferredDERP = report.PreferredDERP + if preferredDERP == 0 { + // Perhaps UDP is blocked. Pick a deterministic but arbitrary + // one. + preferredDERP = c.pickDERPFallback() + } + if !c.setNearestDERP(preferredDERP) { + preferredDERP = 0 + } + return +} + func (c *Conn) derpRegionCodeLocked(regionID int) string { if c.derpMap == nil { return "" diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index db95ec453..39a8ce528 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -682,16 +682,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) { ni.OSHasIPv6.Set(report.OSHasIPv6) ni.WorkingUDP.Set(report.UDP) ni.WorkingICMPv4.Set(report.ICMPv4) - ni.PreferredDERP = report.PreferredDERP - - if ni.PreferredDERP == 0 { - // Perhaps UDP is blocked. Pick a deterministic but arbitrary - // one. - ni.PreferredDERP = c.pickDERPFallback() - } - if !c.setNearestDERP(ni.PreferredDERP) { - ni.PreferredDERP = 0 - } + ni.PreferredDERP = c.maybeSetNearestDERP(report) ni.FirewallMode = hostinfo.FirewallMode() c.callNetInfoCallback(ni) diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 762ac7d57..a4ed26d09 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -41,9 +41,11 @@ import ( "tailscale.com/derp/derphttp" "tailscale.com/disco" "tailscale.com/envknob" + "tailscale.com/health" "tailscale.com/ipn/ipnstate" "tailscale.com/net/connstats" "tailscale.com/net/netaddr" + "tailscale.com/net/netcheck" "tailscale.com/net/packet" "tailscale.com/net/ping" "tailscale.com/net/stun/stuntest" @@ -3017,3 +3019,118 @@ func TestAddrForPingSizeLocked(t *testing.T) { }) } } + +func TestMaybeSetNearestDERP(t *testing.T) { + derpMap := &tailcfg.DERPMap{ + Regions: map[int]*tailcfg.DERPRegion{ + 1: { + RegionID: 1, + RegionCode: "test", + Nodes: []*tailcfg.DERPNode{ + { + Name: "t1", + RegionID: 1, + HostName: "test-node.unused", + IPv4: "127.0.0.1", + IPv6: "none", + }, + }, + }, + 21: { + RegionID: 21, + RegionCode: "tor", + Nodes: []*tailcfg.DERPNode{ + { + Name: "21b", + RegionID: 21, + HostName: "tor.test-node.unused", + IPv4: "127.0.0.1", + IPv6: "none", + }, + }, + }, + 31: { + RegionID: 31, + RegionCode: "fallback", + Nodes: []*tailcfg.DERPNode{ + { + Name: "31b", + RegionID: 31, + HostName: "fallback.test-node.unused", + IPv4: "127.0.0.1", + IPv6: "none", + }, + }, + }, + }, + } + + // Ensure that our fallback code always picks a deterministic value. + tstest.Replace(t, &pickDERPFallbackForTests, func() int { return 31 }) + + // Actually test this code path. + tstest.Replace(t, &checkControlHealthDuringNearestDERPInTests, true) + + testCases := []struct { + name string + old int + reportDERP int + connectedToControl bool + want int + }{ + { + name: "connected_with_report_derp", + old: 1, + reportDERP: 21, + connectedToControl: true, + want: 21, + }, + { + name: "not_connected_with_report_derp", + old: 1, + reportDERP: 21, + connectedToControl: false, + want: 1, // no change + }, + { + name: "connected_no_derp", + old: 1, + reportDERP: 0, + connectedToControl: true, + want: 1, // no change + }, + { + name: "connected_no_derp_fallback", + old: 0, + reportDERP: 0, + connectedToControl: true, + want: 31, // deterministic fallback + }, + } + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + c := newConn() + c.logf = t.Logf + c.myDerp = tt.old + c.derpMap = derpMap + + report := &netcheck.Report{PreferredDERP: tt.reportDERP} + + oldConnected := health.GetInPollNetMap() + if tt.connectedToControl != oldConnected { + if tt.connectedToControl { + health.GotStreamedMapResponse() + t.Cleanup(health.SetOutOfPollNetMap) + } else { + health.SetOutOfPollNetMap() + t.Cleanup(health.GotStreamedMapResponse) + } + } + + got := c.maybeSetNearestDERP(report) + if got != tt.want { + t.Errorf("got new DERP region %d, want %d", got, tt.want) + } + }) + } +}