From 0d4a0bf60e07a342f5779e669df8541af28f6580 Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Thu, 7 Oct 2021 09:43:37 +0900 Subject: [PATCH] magicsock: if STUN failed to send before, rebind before STUNning again. On iOS (and possibly other platforms), sometimes our UDP socket would get stuck in a state where it was bound to an invalid interface (or no interface) after a network reconfiguration. We can detect this by actually checking the error codes from sending our STUN packets. If we completely fail to send any STUN packets, we know something is very broken. So on the next STUN attempt, let's rebind the UDP socket to try to correct any problems. This fixes a problem where iOS would sometimes get stuck using DERP instead of direct connections until the backend was restarted. Fixes #2994 Signed-off-by: Avery Pennarun --- net/netcheck/netcheck.go | 33 ++++++++++++++++++++++++++------- net/netcheck/netcheck_test.go | 9 ++++++++- wgengine/magicsock/magicsock.go | 10 ++++++++++ wgengine/userspace.go | 3 ++- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/net/netcheck/netcheck.go b/net/netcheck/netcheck.go index 12b7b9ce7..2516e2e6f 100644 --- a/net/netcheck/netcheck.go +++ b/net/netcheck/netcheck.go @@ -69,11 +69,20 @@ const ( ) type Report struct { - UDP bool // UDP works - IPv6 bool // IPv6 works - IPv4 bool // IPv4 works - MappingVariesByDestIP opt.Bool // for IPv4 - HairPinning opt.Bool // for IPv4 + UDP bool // a UDP STUN round trip completed + IPv6 bool // an IPv6 STUN round trip completed + IPv4 bool // an IPv4 STUN round trip completed + IPv6CanSend bool // an IPv6 packet was able to be sent + IPv4CanSend bool // an IPv4 packet was able to be sent + + // MappingVariesByDestIP is whether STUN results depend which + // STUN server you're talking to (on IPv4). + MappingVariesByDestIP opt.Bool + + // HairPinning is whether the router supports communicating + // between two local devices through the NATted public IP address + // (on IPv4). + HairPinning opt.Bool // UPnP is whether UPnP appears present on the LAN. // Empty means not checked. @@ -1142,9 +1151,19 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe switch probe.proto { case probeIPv4: - rs.pc4.WriteTo(req, addr) + n, err := rs.pc4.WriteTo(req, addr) + if n == len(req) && err == nil { + rs.mu.Lock() + rs.report.IPv4CanSend = true + rs.mu.Unlock() + } case probeIPv6: - rs.pc6.WriteTo(req, addr) + n, err := rs.pc6.WriteTo(req, addr) + if n == len(req) && err == nil { + rs.mu.Lock() + rs.report.IPv6CanSend = true + rs.mu.Unlock() + } default: panic("bad probe proto " + fmt.Sprint(probe.proto)) } diff --git a/net/netcheck/netcheck_test.go b/net/netcheck/netcheck_test.go index 90993d622..c99aad2cd 100644 --- a/net/netcheck/netcheck_test.go +++ b/net/netcheck/netcheck_test.go @@ -100,11 +100,18 @@ func TestWorksWhenUDPBlocked(t *testing.T) { if err != nil { t.Fatal(err) } - want := newReport() r.UPnP = "" r.PMP = "" r.PCP = "" + want := newReport() + + // The IPv4CanSend flag gets set differently across platforms. + // On Windows this test detects false, while on Linux detects true. + // That's not relevant to this test, so just accept what we're + // given. + want.IPv4CanSend = r.IPv4CanSend + if !reflect.DeepEqual(r, want) { t.Errorf("mismatch\n got: %+v\nwant: %+v\n", r, want) } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index d901c8e09..fd2bb9e35 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -263,6 +263,11 @@ type Conn struct { // logging. noV4, noV6 syncs.AtomicBool + // noV4Send is whether IPv4 UDP is known to be unable to transmit + // at all. This could happen if the socket is in an invalid state + // (as can happen on darwin after a network link status change). + noV4Send syncs.AtomicBool + // networkUp is whether the network is up (some interface is up // with IPv4 or IPv6). It's used to suppress log spam and prevent // new connection that'll fail. @@ -603,6 +608,10 @@ func (c *Conn) updateEndpoints(why string) { c.muCond.Broadcast() }() c.logf("[v1] magicsock: starting endpoint update (%s)", why) + if c.noV4Send.Get() { + c.logf("magicsock: last netcheck reported send error. Rebinding.") + c.Rebind() + } endpoints, err := c.determineEndpoints(c.connCtx) if err != nil { @@ -697,6 +706,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) { c.noV4.Set(!report.IPv4) c.noV6.Set(!report.IPv6) + c.noV4Send.Set(!report.IPv4CanSend) ni := &tailcfg.NetInfo{ DERPLatency: map[string]float64{}, diff --git a/wgengine/userspace.go b/wgengine/userspace.go index b3847134b..0bb1e319b 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -1143,7 +1143,8 @@ func (e *userspaceEngine) GetLinkMonitor() *monitor.Mon { } // LinkChange signals a network change event. It's currently -// (2021-03-03) only called on Android. +// (2021-03-03) only called on Android. On other platforms, linkMon +// generates link change events for us. func (e *userspaceEngine) LinkChange(_ bool) { e.linkMon.InjectEvent() }