magicsock: if STUN failed to send before, rebind before STUNning again.

On iOS (and possibly other platforms), sometimes our UDP socket would
get stuck in a state where it was bound to an invalid interface (or no
interface) after a network reconfiguration. We can detect this by
actually checking the error codes from sending our STUN packets.

If we completely fail to send any STUN packets, we know something is
very broken. So on the next STUN attempt, let's rebind the UDP socket
to try to correct any problems.

This fixes a problem where iOS would sometimes get stuck using DERP
instead of direct connections until the backend was restarted.

Fixes #2994

Signed-off-by: Avery Pennarun <apenwarr@tailscale.com>
pull/3019/head
Avery Pennarun 3 years ago committed by apenwarr
parent 52be1c0c78
commit 0d4a0bf60e

@ -69,11 +69,20 @@ const (
)
type Report struct {
UDP bool // UDP works
IPv6 bool // IPv6 works
IPv4 bool // IPv4 works
MappingVariesByDestIP opt.Bool // for IPv4
HairPinning opt.Bool // for IPv4
UDP bool // a UDP STUN round trip completed
IPv6 bool // an IPv6 STUN round trip completed
IPv4 bool // an IPv4 STUN round trip completed
IPv6CanSend bool // an IPv6 packet was able to be sent
IPv4CanSend bool // an IPv4 packet was able to be sent
// MappingVariesByDestIP is whether STUN results depend which
// STUN server you're talking to (on IPv4).
MappingVariesByDestIP opt.Bool
// HairPinning is whether the router supports communicating
// between two local devices through the NATted public IP address
// (on IPv4).
HairPinning opt.Bool
// UPnP is whether UPnP appears present on the LAN.
// Empty means not checked.
@ -1142,9 +1151,19 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
switch probe.proto {
case probeIPv4:
rs.pc4.WriteTo(req, addr)
n, err := rs.pc4.WriteTo(req, addr)
if n == len(req) && err == nil {
rs.mu.Lock()
rs.report.IPv4CanSend = true
rs.mu.Unlock()
}
case probeIPv6:
rs.pc6.WriteTo(req, addr)
n, err := rs.pc6.WriteTo(req, addr)
if n == len(req) && err == nil {
rs.mu.Lock()
rs.report.IPv6CanSend = true
rs.mu.Unlock()
}
default:
panic("bad probe proto " + fmt.Sprint(probe.proto))
}

@ -100,11 +100,18 @@ func TestWorksWhenUDPBlocked(t *testing.T) {
if err != nil {
t.Fatal(err)
}
want := newReport()
r.UPnP = ""
r.PMP = ""
r.PCP = ""
want := newReport()
// The IPv4CanSend flag gets set differently across platforms.
// On Windows this test detects false, while on Linux detects true.
// That's not relevant to this test, so just accept what we're
// given.
want.IPv4CanSend = r.IPv4CanSend
if !reflect.DeepEqual(r, want) {
t.Errorf("mismatch\n got: %+v\nwant: %+v\n", r, want)
}

@ -263,6 +263,11 @@ type Conn struct {
// logging.
noV4, noV6 syncs.AtomicBool
// noV4Send is whether IPv4 UDP is known to be unable to transmit
// at all. This could happen if the socket is in an invalid state
// (as can happen on darwin after a network link status change).
noV4Send syncs.AtomicBool
// networkUp is whether the network is up (some interface is up
// with IPv4 or IPv6). It's used to suppress log spam and prevent
// new connection that'll fail.
@ -603,6 +608,10 @@ func (c *Conn) updateEndpoints(why string) {
c.muCond.Broadcast()
}()
c.logf("[v1] magicsock: starting endpoint update (%s)", why)
if c.noV4Send.Get() {
c.logf("magicsock: last netcheck reported send error. Rebinding.")
c.Rebind()
}
endpoints, err := c.determineEndpoints(c.connCtx)
if err != nil {
@ -697,6 +706,7 @@ func (c *Conn) updateNetInfo(ctx context.Context) (*netcheck.Report, error) {
c.noV4.Set(!report.IPv4)
c.noV6.Set(!report.IPv6)
c.noV4Send.Set(!report.IPv4CanSend)
ni := &tailcfg.NetInfo{
DERPLatency: map[string]float64{},

@ -1143,7 +1143,8 @@ func (e *userspaceEngine) GetLinkMonitor() *monitor.Mon {
}
// LinkChange signals a network change event. It's currently
// (2021-03-03) only called on Android.
// (2021-03-03) only called on Android. On other platforms, linkMon
// generates link change events for us.
func (e *userspaceEngine) LinkChange(_ bool) {
e.linkMon.InjectEvent()
}

Loading…
Cancel
Save