diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 6268500ec..fb730eb56 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -21,6 +21,8 @@ import ( "golang.org/x/crypto/poly1305" xmaps "golang.org/x/exp/maps" + "golang.org/x/net/ipv4" + "golang.org/x/net/ipv6" "tailscale.com/disco" "tailscale.com/ipn/ipnstate" "tailscale.com/net/stun" @@ -93,6 +95,7 @@ type sentPing struct { at mono.Time timer *time.Timer // timeout timer purpose discoPingPurpose + size int // size of the disco message res *ipnstate.PingResult // nil unless CLI ping cb func(*ipnstate.PingResult) // nil unless CLI ping } @@ -273,10 +276,12 @@ func (de *endpoint) DstToBytes() []byte { return packIPPort(de.fakeWGAddr) } // addrForSendLocked returns the address(es) that should be used for // sending the next packet. Zero, one, or both of UDP address and DERP // addr may be non-zero. If the endpoint is WireGuard only and does not have -// latency information, a bool is returned to indiciate that the +// latency information, a bool is returned to indicate that the // WireGuard latency discovery pings should be sent. // // de.mu must be held. +// +// TODO(val): Rewrite the addrFor*Locked() variations to share code. func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) { udpAddr = de.bestAddr.AddrPort @@ -353,6 +358,41 @@ func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.Add return udpAddr, needPing } +// addrForPingSizeLocked returns the address(es) that should be used for sending +// the next ping. It will only return addrs with a large enough path MTU to +// permit a ping payload of size bytes to be delivered (DERP is always one such +// addr as it is a TCP connection). If it returns a zero-value udpAddr, then we +// should continue probing the MTU of all paths to this endpoint. Zero, one, or +// both of the returned UDP address and DERP address may be non-zero. +// +// de.mu must be held. +func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) { + if size == 0 { + udpAddr, derpAddr, _ = de.addrForSendLocked(now) + return + } + + udpAddr = de.bestAddr.AddrPort + pathMTU := de.bestAddr.wireMTU + requestedMTU := pingSizeToPktLen(size, udpAddr.Addr()) + mtuOk := requestedMTU <= pathMTU + + if udpAddr.IsValid() && mtuOk { + if !now.After(de.trustBestAddrUntil) { + return udpAddr, netip.AddrPort{} + } + // We had a bestAddr with large enough MTU but it expired, so + // send both to it and DERP. + return udpAddr, de.derpAddr + } + + // The UDP address isn't valid or it doesn't have a path MTU big enough + // for the packet. Return a zero-value udpAddr to signal that we should + // keep probing the path MTU to all addresses for this endpoint, and a + // valid DERP addr to signal that we should also send via DERP. + return netip.AddrPort{}, de.derpAddr +} + // heartbeat is called every heartbeatInterval to keep the best UDP path alive, // or kick off discovery of other paths. func (de *endpoint) heartbeat() { @@ -443,7 +483,7 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat } now := mono.Now() - udpAddr, derpAddr, _ := de.addrForSendLocked(now) + udpAddr, derpAddr := de.addrForPingSizeLocked(now, size) if derpAddr.IsValid() { de.startDiscoPingLocked(derpAddr, now, pingCLI, size, res, cb) @@ -939,6 +979,41 @@ func (de *endpoint) noteConnectivityChange() { } } +// pingSizeToPktLen calculates the minimum path MTU that would permit +// a disco ping message of length size to reach its target at +// addr. size is the length of the entire disco message including +// disco headers. If size is zero, assume it is the default MTU. +func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU { + if size == 0 { + return tstun.DefaultWireMTU() + } + headerLen := ipv4.HeaderLen + if addr.Is6() { + headerLen = ipv6.HeaderLen + } + headerLen += 8 // UDP header length + return tstun.WireMTU(size + headerLen) +} + +// pktLenToPingSize calculates the ping payload size that would +// create a disco ping message whose on-the-wire length is exactly mtu +// bytes long. If mtu is zero or less than the minimum ping size, then +// no MTU probe is desired and return zero for an unpadded ping. +func pktLenToPingSize(mtu tstun.WireMTU, addr netip.Addr) int { + if mtu == 0 { + return 0 + } + headerLen := ipv4.HeaderLen + if addr.Is6() { + headerLen = ipv6.HeaderLen + } + headerLen += 8 // UDP header length + if mtu < tstun.WireMTU(headerLen) { + return 0 + } + return int(mtu) - headerLen +} + // handlePongConnLocked handles a Pong message (a reply to an earlier ping). // It should be called with the Conn.mu held. // @@ -978,7 +1053,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip } if sp.purpose != pingHeartbeat { - de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { + de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { if sp.to != src { fmt.Fprintf(bw, " ping.to=%v", sp.to) } @@ -996,9 +1071,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip // Promote this pong response to our current best address if it's lower latency. // TODO(bradfitz): decide how latency vs. preference order affects decision if !isDerp { - thisPong := addrQuality{sp.to, latency, 0} + thisPong := addrQuality{sp.to, latency, pingSizeToPktLen(sp.size, sp.to.Addr())} if betterAddr(thisPong, de.bestAddr) { - de.c.logf("magicsock: disco: node %v %v now using %v", de.publicKey.ShortString(), de.discoShort(), sp.to) + de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU) de.debugUpdates.Add(EndpointChange{ When: time.Now(), What: "handlePingLocked-bestAddr-update", diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index eaeb28c01..570fca35b 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -2893,3 +2893,112 @@ func TestAddrForSendLockedForWireGuardOnly(t *testing.T) { }) } } + +func TestAddrForPingSizeLocked(t *testing.T) { + testTime := mono.Now() + + validUdpAddr := netip.MustParseAddrPort("1.1.1.1:111") + validDerpAddr := netip.MustParseAddrPort("2.2.2.2:222") + + pingTests := []struct { + desc string + size int // size of ping payload + mtu tstun.WireMTU // The MTU of the path to bestAddr, if any + bestAddr bool // If the endpoint should have a valid bestAddr + bestAddrTrusted bool // If the bestAddr has not yet expired + wantUDP bool // Non-zero UDP addr means send to UDP; zero means start discovery + wantDERP bool // Non-zero DERP addr means send to DERP + }{ + { + desc: "ping_size_0_and_invalid_UDP_addr_should_start_discovery_and_send_to_DERP", + size: 0, + bestAddr: false, + bestAddrTrusted: false, + wantUDP: false, + wantDERP: true, + }, + { + desc: "ping_size_0_and_valid_trusted_UDP_addr_should_send_to_UDP_and_not_send_to_DERP", + size: 0, + bestAddr: true, + bestAddrTrusted: true, + wantUDP: true, + wantDERP: false, + }, + { + desc: "ping_size_0_and_valid_but_expired_UDP_addr_should_send_to_both_UDP_and_DERP", + size: 0, + bestAddr: true, + bestAddrTrusted: false, + wantUDP: true, + wantDERP: true, + }, + { + desc: "ping_size_too_big_for_trusted_UDP_addr_should_start_discovery_and_send_to_DERP", + size: pktLenToPingSize(1501, validUdpAddr.Addr()), + mtu: 1500, + bestAddr: true, + bestAddrTrusted: true, + wantUDP: false, + wantDERP: true, + }, + { + desc: "ping_size_too_big_for_untrusted_UDP_addr_should_start_discovery_and_send_to_DERP", + size: pktLenToPingSize(1501, validUdpAddr.Addr()), + mtu: 1500, + bestAddr: true, + bestAddrTrusted: false, + wantUDP: false, + wantDERP: true, + }, + { + desc: "ping_size_small_enough_for_trusted_UDP_addr_should_send_to_UDP_and_not_DERP", + size: pktLenToPingSize(1500, validUdpAddr.Addr()), + mtu: 1500, + bestAddr: true, + bestAddrTrusted: true, + wantUDP: true, + wantDERP: false, + }, + { + desc: "ping_size_small_enough_for_untrusted_UDP_addr_should_send_to_UDP_and_DERP", + size: pktLenToPingSize(1500, validUdpAddr.Addr()), + mtu: 1500, + bestAddr: true, + bestAddrTrusted: false, + wantUDP: true, + wantDERP: true, + }, + } + + for _, test := range pingTests { + t.Run(test.desc, func(t *testing.T) { + bestAddr := addrQuality{wireMTU: test.mtu} + if test.bestAddr { + bestAddr.AddrPort = validUdpAddr + } + ep := &endpoint{ + derpAddr: validDerpAddr, + bestAddr: bestAddr, + } + if test.bestAddrTrusted { + ep.trustBestAddrUntil = testTime.Add(1 * time.Second) + } + + udpAddr, derpAddr := ep.addrForPingSizeLocked(testTime, test.size) + + if test.wantUDP && !udpAddr.IsValid() { + t.Errorf("%s: udpAddr returned is not valid, won't be sent to UDP address", test.desc) + } + if !test.wantUDP && udpAddr.IsValid() { + t.Errorf("%s: udpAddr returned is valid, discovery will not start", test.desc) + } + if test.wantDERP && !derpAddr.IsValid() { + t.Errorf("%s: derpAddr returned is not valid, won't be sent to DERP", test.desc) + } + if !test.wantDERP && derpAddr.IsValid() { + t.Errorf("%s: derpAddr returned is valid, will be sent to DERP", test.desc) + } + }) + } +}