disco,net/tstun,wgengine/magicsock: probe peer MTU

Automatically probe the path MTU to a peer when peer MTU is enabled, but do not
use the MTU information for anything yet.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
pull/9631/head
Val 1 year ago committed by valscale
parent b4e587c3bd
commit 893bdd729c

@ -261,7 +261,7 @@ func parsePong(ver uint8, p []byte) (m *Pong, err error) {
func MessageSummary(m Message) string { func MessageSummary(m Message) string {
switch m := m.(type) { switch m := m.(type) {
case *Ping: case *Ping:
return fmt.Sprintf("ping tx=%x", m.TxID[:6]) return fmt.Sprintf("ping tx=%x padding=%v", m.TxID[:6], m.Padding)
case *Pong: case *Pong:
return fmt.Sprintf("pong tx=%x", m.TxID[:6]) return fmt.Sprintf("pong tx=%x", m.TxID[:6])
case *CallMeMaybe: case *CallMeMaybe:

@ -79,14 +79,16 @@ const (
safeTUNMTU TUNMTU = 1280 safeTUNMTU TUNMTU = 1280
) )
// MaxProbedWireMTU is the largest MTU we will test for path MTU // WireMTUsToProbe is a list of the on-the-wire MTUs we want to probe. Each time
// discovery. // magicsock discovery begins, it will send a set of pings, one of each size
var MaxProbedWireMTU WireMTU = 9000 // listed below.
var WireMTUsToProbe = []WireMTU{
func init() { WireMTU(safeTUNMTU), // Tailscale over Tailscale :)
if MaxProbedWireMTU > WireMTU(maxTUNMTU) { TUNToWireMTU(safeTUNMTU), // Smallest MTU allowed for IPv6, current default
MaxProbedWireMTU = WireMTU(maxTUNMTU) 1400, // Most common MTU minus a few bytes for tunnels
} 1500, // Most common MTU
8000, // Should fit inside all jumbo frame sizes
9000, // Most jumbo frames are this size or larger
} }
// wgHeaderLen is the length of all the headers Wireguard adds to a packet // wgHeaderLen is the length of all the headers Wireguard adds to a packet
@ -125,7 +127,7 @@ func WireToTUNMTU(w WireMTU) TUNMTU {
// MTU. It is also the path MTU that we default to if we have no // MTU. It is also the path MTU that we default to if we have no
// information about the path to a peer. // information about the path to a peer.
// //
// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU // 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTUNMTU
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead // 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead
// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU // 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
func DefaultTUNMTU() TUNMTU { func DefaultTUNMTU() TUNMTU {
@ -135,12 +137,23 @@ func DefaultTUNMTU() TUNMTU {
debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD") debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD")
if debugPMTUD { if debugPMTUD {
return WireToTUNMTU(MaxProbedWireMTU) // TODO: While we are just probing MTU but not generating PTB,
// this has to continue to return the safe MTU. When we add the
// code to generate PTB, this will be:
//
// return WireToTUNMTU(maxProbedWireMTU)
return safeTUNMTU
} }
return safeTUNMTU return safeTUNMTU
} }
// SafeWireMTU returns the wire MTU that is safe to use if we have no
// information about the path MTU to this peer.
func SafeWireMTU() WireMTU {
return TUNToWireMTU(safeTUNMTU)
}
// DefaultWireMTU returns the default TUN MTU, adjusted for wireguard // DefaultWireMTU returns the default TUN MTU, adjusted for wireguard
// overhead. // overhead.
func DefaultWireMTU() WireMTU { func DefaultWireMTU() WireMTU {

@ -39,15 +39,18 @@ func TestDefaultTunMTU(t *testing.T) {
t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU) t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU)
} }
// If PMTUD is enabled, the MTU should default to the largest probed // If PMTUD is enabled, the MTU should default to the safe MTU, but only
// MTU, but only if the user hasn't requested a specific MTU. // if the user hasn't requested a specific MTU.
//
// TODO: When PMTUD is generating PTB responses, this will become the
// largest MTU we probe.
os.Setenv("TS_DEBUG_MTU", "") os.Setenv("TS_DEBUG_MTU", "")
os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true") os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true")
if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) { if DefaultTUNMTU() != safeTUNMTU {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU)) t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU)
} }
// TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD. // TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD.
mtu = WireToTUNMTU(MaxProbedWireMTU - 1) mtu = WireToTUNMTU(MaxPacketSize - 1)
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu))) os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != mtu { if DefaultTUNMTU() != mtu {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu) t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu)

@ -35,6 +35,16 @@ import (
"tailscale.com/util/ringbuffer" "tailscale.com/util/ringbuffer"
) )
var mtuProbePingSizesV4 []int
var mtuProbePingSizesV6 []int
func init() {
for _, m := range tstun.WireMTUsToProbe {
mtuProbePingSizesV4 = append(mtuProbePingSizesV4, pktLenToPingSize(m, false))
mtuProbePingSizesV6 = append(mtuProbePingSizesV6, pktLenToPingSize(m, true))
}
}
// endpoint is a wireguard/conn.Endpoint. In wireguard-go and kernel WireGuard // endpoint is a wireguard/conn.Endpoint. In wireguard-go and kernel WireGuard
// there is only one endpoint for a peer, but in Tailscale we distribute a // there is only one endpoint for a peer, but in Tailscale we distribute a
// number of possible endpoints for a peer which would include the all the // number of possible endpoints for a peer which would include the all the
@ -374,7 +384,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der
udpAddr = de.bestAddr.AddrPort udpAddr = de.bestAddr.AddrPort
pathMTU := de.bestAddr.wireMTU pathMTU := de.bestAddr.wireMTU
requestedMTU := pingSizeToPktLen(size, udpAddr.Addr()) requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
mtuOk := requestedMTU <= pathMTU mtuOk := requestedMTU <= pathMTU
if udpAddr.IsValid() && mtuOk { if udpAddr.IsValid() && mtuOk {
@ -666,21 +676,41 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo
st.lastPing = now st.lastPing = now
} }
txid := stun.NewTxID() // If we are doing a discovery ping or a CLI ping with no specified size
de.sentPing[txid] = sentPing{ // to a non DERP address, then probe the MTU. Otherwise just send the
to: ep, // one specified ping.
at: now,
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }), // Default to sending a single ping of the specified size
purpose: purpose, sizes := []int{size}
res: res, if de.c.PeerMTUEnabled() {
cb: cb, isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
de.c.dlogf("[v1] magicsock: starting MTU probe")
sizes = mtuProbePingSizesV4
if ep.Addr().Is6() {
sizes = mtuProbePingSizesV6
}
}
} }
logLevel := discoLog logLevel := discoLog
if purpose == pingHeartbeat { if purpose == pingHeartbeat {
logLevel = discoVerboseLog logLevel = discoVerboseLog
} }
go de.sendDiscoPing(ep, epDisco.key, txid, size, logLevel) for _, s := range sizes {
txid := stun.NewTxID()
de.sentPing[txid] = sentPing{
to: ep,
at: now,
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
purpose: purpose,
res: res,
cb: cb,
size: s,
}
go de.sendDiscoPing(ep, epDisco.key, txid, s, logLevel)
}
} }
// sendDiscoPingsLocked starts pinging all of ep's endpoints. // sendDiscoPingsLocked starts pinging all of ep's endpoints.
@ -982,13 +1012,13 @@ func (de *endpoint) noteConnectivityChange() {
// pingSizeToPktLen calculates the minimum path MTU that would permit // pingSizeToPktLen calculates the minimum path MTU that would permit
// a disco ping message of length size to reach its target at // a disco ping message of length size to reach its target at
// addr. size is the length of the entire disco message including // addr. size is the length of the entire disco message including
// disco headers. If size is zero, assume it is the default MTU. // disco headers. If size is zero, assume it is the safe wire MTU.
func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU { func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
if size == 0 { if size == 0 {
return tstun.DefaultWireMTU() return tstun.SafeWireMTU()
} }
headerLen := ipv4.HeaderLen headerLen := ipv4.HeaderLen
if addr.Is6() { if is6 {
headerLen = ipv6.HeaderLen headerLen = ipv6.HeaderLen
} }
headerLen += 8 // UDP header length headerLen += 8 // UDP header length
@ -999,12 +1029,12 @@ func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU {
// create a disco ping message whose on-the-wire length is exactly mtu // create a disco ping message whose on-the-wire length is exactly mtu
// bytes long. If mtu is zero or less than the minimum ping size, then // bytes long. If mtu is zero or less than the minimum ping size, then
// no MTU probe is desired and return zero for an unpadded ping. // no MTU probe is desired and return zero for an unpadded ping.
func pktLenToPingSize(mtu tstun.WireMTU, addr netip.Addr) int { func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
if mtu == 0 { if mtu == 0 {
return 0 return 0
} }
headerLen := ipv4.HeaderLen headerLen := ipv4.HeaderLen
if addr.Is6() { if is6 {
headerLen = ipv6.HeaderLen headerLen = ipv6.HeaderLen
} }
headerLen += 8 // UDP header length headerLen += 8 // UDP header length
@ -1053,7 +1083,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
} }
if sp.purpose != pingHeartbeat { if sp.purpose != pingHeartbeat {
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr().Is6()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
if sp.to != src { if sp.to != src {
fmt.Fprintf(bw, " ping.to=%v", sp.to) fmt.Fprintf(bw, " ping.to=%v", sp.to)
} }
@ -1071,9 +1101,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip
// Promote this pong response to our current best address if it's lower latency. // Promote this pong response to our current best address if it's lower latency.
// TODO(bradfitz): decide how latency vs. preference order affects decision // TODO(bradfitz): decide how latency vs. preference order affects decision
if !isDerp { if !isDerp {
thisPong := addrQuality{sp.to, latency, pingSizeToPktLen(sp.size, sp.to.Addr())} thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
if betterAddr(thisPong, de.bestAddr) { if betterAddr(thisPong, de.bestAddr) {
de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU) de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
de.debugUpdates.Add(EndpointChange{ de.debugUpdates.Add(EndpointChange{
When: time.Now(), When: time.Now(),
What: "handlePingLocked-bestAddr-update", What: "handlePingLocked-bestAddr-update",

@ -1567,7 +1567,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf
if numNodes > 1 { if numNodes > 1 {
pingNodeSrcStr = "[one-of-multi]" pingNodeSrcStr = "[one-of-multi]"
} }
c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got ping tx=%x", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6]) c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got ping tx=%x padding=%v", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6], dm.Padding)
} }
ipDst := src ipDst := src

@ -2936,7 +2936,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
}, },
{ {
desc: "ping_size_too_big_for_trusted_UDP_addr_should_start_discovery_and_send_to_DERP", desc: "ping_size_too_big_for_trusted_UDP_addr_should_start_discovery_and_send_to_DERP",
size: pktLenToPingSize(1501, validUdpAddr.Addr()), size: pktLenToPingSize(1501, validUdpAddr.Addr().Is6()),
mtu: 1500, mtu: 1500,
bestAddr: true, bestAddr: true,
bestAddrTrusted: true, bestAddrTrusted: true,
@ -2945,7 +2945,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
}, },
{ {
desc: "ping_size_too_big_for_untrusted_UDP_addr_should_start_discovery_and_send_to_DERP", desc: "ping_size_too_big_for_untrusted_UDP_addr_should_start_discovery_and_send_to_DERP",
size: pktLenToPingSize(1501, validUdpAddr.Addr()), size: pktLenToPingSize(1501, validUdpAddr.Addr().Is6()),
mtu: 1500, mtu: 1500,
bestAddr: true, bestAddr: true,
bestAddrTrusted: false, bestAddrTrusted: false,
@ -2954,7 +2954,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
}, },
{ {
desc: "ping_size_small_enough_for_trusted_UDP_addr_should_send_to_UDP_and_not_DERP", desc: "ping_size_small_enough_for_trusted_UDP_addr_should_send_to_UDP_and_not_DERP",
size: pktLenToPingSize(1500, validUdpAddr.Addr()), size: pktLenToPingSize(1500, validUdpAddr.Addr().Is6()),
mtu: 1500, mtu: 1500,
bestAddr: true, bestAddr: true,
bestAddrTrusted: true, bestAddrTrusted: true,
@ -2963,7 +2963,7 @@ func TestAddrForPingSizeLocked(t *testing.T) {
}, },
{ {
desc: "ping_size_small_enough_for_untrusted_UDP_addr_should_send_to_UDP_and_DERP", desc: "ping_size_small_enough_for_untrusted_UDP_addr_should_send_to_UDP_and_DERP",
size: pktLenToPingSize(1500, validUdpAddr.Addr()), size: pktLenToPingSize(1500, validUdpAddr.Addr().Is6()),
mtu: 1500, mtu: 1500,
bestAddr: true, bestAddr: true,
bestAddrTrusted: false, bestAddrTrusted: false,

@ -5,6 +5,8 @@
package magicsock package magicsock
import "tailscale.com/net/tstun"
// Peer path MTU routines shared by platforms that implement it. // Peer path MTU routines shared by platforms that implement it.
// DontFragSetting returns true if at least one of the underlying sockets of // DontFragSetting returns true if at least one of the underlying sockets of
@ -102,6 +104,9 @@ func (c *Conn) UpdatePMTUD() {
_ = c.setDontFragment("udp6", false) _ = c.setDontFragment("udp6", false)
newStatus = false newStatus = false
} }
if debugPMTUD() {
c.logf("magicsock: peermtu: peer MTU probes are %v", tstun.WireMTUsToProbe)
}
c.peerMTUEnabled.Store(newStatus) c.peerMTUEnabled.Store(newStatus)
c.resetEndpointStates() c.resetEndpointStates()
} }

Loading…
Cancel
Save