From 893bdd729c836d278228b7ba3d5eb211a6d957a6 Mon Sep 17 00:00:00 2001 From: Val Date: Thu, 5 Oct 2023 20:05:19 +0200 Subject: [PATCH] disco,net/tstun,wgengine/magicsock: probe peer MTU Automatically probe the path MTU to a peer when peer MTU is enabled, but do not use the MTU information for anything yet. Updates #311 Signed-off-by: Val --- disco/disco.go | 2 +- net/tstun/mtu.go | 33 ++++++++++---- net/tstun/mtu_test.go | 13 ++++-- wgengine/magicsock/endpoint.go | 68 ++++++++++++++++++++-------- wgengine/magicsock/magicsock.go | 2 +- wgengine/magicsock/magicsock_test.go | 8 ++-- wgengine/magicsock/peermtu.go | 5 ++ 7 files changed, 91 insertions(+), 40 deletions(-) diff --git a/disco/disco.go b/disco/disco.go index 46379b9d2..8c83480df 100644 --- a/disco/disco.go +++ b/disco/disco.go @@ -261,7 +261,7 @@ func parsePong(ver uint8, p []byte) (m *Pong, err error) { func MessageSummary(m Message) string { switch m := m.(type) { case *Ping: - return fmt.Sprintf("ping tx=%x", m.TxID[:6]) + return fmt.Sprintf("ping tx=%x padding=%v", m.TxID[:6], m.Padding) case *Pong: return fmt.Sprintf("pong tx=%x", m.TxID[:6]) case *CallMeMaybe: diff --git a/net/tstun/mtu.go b/net/tstun/mtu.go index 07b6ee424..004529c20 100644 --- a/net/tstun/mtu.go +++ b/net/tstun/mtu.go @@ -79,14 +79,16 @@ const ( safeTUNMTU TUNMTU = 1280 ) -// MaxProbedWireMTU is the largest MTU we will test for path MTU -// discovery. -var MaxProbedWireMTU WireMTU = 9000 - -func init() { - if MaxProbedWireMTU > WireMTU(maxTUNMTU) { - MaxProbedWireMTU = WireMTU(maxTUNMTU) - } +// WireMTUsToProbe is a list of the on-the-wire MTUs we want to probe. Each time +// magicsock discovery begins, it will send a set of pings, one of each size +// listed below. +var WireMTUsToProbe = []WireMTU{ + WireMTU(safeTUNMTU), // Tailscale over Tailscale :) + TUNToWireMTU(safeTUNMTU), // Smallest MTU allowed for IPv6, current default + 1400, // Most common MTU minus a few bytes for tunnels + 1500, // Most common MTU + 8000, // Should fit inside all jumbo frame sizes + 9000, // Most jumbo frames are this size or larger } // wgHeaderLen is the length of all the headers Wireguard adds to a packet @@ -125,7 +127,7 @@ func WireToTUNMTU(w WireMTU) TUNMTU { // MTU. It is also the path MTU that we default to if we have no // information about the path to a peer. // -// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU +// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTUNMTU // 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead // 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU func DefaultTUNMTU() TUNMTU { @@ -135,12 +137,23 @@ func DefaultTUNMTU() TUNMTU { debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD") if debugPMTUD { - return WireToTUNMTU(MaxProbedWireMTU) + // TODO: While we are just probing MTU but not generating PTB, + // this has to continue to return the safe MTU. When we add the + // code to generate PTB, this will be: + // + // return WireToTUNMTU(maxProbedWireMTU) + return safeTUNMTU } return safeTUNMTU } +// SafeWireMTU returns the wire MTU that is safe to use if we have no +// information about the path MTU to this peer. +func SafeWireMTU() WireMTU { + return TUNToWireMTU(safeTUNMTU) +} + // DefaultWireMTU returns the default TUN MTU, adjusted for wireguard // overhead. func DefaultWireMTU() WireMTU { diff --git a/net/tstun/mtu_test.go b/net/tstun/mtu_test.go index 3708a91f4..8d165bfd3 100644 --- a/net/tstun/mtu_test.go +++ b/net/tstun/mtu_test.go @@ -39,15 +39,18 @@ func TestDefaultTunMTU(t *testing.T) { t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU) } - // If PMTUD is enabled, the MTU should default to the largest probed - // MTU, but only if the user hasn't requested a specific MTU. + // If PMTUD is enabled, the MTU should default to the safe MTU, but only + // if the user hasn't requested a specific MTU. + // + // TODO: When PMTUD is generating PTB responses, this will become the + // largest MTU we probe. os.Setenv("TS_DEBUG_MTU", "") os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true") - if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) { - t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU)) + if DefaultTUNMTU() != safeTUNMTU { + t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU) } // TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD. - mtu = WireToTUNMTU(MaxProbedWireMTU - 1) + mtu = WireToTUNMTU(MaxPacketSize - 1) os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu))) if DefaultTUNMTU() != mtu { t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index fb730eb56..03f1e0ccc 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -35,6 +35,16 @@ import ( "tailscale.com/util/ringbuffer" ) +var mtuProbePingSizesV4 []int +var mtuProbePingSizesV6 []int + +func init() { + for _, m := range tstun.WireMTUsToProbe { + mtuProbePingSizesV4 = append(mtuProbePingSizesV4, pktLenToPingSize(m, false)) + mtuProbePingSizesV6 = append(mtuProbePingSizesV6, pktLenToPingSize(m, true)) + } +} + // endpoint is a wireguard/conn.Endpoint. In wireguard-go and kernel WireGuard // there is only one endpoint for a peer, but in Tailscale we distribute a // number of possible endpoints for a peer which would include the all the @@ -374,7 +384,7 @@ func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, der udpAddr = de.bestAddr.AddrPort pathMTU := de.bestAddr.wireMTU - requestedMTU := pingSizeToPktLen(size, udpAddr.Addr()) + requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6()) mtuOk := requestedMTU <= pathMTU if udpAddr.IsValid() && mtuOk { @@ -666,21 +676,41 @@ func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpo st.lastPing = now } - txid := stun.NewTxID() - de.sentPing[txid] = sentPing{ - to: ep, - at: now, - timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }), - purpose: purpose, - res: res, - cb: cb, + // If we are doing a discovery ping or a CLI ping with no specified size + // to a non DERP address, then probe the MTU. Otherwise just send the + // one specified ping. + + // Default to sending a single ping of the specified size + sizes := []int{size} + if de.c.PeerMTUEnabled() { + isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr + if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) { + de.c.dlogf("[v1] magicsock: starting MTU probe") + sizes = mtuProbePingSizesV4 + if ep.Addr().Is6() { + sizes = mtuProbePingSizesV6 + } + } } logLevel := discoLog if purpose == pingHeartbeat { logLevel = discoVerboseLog } - go de.sendDiscoPing(ep, epDisco.key, txid, size, logLevel) + for _, s := range sizes { + txid := stun.NewTxID() + de.sentPing[txid] = sentPing{ + to: ep, + at: now, + timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }), + purpose: purpose, + res: res, + cb: cb, + size: s, + } + go de.sendDiscoPing(ep, epDisco.key, txid, s, logLevel) + } + } // sendDiscoPingsLocked starts pinging all of ep's endpoints. @@ -982,13 +1012,13 @@ func (de *endpoint) noteConnectivityChange() { // pingSizeToPktLen calculates the minimum path MTU that would permit // a disco ping message of length size to reach its target at // addr. size is the length of the entire disco message including -// disco headers. If size is zero, assume it is the default MTU. -func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU { +// disco headers. If size is zero, assume it is the safe wire MTU. +func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU { if size == 0 { - return tstun.DefaultWireMTU() + return tstun.SafeWireMTU() } headerLen := ipv4.HeaderLen - if addr.Is6() { + if is6 { headerLen = ipv6.HeaderLen } headerLen += 8 // UDP header length @@ -999,12 +1029,12 @@ func pingSizeToPktLen(size int, addr netip.Addr) tstun.WireMTU { // create a disco ping message whose on-the-wire length is exactly mtu // bytes long. If mtu is zero or less than the minimum ping size, then // no MTU probe is desired and return zero for an unpadded ping. -func pktLenToPingSize(mtu tstun.WireMTU, addr netip.Addr) int { +func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int { if mtu == 0 { return 0 } headerLen := ipv4.HeaderLen - if addr.Is6() { + if is6 { headerLen = ipv6.HeaderLen } headerLen += 8 // UDP header length @@ -1053,7 +1083,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip } if sp.purpose != pingHeartbeat { - de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { + de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr().Is6()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { if sp.to != src { fmt.Fprintf(bw, " ping.to=%v", sp.to) } @@ -1071,9 +1101,9 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip // Promote this pong response to our current best address if it's lower latency. // TODO(bradfitz): decide how latency vs. preference order affects decision if !isDerp { - thisPong := addrQuality{sp.to, latency, pingSizeToPktLen(sp.size, sp.to.Addr())} + thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))} if betterAddr(thisPong, de.bestAddr) { - de.c.logf("magicsock: disco: node %v %v now using %v mtu %v", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU) + de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6]) de.debugUpdates.Add(EndpointChange{ When: time.Now(), What: "handlePingLocked-bestAddr-update", diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 4e03f014a..667032c17 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1567,7 +1567,7 @@ func (c *Conn) handlePingLocked(dm *disco.Ping, src netip.AddrPort, di *discoInf if numNodes > 1 { pingNodeSrcStr = "[one-of-multi]" } - c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got ping tx=%x", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6]) + c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got ping tx=%x padding=%v", c.discoShort, di.discoShort, pingNodeSrcStr, src, dm.TxID[:6], dm.Padding) } ipDst := src diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 987b398f4..923eb36eb 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -2936,7 +2936,7 @@ func TestAddrForPingSizeLocked(t *testing.T) { }, { desc: "ping_size_too_big_for_trusted_UDP_addr_should_start_discovery_and_send_to_DERP", - size: pktLenToPingSize(1501, validUdpAddr.Addr()), + size: pktLenToPingSize(1501, validUdpAddr.Addr().Is6()), mtu: 1500, bestAddr: true, bestAddrTrusted: true, @@ -2945,7 +2945,7 @@ func TestAddrForPingSizeLocked(t *testing.T) { }, { desc: "ping_size_too_big_for_untrusted_UDP_addr_should_start_discovery_and_send_to_DERP", - size: pktLenToPingSize(1501, validUdpAddr.Addr()), + size: pktLenToPingSize(1501, validUdpAddr.Addr().Is6()), mtu: 1500, bestAddr: true, bestAddrTrusted: false, @@ -2954,7 +2954,7 @@ func TestAddrForPingSizeLocked(t *testing.T) { }, { desc: "ping_size_small_enough_for_trusted_UDP_addr_should_send_to_UDP_and_not_DERP", - size: pktLenToPingSize(1500, validUdpAddr.Addr()), + size: pktLenToPingSize(1500, validUdpAddr.Addr().Is6()), mtu: 1500, bestAddr: true, bestAddrTrusted: true, @@ -2963,7 +2963,7 @@ func TestAddrForPingSizeLocked(t *testing.T) { }, { desc: "ping_size_small_enough_for_untrusted_UDP_addr_should_send_to_UDP_and_DERP", - size: pktLenToPingSize(1500, validUdpAddr.Addr()), + size: pktLenToPingSize(1500, validUdpAddr.Addr().Is6()), mtu: 1500, bestAddr: true, bestAddrTrusted: false, diff --git a/wgengine/magicsock/peermtu.go b/wgengine/magicsock/peermtu.go index 8013aa5ea..199585323 100644 --- a/wgengine/magicsock/peermtu.go +++ b/wgengine/magicsock/peermtu.go @@ -5,6 +5,8 @@ package magicsock +import "tailscale.com/net/tstun" + // Peer path MTU routines shared by platforms that implement it. // DontFragSetting returns true if at least one of the underlying sockets of @@ -102,6 +104,9 @@ func (c *Conn) UpdatePMTUD() { _ = c.setDontFragment("udp6", false) newStatus = false } + if debugPMTUD() { + c.logf("magicsock: peermtu: peer MTU probes are %v", tstun.WireMTUsToProbe) + } c.peerMTUEnabled.Store(newStatus) c.resetEndpointStates() }