wgengine,net,ipn,disco: split up and define different types of MTU

Prepare for path MTU discovery by splitting up the concept of
DefaultMTU() into the concepts of the Tailscale TUN MTU, MTUs of
underlying network interfaces, minimum "safe" TUN MTU, user configured
TUN MTU, probed path MTU to a peer, and maximum probed MTU. Add a set
of likely MTUs to probe.

Updates #311

Signed-off-by: Val <valerie@tailscale.com>
pull/9517/head
Val 1 year ago committed by valscale
parent fb2f3e4741
commit 059051c58a

@ -36,7 +36,6 @@ import (
"tailscale.com/net/netmon" "tailscale.com/net/netmon"
"tailscale.com/net/netutil" "tailscale.com/net/netutil"
"tailscale.com/net/portmapper" "tailscale.com/net/portmapper"
"tailscale.com/net/tstun"
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/tka" "tailscale.com/tka"
"tailscale.com/tstime" "tailscale.com/tstime"
@ -51,6 +50,7 @@ import (
"tailscale.com/util/osdiag" "tailscale.com/util/osdiag"
"tailscale.com/util/rands" "tailscale.com/util/rands"
"tailscale.com/version" "tailscale.com/version"
"tailscale.com/wgengine/magicsock"
) )
type localAPIHandler func(*Handler, http.ResponseWriter, *http.Request) type localAPIHandler func(*Handler, http.ResponseWriter, *http.Request)
@ -1380,8 +1380,8 @@ func (h *Handler) servePing(w http.ResponseWriter, r *http.Request) {
http.Error(w, "'size' parameter is only supported with disco pings", 400) http.Error(w, "'size' parameter is only supported with disco pings", 400)
return return
} }
if size > int(tstun.DefaultMTU()) { if size > magicsock.MaxDiscoPingSize {
http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", tstun.DefaultMTU()), 400) http.Error(w, fmt.Sprintf("maximum value for 'size' is %v", magicsock.MaxDiscoPingSize), 400)
return return
} }
} }

@ -1,33 +1,154 @@
// Copyright (c) Tailscale Inc & AUTHORS // Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause // SPDX-License-Identifier: BSD-3-Clause
package tstun package tstun
import "tailscale.com/envknob" import (
"tailscale.com/envknob"
)
// The MTU (Maximum Transmission Unit) of a network interface is the largest
// packet that can be sent or received through that interface, including all
// headers above the link layer (e.g. IP headers, UDP headers, Wireguard
// headers, etc.). We have to think about several different values of MTU:
//
// Wire MTU: The MTU of an interface underneath the tailscale TUN, e.g. an
// Ethernet network card will default to a 1500 byte MTU. The user may change
// this MTU at any time.
//
// TUN MTU: The current MTU of the tailscale TUN. This MTU is adjusted downward
// to make room for the wireguard/tailscale headers. For example, if the
// underlying network interface's MTU is 1500 bytes, the maximum size of a
// packet entering the tailscale TUN is 1420 bytes. The user may change this MTU
// at any time via the OS's tools (ifconfig, ip, etc.).
//
// User configured initial MTU: The MTU the tailscale TUN should be created
// with, set by the user via TS_DEBUG_MTU. It should be adjusted down from the
// underlying interface MTU by 80 bytes to make room for the wireguard
// headers. This envknob is mostly for debugging. This value is used once at TUN
// creation and ignored thereafter.
//
// User configured current MTU: The MTU set via the OS's tools (ifconfig, ip,
// etc.). This MTU can change at any time. Setting the MTU this way goes through
// the MTU() method of tailscale's TUN wrapper.
//
// Maximum probed MTU: This is the largest MTU size that we send probe packets
// for.
//
// Safe MTU: If the tailscale TUN MTU is set to this value, almost all packets
// will get to their destination. Tailscale defaults to this MTU in the absence
// of path MTU probe information or user MTU configuration. We may occasionally
// find a path that needs a smaller MTU but it is very rare.
//
// Peer MTU: This is the path MTU to a peer's current best endpoint. It defaults
// to the Safe MTU unless we have path MTU probe results that tell us otherwise.
//
// Initial MTU: This is the MTU tailscaled creates the TUN with. In order of
// priority, it is:
//
// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of 65536
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg
// overhead
// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
//
// Current MTU: This the MTU of the tailscale TUN at any given moment
// after TUN creation. In order of priority, it is:
//
// 1. The MTU set by the user via the OS, if it has ever been set
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg
// overhead
// 4. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
// TUNMTU is the MTU for the tailscale TUN.
type TUNMTU uint32
// WireMTU is the MTU for the underlying network devices.
type WireMTU uint32
const ( const (
maxMTU uint32 = 65536 // maxTUNMTU is the largest MTU we will consider for the Tailscale
defaultMTU uint32 = 1280 // TUN. This is inherited from wireguard-go and can be surprisingly
// small; on Windows it is currently 2048 - 32 bytes and iOS it is 1700
// - 32 bytes.
// TODO(val,raggi): On Windows this seems to derive from RIO driver
// constraints in Wireguard but we don't use RIO so could probably make
// this bigger.
maxTUNMTU TUNMTU = TUNMTU(MaxPacketSize)
// safeTUNMTU is the default "safe" MTU for the Tailscale TUN that we
// use in the absence of other information such as path MTU probes.
safeTUNMTU TUNMTU = 1280
) )
// DefaultMTU returns either the constant default MTU of 1280, or the value set // MaxProbedWireMTU is the largest MTU we will test for path MTU
// in TS_DEBUG_MTU clamped to a maximum of 65536. // discovery.
func DefaultMTU() uint32 { var MaxProbedWireMTU WireMTU = 9000
// DefaultMTU is the Tailscale default MTU for now.
// func init() {
// wireguard-go defaults to 1420 bytes, which only works if the if MaxProbedWireMTU > WireMTU(maxTUNMTU) {
// "outer" MTU is 1500 bytes. This breaks on DSL connections MaxProbedWireMTU = WireMTU(maxTUNMTU)
// (typically 1492 MTU) and on GCE (1460 MTU?!). }
// }
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU // wgHeaderLen is the length of all the headers Wireguard adds to a packet
// discovery. // in the worst case (IPv6). This constant is for use when we can't or
tunMTU := defaultMTU // shouldn't use information about the IP version of a specific packet
if mtu, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok { // (e.g., calculating the MTU for the Tailscale interface.
mtu := uint32(mtu) //
if mtu > maxMTU { // A Wireguard header includes:
mtu = maxMTU //
} // - 20-byte IPv4 header or 40-byte IPv6 header
tunMTU = mtu // - 8-byte UDP header
// - 4-byte type
// - 4-byte key index
// - 8-byte nonce
// - 16-byte authentication tag
const wgHeaderLen = 40 + 8 + 4 + 4 + 8 + 16
// TUNToWireMTU takes the MTU that the Tailscale TUN presents to the user and
// returns the on-the-wire MTU necessary to transmit the largest packet that
// will fit through the TUN, given that we have to add wireguard headers.
func TUNToWireMTU(t TUNMTU) WireMTU {
return WireMTU(t + wgHeaderLen)
}
// WireToTUNMTU takes the MTU of an underlying network device and returns the
// largest possible MTU for a Tailscale TUN operating on top of that device,
// given that we have to add wireguard headers.
func WireToTUNMTU(w WireMTU) TUNMTU {
if w < wgHeaderLen {
return 0
}
return TUNMTU(w - wgHeaderLen)
}
// DefaultTUNMTU returns the MTU we use to set the Tailscale TUN
// MTU. It is also the path MTU that we default to if we have no
// information about the path to a peer.
//
// 1. If set, the value of TS_DEBUG_MTU clamped to a maximum of MaxTunMTU
// 2. If TS_DEBUG_ENABLE_PMTUD is set, the maximum size MTU we probe, minus wg overhead
// 3. If TS_DEBUG_ENABLE_PMTUD is not set, the Safe MTU
func DefaultTUNMTU() TUNMTU {
if m, ok := envknob.LookupUintSized("TS_DEBUG_MTU", 10, 32); ok {
return min(TUNMTU(m), maxTUNMTU)
}
debugPMTUD, _ := envknob.LookupBool("TS_DEBUG_ENABLE_PMTUD")
if debugPMTUD {
return WireToTUNMTU(MaxProbedWireMTU)
} }
return tunMTU
return safeTUNMTU
}
// Temporary workaround for code on corp that uses this function name.
// TODO(val): Remove as soon as corp OSS is updated.
func DefaultMTU() uint32 {
return uint32(DefaultTUNMTU())
}
// DefaultWireMTU returns the default TUN MTU, adjusted for wireguard
// overhead.
func DefaultWireMTU() WireMTU {
return TUNToWireMTU(DefaultTUNMTU())
} }

@ -4,25 +4,93 @@ package tstun
import ( import (
"os" "os"
"strconv"
"testing" "testing"
) )
func TestDefaultMTU(t *testing.T) { // Test the default MTU in the presence of various envknobs.
orig := os.Getenv("TS_DEBUG_MTU") func TestDefaultTunMTU(t *testing.T) {
defer os.Setenv("TS_DEBUG_MTU", orig) // Save and restore the envknobs we will be changing.
// TS_DEBUG_MTU sets the MTU to a specific value.
defer os.Setenv("TS_DEBUG_MTU", os.Getenv("TS_DEBUG_MTU"))
os.Setenv("TS_DEBUG_MTU", "") os.Setenv("TS_DEBUG_MTU", "")
if DefaultMTU() != 1280 {
t.Errorf("DefaultMTU() = %d, want 1280", DefaultMTU()) // TS_DEBUG_ENABLE_PMTUD enables path MTU discovery.
defer os.Setenv("TS_DEBUG_ENABLE_PMTUD", os.Getenv("TS_DEBUG_ENABLE_PMTUD"))
os.Setenv("TS_DEBUG_ENABLE_PMTUD", "")
// With no MTU envknobs set, we should get the conservative MTU.
if DefaultTUNMTU() != safeTUNMTU {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), safeTUNMTU)
}
// If set, TS_DEBUG_MTU should set the MTU.
mtu := maxTUNMTU - 1
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != mtu {
t.Errorf("default TUN MTU = %d, want %d, TS_DEBUG_MTU ignored", DefaultTUNMTU(), mtu)
}
// MTU should be clamped to maxTunMTU.
mtu = maxTUNMTU + 1
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != maxTUNMTU {
t.Errorf("default TUN MTU = %d, want %d, clamping failed", DefaultTUNMTU(), maxTUNMTU)
}
// If PMTUD is enabled, the MTU should default to the largest probed
// MTU, but only if the user hasn't requested a specific MTU.
os.Setenv("TS_DEBUG_MTU", "")
os.Setenv("TS_DEBUG_ENABLE_PMTUD", "true")
if DefaultTUNMTU() != WireToTUNMTU(MaxProbedWireMTU) {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), WireToTUNMTU(MaxProbedWireMTU))
}
// TS_DEBUG_MTU should take precedence over TS_DEBUG_ENABLE_PMTUD.
mtu = WireToTUNMTU(MaxProbedWireMTU - 1)
os.Setenv("TS_DEBUG_MTU", strconv.Itoa(int(mtu)))
if DefaultTUNMTU() != mtu {
t.Errorf("default TUN MTU = %d, want %d", DefaultTUNMTU(), mtu)
}
}
// Test the conversion of wire MTU to/from Tailscale TUN MTU corner cases.
func TestMTUConversion(t *testing.T) {
tests := []struct {
w WireMTU
t TUNMTU
}{
{w: 0, t: 0},
{w: wgHeaderLen - 1, t: 0},
{w: wgHeaderLen, t: 0},
{w: wgHeaderLen + 1, t: 1},
{w: 1360, t: 1280},
{w: 1500, t: 1420},
{w: 9000, t: 8920},
}
for _, tt := range tests {
m := WireToTUNMTU(tt.w)
if m != tt.t {
t.Errorf("conversion of wire MTU %v to TUN MTU = %v, want %v", tt.w, m, tt.t)
}
} }
os.Setenv("TS_DEBUG_MTU", "9000") tests2 := []struct {
if DefaultMTU() != 9000 { t TUNMTU
t.Errorf("DefaultMTU() = %d, want 9000", DefaultMTU()) w WireMTU
}{
{t: 0, w: wgHeaderLen},
{t: 1, w: wgHeaderLen + 1},
{t: 1280, w: 1360},
{t: 1420, w: 1500},
{t: 8920, w: 9000},
} }
os.Setenv("TS_DEBUG_MTU", "123456789") for _, tt := range tests2 {
if DefaultMTU() != maxMTU { m := TUNToWireMTU(tt.t)
t.Errorf("DefaultMTU() = %d, want %d", DefaultMTU(), maxMTU) if m != tt.w {
t.Errorf("conversion of TUN MTU %v to wire MTU = %v, want %v", tt.t, m, tt.w)
}
} }
} }

@ -44,7 +44,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
} }
dev, err = createTAP(tapName, bridgeName) dev, err = createTAP(tapName, bridgeName)
} else { } else {
dev, err = tun.CreateTUN(tunName, int(DefaultMTU())) dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU()))
} }
if err != nil { if err != nil {
return nil, "", err return nil, "", err

@ -422,6 +422,10 @@ func (de *endpoint) noteActiveLocked() {
} }
} }
// MaxDiscoPingSize is the largest useful ping message size that we
// can send - the maximum packet size minus the IPv4 and UDP headers.
var MaxDiscoPingSize = tstun.MaxPacketSize - 20 - 8
// cliPing starts a ping for the "tailscale ping" command. res is value to call cb with, // cliPing starts a ping for the "tailscale ping" command. res is value to call cb with,
// already partially filled. // already partially filled.
func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) { func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) {
@ -433,6 +437,11 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat
cb(res) cb(res)
return return
} }
if size > MaxDiscoPingSize {
res.Err = errPingTooBig.Error()
cb(res)
return
}
now := mono.Now() now := mono.Now()
udpAddr, derpAddr, _ := de.addrForSendLocked(now) udpAddr, derpAddr, _ := de.addrForSendLocked(now)
@ -457,6 +466,7 @@ func (de *endpoint) cliPing(res *ipnstate.PingResult, size int, cb func(*ipnstat
var ( var (
errExpired = errors.New("peer's node key has expired") errExpired = errors.New("peer's node key has expired")
errNoUDPOrDERP = errors.New("no UDP or DERP addr") errNoUDPOrDERP = errors.New("no UDP or DERP addr")
errPingTooBig = errors.New("ping size too big")
) )
func (de *endpoint) send(buffs [][]byte) error { func (de *endpoint) send(buffs [][]byte) error {
@ -564,13 +574,9 @@ const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen
// The caller should use de.discoKey as the discoKey argument. // The caller should use de.discoKey as the discoKey argument.
// It is passed in so that sendDiscoPing doesn't need to lock de.mu. // It is passed in so that sendDiscoPing doesn't need to lock de.mu.
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) { func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
padding := 0 size = min(size, MaxDiscoPingSize)
if size > int(tstun.DefaultMTU()) { padding := max(size-discoPingSize, 0)
size = int(tstun.DefaultMTU())
}
if size-discoPingSize > 0 {
padding = size - discoPingSize
}
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{ sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
TxID: [12]byte(txid), TxID: [12]byte(txid),
NodeKey: de.c.publicKeyAtomic.Load(), NodeKey: de.c.publicKeyAtomic.Load(),

@ -150,10 +150,11 @@ type Impl struct {
const nicID = 1 const nicID = 1
// maxUDPPacketSize is the maximum size of a UDP packet we copy in startPacketCopy // maxUDPPacketSize is the maximum size of a UDP packet we copy in
// when relaying UDP packets. We don't use the 'mtu' const in anticipation of // startPacketCopy when relaying UDP packets. The user can configure
// one day making the MTU more dynamic. // the tailscale MTU to anything up to this size so we can potentially
const maxUDPPacketSize = 1500 // have a UDP packet as big as the MTU.
const maxUDPPacketSize = tstun.MaxPacketSize
// Create creates and populates a new Impl. // Create creates and populates a new Impl.
func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper) (*Impl, error) { func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magicsock.Conn, dialer *tsdial.Dialer, dns *dns.Manager, pm *proxymap.Mapper) (*Impl, error) {
@ -184,7 +185,7 @@ func Create(logf logger.Logf, tundev *tstun.Wrapper, e wgengine.Engine, mc *magi
if tcpipErr != nil { if tcpipErr != nil {
return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr) return nil, fmt.Errorf("could not enable TCP SACK: %v", tcpipErr)
} }
linkEP := channel.New(512, tstun.DefaultMTU(), "") linkEP := channel.New(512, uint32(tstun.DefaultTUNMTU()), "")
if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil { if tcpipProblem := ipstack.CreateNIC(nicID, linkEP); tcpipProblem != nil {
return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem) return nil, fmt.Errorf("could not create netstack NIC: %v", tcpipProblem)
} }
@ -1059,7 +1060,9 @@ func (ns *Impl) acceptUDP(r *udp.ForwarderRequest) {
go ns.forwardUDP(c, srcAddr, dstAddr) go ns.forwardUDP(c, srcAddr, dstAddr)
} }
// Buffer pool for forwarding UDP packets. // Buffer pool for forwarding UDP packets. Implementations are advised not to
// exceed 512 bytes per DNS request due to fragmenting but in reality can and do
// send much larger packets, so use the maximum possible UDP packet size.
var udpBufPool = &sync.Pool{ var udpBufPool = &sync.Pool{
New: func() any { New: func() any {
b := make([]byte, maxUDPPacketSize) b := make([]byte, maxUDPPacketSize)

@ -241,7 +241,7 @@ func interfaceFromLUID(luid winipcfg.LUID, flags winipcfg.GAAFlags) (*winipcfg.I
var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy")) var networkCategoryWarning = health.NewWarnable(health.WithMapDebugFlag("warn-network-category-unhealthy"))
func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) { func configureInterface(cfg *Config, tun *tun.NativeTun) (retErr error) {
var mtu = tstun.DefaultMTU() var mtu = tstun.DefaultTUNMTU()
luid := winipcfg.LUID(tun.LUID()) luid := winipcfg.LUID(tun.LUID())
iface, err := interfaceFromLUID(luid, iface, err := interfaceFromLUID(luid,
// Issue 474: on early boot, when the network is still // Issue 474: on early boot, when the network is still

Loading…
Cancel
Save