From a5ae21a832b7f051a1fabbffb1766ab81e26ee0e Mon Sep 17 00:00:00 2001 From: Val Date: Mon, 18 Sep 2023 20:23:27 +0200 Subject: [PATCH] wgengine/magicsock: improve don't fragment bit set/get support Add an enable/disable argument to setDontFragment() in preparation for dynamic enable/disable of peer path MTU discovery. Add getDontFragment() to get the status of the don't fragment bit from a socket. Updates #311 Co-authored-by: James Tucker Signed-off-by: Val --- wgengine/magicsock/batching_conn.go | 9 +++++ wgengine/magicsock/blockforever_conn.go | 8 ++-- wgengine/magicsock/magicsock.go | 4 +- wgengine/magicsock/peermtu_darwin.go | 50 +++++++++++++++--------- wgengine/magicsock/peermtu_linux.go | 51 ++++++++++++++++--------- wgengine/magicsock/peermtu_stubs.go | 26 +++++++++---- wgengine/magicsock/peermtu_unix.go | 46 ++++++++++++++++++++++ wgengine/magicsock/rebinding_conn.go | 11 ++++++ 8 files changed, 159 insertions(+), 46 deletions(-) create mode 100644 wgengine/magicsock/peermtu_unix.go diff --git a/wgengine/magicsock/batching_conn.go b/wgengine/magicsock/batching_conn.go index 69fcf7d09..242f31c37 100644 --- a/wgengine/magicsock/batching_conn.go +++ b/wgengine/magicsock/batching_conn.go @@ -9,6 +9,7 @@ import ( "net/netip" "sync" "sync/atomic" + "syscall" "time" "golang.org/x/net/ipv6" @@ -192,3 +193,11 @@ retry: } return err } + +func (c *batchingUDPConn) SyscallConn() (syscall.RawConn, error) { + sc, ok := c.pc.(syscall.Conn) + if !ok { + return nil, errUnsupportedConnType + } + return sc.SyscallConn() +} diff --git a/wgengine/magicsock/blockforever_conn.go b/wgengine/magicsock/blockforever_conn.go index 42b94bbd4..f2e85dcd5 100644 --- a/wgengine/magicsock/blockforever_conn.go +++ b/wgengine/magicsock/blockforever_conn.go @@ -8,6 +8,7 @@ import ( "net" "net/netip" "sync" + "syscall" "time" ) @@ -48,6 +49,7 @@ func (c *blockForeverConn) Close() error { return nil } -func (c *blockForeverConn) SetDeadline(t time.Time) error { return errors.New("unimplemented") } -func (c *blockForeverConn) SetReadDeadline(t time.Time) error { return errors.New("unimplemented") } -func (c *blockForeverConn) SetWriteDeadline(t time.Time) error { return errors.New("unimplemented") } +func (c *blockForeverConn) SetDeadline(t time.Time) error { return errors.New("unimplemented") } +func (c *blockForeverConn) SetReadDeadline(t time.Time) error { return errors.New("unimplemented") } +func (c *blockForeverConn) SetWriteDeadline(t time.Time) error { return errors.New("unimplemented") } +func (c *blockForeverConn) SyscallConn() (syscall.RawConn, error) { return nil, errUnsupportedConnType } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index ad646fd99..f6f5d85d6 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -983,6 +983,8 @@ var errDropDerpPacket = errors.New("too many DERP packets queued; dropping") var errNoUDP = errors.New("no UDP available on platform") +var errUnsupportedConnType = errors.New("unsupported connection type") + var ( // This acts as a compile-time check for our usage of ipv6.Message in // batchingUDPConn for both IPv6 and IPv4 operations. @@ -2309,7 +2311,7 @@ func (c *Conn) bindSocket(ruc *RebindingUDPConn, network string, curPortFate cur trySetSocketBuffer(pconn, c.logf) if CanPMTUD() { - err = setDontFragment(pconn, network) + err = c.setDontFragment(network, true) if err != nil { c.logf("magicsock: set dontfragment failed for %v port %d: %v", network, port, err) // TODO disable PMTUD in this case. We don't expect the setsockopt to fail on diff --git a/wgengine/magicsock/peermtu_darwin.go b/wgengine/magicsock/peermtu_darwin.go index 7d626aefc..a0a1aacb5 100644 --- a/wgengine/magicsock/peermtu_darwin.go +++ b/wgengine/magicsock/peermtu_darwin.go @@ -6,30 +6,46 @@ package magicsock import ( - "net" "syscall" "golang.org/x/sys/unix" - "tailscale.com/types/nettype" ) -func setDontFragment(pconn nettype.PacketConn, network string) (err error) { - if c, ok := pconn.(*net.UDPConn); ok { - rc, err := c.SyscallConn() - if err == nil { - rc.Control(func(fd uintptr) { - if network == "udp4" { - err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, unix.IP_DONTFRAG, 1) - } - if network == "udp6" { - err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, unix.IPV6_DONTFRAG, 1) - } - }) - } +func getDontFragOpt(network string) int { + if network == "udp4" { + return unix.IP_DONTFRAG + } + return unix.IPV6_DONTFRAG +} + +func (c *Conn) setDontFragment(network string, enable bool) error { + optArg := 1 + if enable == false { + optArg = 0 + } + var err error + rcErr := c.connControl(network, func(fd uintptr) { + err = syscall.SetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network), optArg) + }) + + if rcErr != nil { + return rcErr } return err } -func CanPMTUD() bool { - return debugEnablePMTUD() // only if the envknob is for now. +func (c *Conn) getDontFragment(network string) (bool, error) { + var v int + var err error + rcErr := c.connControl(network, func(fd uintptr) { + v, err = syscall.GetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network)) + }) + + if rcErr != nil { + return false, rcErr + } + if v == 1 { + return true, err + } + return false, err } diff --git a/wgengine/magicsock/peermtu_linux.go b/wgengine/magicsock/peermtu_linux.go index 2f971e9c9..b76f30f08 100644 --- a/wgengine/magicsock/peermtu_linux.go +++ b/wgengine/magicsock/peermtu_linux.go @@ -6,29 +6,44 @@ package magicsock import ( - "net" "syscall" - - "tailscale.com/types/nettype" ) -func setDontFragment(pconn nettype.PacketConn, network string) (err error) { - if c, ok := pconn.(*net.UDPConn); ok { - rc, err := c.SyscallConn() - if err == nil { - rc.Control(func(fd uintptr) { - if network == "udp4" { - err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IP, syscall.IP_MTU_DISCOVER, syscall.IP_PMTUDISC_DO) - } - if network == "udp6" { - err = syscall.SetsockoptInt(int(fd), syscall.IPPROTO_IPV6, syscall.IPV6_MTU_DISCOVER, syscall.IP_PMTUDISC_DO) - } - }) - } +func getDontFragOpt(network string) int { + if network == "udp4" { + return syscall.IP_MTU_DISCOVER + } + return syscall.IPV6_MTU_DISCOVER +} + +func (c *Conn) setDontFragment(network string, enable bool) error { + optArg := syscall.IP_PMTUDISC_DO + if enable == false { + optArg = syscall.IP_PMTUDISC_DONT + } + var err error + rcErr := c.connControl(network, func(fd uintptr) { + err = syscall.SetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network), optArg) + }) + + if rcErr != nil { + return rcErr } return err } -func CanPMTUD() bool { - return debugEnablePMTUD() // only if the envknob is enabled, for now. +func (c *Conn) getDontFragment(network string) (bool, error) { + var v int + var err error + rcErr := c.connControl(network, func(fd uintptr) { + v, err = syscall.GetsockoptInt(int(fd), getIPProto(network), getDontFragOpt(network)) + }) + + if rcErr != nil { + return false, rcErr + } + if v == syscall.IP_PMTUDISC_DO { + return true, err + } + return false, err } diff --git a/wgengine/magicsock/peermtu_stubs.go b/wgengine/magicsock/peermtu_stubs.go index 77a521e08..60c619f29 100644 --- a/wgengine/magicsock/peermtu_stubs.go +++ b/wgengine/magicsock/peermtu_stubs.go @@ -7,15 +7,27 @@ package magicsock import ( "errors" - - "tailscale.com/types/nettype" ) -// setDontFragment sets the dontfragment sockopt on pconn on the platforms that support it, -// for both IPv4 and IPv6. -// (C.f. https://datatracker.ietf.org/doc/html/rfc3542#section-11.2 for IPv6 fragmentation) -func setDontFragment(pconn nettype.PacketConn, network string) (err error) { - return errors.New("setting don't fragment bit not supported on this OS") +// setDontFragment sets the don't fragment sockopt on the underlying connection +// specified by network, which must be "udp4" or "udp6". See +// https://datatracker.ietf.org/doc/html/rfc3542#section-11.2 for details on +// IPv6 fragmentation. +// +// Return values: +// - an error if peer MTU is not supported on this OS +// - errNoActiveUDP if the underlying connection is not UDP +// - otherwise, the result of setting the don't fragment bit +func (c *Conn) setDontFragment(network string, enable bool) error { + return errors.New("peer path MTU discovery not supported on this OS") +} + +// getDontFragment gets the don't fragment setting on the underlying connection +// specified by network, which must be "udp4" or "udp6". Returns true if the +// underlying connection is UDP and the don't fragment bit is set, otherwise +// false. +func (c *Conn) getDontFragment(network string) (bool, error) { + return false, nil } // CanPMTUD returns whether this platform supports performing peet path MTU discovery. diff --git a/wgengine/magicsock/peermtu_unix.go b/wgengine/magicsock/peermtu_unix.go new file mode 100644 index 000000000..6b89ef93a --- /dev/null +++ b/wgengine/magicsock/peermtu_unix.go @@ -0,0 +1,46 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +//go:build (darwin && !ios) || (linux && !android) + +package magicsock + +import ( + "syscall" +) + +// getIPProto returns the value of the get/setsockopt proto argument necessary +// to set an IP sockopt that corresponds with the string network, which must be +// "udp4" or "udp6". +func getIPProto(network string) int { + if network == "udp4" { + return syscall.IPPROTO_IP + } + return syscall.IPPROTO_IPV6 +} + +// connControl allows the caller to run a system call on the socket underlying +// Conn specified by the string network, which must be "udp4" or "udp6". If the +// pconn type implements the syscall method, this function returns the value of +// of the system call fn called with the fd of the socket as its arg (or the +// error from rc.Control() if that fails). Otherwise it returns the error +// errUnsupportedConnType. +func (c *Conn) connControl(network string, fn func(fd uintptr)) error { + pconn := c.pconn4.pconn + if network == "udp6" { + pconn = c.pconn6.pconn + } + sc, ok := pconn.(syscall.Conn) + if !ok { + return errUnsupportedConnType + } + rc, err := sc.SyscallConn() + if err != nil { + return err + } + return rc.Control(fn) +} + +func CanPMTUD() bool { + return debugEnablePMTUD() +} diff --git a/wgengine/magicsock/rebinding_conn.go b/wgengine/magicsock/rebinding_conn.go index 161f52062..f1e47f3a8 100644 --- a/wgengine/magicsock/rebinding_conn.go +++ b/wgengine/magicsock/rebinding_conn.go @@ -9,6 +9,7 @@ import ( "net/netip" "sync" "sync/atomic" + "syscall" "golang.org/x/net/ipv6" "tailscale.com/net/netaddr" @@ -166,3 +167,13 @@ func (c *RebindingUDPConn) writeToUDPAddrPortWithInitPconn(pconn nettype.PacketC func (c *RebindingUDPConn) WriteToUDPAddrPort(b []byte, addr netip.AddrPort) (int, error) { return c.writeToUDPAddrPortWithInitPconn(*c.pconnAtomic.Load(), b, addr) } + +func (c *RebindingUDPConn) SyscallConn() (syscall.RawConn, error) { + c.mu.Lock() + defer c.mu.Unlock() + sc, ok := c.pconn.(syscall.Conn) + if !ok { + return nil, errUnsupportedConnType + } + return sc.SyscallConn() +}