net/tstun: restore tap mode functionality

It had bit-rotted likely during the transition to vector io in
76389d8baf. Tested on Ubuntu 24.04
by creating a netns and doing the DHCP dance to get an IP.

Updates #2589

Signed-off-by: Maisem Ali <maisem@tailscale.com>
pull/13876/head
Maisem Ali 3 days ago committed by Maisem Ali
parent 0f4c9c0ecb
commit d4d21a0bbf

@ -310,7 +310,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+ gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+ gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+ gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+ 💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+

@ -221,7 +221,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/ip from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+ gvisor.dev/gvisor/pkg/tcpip/network/internal/multicast from gvisor.dev/gvisor/pkg/tcpip/network/ipv4+
gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+ gvisor.dev/gvisor/pkg/tcpip/network/ipv4 from tailscale.com/net/tstun+
gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack gvisor.dev/gvisor/pkg/tcpip/network/ipv6 from tailscale.com/wgengine/netstack+
gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+ gvisor.dev/gvisor/pkg/tcpip/ports from gvisor.dev/gvisor/pkg/tcpip/stack+
gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+ gvisor.dev/gvisor/pkg/tcpip/seqnum from gvisor.dev/gvisor/pkg/tcpip/header+
💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+ 💣 gvisor.dev/gvisor/pkg/tcpip/stack from gvisor.dev/gvisor/pkg/tcpip/adapters/gonet+

@ -6,6 +6,7 @@
package tstun package tstun
import ( import (
"bytes"
"fmt" "fmt"
"net" "net"
"net/netip" "net/netip"
@ -20,10 +21,13 @@ import (
"gvisor.dev/gvisor/pkg/tcpip/checksum" "gvisor.dev/gvisor/pkg/tcpip/checksum"
"gvisor.dev/gvisor/pkg/tcpip/header" "gvisor.dev/gvisor/pkg/tcpip/header"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4" "gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
"gvisor.dev/gvisor/pkg/tcpip/transport/udp" "gvisor.dev/gvisor/pkg/tcpip/transport/udp"
"tailscale.com/net/netaddr" "tailscale.com/net/netaddr"
"tailscale.com/net/packet" "tailscale.com/net/packet"
"tailscale.com/syncs"
"tailscale.com/types/ipproto" "tailscale.com/types/ipproto"
"tailscale.com/types/logger"
"tailscale.com/util/multierr" "tailscale.com/util/multierr"
) )
@ -35,13 +39,13 @@ var ourMAC = net.HardwareAddr{0x30, 0x2D, 0x66, 0xEC, 0x7A, 0x93}
func init() { createTAP = createTAPLinux } func init() { createTAP = createTAPLinux }
func createTAPLinux(tapName, bridgeName string) (tun.Device, error) { func createTAPLinux(logf logger.Logf, tapName, bridgeName string) (tun.Device, error) {
fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0) fd, err := unix.Open("/dev/net/tun", unix.O_RDWR, 0)
if err != nil { if err != nil {
return nil, err return nil, err
} }
dev, err := openDevice(fd, tapName, bridgeName) dev, err := openDevice(logf, fd, tapName, bridgeName)
if err != nil { if err != nil {
unix.Close(fd) unix.Close(fd)
return nil, err return nil, err
@ -50,7 +54,7 @@ func createTAPLinux(tapName, bridgeName string) (tun.Device, error) {
return dev, nil return dev, nil
} }
func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) { func openDevice(logf logger.Logf, fd int, tapName, bridgeName string) (tun.Device, error) {
ifr, err := unix.NewIfreq(tapName) ifr, err := unix.NewIfreq(tapName)
if err != nil { if err != nil {
return nil, err return nil, err
@ -71,7 +75,7 @@ func openDevice(fd int, tapName, bridgeName string) (tun.Device, error) {
} }
} }
return newTAPDevice(fd, tapName) return newTAPDevice(logf, fd, tapName)
} }
type etherType [2]byte type etherType [2]byte
@ -91,7 +95,7 @@ const (
// handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether // handleTAPFrame handles receiving a raw TAP ethernet frame and reports whether
// it's been handled (that is, whether it should NOT be passed to wireguard). // it's been handled (that is, whether it should NOT be passed to wireguard).
func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool { func (t *tapDevice) handleTAPFrame(ethBuf []byte) bool {
if len(ethBuf) < ethernetFrameSize { if len(ethBuf) < ethernetFrameSize {
// Corrupt. Ignore. // Corrupt. Ignore.
@ -164,8 +168,7 @@ func (t *Wrapper) handleTAPFrame(ethBuf []byte) bool {
copy(res.HardwareAddressTarget(), req.HardwareAddressSender()) copy(res.HardwareAddressTarget(), req.HardwareAddressSender())
copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender()) copy(res.ProtocolAddressTarget(), req.ProtocolAddressSender())
// TODO(raggi): reduce allocs! n, err := t.WriteEthernet(buf)
n, err := t.tdev.Write([][]byte{buf}, 0)
if tapDebug { if tapDebug {
t.logf("tap: wrote ARP reply %v, %v", n, err) t.logf("tap: wrote ARP reply %v, %v", n, err)
} }
@ -182,7 +185,7 @@ const routerIP = "100.70.145.1" // must be in same netmask (currently hack at
// handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether // handleDHCPRequest handles receiving a raw TAP ethernet frame and reports whether
// it's been handled as a DHCP request. That is, it reports whether the frame should // it's been handled as a DHCP request. That is, it reports whether the frame should
// be ignored by the caller and not passed on. // be ignored by the caller and not passed on.
func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool { func (t *tapDevice) handleDHCPRequest(ethBuf []byte) bool {
const udpHeader = 8 const udpHeader = 8
if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader { if len(ethBuf) < ethernetFrameSize+ipv4HeaderLen+udpHeader {
if tapDebug { if tapDebug {
@ -207,7 +210,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 { if p.IPProto != ipproto.UDP || p.Src.Port() != 68 || p.Dst.Port() != 67 {
// Not a DHCP request. // Not a DHCP request.
if tapDebug { if tapDebug {
t.logf("tap: DHCP wrong meta") t.logf("tap: DHCP wrong meta: %+v", p)
} }
return passOnPacket return passOnPacket
} }
@ -250,8 +253,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
) )
// TODO(raggi): reduce allocs! n, err := t.WriteEthernet(pkt)
n, err := t.tdev.Write([][]byte{pkt}, 0)
if tapDebug { if tapDebug {
t.logf("tap: wrote DHCP OFFER %v, %v", n, err) t.logf("tap: wrote DHCP OFFER %v, %v", n, err)
} }
@ -278,8 +280,7 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src netip.AddrPortFrom(netaddr.IPv4(100, 100, 100, 100), 67), // src
netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst netip.AddrPortFrom(netaddr.IPv4(255, 255, 255, 255), 68), // dst
) )
// TODO(raggi): reduce allocs! n, err := t.WriteEthernet(pkt)
n, err := t.tdev.Write([][]byte{pkt}, 0)
if tapDebug { if tapDebug {
t.logf("tap: wrote DHCP ACK %v, %v", n, err) t.logf("tap: wrote DHCP ACK %v, %v", n, err)
} }
@ -291,6 +292,16 @@ func (t *Wrapper) handleDHCPRequest(ethBuf []byte) bool {
return consumePacket return consumePacket
} }
func writeEthernetFrame(buf []byte, srcMAC, dstMAC net.HardwareAddr, proto tcpip.NetworkProtocolNumber) {
// Ethernet header
eth := header.Ethernet(buf)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(srcMAC),
DstAddr: tcpip.LinkAddress(dstMAC),
Type: proto,
})
}
func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte { func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst netip.AddrPort) []byte {
buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload)) buf := make([]byte, header.EthernetMinimumSize+header.UDPMinimumSize+header.IPv4MinimumSize+len(payload))
payloadStart := len(buf) - len(payload) payloadStart := len(buf) - len(payload)
@ -300,12 +311,7 @@ func packLayer2UDP(payload []byte, srcMAC, dstMAC net.HardwareAddr, src, dst net
dstB := dst.Addr().As4() dstB := dst.Addr().As4()
dstIP := tcpip.AddrFromSlice(dstB[:]) dstIP := tcpip.AddrFromSlice(dstB[:])
// Ethernet header // Ethernet header
eth := header.Ethernet(buf) writeEthernetFrame(buf, srcMAC, dstMAC, ipv4.ProtocolNumber)
eth.Encode(&header.EthernetFields{
SrcAddr: tcpip.LinkAddress(srcMAC),
DstAddr: tcpip.LinkAddress(dstMAC),
Type: ipv4.ProtocolNumber,
})
// IP header // IP header
ipbuf := buf[header.EthernetMinimumSize:] ipbuf := buf[header.EthernetMinimumSize:]
ip := header.IPv4(ipbuf) ip := header.IPv4(ipbuf)
@ -342,17 +348,18 @@ func run(prog string, args ...string) error {
return nil return nil
} }
func (t *Wrapper) destMAC() [6]byte { func (t *tapDevice) destMAC() [6]byte {
return t.destMACAtomic.Load() return t.destMACAtomic.Load()
} }
func newTAPDevice(fd int, tapName string) (tun.Device, error) { func newTAPDevice(logf logger.Logf, fd int, tapName string) (tun.Device, error) {
err := unix.SetNonblock(fd, true) err := unix.SetNonblock(fd, true)
if err != nil { if err != nil {
return nil, err return nil, err
} }
file := os.NewFile(uintptr(fd), "/dev/tap") file := os.NewFile(uintptr(fd), "/dev/tap")
d := &tapDevice{ d := &tapDevice{
logf: logf,
file: file, file: file,
events: make(chan tun.Event), events: make(chan tun.Event),
name: tapName, name: tapName,
@ -360,20 +367,14 @@ func newTAPDevice(fd int, tapName string) (tun.Device, error) {
return d, nil return d, nil
} }
var (
_ setWrapperer = &tapDevice{}
)
type tapDevice struct { type tapDevice struct {
file *os.File file *os.File
logf func(format string, args ...any)
events chan tun.Event events chan tun.Event
name string name string
wrapper *Wrapper
closeOnce sync.Once closeOnce sync.Once
}
func (t *tapDevice) setWrapper(wrapper *Wrapper) { destMACAtomic syncs.AtomicValue[[6]byte]
t.wrapper = wrapper
} }
func (t *tapDevice) File() *os.File { func (t *tapDevice) File() *os.File {
@ -384,36 +385,63 @@ func (t *tapDevice) Name() (string, error) {
return t.name, nil return t.name, nil
} }
// Read reads an IP packet from the TAP device. It strips the ethernet frame header.
func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) { func (t *tapDevice) Read(buffs [][]byte, sizes []int, offset int) (int, error) {
n, err := t.ReadEthernet(buffs, sizes, offset)
if err != nil || n == 0 {
return n, err
}
// Strip the ethernet frame header.
copy(buffs[0][offset:], buffs[0][offset+ethernetFrameSize:offset+sizes[0]])
sizes[0] -= ethernetFrameSize
return 1, nil
}
// ReadEthernet reads a raw ethernet frame from the TAP device.
func (t *tapDevice) ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error) {
n, err := t.file.Read(buffs[0][offset:]) n, err := t.file.Read(buffs[0][offset:])
if err != nil { if err != nil {
return 0, err return 0, err
} }
if t.handleTAPFrame(buffs[0][offset : offset+n]) {
return 0, nil
}
sizes[0] = n sizes[0] = n
return 1, nil return 1, nil
} }
// WriteEthernet writes a raw ethernet frame to the TAP device.
func (t *tapDevice) WriteEthernet(buf []byte) (int, error) {
return t.file.Write(buf)
}
// ethBufPool holds a pool of bytes.Buffers for use in [tapDevice.Write].
var ethBufPool = syncs.Pool[*bytes.Buffer]{New: func() *bytes.Buffer { return new(bytes.Buffer) }}
// Write writes a raw IP packet to the TAP device. It adds the ethernet frame header.
func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) { func (t *tapDevice) Write(buffs [][]byte, offset int) (int, error) {
errs := make([]error, 0) errs := make([]error, 0)
wrote := 0 wrote := 0
m := t.destMAC()
dstMac := net.HardwareAddr(m[:])
buf := ethBufPool.Get()
defer ethBufPool.Put(buf)
for _, buff := range buffs { for _, buff := range buffs {
if offset < ethernetFrameSize { buf.Reset()
errs = append(errs, fmt.Errorf("[unexpected] weird offset %d for TAP write", offset)) buf.Grow(header.EthernetMinimumSize + len(buff) - offset)
return 0, multierr.New(errs...)
} var ebuf [14]byte
eth := buff[offset-ethernetFrameSize:] switch buff[offset] >> 4 {
dst := t.wrapper.destMAC() case 4:
copy(eth[:6], dst[:]) writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv4.ProtocolNumber)
copy(eth[6:12], ourMAC[:]) case 6:
et := etherTypeIPv4 writeEthernetFrame(ebuf[:], ourMAC, dstMac, ipv6.ProtocolNumber)
if buff[offset]>>4 == 6 { default:
et = etherTypeIPv6 continue
} }
eth[12], eth[13] = et[0], et[1] buf.Write(ebuf[:])
if tapDebug { buf.Write(buff[offset:])
t.wrapper.logf("tap: tapWrite off=%v % x", offset, buff) _, err := t.WriteEthernet(buf.Bytes())
}
_, err := t.file.Write(buff[offset-ethernetFrameSize:])
if err != nil { if err != nil {
errs = append(errs, err) errs = append(errs, err)
} else { } else {
@ -428,8 +456,7 @@ func (t *tapDevice) MTU() (int, error) {
if err != nil { if err != nil {
return 0, err return 0, err
} }
err = unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr) if err := unix.IoctlIfreq(int(t.file.Fd()), unix.SIOCGIFMTU, ifr); err != nil {
if err != nil {
return 0, err return 0, err
} }
return int(ifr.Uint32()), nil return int(ifr.Uint32()), nil

@ -1,8 +0,0 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build !linux || ts_omit_tap
package tstun
func (*Wrapper) handleTAPFrame([]byte) bool { panic("unreachable") }

@ -18,7 +18,7 @@ import (
) )
// createTAP is non-nil on Linux. // createTAP is non-nil on Linux.
var createTAP func(tapName, bridgeName string) (tun.Device, error) var createTAP func(logf logger.Logf, tapName, bridgeName string) (tun.Device, error)
// New returns a tun.Device for the requested device name, along with // New returns a tun.Device for the requested device name, along with
// the OS-dependent name that was allocated to the device. // the OS-dependent name that was allocated to the device.
@ -42,7 +42,7 @@ func New(logf logger.Logf, tunName string) (tun.Device, string, error) {
default: default:
return nil, "", errors.New("bogus tap argument") return nil, "", errors.New("bogus tap argument")
} }
dev, err = createTAP(tapName, bridgeName) dev, err = createTAP(logf, tapName, bridgeName)
} else { } else {
dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU())) dev, err = tun.CreateTUN(tunName, int(DefaultTUNMTU()))
} }

@ -109,9 +109,7 @@ type Wrapper struct {
lastActivityAtomic mono.Time // time of last send or receive lastActivityAtomic mono.Time // time of last send or receive
destIPActivity syncs.AtomicValue[map[netip.Addr]func()] destIPActivity syncs.AtomicValue[map[netip.Addr]func()]
//lint:ignore U1000 used in tap_linux.go discoKey syncs.AtomicValue[key.DiscoPublic]
destMACAtomic syncs.AtomicValue[[6]byte]
discoKey syncs.AtomicValue[key.DiscoPublic]
// timeNow, if non-nil, will be used to obtain the current time. // timeNow, if non-nil, will be used to obtain the current time.
timeNow func() time.Time timeNow func() time.Time
@ -257,12 +255,6 @@ type tunVectorReadResult struct {
dataOffset int dataOffset int
} }
type setWrapperer interface {
// setWrapper enables the underlying TUN/TAP to have access to the Wrapper.
// It MUST be called only once during initialization, other usage is unsafe.
setWrapper(*Wrapper)
}
// Start unblocks any Wrapper.Read calls that have already started // Start unblocks any Wrapper.Read calls that have already started
// and makes the Wrapper functional. // and makes the Wrapper functional.
// //
@ -313,10 +305,6 @@ func wrap(logf logger.Logf, tdev tun.Device, isTAP bool, m *usermetric.Registry)
w.bufferConsumed <- struct{}{} w.bufferConsumed <- struct{}{}
w.noteActivity() w.noteActivity()
if sw, ok := w.tdev.(setWrapperer); ok {
sw.setWrapper(w)
}
return w return w
} }
@ -459,12 +447,18 @@ const ethernetFrameSize = 14 // 2 six byte MACs, 2 bytes ethertype
func (t *Wrapper) pollVector() { func (t *Wrapper) pollVector() {
sizes := make([]int, len(t.vectorBuffer)) sizes := make([]int, len(t.vectorBuffer))
readOffset := PacketStartOffset readOffset := PacketStartOffset
reader := t.tdev.Read
if t.isTAP { if t.isTAP {
readOffset = PacketStartOffset - ethernetFrameSize type tapReader interface {
ReadEthernet(buffs [][]byte, sizes []int, offset int) (int, error)
}
if r, ok := t.tdev.(tapReader); ok {
readOffset = PacketStartOffset - ethernetFrameSize
reader = r.ReadEthernet
}
} }
for range t.bufferConsumed { for range t.bufferConsumed {
DoRead:
for i := range t.vectorBuffer { for i := range t.vectorBuffer {
t.vectorBuffer[i] = t.vectorBuffer[i][:cap(t.vectorBuffer[i])] t.vectorBuffer[i] = t.vectorBuffer[i][:cap(t.vectorBuffer[i])]
} }
@ -474,7 +468,7 @@ func (t *Wrapper) pollVector() {
if t.isClosed() { if t.isClosed() {
return return
} }
n, err = t.tdev.Read(t.vectorBuffer[:], sizes, readOffset) n, err = reader(t.vectorBuffer[:], sizes, readOffset)
if t.isTAP && tapDebug { if t.isTAP && tapDebug {
s := fmt.Sprintf("% x", t.vectorBuffer[0][:]) s := fmt.Sprintf("% x", t.vectorBuffer[0][:])
for strings.HasSuffix(s, " 00") { for strings.HasSuffix(s, " 00") {
@ -486,21 +480,6 @@ func (t *Wrapper) pollVector() {
for i := range sizes[:n] { for i := range sizes[:n] {
t.vectorBuffer[i] = t.vectorBuffer[i][:readOffset+sizes[i]] t.vectorBuffer[i] = t.vectorBuffer[i][:readOffset+sizes[i]]
} }
if t.isTAP {
if err == nil {
ethernetFrame := t.vectorBuffer[0][readOffset:]
if t.handleTAPFrame(ethernetFrame) {
goto DoRead
}
}
// Fall through. We got an IP packet.
if sizes[0] >= ethernetFrameSize {
t.vectorBuffer[0] = t.vectorBuffer[0][:readOffset+sizes[0]-ethernetFrameSize]
}
if tapDebug {
t.logf("tap regular frame: %x", t.vectorBuffer[0][PacketStartOffset:PacketStartOffset+sizes[0]])
}
}
t.sendVectorOutbound(tunVectorReadResult{ t.sendVectorOutbound(tunVectorReadResult{
data: t.vectorBuffer[:n], data: t.vectorBuffer[:n],
dataOffset: PacketStartOffset, dataOffset: PacketStartOffset,

Loading…
Cancel
Save