mirror of https://github.com/tailscale/tailscale/
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2243 lines
59 KiB
Go
2243 lines
59 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
// Package vnet simulates a virtual Internet containing a set of networks with various
|
|
// NAT behaviors. You can then plug VMs into the virtual internet at different points
|
|
// to test Tailscale working end-to-end in various network conditions.
|
|
//
|
|
// See https://github.com/tailscale/tailscale/issues/13038
|
|
package vnet
|
|
|
|
// TODO:
|
|
// - [ ] tests for NAT tables
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/tls"
|
|
"encoding/binary"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"iter"
|
|
"log"
|
|
"maps"
|
|
"math/rand/v2"
|
|
"net"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"net/netip"
|
|
"os/exec"
|
|
"strconv"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/gaissmai/bart"
|
|
"github.com/google/gopacket"
|
|
"github.com/google/gopacket/layers"
|
|
"go4.org/mem"
|
|
"gvisor.dev/gvisor/pkg/buffer"
|
|
"gvisor.dev/gvisor/pkg/tcpip"
|
|
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
|
|
"gvisor.dev/gvisor/pkg/tcpip/header"
|
|
"gvisor.dev/gvisor/pkg/tcpip/link/channel"
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/arp"
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/ipv4"
|
|
"gvisor.dev/gvisor/pkg/tcpip/network/ipv6"
|
|
"gvisor.dev/gvisor/pkg/tcpip/stack"
|
|
"gvisor.dev/gvisor/pkg/tcpip/transport/icmp"
|
|
"gvisor.dev/gvisor/pkg/tcpip/transport/tcp"
|
|
"gvisor.dev/gvisor/pkg/waiter"
|
|
"tailscale.com/client/tailscale"
|
|
"tailscale.com/derp"
|
|
"tailscale.com/derp/derphttp"
|
|
"tailscale.com/net/netutil"
|
|
"tailscale.com/net/stun"
|
|
"tailscale.com/syncs"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/tstest/integration/testcontrol"
|
|
"tailscale.com/types/key"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/util/mak"
|
|
"tailscale.com/util/must"
|
|
"tailscale.com/util/set"
|
|
"tailscale.com/util/zstdframe"
|
|
)
|
|
|
|
const nicID = 1
|
|
|
|
const (
|
|
stunPort = 3478
|
|
pcpPort = 5351
|
|
ssdpPort = 1900
|
|
)
|
|
|
|
func (s *Server) PopulateDERPMapIPs() error {
|
|
out, err := exec.Command("tailscale", "debug", "derp-map").Output()
|
|
if err != nil {
|
|
return fmt.Errorf("tailscale debug derp-map: %v", err)
|
|
}
|
|
var dm tailcfg.DERPMap
|
|
if err := json.Unmarshal(out, &dm); err != nil {
|
|
return fmt.Errorf("unmarshal DERPMap: %v", err)
|
|
}
|
|
for _, r := range dm.Regions {
|
|
for _, n := range r.Nodes {
|
|
if n.IPv4 != "" {
|
|
s.derpIPs.Add(netip.MustParseAddr(n.IPv4))
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (n *network) InitNAT(natType NAT) error {
|
|
ctor, ok := natTypes[natType]
|
|
if !ok {
|
|
return fmt.Errorf("unknown NAT type %q", natType)
|
|
}
|
|
t, err := ctor(n)
|
|
if err != nil {
|
|
return fmt.Errorf("error creating NAT type %q for network %v: %w", natType, n.wanIP4, err)
|
|
}
|
|
n.setNATTable(t)
|
|
n.natStyle.Store(natType)
|
|
return nil
|
|
}
|
|
|
|
func (n *network) setNATTable(nt NATTable) {
|
|
n.natMu.Lock()
|
|
defer n.natMu.Unlock()
|
|
n.natTable = nt
|
|
}
|
|
|
|
// SoleLANIP implements [IPPool].
|
|
func (n *network) SoleLANIP() (netip.Addr, bool) {
|
|
if len(n.nodesByIP4) != 1 {
|
|
return netip.Addr{}, false
|
|
}
|
|
for ip := range n.nodesByIP4 {
|
|
return ip, true
|
|
}
|
|
return netip.Addr{}, false
|
|
}
|
|
|
|
// WANIP implements [IPPool].
|
|
func (n *network) WANIP() netip.Addr { return n.wanIP4 }
|
|
|
|
func (n *network) initStack() error {
|
|
n.ns = stack.New(stack.Options{
|
|
NetworkProtocols: []stack.NetworkProtocolFactory{
|
|
ipv4.NewProtocol,
|
|
ipv6.NewProtocol,
|
|
arp.NewProtocol,
|
|
},
|
|
TransportProtocols: []stack.TransportProtocolFactory{
|
|
tcp.NewProtocol,
|
|
icmp.NewProtocol4,
|
|
},
|
|
})
|
|
sackEnabledOpt := tcpip.TCPSACKEnabled(true) // TCP SACK is disabled by default
|
|
tcpipErr := n.ns.SetTransportProtocolOption(tcp.ProtocolNumber, &sackEnabledOpt)
|
|
if tcpipErr != nil {
|
|
return fmt.Errorf("SetTransportProtocolOption SACK: %v", tcpipErr)
|
|
}
|
|
n.linkEP = channel.New(512, 1500, tcpip.LinkAddress(n.mac.HWAddr()))
|
|
if tcpipProblem := n.ns.CreateNIC(nicID, n.linkEP); tcpipProblem != nil {
|
|
return fmt.Errorf("CreateNIC: %v", tcpipProblem)
|
|
}
|
|
n.ns.SetPromiscuousMode(nicID, true)
|
|
n.ns.SetSpoofing(nicID, true)
|
|
|
|
var routes []tcpip.Route
|
|
|
|
if n.v4 {
|
|
prefix := tcpip.AddrFrom4Slice(n.lanIP4.Addr().AsSlice()).WithPrefix()
|
|
prefix.PrefixLen = n.lanIP4.Bits()
|
|
if tcpProb := n.ns.AddProtocolAddress(nicID, tcpip.ProtocolAddress{
|
|
Protocol: ipv4.ProtocolNumber,
|
|
AddressWithPrefix: prefix,
|
|
}, stack.AddressProperties{}); tcpProb != nil {
|
|
return errors.New(tcpProb.String())
|
|
}
|
|
|
|
ipv4Subnet, err := tcpip.NewSubnet(tcpip.AddrFromSlice(make([]byte, 4)), tcpip.MaskFromBytes(make([]byte, 4)))
|
|
if err != nil {
|
|
return fmt.Errorf("could not create IPv4 subnet: %v", err)
|
|
}
|
|
routes = append(routes, tcpip.Route{
|
|
Destination: ipv4Subnet,
|
|
NIC: nicID,
|
|
})
|
|
}
|
|
if n.v6 {
|
|
prefix := tcpip.AddrFrom16(n.wanIP6.Addr().As16()).WithPrefix()
|
|
prefix.PrefixLen = n.wanIP6.Bits()
|
|
if tcpProb := n.ns.AddProtocolAddress(nicID, tcpip.ProtocolAddress{
|
|
Protocol: ipv6.ProtocolNumber,
|
|
AddressWithPrefix: prefix,
|
|
}, stack.AddressProperties{}); tcpProb != nil {
|
|
return errors.New(tcpProb.String())
|
|
}
|
|
|
|
ipv6Subnet, err := tcpip.NewSubnet(tcpip.AddrFromSlice(make([]byte, 16)), tcpip.MaskFromBytes(make([]byte, 16)))
|
|
if err != nil {
|
|
return fmt.Errorf("could not create IPv6 subnet: %v", err)
|
|
}
|
|
routes = append(routes, tcpip.Route{
|
|
Destination: ipv6Subnet,
|
|
NIC: nicID,
|
|
})
|
|
}
|
|
|
|
n.ns.SetRouteTable(routes)
|
|
|
|
const tcpReceiveBufferSize = 0 // default
|
|
const maxInFlightConnectionAttempts = 8192
|
|
tcpFwd := tcp.NewForwarder(n.ns, tcpReceiveBufferSize, maxInFlightConnectionAttempts, n.acceptTCP)
|
|
n.ns.SetTransportProtocolHandler(tcp.ProtocolNumber, func(tei stack.TransportEndpointID, pb *stack.PacketBuffer) (handled bool) {
|
|
return tcpFwd.HandlePacket(tei, pb)
|
|
})
|
|
|
|
go func() {
|
|
for {
|
|
pkt := n.linkEP.ReadContext(n.s.shutdownCtx)
|
|
if pkt == nil {
|
|
if n.s.shutdownCtx.Err() != nil {
|
|
// Return without logging.
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
n.handleIPPacketFromGvisor(pkt.ToView().AsSlice())
|
|
}
|
|
}()
|
|
return nil
|
|
}
|
|
|
|
func (n *network) handleIPPacketFromGvisor(ipRaw []byte) {
|
|
if len(ipRaw) == 0 {
|
|
panic("empty packet from gvisor")
|
|
}
|
|
var goPkt gopacket.Packet
|
|
ipVer := ipRaw[0] >> 4 // 4 or 6
|
|
switch ipVer {
|
|
case 4:
|
|
goPkt = gopacket.NewPacket(
|
|
ipRaw,
|
|
layers.LayerTypeIPv4, gopacket.Lazy)
|
|
case 6:
|
|
goPkt = gopacket.NewPacket(
|
|
ipRaw,
|
|
layers.LayerTypeIPv6, gopacket.Lazy)
|
|
default:
|
|
panic(fmt.Sprintf("unexpected IP packet version %v", ipVer))
|
|
}
|
|
flow, ok := flow(goPkt)
|
|
if !ok {
|
|
panic("unexpected gvisor packet")
|
|
}
|
|
node, ok := n.nodeByIP(flow.dst)
|
|
if !ok {
|
|
n.logf("no node for netstack dest IP %v", flow.dst)
|
|
return
|
|
}
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: n.mac.HWAddr(),
|
|
DstMAC: node.mac.HWAddr(),
|
|
}
|
|
sls := []gopacket.SerializableLayer{
|
|
eth,
|
|
}
|
|
for _, layer := range goPkt.Layers() {
|
|
sl, ok := layer.(gopacket.SerializableLayer)
|
|
if !ok {
|
|
log.Fatalf("layer %s is not serializable", layer.LayerType().String())
|
|
}
|
|
sls = append(sls, sl)
|
|
}
|
|
|
|
resPkt, err := mkPacket(sls...)
|
|
if err != nil {
|
|
n.logf("gvisor: serialize error: %v", err)
|
|
return
|
|
}
|
|
if nw, ok := n.writers.Load(node.mac); ok {
|
|
nw.write(resPkt)
|
|
} else {
|
|
n.logf("gvisor write: no writeFunc for %v", node.mac)
|
|
}
|
|
}
|
|
|
|
func netaddrIPFromNetstackIP(s tcpip.Address) netip.Addr {
|
|
switch s.Len() {
|
|
case 4:
|
|
return netip.AddrFrom4(s.As4())
|
|
case 16:
|
|
return netip.AddrFrom16(s.As16()).Unmap()
|
|
}
|
|
return netip.Addr{}
|
|
}
|
|
|
|
func stringifyTEI(tei stack.TransportEndpointID) string {
|
|
localHostPort := net.JoinHostPort(tei.LocalAddress.String(), strconv.Itoa(int(tei.LocalPort)))
|
|
remoteHostPort := net.JoinHostPort(tei.RemoteAddress.String(), strconv.Itoa(int(tei.RemotePort)))
|
|
return fmt.Sprintf("%s -> %s", remoteHostPort, localHostPort)
|
|
}
|
|
|
|
func (n *network) acceptTCP(r *tcp.ForwarderRequest) {
|
|
reqDetails := r.ID()
|
|
|
|
clientRemoteIP := netaddrIPFromNetstackIP(reqDetails.RemoteAddress)
|
|
destIP := netaddrIPFromNetstackIP(reqDetails.LocalAddress)
|
|
destPort := reqDetails.LocalPort
|
|
if !clientRemoteIP.IsValid() {
|
|
r.Complete(true) // sends a RST
|
|
return
|
|
}
|
|
|
|
log.Printf("vnet-AcceptTCP: %v", stringifyTEI(reqDetails))
|
|
|
|
var wq waiter.Queue
|
|
ep, err := r.CreateEndpoint(&wq)
|
|
if err != nil {
|
|
log.Printf("CreateEndpoint error for %s: %v", stringifyTEI(reqDetails), err)
|
|
r.Complete(true) // sends a RST
|
|
return
|
|
}
|
|
ep.SocketOptions().SetKeepAlive(true)
|
|
|
|
if destPort == 123 {
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
io.WriteString(tc, "Hello from Go\nGoodbye.\n")
|
|
tc.Close()
|
|
return
|
|
}
|
|
|
|
if destPort == 8008 && fakeTestAgent.Match(destIP) {
|
|
node, ok := n.nodeByIP(clientRemoteIP)
|
|
if !ok {
|
|
n.logf("unknown client IP %v trying to connect to test driver", clientRemoteIP)
|
|
r.Complete(true)
|
|
return
|
|
}
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
ac := &agentConn{node, tc}
|
|
n.s.addIdleAgentConn(ac)
|
|
return
|
|
}
|
|
|
|
if destPort == 80 && fakeControl.Match(destIP) {
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
hs := &http.Server{Handler: n.s.control}
|
|
go hs.Serve(netutil.NewOneConnListener(tc, nil))
|
|
return
|
|
}
|
|
|
|
if fakeDERP1.Match(destIP) || fakeDERP2.Match(destIP) {
|
|
if destPort == 443 {
|
|
ds := n.s.derps[0]
|
|
if fakeDERP2.Match(destIP) {
|
|
ds = n.s.derps[1]
|
|
}
|
|
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
tlsConn := tls.Server(tc, ds.tlsConfig)
|
|
hs := &http.Server{Handler: ds.handler}
|
|
go hs.Serve(netutil.NewOneConnListener(tlsConn, nil))
|
|
return
|
|
}
|
|
if destPort == 80 {
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
hs := &http.Server{Handler: n.s.derps[0].handler}
|
|
go hs.Serve(netutil.NewOneConnListener(tc, nil))
|
|
return
|
|
}
|
|
}
|
|
if destPort == 443 && fakeLogCatcher.Match(destIP) {
|
|
r.Complete(false)
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
go n.serveLogCatcherConn(clientRemoteIP, tc)
|
|
return
|
|
}
|
|
|
|
var targetDial string
|
|
if n.s.derpIPs.Contains(destIP) {
|
|
targetDial = destIP.String() + ":" + strconv.Itoa(int(destPort))
|
|
} else if fakeProxyControlplane.Match(destIP) {
|
|
targetDial = "controlplane.tailscale.com:" + strconv.Itoa(int(destPort))
|
|
}
|
|
if targetDial != "" {
|
|
c, err := net.Dial("tcp", targetDial)
|
|
if err != nil {
|
|
r.Complete(true)
|
|
log.Printf("Dial controlplane: %v", err)
|
|
return
|
|
}
|
|
defer c.Close()
|
|
tc := gonet.NewTCPConn(&wq, ep)
|
|
defer tc.Close()
|
|
r.Complete(false)
|
|
errc := make(chan error, 2)
|
|
go func() { _, err := io.Copy(tc, c); errc <- err }()
|
|
go func() { _, err := io.Copy(c, tc); errc <- err }()
|
|
<-errc
|
|
} else {
|
|
r.Complete(true) // sends a RST
|
|
}
|
|
}
|
|
|
|
// serveLogCatchConn serves a TCP connection to "log.tailscale.io", speaking the
|
|
// logtail/logcatcher protocol.
|
|
//
|
|
// We terminate TLS with an arbitrary cert; the client is configured to not
|
|
// validate TLS certs for this hostname when running under these integration
|
|
// tests.
|
|
func (n *network) serveLogCatcherConn(clientRemoteIP netip.Addr, c net.Conn) {
|
|
tlsConfig := n.s.derps[0].tlsConfig // self-signed (stealing DERP's); test client configure to not check
|
|
tlsConn := tls.Server(c, tlsConfig)
|
|
var handler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
all, _ := io.ReadAll(r.Body)
|
|
if r.Header.Get("Content-Encoding") == "zstd" {
|
|
var err error
|
|
all, err = zstdframe.AppendDecode(nil, all)
|
|
if err != nil {
|
|
log.Printf("LOGS DECODE ERROR zstd decode: %v", err)
|
|
http.Error(w, "zstd decode error", http.StatusBadRequest)
|
|
return
|
|
}
|
|
}
|
|
var logs []struct {
|
|
Logtail struct {
|
|
Client_Time time.Time
|
|
}
|
|
Text string
|
|
}
|
|
if err := json.Unmarshal(all, &logs); err != nil {
|
|
log.Printf("Logs decode error: %v", err)
|
|
return
|
|
}
|
|
node := n.nodesByIP4[clientRemoteIP]
|
|
if node != nil {
|
|
node.logMu.Lock()
|
|
defer node.logMu.Unlock()
|
|
node.logCatcherWrites++
|
|
for _, lg := range logs {
|
|
tStr := lg.Logtail.Client_Time.Round(time.Millisecond).Format(time.RFC3339Nano)
|
|
fmt.Fprintf(&node.logBuf, "[%v] %s\n", tStr, lg.Text)
|
|
}
|
|
}
|
|
})
|
|
hs := &http.Server{Handler: handler}
|
|
hs.Serve(netutil.NewOneConnListener(tlsConn, nil))
|
|
}
|
|
|
|
type EthernetPacket struct {
|
|
le *layers.Ethernet
|
|
gp gopacket.Packet
|
|
}
|
|
|
|
func (ep EthernetPacket) SrcMAC() MAC {
|
|
return MAC(ep.le.SrcMAC)
|
|
}
|
|
|
|
func (ep EthernetPacket) DstMAC() MAC {
|
|
return MAC(ep.le.DstMAC)
|
|
}
|
|
|
|
type MAC [6]byte
|
|
|
|
func (m MAC) IsBroadcast() bool {
|
|
return m == MAC{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
|
}
|
|
|
|
// IsIPv6Multicast reports whether m is an IPv6 multicast MAC address,
|
|
// typically one containing a solicited-node multicast address.
|
|
func (m MAC) IsIPv6Multicast() bool {
|
|
return m[0] == 0x33 && m[1] == 0x33
|
|
}
|
|
|
|
func macOf(hwa net.HardwareAddr) (_ MAC, ok bool) {
|
|
if len(hwa) != 6 {
|
|
return MAC{}, false
|
|
}
|
|
return MAC(hwa), true
|
|
}
|
|
|
|
func (m MAC) HWAddr() net.HardwareAddr {
|
|
return net.HardwareAddr(m[:])
|
|
}
|
|
|
|
func (m MAC) String() string {
|
|
return fmt.Sprintf("%02x:%02x:%02x:%02x:%02x:%02x", m[0], m[1], m[2], m[3], m[4], m[5])
|
|
}
|
|
|
|
type portMapping struct {
|
|
dst netip.AddrPort // LAN IP:port
|
|
expiry time.Time
|
|
}
|
|
|
|
// writerFunc is a function that writes an Ethernet frame to a connected client.
|
|
//
|
|
// ethFrame is the Ethernet frame to write.
|
|
//
|
|
// interfaceIndexID is the interface ID for the pcap file.
|
|
type writerFunc func(dst vmClient, ethFrame []byte, interfaceIndexID int)
|
|
|
|
// networkWriter are the arguments to a writerFunc and the writerFunc.
|
|
type networkWriter struct {
|
|
writer writerFunc // Function to write packets to the network
|
|
c vmClient
|
|
interfaceID int // The interface ID of the src node (for writing pcaps)
|
|
}
|
|
|
|
func (nw networkWriter) write(b []byte) {
|
|
nw.writer(nw.c, b, nw.interfaceID)
|
|
}
|
|
|
|
type network struct {
|
|
s *Server
|
|
num int // 1-based
|
|
mac MAC // of router
|
|
portmap bool
|
|
lanInterfaceID int
|
|
wanInterfaceID int
|
|
v4 bool // network supports IPv4
|
|
v6 bool // network support IPv6
|
|
wanIP6 netip.Prefix // router's WAN IPv6, if any, as a /64.
|
|
wanIP4 netip.Addr // router's LAN IPv4, if any
|
|
lanIP4 netip.Prefix // router's LAN IP + CIDR (e.g. 192.168.2.1/24)
|
|
breakWAN4 bool // break WAN IPv4 connectivity
|
|
latency time.Duration // latency applied to interface writes
|
|
lossRate float64 // probability of dropping a packet (0.0 to 1.0)
|
|
nodesByIP4 map[netip.Addr]*node // by LAN IPv4
|
|
nodesByMAC map[MAC]*node
|
|
logf func(format string, args ...any)
|
|
|
|
ns *stack.Stack
|
|
linkEP *channel.Endpoint
|
|
|
|
natStyle syncs.AtomicValue[NAT]
|
|
natMu sync.Mutex // held while using + changing natTable
|
|
natTable NATTable
|
|
portMap map[netip.AddrPort]portMapping // WAN ip:port -> LAN ip:port
|
|
portMapFlow map[portmapFlowKey]netip.AddrPort // (lanAP, peerWANAP) -> portmapped wanAP
|
|
|
|
macMu sync.Mutex
|
|
macOfIPv6 map[netip.Addr]MAC // IPv6 source IP -> MAC
|
|
|
|
// writers is a map of MAC -> networkWriters to write packets to that MAC.
|
|
// It contains entries for connected nodes only.
|
|
writers syncs.Map[MAC, networkWriter] // MAC -> to networkWriter for that MAC
|
|
}
|
|
|
|
// registerWriter registers a client address with a MAC address.
|
|
func (n *network) registerWriter(mac MAC, c vmClient) {
|
|
nw := networkWriter{
|
|
writer: n.s.writeEthernetFrameToVM,
|
|
c: c,
|
|
}
|
|
if node, ok := n.s.nodeByMAC[mac]; ok {
|
|
nw.interfaceID = node.interfaceID
|
|
}
|
|
n.writers.Store(mac, nw)
|
|
}
|
|
|
|
func (n *network) unregisterWriter(mac MAC) {
|
|
n.writers.Delete(mac)
|
|
}
|
|
|
|
// RegisteredWritersForTest returns the number of registered connections (VM
|
|
// guests with a known MAC to whom a packet can be sent) there are to the
|
|
// server. It exists for testing.
|
|
func (s *Server) RegisteredWritersForTest() int {
|
|
num := 0
|
|
for n := range s.networks {
|
|
num += n.writers.Len()
|
|
}
|
|
return num
|
|
}
|
|
|
|
func (n *network) MACOfIP(ip netip.Addr) (_ MAC, ok bool) {
|
|
if n.lanIP4.Addr() == ip {
|
|
return n.mac, true
|
|
}
|
|
if n, ok := n.nodesByIP4[ip]; ok {
|
|
return n.mac, true
|
|
}
|
|
return MAC{}, false
|
|
}
|
|
|
|
type node struct {
|
|
mac MAC
|
|
num int // 1-based node number
|
|
interfaceID int
|
|
net *network
|
|
lanIP netip.Addr // must be in net.lanIP prefix + unique in net
|
|
verboseSyslog bool
|
|
|
|
// logMu guards logBuf.
|
|
// TODO(bradfitz): conditionally write these out to separate files at the end?
|
|
// Currently they only hold logcatcher logs.
|
|
logMu sync.Mutex
|
|
logBuf bytes.Buffer
|
|
logCatcherWrites int
|
|
}
|
|
|
|
// String returns the string "nodeN" where N is the 1-based node number.
|
|
func (n *node) String() string {
|
|
return fmt.Sprintf("node%d", n.num)
|
|
}
|
|
|
|
type derpServer struct {
|
|
srv *derp.Server
|
|
handler http.Handler
|
|
tlsConfig *tls.Config
|
|
}
|
|
|
|
func newDERPServer() *derpServer {
|
|
// Just to get a self-signed TLS cert:
|
|
ts := httptest.NewTLSServer(nil)
|
|
ts.Close()
|
|
|
|
ds := &derpServer{
|
|
srv: derp.NewServer(key.NewNode(), logger.Discard),
|
|
tlsConfig: ts.TLS, // self-signed; test client configure to not check
|
|
}
|
|
var mux http.ServeMux
|
|
mux.Handle("/derp", derphttp.Handler(ds.srv))
|
|
mux.HandleFunc("/generate_204", derphttp.ServeNoContent)
|
|
|
|
ds.handler = &mux
|
|
return ds
|
|
}
|
|
|
|
type Server struct {
|
|
shutdownCtx context.Context
|
|
shutdownCancel context.CancelFunc
|
|
shuttingDown atomic.Bool
|
|
wg sync.WaitGroup
|
|
blendReality bool
|
|
|
|
optLogf func(format string, args ...any) // or nil to use log.Printf
|
|
|
|
derpIPs set.Set[netip.Addr]
|
|
|
|
nodes []*node
|
|
nodeByMAC map[MAC]*node
|
|
networks set.Set[*network]
|
|
networkByWAN *bart.Table[*network]
|
|
|
|
control *testcontrol.Server
|
|
derps []*derpServer
|
|
pcapWriter *pcapWriter
|
|
|
|
// writeMu serializes all writes to VM clients.
|
|
writeMu sync.Mutex
|
|
scratch []byte
|
|
|
|
mu sync.Mutex
|
|
agentConnWaiter map[*node]chan<- struct{} // signaled after added to set
|
|
agentConns set.Set[*agentConn] // not keyed by node; should be small/cheap enough to scan all
|
|
agentDialer map[*node]DialFunc
|
|
}
|
|
|
|
func (s *Server) logf(format string, args ...any) {
|
|
if s.optLogf != nil {
|
|
s.optLogf(format, args...)
|
|
} else {
|
|
log.Printf(format, args...)
|
|
}
|
|
}
|
|
|
|
func (s *Server) SetLoggerForTest(logf func(format string, args ...any)) {
|
|
s.optLogf = logf
|
|
}
|
|
|
|
type DialFunc func(ctx context.Context, network, address string) (net.Conn, error)
|
|
|
|
var derpMap = &tailcfg.DERPMap{
|
|
Regions: map[int]*tailcfg.DERPRegion{
|
|
1: {
|
|
RegionID: 1,
|
|
RegionCode: "atlantis",
|
|
RegionName: "Atlantis",
|
|
Nodes: []*tailcfg.DERPNode{
|
|
{
|
|
Name: "1a",
|
|
RegionID: 1,
|
|
HostName: "derp1.tailscale",
|
|
IPv4: fakeDERP1.v4.String(),
|
|
IPv6: fakeDERP1.v6.String(),
|
|
InsecureForTests: true,
|
|
CanPort80: true,
|
|
},
|
|
},
|
|
},
|
|
2: {
|
|
RegionID: 2,
|
|
RegionCode: "northpole",
|
|
RegionName: "North Pole",
|
|
Nodes: []*tailcfg.DERPNode{
|
|
{
|
|
Name: "2a",
|
|
RegionID: 2,
|
|
HostName: "derp2.tailscale",
|
|
IPv4: fakeDERP2.v4.String(),
|
|
IPv6: fakeDERP2.v6.String(),
|
|
InsecureForTests: true,
|
|
CanPort80: true,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
func New(c *Config) (*Server, error) {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
s := &Server{
|
|
shutdownCtx: ctx,
|
|
shutdownCancel: cancel,
|
|
|
|
control: &testcontrol.Server{
|
|
DERPMap: derpMap,
|
|
ExplicitBaseURL: "http://control.tailscale",
|
|
},
|
|
|
|
blendReality: c.blendReality,
|
|
derpIPs: set.Of[netip.Addr](),
|
|
|
|
nodeByMAC: map[MAC]*node{},
|
|
networkByWAN: &bart.Table[*network]{},
|
|
networks: set.Of[*network](),
|
|
}
|
|
for range 2 {
|
|
s.derps = append(s.derps, newDERPServer())
|
|
}
|
|
if err := s.initFromConfig(c); err != nil {
|
|
return nil, err
|
|
}
|
|
for n := range s.networks {
|
|
if err := n.initStack(); err != nil {
|
|
return nil, fmt.Errorf("newServer: initStack: %v", err)
|
|
}
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
func (s *Server) Close() {
|
|
if shutdown := s.shuttingDown.Swap(true); !shutdown {
|
|
s.shutdownCancel()
|
|
s.pcapWriter.Close()
|
|
}
|
|
s.wg.Wait()
|
|
}
|
|
|
|
// MACs returns the MAC addresses of the configured nodes.
|
|
func (s *Server) MACs() iter.Seq[MAC] {
|
|
return maps.Keys(s.nodeByMAC)
|
|
}
|
|
|
|
func (s *Server) RegisterSinkForTest(mac MAC, fn func(eth []byte)) {
|
|
n, ok := s.nodeByMAC[mac]
|
|
if !ok {
|
|
log.Fatalf("RegisterSinkForTest: unknown MAC %v", mac)
|
|
}
|
|
n.net.writers.Store(mac, networkWriter{
|
|
writer: func(_ vmClient, eth []byte, _ int) {
|
|
fn(eth)
|
|
},
|
|
})
|
|
}
|
|
|
|
func (s *Server) HWAddr(mac MAC) net.HardwareAddr {
|
|
// TODO: cache
|
|
return net.HardwareAddr(mac[:])
|
|
}
|
|
|
|
type Protocol int
|
|
|
|
const (
|
|
ProtocolQEMU = Protocol(iota + 1)
|
|
ProtocolUnixDGRAM // for macOS Virtualization.Framework and VZFileHandleNetworkDeviceAttachment
|
|
)
|
|
|
|
func (s *Server) writeEthernetFrameToVM(c vmClient, ethPkt []byte, interfaceID int) {
|
|
s.writeMu.Lock()
|
|
defer s.writeMu.Unlock()
|
|
|
|
if ethPkt == nil {
|
|
return
|
|
}
|
|
switch c.proto() {
|
|
case ProtocolQEMU:
|
|
s.scratch = binary.BigEndian.AppendUint32(s.scratch[:0], uint32(len(ethPkt)))
|
|
s.scratch = append(s.scratch, ethPkt...)
|
|
if _, err := c.uc.Write(s.scratch); err != nil {
|
|
s.logf("Write pkt: %v", err)
|
|
}
|
|
|
|
case ProtocolUnixDGRAM:
|
|
if _, err := c.uc.WriteToUnix(ethPkt, c.raddr); err != nil {
|
|
s.logf("Write pkt : %v", err)
|
|
return
|
|
}
|
|
}
|
|
|
|
must.Do(s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(ethPkt),
|
|
Length: len(ethPkt),
|
|
InterfaceIndex: interfaceID,
|
|
}, ethPkt))
|
|
}
|
|
|
|
// vmClient is a comparable value representing a connection from a VM, either a
|
|
// QEMU-style client (with streams over a Unix socket) or a datagram based
|
|
// client (such as macOS Virtualization.framework clients).
|
|
type vmClient struct {
|
|
uc *net.UnixConn
|
|
raddr *net.UnixAddr // nil for QEMU-style clients using streams; else datagram source
|
|
}
|
|
|
|
func (c vmClient) proto() Protocol {
|
|
if c.raddr == nil {
|
|
return ProtocolQEMU
|
|
}
|
|
return ProtocolUnixDGRAM
|
|
}
|
|
|
|
func parseEthernet(pkt []byte) (dst, src MAC, ethType layers.EthernetType, payload []byte, ok bool) {
|
|
// headerLen is the length of an Ethernet header:
|
|
// 6 bytes of destination MAC, 6 bytes of source MAC, 2 bytes of EtherType.
|
|
const headerLen = 14
|
|
if len(pkt) < headerLen {
|
|
return
|
|
}
|
|
dst = MAC(pkt[0:6])
|
|
src = MAC(pkt[6:12])
|
|
ethType = layers.EthernetType(binary.BigEndian.Uint16(pkt[12:14]))
|
|
payload = pkt[headerLen:]
|
|
ok = true
|
|
return
|
|
}
|
|
|
|
// Handles a single connection from a QEMU-style client or muxd connections for dgram mode
|
|
func (s *Server) ServeUnixConn(uc *net.UnixConn, proto Protocol) {
|
|
if s.shuttingDown.Load() {
|
|
return
|
|
}
|
|
s.wg.Add(1)
|
|
defer s.wg.Done()
|
|
context.AfterFunc(s.shutdownCtx, func() {
|
|
uc.SetDeadline(time.Now())
|
|
})
|
|
s.logf("Got conn %T %p", uc, uc)
|
|
defer uc.Close()
|
|
|
|
buf := make([]byte, 16<<10)
|
|
didReg := map[MAC]bool{}
|
|
for {
|
|
var packetRaw []byte
|
|
var raddr *net.UnixAddr
|
|
|
|
switch proto {
|
|
case ProtocolUnixDGRAM:
|
|
n, addr, err := uc.ReadFromUnix(buf)
|
|
raddr = addr
|
|
if err != nil {
|
|
if s.shutdownCtx.Err() != nil {
|
|
// Return without logging.
|
|
return
|
|
}
|
|
s.logf("ReadFromUnix: %#v", err)
|
|
continue
|
|
}
|
|
packetRaw = buf[:n]
|
|
case ProtocolQEMU:
|
|
if _, err := io.ReadFull(uc, buf[:4]); err != nil {
|
|
if s.shutdownCtx.Err() != nil {
|
|
// Return without logging.
|
|
return
|
|
}
|
|
s.logf("ReadFull header: %v", err)
|
|
return
|
|
}
|
|
n := binary.BigEndian.Uint32(buf[:4])
|
|
|
|
if _, err := io.ReadFull(uc, buf[4:4+n]); err != nil {
|
|
if s.shutdownCtx.Err() != nil {
|
|
// Return without logging.
|
|
return
|
|
}
|
|
s.logf("ReadFull pkt: %v", err)
|
|
return
|
|
}
|
|
packetRaw = buf[4 : 4+n] // raw ethernet frame
|
|
}
|
|
c := vmClient{uc, raddr}
|
|
|
|
// For the first packet from a MAC, register a writerFunc to write to the VM.
|
|
_, srcMAC, _, _, ok := parseEthernet(packetRaw)
|
|
if !ok {
|
|
continue
|
|
}
|
|
srcNode, ok := s.nodeByMAC[srcMAC]
|
|
if !ok {
|
|
s.logf("[conn %p] got frame from unknown MAC %v", c.uc, srcMAC)
|
|
continue
|
|
}
|
|
if !didReg[srcMAC] {
|
|
didReg[srcMAC] = true
|
|
s.logf("[conn %p] Registering writer for MAC %v, node %v", c.uc, srcMAC, srcNode.lanIP)
|
|
srcNode.net.registerWriter(srcMAC, c)
|
|
defer srcNode.net.unregisterWriter(srcMAC)
|
|
}
|
|
|
|
if err := s.handleEthernetFrameFromVM(packetRaw); err != nil {
|
|
srcNode.net.logf("handleEthernetFrameFromVM: [conn %p], %v", c.uc, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) handleEthernetFrameFromVM(packetRaw []byte) error {
|
|
packet := gopacket.NewPacket(packetRaw, layers.LayerTypeEthernet, gopacket.Lazy)
|
|
le, ok := packet.LinkLayer().(*layers.Ethernet)
|
|
if !ok || len(le.SrcMAC) != 6 || len(le.DstMAC) != 6 {
|
|
return fmt.Errorf("ignoring non-Ethernet packet: % 02x", packetRaw)
|
|
}
|
|
ep := EthernetPacket{le, packet}
|
|
|
|
srcMAC := ep.SrcMAC()
|
|
srcNode, ok := s.nodeByMAC[srcMAC]
|
|
if !ok {
|
|
return fmt.Errorf("got frame from unknown MAC %v", srcMAC)
|
|
}
|
|
|
|
must.Do(s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(packetRaw),
|
|
Length: len(packetRaw),
|
|
InterfaceIndex: srcNode.interfaceID,
|
|
}, packetRaw))
|
|
srcNode.net.HandleEthernetPacket(ep)
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) routeUDPPacket(up UDPPacket) {
|
|
// Find which network owns this based on the destination IP
|
|
// and all the known networks' wan IPs.
|
|
|
|
// But certain things (like STUN) we do in-process.
|
|
if up.Dst.Port() == stunPort {
|
|
// TODO(bradfitz): fake latency; time.AfterFunc the response
|
|
if res, ok := makeSTUNReply(up); ok {
|
|
//log.Printf("STUN reply: %+v", res)
|
|
s.routeUDPPacket(res)
|
|
} else {
|
|
log.Printf("weird: STUN packet not handled")
|
|
}
|
|
return
|
|
}
|
|
|
|
dstIP := up.Dst.Addr()
|
|
netw, ok := s.networkByWAN.Lookup(dstIP)
|
|
if !ok {
|
|
if dstIP.IsPrivate() {
|
|
// Not worth spamming logs. RFC 1918 space doesn't route.
|
|
return
|
|
}
|
|
log.Printf("no network to route UDP packet for %v", up.Dst)
|
|
return
|
|
}
|
|
netw.HandleUDPPacket(up)
|
|
}
|
|
|
|
// writeEth writes a raw Ethernet frame to all (0, 1, or multiple) connected
|
|
// clients on the network.
|
|
//
|
|
// This only delivers to client devices and not the virtual router/gateway
|
|
// device.
|
|
//
|
|
// It reports whether a packet was written to any clients.
|
|
func (n *network) writeEth(res []byte) bool {
|
|
dstMAC, srcMAC, etherType, _, ok := parseEthernet(res)
|
|
if !ok {
|
|
return false
|
|
}
|
|
|
|
if dstMAC.IsBroadcast() || (n.v6 && etherType == layers.EthernetTypeIPv6 && dstMAC == macAllNodes) {
|
|
num := 0
|
|
for mac, nw := range n.writers.All() {
|
|
if mac != srcMAC {
|
|
num++
|
|
n.conditionedWrite(nw, res)
|
|
}
|
|
}
|
|
return num > 0
|
|
}
|
|
if srcMAC == dstMAC {
|
|
n.logf("dropping write of packet from %v to itself", srcMAC)
|
|
return false
|
|
}
|
|
if nw, ok := n.writers.Load(dstMAC); ok {
|
|
n.conditionedWrite(nw, res)
|
|
return true
|
|
}
|
|
|
|
const debugMiss = false
|
|
if debugMiss {
|
|
gp := gopacket.NewPacket(res, layers.LayerTypeEthernet, gopacket.Lazy)
|
|
n.logf("no writeFunc for dst %v from src %v; pkt=%v", dstMAC, srcMAC, gp)
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func (n *network) conditionedWrite(nw networkWriter, packet []byte) {
|
|
if n.lossRate > 0 && rand.Float64() < n.lossRate {
|
|
// packet lost
|
|
return
|
|
}
|
|
if n.latency > 0 {
|
|
// copy the packet as there's no guarantee packet is owned long enough.
|
|
// TODO(raggi): this could be optimized substantially if necessary,
|
|
// a pool of buffers and a cheaper delay mechanism are both obvious improvements.
|
|
var pkt = make([]byte, len(packet))
|
|
copy(pkt, packet)
|
|
time.AfterFunc(n.latency, func() { nw.write(pkt) })
|
|
} else {
|
|
nw.write(packet)
|
|
}
|
|
}
|
|
|
|
var (
|
|
macAllNodes = MAC{0: 0x33, 1: 0x33, 5: 0x01}
|
|
macAllRouters = MAC{0: 0x33, 1: 0x33, 5: 0x02}
|
|
macBroadcast = MAC{0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
|
)
|
|
|
|
const (
|
|
testingEthertype layers.EthernetType = 0x1234
|
|
)
|
|
|
|
func (n *network) HandleEthernetPacket(ep EthernetPacket) {
|
|
packet := ep.gp
|
|
dstMAC := ep.DstMAC()
|
|
isBroadcast := dstMAC.IsBroadcast() || (n.v6 && ep.le.EthernetType == layers.EthernetTypeIPv6 && dstMAC == macAllNodes)
|
|
isV6SpecialMAC := dstMAC[0] == 0x33 && dstMAC[1] == 0x33
|
|
|
|
// forRouter is whether the packet is destined for the router itself
|
|
// or if it's a special thing (like V6 NDP) that the router should handle.
|
|
forRouter := dstMAC == n.mac || isBroadcast || isV6SpecialMAC
|
|
|
|
const debug = false
|
|
if debug {
|
|
n.logf("HandleEthernetPacket: %v => %v; type %v, bcast=%v, forRouter=%v", ep.SrcMAC(), ep.DstMAC(), ep.le.EthernetType, isBroadcast, forRouter)
|
|
}
|
|
|
|
switch ep.le.EthernetType {
|
|
default:
|
|
n.logf("Dropping non-IP packet: %v", ep.le.EthernetType)
|
|
return
|
|
case 0x1234:
|
|
// Permitted for testing. Not a real ethertype.
|
|
case layers.EthernetTypeARP:
|
|
res, err := n.createARPResponse(packet)
|
|
if err != nil {
|
|
n.logf("createARPResponse: %v", err)
|
|
} else {
|
|
n.writeEth(res)
|
|
}
|
|
return
|
|
case layers.EthernetTypeIPv6:
|
|
if !n.v6 {
|
|
n.logf("dropping IPv6 packet on v4-only network")
|
|
return
|
|
}
|
|
if dstMAC == macAllRouters {
|
|
if rs, ok := ep.gp.Layer(layers.LayerTypeICMPv6RouterSolicitation).(*layers.ICMPv6RouterSolicitation); ok {
|
|
n.handleIPv6RouterSolicitation(ep, rs)
|
|
} else {
|
|
n.logf("unexpected IPv6 packet to all-routers: %v", ep.gp)
|
|
}
|
|
return
|
|
}
|
|
isMcast := dstMAC.IsIPv6Multicast()
|
|
if isMcast || dstMAC == n.mac {
|
|
if ns, ok := ep.gp.Layer(layers.LayerTypeICMPv6NeighborSolicitation).(*layers.ICMPv6NeighborSolicitation); ok {
|
|
n.handleIPv6NeighborSolicitation(ep, ns)
|
|
return
|
|
}
|
|
if ep.gp.Layer(layers.LayerTypeMLDv2MulticastListenerReport) != nil {
|
|
// We don't care about these (yet?) and Linux spams a bunch
|
|
// a bunch of them out, so explicitly ignore them to prevent
|
|
// log spam when verbose logging is enabled.
|
|
return
|
|
}
|
|
if isMcast && !isBroadcast {
|
|
return
|
|
}
|
|
}
|
|
|
|
// TODO(bradfitz): handle packets to e.g. [fe80::50cc:ccff:fecc:cc01]:43619
|
|
// and don't fall through to the router below.
|
|
|
|
case layers.EthernetTypeIPv4:
|
|
// Below
|
|
}
|
|
|
|
// Send ethernet broadcasts and unicast ethernet frames to peers
|
|
// on the same network. This is all LAN traffic that isn't meant
|
|
// for the router/gw itself:
|
|
if isBroadcast || !forRouter {
|
|
n.writeEth(ep.gp.Data())
|
|
}
|
|
|
|
if forRouter {
|
|
n.HandleEthernetPacketForRouter(ep)
|
|
}
|
|
}
|
|
|
|
// HandleUDPPacket handles a UDP packet arriving from the internet,
|
|
// addressed to the router's WAN IP. It is then NATed back to a
|
|
// LAN IP here and wrapped in an ethernet layer and delivered
|
|
// to the network.
|
|
func (n *network) HandleUDPPacket(p UDPPacket) {
|
|
buf, err := n.serializedUDPPacket(p.Src, p.Dst, p.Payload, nil)
|
|
if err != nil {
|
|
n.logf("serializing UDP packet: %v", err)
|
|
return
|
|
}
|
|
n.s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(buf),
|
|
Length: len(buf),
|
|
InterfaceIndex: n.wanInterfaceID,
|
|
}, buf)
|
|
if p.Dst.Addr().Is4() && n.breakWAN4 {
|
|
// Blackhole the packet.
|
|
return
|
|
}
|
|
dst := n.doNATIn(p.Src, p.Dst)
|
|
if !dst.IsValid() {
|
|
n.logf("Warning: NAT dropped packet; no mapping for %v=>%v", p.Src, p.Dst)
|
|
return
|
|
}
|
|
p.Dst = dst
|
|
buf, err = n.serializedUDPPacket(p.Src, p.Dst, p.Payload, nil)
|
|
if err != nil {
|
|
n.logf("serializing UDP packet: %v", err)
|
|
return
|
|
}
|
|
n.s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(buf),
|
|
Length: len(buf),
|
|
InterfaceIndex: n.lanInterfaceID,
|
|
}, buf)
|
|
n.WriteUDPPacketNoNAT(p)
|
|
}
|
|
|
|
func (n *network) nodeByIP(ip netip.Addr) (node *node, ok bool) {
|
|
if ip.Is4() {
|
|
node, ok = n.nodesByIP4[ip]
|
|
}
|
|
if !ok && ip.Is6() {
|
|
var mac MAC
|
|
n.macMu.Lock()
|
|
mac, ok = n.macOfIPv6[ip]
|
|
n.macMu.Unlock()
|
|
if !ok {
|
|
log.Printf("warning: no known MAC for IPv6 %v", ip)
|
|
return nil, false
|
|
}
|
|
node, ok = n.nodesByMAC[mac]
|
|
if !ok {
|
|
log.Printf("warning: no known node for MAC %v (IP %v)", mac, ip)
|
|
}
|
|
}
|
|
return node, ok
|
|
}
|
|
|
|
// WriteUDPPacketNoNAT writes a UDP packet to the network, without
|
|
// doing any NAT translation.
|
|
//
|
|
// The packet will always have the ethernet src MAC of the router
|
|
// so this should not be used for packets between clients on the
|
|
// same ethernet segment.
|
|
func (n *network) WriteUDPPacketNoNAT(p UDPPacket) {
|
|
src, dst := p.Src, p.Dst
|
|
node, ok := n.nodeByIP(dst.Addr())
|
|
if !ok {
|
|
n.logf("no node for dest IP %v in UDP packet %v=>%v", dst.Addr(), p.Src, p.Dst)
|
|
return
|
|
}
|
|
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: n.mac.HWAddr(), // of gateway
|
|
DstMAC: node.mac.HWAddr(),
|
|
}
|
|
ethRaw, err := n.serializedUDPPacket(src, dst, p.Payload, eth)
|
|
if err != nil {
|
|
n.logf("serializing UDP packet: %v", err)
|
|
return
|
|
}
|
|
n.writeEth(ethRaw)
|
|
}
|
|
|
|
type serializableNetworkLayer interface {
|
|
gopacket.SerializableLayer
|
|
gopacket.NetworkLayer
|
|
}
|
|
|
|
func mkIPLayer(proto layers.IPProtocol, src, dst netip.Addr) serializableNetworkLayer {
|
|
if src.Is4() {
|
|
return &layers.IPv4{
|
|
Protocol: proto,
|
|
SrcIP: src.AsSlice(),
|
|
DstIP: dst.AsSlice(),
|
|
}
|
|
}
|
|
if src.Is6() {
|
|
return &layers.IPv6{
|
|
NextHeader: proto,
|
|
SrcIP: src.AsSlice(),
|
|
DstIP: dst.AsSlice(),
|
|
}
|
|
}
|
|
panic("invalid src IP")
|
|
}
|
|
|
|
// serializedUDPPacket serializes a UDP packet with the given source and
|
|
// destination IP:port pairs, and payload.
|
|
//
|
|
// If eth is non-nil, it will be used as the Ethernet layer, otherwise the
|
|
// Ethernet layer will be omitted from the serialization.
|
|
func (n *network) serializedUDPPacket(src, dst netip.AddrPort, payload []byte, eth *layers.Ethernet) ([]byte, error) {
|
|
ip := mkIPLayer(layers.IPProtocolUDP, src.Addr(), dst.Addr())
|
|
udp := &layers.UDP{
|
|
SrcPort: layers.UDPPort(src.Port()),
|
|
DstPort: layers.UDPPort(dst.Port()),
|
|
}
|
|
if eth == nil {
|
|
return mkPacket(ip, udp, gopacket.Payload(payload))
|
|
} else {
|
|
return mkPacket(eth, ip, udp, gopacket.Payload(payload))
|
|
}
|
|
}
|
|
|
|
// HandleEthernetPacketForRouter handles a packet that is
|
|
// directed to the router/gateway itself. The packet may be to the
|
|
// broadcast MAC address, or to the router's MAC address. The target
|
|
// IP may be the router's IP, or an internet (routed) IP.
|
|
func (n *network) HandleEthernetPacketForRouter(ep EthernetPacket) {
|
|
packet := ep.gp
|
|
flow, ok := flow(packet)
|
|
if !ok {
|
|
n.logf("dropping non-IP packet: %v", packet)
|
|
return
|
|
}
|
|
dstIP := flow.dst
|
|
toForward := dstIP != n.lanIP4.Addr() && dstIP != netip.IPv4Unspecified() && !dstIP.IsLinkLocalUnicast()
|
|
|
|
// Pre-NAT mapping, for DNS/etc responses:
|
|
if flow.src.Is6() {
|
|
n.macMu.Lock()
|
|
mak.Set(&n.macOfIPv6, flow.src, ep.SrcMAC())
|
|
n.macMu.Unlock()
|
|
}
|
|
|
|
if udp, ok := packet.Layer(layers.LayerTypeUDP).(*layers.UDP); ok {
|
|
n.handleUDPPacketForRouter(ep, udp, toForward, flow)
|
|
return
|
|
}
|
|
|
|
if toForward && n.s.shouldInterceptTCP(packet) {
|
|
if flow.dst.Is4() && n.breakWAN4 {
|
|
// Blackhole the packet.
|
|
return
|
|
}
|
|
var base *layers.BaseLayer
|
|
proto := header.IPv4ProtocolNumber
|
|
if v4, ok := packet.Layer(layers.LayerTypeIPv4).(*layers.IPv4); ok {
|
|
base = &v4.BaseLayer
|
|
} else if v6, ok := packet.Layer(layers.LayerTypeIPv6).(*layers.IPv6); ok {
|
|
base = &v6.BaseLayer
|
|
proto = header.IPv6ProtocolNumber
|
|
} else {
|
|
panic("not v4, not v6")
|
|
}
|
|
pktCopy := make([]byte, 0, len(base.Contents)+len(base.Payload))
|
|
pktCopy = append(pktCopy, base.Contents...)
|
|
pktCopy = append(pktCopy, base.Payload...)
|
|
packetBuf := stack.NewPacketBuffer(stack.PacketBufferOptions{
|
|
Payload: buffer.MakeWithData(pktCopy),
|
|
})
|
|
n.linkEP.InjectInbound(proto, packetBuf)
|
|
packetBuf.DecRef()
|
|
return
|
|
}
|
|
|
|
if flow.src.Is6() && flow.src.IsLinkLocalUnicast() && !flow.dst.IsLinkLocalUnicast() {
|
|
// Don't log.
|
|
return
|
|
}
|
|
|
|
n.logf("router got unknown packet: %v", packet)
|
|
}
|
|
|
|
func (n *network) handleUDPPacketForRouter(ep EthernetPacket, udp *layers.UDP, toForward bool, flow ipSrcDst) {
|
|
packet := ep.gp
|
|
srcIP, dstIP := flow.src, flow.dst
|
|
|
|
if isDHCPRequest(packet) {
|
|
if !n.v4 {
|
|
n.logf("dropping DHCPv4 packet on v6-only network")
|
|
return
|
|
}
|
|
res, err := n.s.createDHCPResponse(packet)
|
|
if err != nil {
|
|
n.logf("createDHCPResponse: %v", err)
|
|
return
|
|
}
|
|
n.writeEth(res)
|
|
return
|
|
}
|
|
|
|
if isMDNSQuery(packet) || isIGMP(packet) {
|
|
// Don't log. Spammy for now.
|
|
return
|
|
}
|
|
|
|
if isDNSRequest(packet) {
|
|
res, err := n.s.createDNSResponse(packet)
|
|
if err != nil {
|
|
n.logf("createDNSResponse: %v", err)
|
|
return
|
|
}
|
|
n.writeEth(res)
|
|
return
|
|
}
|
|
|
|
if fakeSyslog.Match(dstIP) {
|
|
node, ok := n.nodeByIP(srcIP)
|
|
if !ok {
|
|
return
|
|
}
|
|
if node.verboseSyslog {
|
|
// TODO(bradfitz): parse this and capture it, structured, into
|
|
// node's log buffer.
|
|
n.logf("syslog from %v: %s", node, udp.Payload)
|
|
}
|
|
return
|
|
}
|
|
|
|
if dstIP == n.lanIP4.Addr() && isNATPMP(udp) {
|
|
n.handleNATPMPRequest(UDPPacket{
|
|
Src: netip.AddrPortFrom(srcIP, uint16(udp.SrcPort)),
|
|
Dst: netip.AddrPortFrom(dstIP, uint16(udp.DstPort)),
|
|
Payload: udp.Payload,
|
|
})
|
|
return
|
|
}
|
|
|
|
if toForward {
|
|
if dstIP.Is4() && n.breakWAN4 {
|
|
// Blackhole the packet.
|
|
return
|
|
}
|
|
src := netip.AddrPortFrom(srcIP, uint16(udp.SrcPort))
|
|
dst := netip.AddrPortFrom(dstIP, uint16(udp.DstPort))
|
|
buf, err := n.serializedUDPPacket(src, dst, udp.Payload, nil)
|
|
if err != nil {
|
|
n.logf("serializing UDP packet: %v", err)
|
|
return
|
|
}
|
|
n.s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(buf),
|
|
Length: len(buf),
|
|
InterfaceIndex: n.lanInterfaceID,
|
|
}, buf)
|
|
|
|
lanSrc := src // the original src, before NAT (for logging only)
|
|
src = n.doNATOut(src, dst)
|
|
if !src.IsValid() {
|
|
n.logf("warning: NAT dropped packet; no NAT out mapping for %v=>%v", lanSrc, dst)
|
|
return
|
|
}
|
|
buf, err = n.serializedUDPPacket(src, dst, udp.Payload, nil)
|
|
if err != nil {
|
|
n.logf("serializing UDP packet: %v", err)
|
|
return
|
|
}
|
|
n.s.pcapWriter.WritePacket(gopacket.CaptureInfo{
|
|
Timestamp: time.Now(),
|
|
CaptureLength: len(buf),
|
|
Length: len(buf),
|
|
InterfaceIndex: n.wanInterfaceID,
|
|
}, buf)
|
|
|
|
if src.Addr().Is6() {
|
|
n.macMu.Lock()
|
|
mak.Set(&n.macOfIPv6, src.Addr(), ep.SrcMAC())
|
|
n.macMu.Unlock()
|
|
}
|
|
|
|
n.s.routeUDPPacket(UDPPacket{
|
|
Src: src,
|
|
Dst: dst,
|
|
Payload: udp.Payload,
|
|
})
|
|
return
|
|
}
|
|
|
|
if udp.DstPort == pcpPort || udp.DstPort == ssdpPort {
|
|
// We handle NAT-PMP, but not these yet.
|
|
// TODO(bradfitz): handle? marginal utility so far.
|
|
// Don't log about them being unknown.
|
|
return
|
|
}
|
|
|
|
n.logf("router got unknown UDP packet: %v", packet)
|
|
}
|
|
|
|
func (n *network) handleIPv6RouterSolicitation(ep EthernetPacket, rs *layers.ICMPv6RouterSolicitation) {
|
|
v6 := ep.gp.Layer(layers.LayerTypeIPv6).(*layers.IPv6)
|
|
|
|
// Send a router advertisement back.
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: n.mac.HWAddr(),
|
|
DstMAC: ep.SrcMAC().HWAddr(),
|
|
EthernetType: layers.EthernetTypeIPv6,
|
|
}
|
|
n.logf("sending IPv6 router advertisement to %v from %v", eth.DstMAC, eth.SrcMAC)
|
|
ip := &layers.IPv6{
|
|
NextHeader: layers.IPProtocolICMPv6,
|
|
HopLimit: 255, // per RFC 4861, 7.1.1 etc (all NDP messages); don't use mkPacket's default of 64
|
|
SrcIP: net.ParseIP("fe80::1"),
|
|
DstIP: v6.SrcIP,
|
|
}
|
|
icmp := &layers.ICMPv6{
|
|
TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeRouterAdvertisement, 0),
|
|
}
|
|
pfx := make([]byte, 0, 30) // it's 32 on the wire, once gopacket adds two byte header
|
|
pfx = append(pfx, byte(64)) // CIDR length
|
|
pfx = append(pfx, byte(0xc0)) // flags: On-Link, Autonomous
|
|
pfx = binary.BigEndian.AppendUint32(pfx, 86400) // valid lifetime
|
|
pfx = binary.BigEndian.AppendUint32(pfx, 14400) // preferred lifetime
|
|
pfx = binary.BigEndian.AppendUint32(pfx, 0) // reserved
|
|
wanIP := n.wanIP6.Addr().As16()
|
|
pfx = append(pfx, wanIP[:]...)
|
|
|
|
ra := &layers.ICMPv6RouterAdvertisement{
|
|
RouterLifetime: 1800,
|
|
Options: []layers.ICMPv6Option{
|
|
{
|
|
Type: layers.ICMPv6OptPrefixInfo,
|
|
Data: pfx,
|
|
},
|
|
},
|
|
}
|
|
pkt, err := mkPacket(eth, ip, icmp, ra)
|
|
if err != nil {
|
|
n.logf("serializing ICMPv6 RA: %v", err)
|
|
return
|
|
}
|
|
n.writeEth(pkt)
|
|
}
|
|
|
|
func (n *network) handleIPv6NeighborSolicitation(ep EthernetPacket, ns *layers.ICMPv6NeighborSolicitation) {
|
|
v6 := ep.gp.Layer(layers.LayerTypeIPv6).(*layers.IPv6)
|
|
|
|
targetIP, ok := netip.AddrFromSlice(ns.TargetAddress)
|
|
if !ok {
|
|
return
|
|
}
|
|
var srcMAC MAC
|
|
if targetIP == netip.MustParseAddr("fe80::1") {
|
|
srcMAC = n.mac
|
|
} else {
|
|
n.logf("Ignoring IPv6 NS request from %v for target %v", ep.SrcMAC(), targetIP)
|
|
return
|
|
}
|
|
n.logf("replying to IPv6 NS %v->%v about target %v (replySrc=%v)", ep.SrcMAC(), ep.DstMAC(), targetIP, srcMAC)
|
|
|
|
// Send a neighbor advertisement back.
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: srcMAC.HWAddr(),
|
|
DstMAC: ep.SrcMAC().HWAddr(),
|
|
EthernetType: layers.EthernetTypeIPv6,
|
|
}
|
|
ip := &layers.IPv6{
|
|
HopLimit: 255, // per RFC 4861, 7.1.1 etc (all NDP messages); don't use mkPacket's default of 64
|
|
NextHeader: layers.IPProtocolICMPv6,
|
|
SrcIP: ns.TargetAddress,
|
|
DstIP: v6.SrcIP,
|
|
}
|
|
icmp := &layers.ICMPv6{
|
|
TypeCode: layers.CreateICMPv6TypeCode(layers.ICMPv6TypeNeighborAdvertisement, 0),
|
|
}
|
|
var flags uint8 = 0x40 // solicited
|
|
if srcMAC == n.mac {
|
|
flags |= 0x80 // router
|
|
}
|
|
flags |= 0x20 // override
|
|
|
|
na := &layers.ICMPv6NeighborAdvertisement{
|
|
TargetAddress: ns.TargetAddress,
|
|
Flags: flags,
|
|
}
|
|
na.Options = append(na.Options, layers.ICMPv6Option{
|
|
Type: layers.ICMPv6OptTargetAddress,
|
|
Data: srcMAC.HWAddr(),
|
|
})
|
|
pkt, err := mkPacket(eth, ip, icmp, na)
|
|
if err != nil {
|
|
n.logf("serializing ICMPv6 NA: %v", err)
|
|
}
|
|
if !n.writeEth(pkt) {
|
|
n.logf("failed to writeEth for IPv6 NA reply for %v", targetIP)
|
|
}
|
|
}
|
|
|
|
// createDHCPResponse creates a DHCPv4 response for the given DHCPv4 request.
|
|
func (s *Server) createDHCPResponse(request gopacket.Packet) ([]byte, error) {
|
|
ethLayer := request.Layer(layers.LayerTypeEthernet).(*layers.Ethernet)
|
|
srcMAC, ok := macOf(ethLayer.SrcMAC)
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
node, ok := s.nodeByMAC[srcMAC]
|
|
if !ok {
|
|
log.Printf("DHCP request from unknown node %v; ignoring", srcMAC)
|
|
return nil, nil
|
|
}
|
|
gwIP := node.net.lanIP4.Addr()
|
|
|
|
ipLayer := request.Layer(layers.LayerTypeIPv4).(*layers.IPv4)
|
|
udpLayer := request.Layer(layers.LayerTypeUDP).(*layers.UDP)
|
|
dhcpLayer := request.Layer(layers.LayerTypeDHCPv4).(*layers.DHCPv4)
|
|
|
|
response := &layers.DHCPv4{
|
|
Operation: layers.DHCPOpReply,
|
|
HardwareType: layers.LinkTypeEthernet,
|
|
HardwareLen: 6,
|
|
Xid: dhcpLayer.Xid,
|
|
ClientHWAddr: dhcpLayer.ClientHWAddr,
|
|
Flags: dhcpLayer.Flags,
|
|
YourClientIP: node.lanIP.AsSlice(),
|
|
Options: []layers.DHCPOption{
|
|
{
|
|
Type: layers.DHCPOptServerID,
|
|
Data: gwIP.AsSlice(), // DHCP server's IP
|
|
Length: 4,
|
|
},
|
|
},
|
|
}
|
|
|
|
var msgType layers.DHCPMsgType
|
|
for _, opt := range dhcpLayer.Options {
|
|
if opt.Type == layers.DHCPOptMessageType && opt.Length > 0 {
|
|
msgType = layers.DHCPMsgType(opt.Data[0])
|
|
}
|
|
}
|
|
switch msgType {
|
|
case layers.DHCPMsgTypeDiscover:
|
|
response.Options = append(response.Options, layers.DHCPOption{
|
|
Type: layers.DHCPOptMessageType,
|
|
Data: []byte{byte(layers.DHCPMsgTypeOffer)},
|
|
Length: 1,
|
|
})
|
|
case layers.DHCPMsgTypeRequest:
|
|
response.Options = append(response.Options,
|
|
layers.DHCPOption{
|
|
Type: layers.DHCPOptMessageType,
|
|
Data: []byte{byte(layers.DHCPMsgTypeAck)},
|
|
Length: 1,
|
|
},
|
|
layers.DHCPOption{
|
|
Type: layers.DHCPOptLeaseTime,
|
|
Data: binary.BigEndian.AppendUint32(nil, 3600), // hour? sure.
|
|
Length: 4,
|
|
},
|
|
layers.DHCPOption{
|
|
Type: layers.DHCPOptRouter,
|
|
Data: gwIP.AsSlice(),
|
|
Length: 4,
|
|
},
|
|
layers.DHCPOption{
|
|
Type: layers.DHCPOptDNS,
|
|
Data: fakeDNS.v4.AsSlice(),
|
|
Length: 4,
|
|
},
|
|
layers.DHCPOption{
|
|
Type: layers.DHCPOptSubnetMask,
|
|
Data: net.CIDRMask(node.net.lanIP4.Bits(), 32),
|
|
Length: 4,
|
|
},
|
|
)
|
|
}
|
|
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: node.net.mac.HWAddr(),
|
|
DstMAC: ethLayer.SrcMAC,
|
|
EthernetType: layers.EthernetTypeIPv4, // never IPv6 for DHCP
|
|
}
|
|
ip := &layers.IPv4{
|
|
Protocol: layers.IPProtocolUDP,
|
|
SrcIP: ipLayer.DstIP,
|
|
DstIP: ipLayer.SrcIP,
|
|
}
|
|
udp := &layers.UDP{
|
|
SrcPort: udpLayer.DstPort,
|
|
DstPort: udpLayer.SrcPort,
|
|
}
|
|
return mkPacket(eth, ip, udp, response)
|
|
}
|
|
|
|
// isDHCPRequest reports whether pkt is a DHCPv4 request.
|
|
func isDHCPRequest(pkt gopacket.Packet) bool {
|
|
v4, ok := pkt.Layer(layers.LayerTypeIPv4).(*layers.IPv4)
|
|
if !ok || v4.Protocol != layers.IPProtocolUDP {
|
|
return false
|
|
}
|
|
udp, ok := pkt.Layer(layers.LayerTypeUDP).(*layers.UDP)
|
|
return ok && udp.DstPort == 67 && udp.SrcPort == 68
|
|
}
|
|
|
|
func isIGMP(pkt gopacket.Packet) bool {
|
|
return pkt.Layer(layers.LayerTypeIGMP) != nil
|
|
}
|
|
|
|
func isMDNSQuery(pkt gopacket.Packet) bool {
|
|
udp, ok := pkt.Layer(layers.LayerTypeUDP).(*layers.UDP)
|
|
// TODO(bradfitz): also check IPv4 DstIP=224.0.0.251 (or whatever)
|
|
return ok && udp.SrcPort == 5353 && udp.DstPort == 5353
|
|
}
|
|
|
|
func (s *Server) shouldInterceptTCP(pkt gopacket.Packet) bool {
|
|
tcp, ok := pkt.Layer(layers.LayerTypeTCP).(*layers.TCP)
|
|
if !ok {
|
|
return false
|
|
}
|
|
if tcp.DstPort == 123 {
|
|
// Test port for TCP interception. Not really useful, but cute for
|
|
// demos.
|
|
return true
|
|
}
|
|
flow, ok := flow(pkt)
|
|
if !ok {
|
|
return false
|
|
}
|
|
if flow.src.Is6() && flow.src.IsLinkLocalUnicast() {
|
|
return false
|
|
}
|
|
|
|
if tcp.DstPort == 80 || tcp.DstPort == 443 {
|
|
for _, v := range []virtualIP{fakeControl, fakeDERP1, fakeDERP2, fakeLogCatcher} {
|
|
if v.Match(flow.dst) {
|
|
return true
|
|
}
|
|
}
|
|
if fakeProxyControlplane.Match(flow.dst) {
|
|
return s.blendReality
|
|
}
|
|
if s.derpIPs.Contains(flow.dst) {
|
|
return true
|
|
}
|
|
}
|
|
if tcp.DstPort == 8008 && fakeTestAgent.Match(flow.dst) {
|
|
// Connection from cmd/tta.
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
type ipSrcDst struct {
|
|
src netip.Addr
|
|
dst netip.Addr
|
|
}
|
|
|
|
func flow(gp gopacket.Packet) (f ipSrcDst, ok bool) {
|
|
if gp == nil {
|
|
return f, false
|
|
}
|
|
n := gp.NetworkLayer()
|
|
if n == nil {
|
|
return f, false
|
|
}
|
|
sb, db := n.NetworkFlow().Endpoints()
|
|
src, _ := netip.AddrFromSlice(sb.Raw())
|
|
dst, _ := netip.AddrFromSlice(db.Raw())
|
|
return ipSrcDst{src: src, dst: dst}, src.IsValid() && dst.IsValid()
|
|
}
|
|
|
|
// isDNSRequest reports whether pkt is a DNS request to the fake DNS server.
|
|
func isDNSRequest(pkt gopacket.Packet) bool {
|
|
udp, ok := pkt.Layer(layers.LayerTypeUDP).(*layers.UDP)
|
|
if !ok || udp.DstPort != 53 {
|
|
return false
|
|
}
|
|
f, ok := flow(pkt)
|
|
if !ok {
|
|
return false
|
|
}
|
|
if !fakeDNS.Match(f.dst) {
|
|
// TODO(bradfitz): maybe support configs where DNS is local in the LAN
|
|
return false
|
|
}
|
|
dns, ok := pkt.Layer(layers.LayerTypeDNS).(*layers.DNS)
|
|
return ok && dns.QR == false && len(dns.Questions) > 0
|
|
}
|
|
|
|
func isNATPMP(udp *layers.UDP) bool {
|
|
return udp.DstPort == 5351 && len(udp.Payload) > 0 && udp.Payload[0] == 0 // version 0, not 2 for PCP
|
|
}
|
|
|
|
func makeSTUNReply(req UDPPacket) (res UDPPacket, ok bool) {
|
|
txid, err := stun.ParseBindingRequest(req.Payload)
|
|
if err != nil {
|
|
log.Printf("invalid STUN request: %v", err)
|
|
return res, false
|
|
}
|
|
return UDPPacket{
|
|
Src: req.Dst,
|
|
Dst: req.Src,
|
|
Payload: stun.Response(txid, req.Src),
|
|
}, true
|
|
}
|
|
|
|
func (s *Server) createDNSResponse(pkt gopacket.Packet) ([]byte, error) {
|
|
flow, ok := flow(pkt)
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
ethLayer := pkt.Layer(layers.LayerTypeEthernet).(*layers.Ethernet)
|
|
udpLayer := pkt.Layer(layers.LayerTypeUDP).(*layers.UDP)
|
|
dnsLayer := pkt.Layer(layers.LayerTypeDNS).(*layers.DNS)
|
|
|
|
if dnsLayer.OpCode != layers.DNSOpCodeQuery || dnsLayer.QR || len(dnsLayer.Questions) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
response := &layers.DNS{
|
|
ID: dnsLayer.ID,
|
|
QR: true,
|
|
AA: true,
|
|
TC: false,
|
|
RD: dnsLayer.RD,
|
|
RA: true,
|
|
OpCode: layers.DNSOpCodeQuery,
|
|
ResponseCode: layers.DNSResponseCodeNoErr,
|
|
}
|
|
|
|
var names []string
|
|
for _, q := range dnsLayer.Questions {
|
|
response.QDCount++
|
|
response.Questions = append(response.Questions, q)
|
|
|
|
if mem.HasSuffix(mem.B(q.Name), mem.S(".pool.ntp.org")) {
|
|
// Just drop DNS queries for NTP servers. For Debian/etc guests used
|
|
// during development. Not needed. Assume VM guests get correct time
|
|
// via their hypervisor.
|
|
return nil, nil
|
|
}
|
|
|
|
names = append(names, q.Type.String()+"/"+string(q.Name))
|
|
if q.Class != layers.DNSClassIN {
|
|
continue
|
|
}
|
|
|
|
if q.Type == layers.DNSTypeA || q.Type == layers.DNSTypeAAAA {
|
|
if v, ok := vips[string(q.Name)]; ok {
|
|
ip := v.v4
|
|
if q.Type == layers.DNSTypeAAAA {
|
|
ip = v.v6
|
|
}
|
|
response.ANCount++
|
|
response.Answers = append(response.Answers, layers.DNSResourceRecord{
|
|
Name: q.Name,
|
|
Type: q.Type,
|
|
Class: q.Class,
|
|
IP: ip.AsSlice(),
|
|
TTL: 60,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make reply layers, all reversed.
|
|
eth2 := &layers.Ethernet{
|
|
SrcMAC: ethLayer.DstMAC,
|
|
DstMAC: ethLayer.SrcMAC,
|
|
}
|
|
ip2 := mkIPLayer(layers.IPProtocolUDP, flow.dst, flow.src)
|
|
udp2 := &layers.UDP{
|
|
SrcPort: udpLayer.DstPort,
|
|
DstPort: udpLayer.SrcPort,
|
|
}
|
|
|
|
resPkt, err := mkPacket(eth2, ip2, udp2, response)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
const debugDNS = false
|
|
if debugDNS {
|
|
if len(response.Answers) > 0 {
|
|
back := gopacket.NewPacket(resPkt, layers.LayerTypeEthernet, gopacket.Lazy)
|
|
log.Printf("createDNSResponse generated answers: %v", back)
|
|
} else {
|
|
log.Printf("made empty response for %q", names)
|
|
}
|
|
}
|
|
|
|
return resPkt, nil
|
|
}
|
|
|
|
// doNATOut performs NAT on an outgoing packet from src to dst, where
|
|
// src is a LAN IP and dst is a WAN IP.
|
|
//
|
|
// It returns the source WAN ip:port to use.
|
|
//
|
|
// If newSrc is invalid, the packet should be dropped.
|
|
func (n *network) doNATOut(src, dst netip.AddrPort) (newSrc netip.AddrPort) {
|
|
if src.Addr().Is6() {
|
|
// TODO(bradfitz): IPv6 NAT? For now, normal IPv6 only.
|
|
return src
|
|
}
|
|
|
|
n.natMu.Lock()
|
|
defer n.natMu.Unlock()
|
|
|
|
// First see if there's a port mapping, before doing NAT.
|
|
if wanAP, ok := n.portMapFlow[portmapFlowKey{
|
|
peerWAN: dst,
|
|
lanAP: src,
|
|
}]; ok {
|
|
return wanAP
|
|
}
|
|
|
|
return n.natTable.PickOutgoingSrc(src, dst, time.Now())
|
|
}
|
|
|
|
type portmapFlowKey struct {
|
|
peerWAN netip.AddrPort // the peer's WAN ip:port
|
|
lanAP netip.AddrPort
|
|
}
|
|
|
|
// doNATIn performs NAT on an incoming packet from WAN src to WAN dst, returning
|
|
// a new destination LAN ip:port to use.
|
|
//
|
|
// If newDst is invalid, the packet should be dropped.
|
|
func (n *network) doNATIn(src, dst netip.AddrPort) (newDst netip.AddrPort) {
|
|
if dst.Addr().Is6() {
|
|
// TODO(bradfitz): IPv6 NAT? For now, normal IPv6 only.
|
|
return dst
|
|
}
|
|
|
|
n.natMu.Lock()
|
|
defer n.natMu.Unlock()
|
|
|
|
now := time.Now()
|
|
|
|
// First see if there's a port mapping, before doing NAT.
|
|
if lanAP, ok := n.portMap[dst]; ok {
|
|
if now.Before(lanAP.expiry) {
|
|
mak.Set(&n.portMapFlow, portmapFlowKey{
|
|
peerWAN: src,
|
|
lanAP: lanAP.dst,
|
|
}, dst)
|
|
//n.logf("NAT: doNatIn: port mapping %v=>%v", dst, lanAP.dst)
|
|
return lanAP.dst
|
|
}
|
|
n.logf("NAT: doNatIn: port mapping EXPIRED for %v=>%v", dst, lanAP.dst)
|
|
delete(n.portMap, dst)
|
|
return netip.AddrPort{}
|
|
}
|
|
|
|
return n.natTable.PickIncomingDst(src, dst, now)
|
|
}
|
|
|
|
// IsPublicPortUsed reports whether the given public port is currently in use.
|
|
//
|
|
// n.natMu must be held by the caller. (It's only called by nat implementations
|
|
// which are always called with natMu held))
|
|
func (n *network) IsPublicPortUsed(ap netip.AddrPort) bool {
|
|
_, ok := n.portMap[ap]
|
|
return ok
|
|
}
|
|
|
|
func (n *network) doPortMap(src netip.Addr, dstLANPort, wantExtPort uint16, sec int) (gotPort uint16, ok bool) {
|
|
n.natMu.Lock()
|
|
defer n.natMu.Unlock()
|
|
|
|
if !n.portmap {
|
|
return 0, false
|
|
}
|
|
|
|
wanAP := netip.AddrPortFrom(n.wanIP4, wantExtPort)
|
|
dst := netip.AddrPortFrom(src, dstLANPort)
|
|
|
|
if sec == 0 {
|
|
lanAP, ok := n.portMap[wanAP]
|
|
if ok && lanAP.dst.Addr() == src {
|
|
delete(n.portMap, wanAP)
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
// See if they already have a mapping and extend expiry if so.
|
|
for k, v := range n.portMap {
|
|
if v.dst == dst {
|
|
n.portMap[k] = portMapping{
|
|
dst: dst,
|
|
expiry: time.Now().Add(time.Duration(sec) * time.Second),
|
|
}
|
|
return k.Port(), true
|
|
}
|
|
}
|
|
|
|
for try := 0; try < 20_000; try++ {
|
|
if wanAP.Port() > 0 && !n.natTable.IsPublicPortUsed(wanAP) {
|
|
mak.Set(&n.portMap, wanAP, portMapping{
|
|
dst: dst,
|
|
expiry: time.Now().Add(time.Duration(sec) * time.Second),
|
|
})
|
|
n.logf("vnet: allocated NAT mapping from %v to %v", wanAP, dst)
|
|
return wanAP.Port(), true
|
|
}
|
|
wantExtPort = rand.N(uint16(32<<10)) + 32<<10
|
|
wanAP = netip.AddrPortFrom(n.wanIP4, wantExtPort)
|
|
}
|
|
return 0, false
|
|
}
|
|
|
|
func (n *network) createARPResponse(pkt gopacket.Packet) ([]byte, error) {
|
|
ethLayer, ok := pkt.Layer(layers.LayerTypeEthernet).(*layers.Ethernet)
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
arpLayer, ok := pkt.Layer(layers.LayerTypeARP).(*layers.ARP)
|
|
if !ok ||
|
|
arpLayer.Operation != layers.ARPRequest ||
|
|
arpLayer.AddrType != layers.LinkTypeEthernet ||
|
|
arpLayer.Protocol != layers.EthernetTypeIPv4 ||
|
|
arpLayer.HwAddressSize != 6 ||
|
|
arpLayer.ProtAddressSize != 4 ||
|
|
len(arpLayer.DstProtAddress) != 4 {
|
|
return nil, nil
|
|
}
|
|
|
|
wantIP := netip.AddrFrom4([4]byte(arpLayer.DstProtAddress))
|
|
foundMAC, ok := n.MACOfIP(wantIP)
|
|
if !ok {
|
|
return nil, nil
|
|
}
|
|
|
|
eth := &layers.Ethernet{
|
|
SrcMAC: foundMAC.HWAddr(),
|
|
DstMAC: ethLayer.SrcMAC,
|
|
EthernetType: layers.EthernetTypeARP,
|
|
}
|
|
|
|
a2 := &layers.ARP{
|
|
AddrType: layers.LinkTypeEthernet,
|
|
Protocol: layers.EthernetTypeIPv4, // never IPv6; IPv6 equivalent of ARP is handleIPv6NeighborSolicitation
|
|
HwAddressSize: 6,
|
|
ProtAddressSize: 4,
|
|
Operation: layers.ARPReply,
|
|
SourceHwAddress: foundMAC.HWAddr(),
|
|
SourceProtAddress: arpLayer.DstProtAddress,
|
|
DstHwAddress: ethLayer.SrcMAC,
|
|
DstProtAddress: arpLayer.SourceProtAddress,
|
|
}
|
|
|
|
buffer := gopacket.NewSerializeBuffer()
|
|
options := gopacket.SerializeOptions{FixLengths: true, ComputeChecksums: true}
|
|
if err := gopacket.SerializeLayers(buffer, options, eth, a2); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return buffer.Bytes(), nil
|
|
}
|
|
|
|
func (n *network) handleNATPMPRequest(req UDPPacket) {
|
|
if !n.portmap {
|
|
return
|
|
}
|
|
if string(req.Payload) == "\x00\x00" {
|
|
// https://www.rfc-editor.org/rfc/rfc6886#section-3.2
|
|
|
|
res := make([]byte, 0, 12)
|
|
res = append(res,
|
|
0, // version 0 (NAT-PMP)
|
|
128, // response to op 0 (128+0)
|
|
0, 0, // result code success
|
|
)
|
|
res = binary.BigEndian.AppendUint32(res, uint32(time.Now().Unix()))
|
|
wan4 := n.wanIP4.As4()
|
|
res = append(res, wan4[:]...)
|
|
n.WriteUDPPacketNoNAT(UDPPacket{
|
|
Src: req.Dst,
|
|
Dst: req.Src,
|
|
Payload: res,
|
|
})
|
|
return
|
|
}
|
|
|
|
// Map UDP request
|
|
if len(req.Payload) == 12 && req.Payload[0] == 0 && req.Payload[1] == 1 {
|
|
// https://www.rfc-editor.org/rfc/rfc6886#section-3.3
|
|
// "00 01 00 00 ed 40 00 00 00 00 1c 20" =>
|
|
// 00 ver
|
|
// 01 op=map UDP
|
|
// 00 00 reserved (0 in request; in response, this is the result code)
|
|
// ed 40 internal port 60736
|
|
// 00 00 suggested external port
|
|
// 00 00 1c 20 suggested lifetime in seconds (7200 sec = 2 hours)
|
|
internalPort := binary.BigEndian.Uint16(req.Payload[4:6])
|
|
wantExtPort := binary.BigEndian.Uint16(req.Payload[6:8])
|
|
lifetimeSec := binary.BigEndian.Uint32(req.Payload[8:12])
|
|
gotPort, ok := n.doPortMap(req.Src.Addr(), internalPort, wantExtPort, int(lifetimeSec))
|
|
if !ok {
|
|
n.logf("NAT-PMP map request for %v:%d failed", req.Src.Addr(), internalPort)
|
|
return
|
|
}
|
|
res := make([]byte, 0, 16)
|
|
res = append(res,
|
|
0, // version 0 (NAT-PMP)
|
|
1+128, // response to op 1
|
|
0, 0, // result code success
|
|
)
|
|
res = binary.BigEndian.AppendUint32(res, uint32(time.Now().Unix()))
|
|
res = binary.BigEndian.AppendUint16(res, internalPort)
|
|
res = binary.BigEndian.AppendUint16(res, gotPort)
|
|
res = binary.BigEndian.AppendUint32(res, lifetimeSec)
|
|
n.WriteUDPPacketNoNAT(UDPPacket{
|
|
Src: req.Dst,
|
|
Dst: req.Src,
|
|
Payload: res,
|
|
})
|
|
return
|
|
}
|
|
|
|
n.logf("TODO: handle NAT-PMP packet % 02x", req.Payload)
|
|
}
|
|
|
|
// UDPPacket is a UDP packet.
|
|
//
|
|
// For the purposes of this project, a UDP packet
|
|
// (not a general IP packet) is the unit to be NAT'ed,
|
|
// as that's all that Tailscale uses.
|
|
type UDPPacket struct {
|
|
Src netip.AddrPort
|
|
Dst netip.AddrPort
|
|
Payload []byte // everything after UDP header
|
|
}
|
|
|
|
func (s *Server) WriteStartingBanner(w io.Writer) {
|
|
fmt.Fprintf(w, "vnet serving clients:\n")
|
|
|
|
for _, n := range s.nodes {
|
|
fmt.Fprintf(w, " %v %15v (%v, %v)\n", n.mac, n.lanIP, n.net.wanIP4, n.net.natStyle.Load())
|
|
}
|
|
}
|
|
|
|
type agentConn struct {
|
|
node *node
|
|
tc *gonet.TCPConn
|
|
}
|
|
|
|
func (s *Server) addIdleAgentConn(ac *agentConn) {
|
|
//log.Printf("got agent conn from %v", ac.node.mac)
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
s.agentConns.Make()
|
|
s.agentConns.Add(ac)
|
|
|
|
if waiter, ok := s.agentConnWaiter[ac.node]; ok {
|
|
select {
|
|
case waiter <- struct{}{}:
|
|
default:
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) takeAgentConn(ctx context.Context, n *node) (_ *agentConn, ok bool) {
|
|
const debug = false
|
|
for {
|
|
ac, ok := s.takeAgentConnOne(n)
|
|
if ok {
|
|
if debug {
|
|
log.Printf("takeAgentConn: got agent conn for %v", n.mac)
|
|
}
|
|
return ac, true
|
|
}
|
|
s.mu.Lock()
|
|
ready := make(chan struct{})
|
|
mak.Set(&s.agentConnWaiter, n, ready)
|
|
s.mu.Unlock()
|
|
|
|
if debug {
|
|
log.Printf("takeAgentConn: waiting for agent conn for %v", n.mac)
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil, false
|
|
case <-ready:
|
|
case <-time.After(time.Second):
|
|
// Try again regularly anyway, in case we have multiple clients
|
|
// trying to hit the same node, or if a race means we weren't in the
|
|
// select by the time addIdleAgentConn tried to signal us.
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) takeAgentConnOne(n *node) (_ *agentConn, ok bool) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
miss := 0
|
|
for ac := range s.agentConns {
|
|
if ac.node == n {
|
|
s.agentConns.Delete(ac)
|
|
return ac, true
|
|
}
|
|
miss++
|
|
}
|
|
if miss > 0 {
|
|
log.Printf("takeAgentConnOne: missed %d times for %v", miss, n.mac)
|
|
}
|
|
return nil, false
|
|
}
|
|
|
|
type NodeAgentClient struct {
|
|
*tailscale.LocalClient
|
|
HTTPClient *http.Client
|
|
}
|
|
|
|
func (s *Server) NodeAgentDialer(n *Node) DialFunc {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if d, ok := s.agentDialer[n.n]; ok {
|
|
return d
|
|
}
|
|
d := func(ctx context.Context, network, addr string) (net.Conn, error) {
|
|
ac, ok := s.takeAgentConn(ctx, n.n)
|
|
if !ok {
|
|
return nil, ctx.Err()
|
|
}
|
|
return ac.tc, nil
|
|
}
|
|
mak.Set(&s.agentDialer, n.n, d)
|
|
return d
|
|
}
|
|
|
|
func (s *Server) NodeAgentClient(n *Node) *NodeAgentClient {
|
|
d := s.NodeAgentDialer(n)
|
|
return &NodeAgentClient{
|
|
LocalClient: &tailscale.LocalClient{
|
|
UseSocketOnly: true,
|
|
OmitAuth: true,
|
|
Dial: d,
|
|
},
|
|
HTTPClient: &http.Client{
|
|
Transport: &http.Transport{
|
|
DialContext: d,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// EnableHostFirewall enables the host's stateful firewall.
|
|
func (c *NodeAgentClient) EnableHostFirewall(ctx context.Context) error {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", "http://unused/fw", nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
res, err := c.HTTPClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer res.Body.Close()
|
|
all, _ := io.ReadAll(res.Body)
|
|
if res.StatusCode != 200 {
|
|
return fmt.Errorf("unexpected status code %v: %s", res.Status, all)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// mkPacket is a serializes a number of layers into a packet.
|
|
//
|
|
// It's a convenience wrapper around gopacket.SerializeLayers
|
|
// that does some things automatically:
|
|
//
|
|
// * layers.Ethernet.EthernetType is set to IPv4 or IPv6 if not already set
|
|
// * layers.IPv4/IPv6 Version is set to 4/6 if not already set
|
|
// * layers.IPv4/IPv6 TTL/HopLimit is set to 64 if not already set
|
|
// * the TCP/UDP/ICMPv6 checksum is set based on the network layer
|
|
//
|
|
// The provided layers in ll must be sorted from lowest (e.g. *layers.Ethernet)
|
|
// to highest. (Depending on the need, the first layer will be either *layers.Ethernet
|
|
// or *layers.IPv4/IPv6).
|
|
func mkPacket(ll ...gopacket.SerializableLayer) ([]byte, error) {
|
|
var el *layers.Ethernet
|
|
var nl gopacket.NetworkLayer
|
|
for _, la := range ll {
|
|
switch la := la.(type) {
|
|
case *layers.IPv4:
|
|
nl = la
|
|
if el != nil && el.EthernetType == 0 {
|
|
el.EthernetType = layers.EthernetTypeIPv4
|
|
}
|
|
if la.Version == 0 {
|
|
la.Version = 4
|
|
}
|
|
if la.TTL == 0 {
|
|
la.TTL = 64
|
|
}
|
|
case *layers.IPv6:
|
|
nl = la
|
|
if el != nil && el.EthernetType == 0 {
|
|
el.EthernetType = layers.EthernetTypeIPv6
|
|
}
|
|
if la.Version == 0 {
|
|
la.Version = 6
|
|
}
|
|
if la.HopLimit == 0 {
|
|
la.HopLimit = 64
|
|
}
|
|
case *layers.Ethernet:
|
|
el = la
|
|
}
|
|
}
|
|
for _, la := range ll {
|
|
switch la := la.(type) {
|
|
case *layers.TCP:
|
|
la.SetNetworkLayerForChecksum(nl)
|
|
case *layers.UDP:
|
|
la.SetNetworkLayerForChecksum(nl)
|
|
case *layers.ICMPv6:
|
|
la.SetNetworkLayerForChecksum(nl)
|
|
}
|
|
}
|
|
buf := gopacket.NewSerializeBuffer()
|
|
opts := gopacket.SerializeOptions{FixLengths: true, ComputeChecksums: true}
|
|
if err := gopacket.SerializeLayers(buf, opts, ll...); err != nil {
|
|
return nil, fmt.Errorf("serializing packet: %v", err)
|
|
}
|
|
return buf.Bytes(), nil
|
|
}
|