diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 09dc5f146..09f4a8198 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -84,10 +84,19 @@ import ( "golang.org/x/sys/unix" "tailscale.com/client/tailscale" "tailscale.com/ipn" + "tailscale.com/types/logger" "tailscale.com/types/ptr" "tailscale.com/util/deephash" + "tailscale.com/util/linuxfw" ) +func newNetfilterRunner(logf logger.Logf) (linuxfw.NetfilterRunner, error) { + if defaultBool("TS_TEST_FAKE_NETFILTER", false) { + return linuxfw.NewFakeIPTablesRunner(), nil + } + return linuxfw.New(logf) +} + func main() { log.SetPrefix("boot: ") tailscale.I_Acknowledge_This_API_Is_Unstable = true @@ -295,6 +304,13 @@ authLoop: if cfg.ServeConfigPath != "" { go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client) } + var nfr linuxfw.NetfilterRunner + if wantProxy { + nfr, err = newNetfilterRunner(log.Printf) + if err != nil { + log.Fatalf("error creating new netfilter runner: %v", err) + } + } for { n, err := w.Next() if err != nil { @@ -315,7 +331,7 @@ authLoop: ipsHaveChanged := newCurrentIPs != currentIPs if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged { log.Printf("Installing proxy rules") - if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs); err != nil { + if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil { log.Fatalf("installing ingress proxy rules: %v", err) } } @@ -330,7 +346,7 @@ authLoop: } } if cfg.TailnetTargetIP != "" && ipsHaveChanged && len(addrs) > 0 { - if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs); err != nil { + if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs, nfr); err != nil { log.Fatalf("installing egress proxy rules: %v", err) } } @@ -662,16 +678,12 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, routes string return nil } -func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -679,52 +691,30 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + if !local.IsValid() { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - // Set up a rule that ensures that all packets - // except for those received on tailscale0 interface is forwarded to - // destination address - cmdDNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "!", "-i", "tailscale0", "-j", "DNAT", "--to-destination", dstStr) - cmdDNAT.Stdout = os.Stdout - cmdDNAT.Stderr = os.Stderr - if err := cmdDNAT.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) - } - // Set up a rule that ensures that all packets sent to the destination - // address will have the proxy's IP set as source IP - cmdSNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "POSTROUTING", "1", "--destination", dstStr, "-j", "SNAT", "--to-source", local) - cmdSNAT.Stdout = os.Stdout - cmdSNAT.Stderr = os.Stderr - if err := cmdSNAT.Run(); err != nil { - return fmt.Errorf("setting up SNAT via iptables failed: %w", err) - } - - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.DNATNonTailscaleTraffic("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.AddSNATRuleForDst(local, dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) } return nil } -func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -732,26 +722,17 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + if !local.IsValid() { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - cmd := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "-d", local, "-j", "DNAT", "--to-destination", dstStr) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.AddDNATRule(local, dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) } - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) } return nil } diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go index b7147b75f..7a5280b6e 100644 --- a/cmd/containerboot/main_test.go +++ b/cmd/containerboot/main_test.go @@ -340,8 +340,6 @@ func TestContainerBoot(t *testing.T) { Notify: runningNotify, WantCmds: []string{ "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 -d 100.64.0.1 -j DNAT --to-destination 1.2.3.4", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", }, }, }, @@ -365,9 +363,6 @@ func TestContainerBoot(t *testing.T) { Notify: runningNotify, WantCmds: []string{ "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 ! -i tailscale0 -j DNAT --to-destination 100.99.99.99", - "/usr/bin/iptables -t nat -I POSTROUTING 1 --destination 100.99.99.99 -j SNAT --to-source 100.64.0.1", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", }, }, }, @@ -694,6 +689,7 @@ func TestContainerBoot(t *testing.T) { fmt.Sprintf("TS_TEST_SOCKET=%s", lapi.Path), fmt.Sprintf("TS_SOCKET=%s", runningSockPath), fmt.Sprintf("TS_TEST_ONLY_ROOT=%s", d), + fmt.Sprint("TS_TEST_FAKE_NETFILTER=true"), } for k, v := range test.Env { cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v)) diff --git a/util/linuxfw/iptables_runner.go b/util/linuxfw/iptables_runner.go index d703190bc..f7fe2f0f4 100644 --- a/util/linuxfw/iptables_runner.go +++ b/util/linuxfw/iptables_runner.go @@ -291,6 +291,26 @@ func (i *iptablesRunner) addBase4(tunname string) error { return nil } +func (i *iptablesRunner) AddDNATRule(origDst, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "PREROUTING", 1, "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dst.String()) +} + +func (i *iptablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "POSTROUTING", 1, "--destination", dst.String(), "-j", "SNAT", "--to-source", src.String()) +} + +func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String()) +} + +func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + table := i.getIPTByAddr(addr) + return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") +} + // addBase6 adds some basic IPv4 processing rules to be // supplemented by later calls to other helpers. func (i *iptablesRunner) addBase6(tunname string) error { diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go index d87610dda..0d438d9f5 100644 --- a/util/linuxfw/nftables_runner.go +++ b/util/linuxfw/nftables_runner.go @@ -17,6 +17,7 @@ import ( "github.com/google/nftables" "github.com/google/nftables/expr" + "golang.org/x/sys/unix" "tailscale.com/net/tsaddr" "tailscale.com/types/logger" ) @@ -69,6 +70,252 @@ type nftablesRunner struct { v6NATAvailable bool } +func (n *nftablesRunner) ensurePreroutingChain(dst netip.Addr) (*nftables.Table, *nftables.Chain, error) { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(dst) + nat, err := createTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return nil, nil, fmt.Errorf("error ensuring nat table: %w", err) + } + + // ensure prerouting chain exists + preroutingCh, err := getOrCreateChain(n.conn, chainInfo{ + table: nat, + name: "PREROUTING", + chainType: nftables.ChainTypeNAT, + chainHook: nftables.ChainHookPrerouting, + chainPriority: nftables.ChainPriorityNATDest, + chainPolicy: &polAccept, + }) + if err != nil { + return nil, nil, fmt.Errorf("error ensuring prerouting chain: %w", err) + } + return nat, preroutingCh, nil +} + +func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error { + nat, preroutingCh, err := n.ensurePreroutingChain(dst) + if err != nil { + return err + } + var daddrOffset, fam, dadderLen uint32 + if origDst.Is4() { + daddrOffset = 16 + dadderLen = 4 + fam = unix.NFPROTO_IPV4 + } else { + daddrOffset = 24 + dadderLen = 16 + fam = unix.NFPROTO_IPV6 + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: dadderLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: origDst.AsSlice(), + }, + &expr.Immediate{ + Register: 1, + Data: dst.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: fam, + RegAddrMin: 1, + }, + }, + } + n.conn.InsertRule(dnatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error { + nat, preroutingCh, err := n.ensurePreroutingChain(dst) + if err != nil { + return err + } + var famConst uint32 + if dst.Is4() { + famConst = unix.NFPROTO_IPV4 + } else { + famConst = unix.NFPROTO_IPV6 + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: []byte(tunname), + }, + &expr.Immediate{ + Register: 1, + Data: dst.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: famConst, + RegAddrMin: 1, + }, + }, + } + n.conn.AddRule(dnatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(dst) + nat, err := createTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return fmt.Errorf("error ensuring nat table exists: %w", err) + } + + // ensure postrouting chain exists + postRoutingCh, err := getOrCreateChain(n.conn, chainInfo{ + table: nat, + name: "POSTROUTING", + chainType: nftables.ChainTypeNAT, + chainHook: nftables.ChainHookPostrouting, + chainPriority: nftables.ChainPriorityNATSource, + chainPolicy: &polAccept, + }) + if err != nil { + return fmt.Errorf("error ensuring postrouting chain: %w", err) + } + var daddrOffset, fam, daddrLen uint32 + if dst.Is4() { + daddrOffset = 16 + daddrLen = 4 + fam = unix.NFPROTO_IPV4 + } else { + daddrOffset = 24 + daddrLen = 16 + fam = unix.NFPROTO_IPV6 + } + + snatRule := &nftables.Rule{ + Table: nat, + Chain: postRoutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: daddrLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: dst.AsSlice(), + }, + &expr.Immediate{ + Register: 1, + Data: src.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeSourceNAT, + Family: fam, + RegAddrMin: 1, + }, + }, + } + n.conn.AddRule(snatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(addr) + filterTable, err := createTableIfNotExist(n.conn, table.Proto, "filter") + if err != nil { + return fmt.Errorf("error ensuring filter table: %w", err) + } + + // ensure forwarding chain exists + fwChain, err := getOrCreateChain(n.conn, chainInfo{ + table: filterTable, + name: "FORWARD", + chainType: nftables.ChainTypeFilter, + chainHook: nftables.ChainHookForward, + chainPriority: nftables.ChainPriorityFilter, + chainPolicy: &polAccept, + }) + if err != nil { + return fmt.Errorf("error ensuring forward chain: %w", err) + } + + clampRule := &nftables.Rule{ + Table: filterTable, + Chain: fwChain, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte(tun), + }, + &expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte{unix.IPPROTO_TCP}, + }, + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseTransportHeader, + Offset: 13, + Len: 1, + }, + &expr.Bitwise{ + DestRegister: 1, + SourceRegister: 1, + Len: 1, + Mask: []byte{0x02}, + Xor: []byte{0x00}, + }, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: []byte{0x00}, + }, + &expr.Rt{ + Register: 1, + Key: expr.RtTCPMSS, + }, + &expr.Byteorder{ + DestRegister: 1, + SourceRegister: 1, + Op: expr.ByteorderHton, + Len: 2, + Size: 2, + }, + &expr.Exthdr{ + SourceRegister: 1, + Type: 2, + Offset: 2, + Len: 2, + Op: expr.ExthdrOpTcpopt, + }, + }, + } + n.conn.AddRule(clampRule) + return n.conn.Flush() +} + // createTableIfNotExist creates a nftables table via connection c if it does not exist within the given family. func createTableIfNotExist(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) { tables, err := c.ListTables() @@ -145,18 +392,23 @@ func isTSChain(name string) bool { // createChainIfNotExist creates a chain with the given name in the given table // if it does not exist. func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error { + _, err := getOrCreateChain(c, cinfo) + return err +} + +func getOrCreateChain(c *nftables.Conn, cinfo chainInfo) (*nftables.Chain, error) { chain, err := getChainFromTable(c, cinfo.table, cinfo.name) if err != nil && !errors.Is(err, errorChainNotFound{cinfo.table.Name, cinfo.name}) { - return fmt.Errorf("get chain: %w", err) + return nil, fmt.Errorf("get chain: %w", err) } else if err == nil { // The chain already exists. If it is a TS chain, check the // type/hook/priority, but for "conventional chains" assume they're what // we expect (in case iptables-nft/ufw make minor behavior changes in // the future). if isTSChain(chain.Name) && (chain.Type != cinfo.chainType || chain.Hooknum != cinfo.chainHook || chain.Priority != cinfo.chainPriority) { - return fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name) + return nil, fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name) } - return nil + return chain, nil } _ = c.AddChain(&nftables.Chain{ @@ -169,10 +421,10 @@ func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error { }) if err := c.Flush(); err != nil { - return fmt.Errorf("add chain: %w", err) + return nil, fmt.Errorf("add chain: %w", err) } - return nil + return chain, nil } // NetfilterRunner abstracts helpers to run netfilter commands. It is @@ -217,6 +469,28 @@ type NetfilterRunner interface { // HasIPV6NAT reports true if the system supports IPv6 NAT. HasIPV6NAT() bool + + // AddDNATRule adds a rule to the nat/PREROUTING chain to DNAT traffic + // destined for the given original destination to the given new destination. + // This is used to forward all traffic destined for the Tailscale interface + // to the provided destination, as used in the Kubernetes ingress proxies. + AddDNATRule(origDst, dst netip.Addr) error + + // AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT + // traffic destined for dst to src. + // This is used to forward traffic destined for the local machine over + // the Tailscale interface, as used in the Kubernetes egress proxies. + AddSNATRuleForDst(src, dst netip.Addr) error + + // DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT + // all traffic inbound from any interface except exemptInterface to dst. + // This is used to forward traffic destined for the local machine over + // the Tailscale interface, as used in the Kubernetes egress proxies.// + DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error + + // ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for + // traffic destined for the provided tun interface. + ClampMSSToPMTU(tun string, addr netip.Addr) error } // New creates a NetfilterRunner using either nftables or iptables. diff --git a/wgengine/router/router_linux_test.go b/wgengine/router/router_linux_test.go index d77708f51..d16c72cdf 100644 --- a/wgengine/router/router_linux_test.go +++ b/wgengine/router/router_linux_test.go @@ -465,6 +465,22 @@ func (n *fakeIPTablesRunner) AddBase(tunname string) error { return nil } +func (n *fakeIPTablesRunner) AddDNATRule(origDst, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + return errors.New("not implemented") +} + func (n *fakeIPTablesRunner) addBase4(tunname string) error { curIPT := n.ipt4 newRules := []struct{ chain, rule string }{