From fbfee6a8c0ea33373d033d6d63a8542d3023edbb Mon Sep 17 00:00:00 2001 From: Maisem Ali Date: Wed, 11 Oct 2023 07:26:40 -0700 Subject: [PATCH] cmd/containerboot: use linuxfw.NetfilterRunner This migrates containerboot to reuse the NetfilterRunner used by tailscaled instead of manipulating iptables rule itself. This has the added advantage of now working with nftables and we can potentially drop the `iptables` command from the container image in the future. Updates #9310 Co-authored-by: Irbe Krumina Signed-off-by: Maisem Ali --- cmd/containerboot/main.go | 95 ++++----- cmd/containerboot/main_test.go | 6 +- util/linuxfw/iptables_runner.go | 20 ++ util/linuxfw/nftables_runner.go | 284 ++++++++++++++++++++++++++- wgengine/router/router_linux_test.go | 16 ++ 5 files changed, 354 insertions(+), 67 deletions(-) diff --git a/cmd/containerboot/main.go b/cmd/containerboot/main.go index 09dc5f146..09f4a8198 100644 --- a/cmd/containerboot/main.go +++ b/cmd/containerboot/main.go @@ -84,10 +84,19 @@ import ( "golang.org/x/sys/unix" "tailscale.com/client/tailscale" "tailscale.com/ipn" + "tailscale.com/types/logger" "tailscale.com/types/ptr" "tailscale.com/util/deephash" + "tailscale.com/util/linuxfw" ) +func newNetfilterRunner(logf logger.Logf) (linuxfw.NetfilterRunner, error) { + if defaultBool("TS_TEST_FAKE_NETFILTER", false) { + return linuxfw.NewFakeIPTablesRunner(), nil + } + return linuxfw.New(logf) +} + func main() { log.SetPrefix("boot: ") tailscale.I_Acknowledge_This_API_Is_Unstable = true @@ -295,6 +304,13 @@ authLoop: if cfg.ServeConfigPath != "" { go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client) } + var nfr linuxfw.NetfilterRunner + if wantProxy { + nfr, err = newNetfilterRunner(log.Printf) + if err != nil { + log.Fatalf("error creating new netfilter runner: %v", err) + } + } for { n, err := w.Next() if err != nil { @@ -315,7 +331,7 @@ authLoop: ipsHaveChanged := newCurrentIPs != currentIPs if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged { log.Printf("Installing proxy rules") - if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs); err != nil { + if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil { log.Fatalf("installing ingress proxy rules: %v", err) } } @@ -330,7 +346,7 @@ authLoop: } } if cfg.TailnetTargetIP != "" && ipsHaveChanged && len(addrs) > 0 { - if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs); err != nil { + if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs, nfr); err != nil { log.Fatalf("installing egress proxy rules: %v", err) } } @@ -662,16 +678,12 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, routes string return nil } -func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -679,52 +691,30 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + if !local.IsValid() { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - // Set up a rule that ensures that all packets - // except for those received on tailscale0 interface is forwarded to - // destination address - cmdDNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "!", "-i", "tailscale0", "-j", "DNAT", "--to-destination", dstStr) - cmdDNAT.Stdout = os.Stdout - cmdDNAT.Stderr = os.Stderr - if err := cmdDNAT.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) - } - // Set up a rule that ensures that all packets sent to the destination - // address will have the proxy's IP set as source IP - cmdSNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "POSTROUTING", "1", "--destination", dstStr, "-j", "SNAT", "--to-source", local) - cmdSNAT.Stdout = os.Stdout - cmdSNAT.Stderr = os.Stderr - if err := cmdSNAT.Run(); err != nil { - return fmt.Errorf("setting up SNAT via iptables failed: %w", err) - } - - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.DNATNonTailscaleTraffic("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.AddSNATRuleForDst(local, dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) + } + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing egress proxy rules: %w", err) } return nil } -func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error { +func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error { dst, err := netip.ParseAddr(dstStr) if err != nil { return err } - argv0 := "iptables" - if dst.Is6() { - argv0 = "ip6tables" - } - var local string + var local netip.Addr for _, pfx := range tsIPs { if !pfx.IsSingleIP() { continue @@ -732,26 +722,17 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne if pfx.Addr().Is4() != dst.Is4() { continue } - local = pfx.Addr().String() + local = pfx.Addr() break } - if local == "" { + if !local.IsValid() { return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs) } - // Technically, if the control server ever changes the IPs assigned to this - // node, we'll slowly accumulate iptables rules. This shouldn't happen, so - // for now we'll live with it. - cmd := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "-d", local, "-j", "DNAT", "--to-destination", dstStr) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - if err := cmd.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.AddDNATRule(local, dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) } - cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") - cmdClamp.Stdout = os.Stdout - cmdClamp.Stderr = os.Stderr - if err := cmdClamp.Run(); err != nil { - return fmt.Errorf("executing iptables failed: %w", err) + if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil { + return fmt.Errorf("installing ingress proxy rules: %w", err) } return nil } diff --git a/cmd/containerboot/main_test.go b/cmd/containerboot/main_test.go index b7147b75f..7a5280b6e 100644 --- a/cmd/containerboot/main_test.go +++ b/cmd/containerboot/main_test.go @@ -340,8 +340,6 @@ func TestContainerBoot(t *testing.T) { Notify: runningNotify, WantCmds: []string{ "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 -d 100.64.0.1 -j DNAT --to-destination 1.2.3.4", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", }, }, }, @@ -365,9 +363,6 @@ func TestContainerBoot(t *testing.T) { Notify: runningNotify, WantCmds: []string{ "/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false", - "/usr/bin/iptables -t nat -I PREROUTING 1 ! -i tailscale0 -j DNAT --to-destination 100.99.99.99", - "/usr/bin/iptables -t nat -I POSTROUTING 1 --destination 100.99.99.99 -j SNAT --to-source 100.64.0.1", - "/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu", }, }, }, @@ -694,6 +689,7 @@ func TestContainerBoot(t *testing.T) { fmt.Sprintf("TS_TEST_SOCKET=%s", lapi.Path), fmt.Sprintf("TS_SOCKET=%s", runningSockPath), fmt.Sprintf("TS_TEST_ONLY_ROOT=%s", d), + fmt.Sprint("TS_TEST_FAKE_NETFILTER=true"), } for k, v := range test.Env { cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v)) diff --git a/util/linuxfw/iptables_runner.go b/util/linuxfw/iptables_runner.go index d703190bc..f7fe2f0f4 100644 --- a/util/linuxfw/iptables_runner.go +++ b/util/linuxfw/iptables_runner.go @@ -291,6 +291,26 @@ func (i *iptablesRunner) addBase4(tunname string) error { return nil } +func (i *iptablesRunner) AddDNATRule(origDst, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "PREROUTING", 1, "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dst.String()) +} + +func (i *iptablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "POSTROUTING", 1, "--destination", dst.String(), "-j", "SNAT", "--to-source", src.String()) +} + +func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) error { + table := i.getIPTByAddr(dst) + return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String()) +} + +func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + table := i.getIPTByAddr(addr) + return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu") +} + // addBase6 adds some basic IPv4 processing rules to be // supplemented by later calls to other helpers. func (i *iptablesRunner) addBase6(tunname string) error { diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go index d87610dda..0d438d9f5 100644 --- a/util/linuxfw/nftables_runner.go +++ b/util/linuxfw/nftables_runner.go @@ -17,6 +17,7 @@ import ( "github.com/google/nftables" "github.com/google/nftables/expr" + "golang.org/x/sys/unix" "tailscale.com/net/tsaddr" "tailscale.com/types/logger" ) @@ -69,6 +70,252 @@ type nftablesRunner struct { v6NATAvailable bool } +func (n *nftablesRunner) ensurePreroutingChain(dst netip.Addr) (*nftables.Table, *nftables.Chain, error) { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(dst) + nat, err := createTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return nil, nil, fmt.Errorf("error ensuring nat table: %w", err) + } + + // ensure prerouting chain exists + preroutingCh, err := getOrCreateChain(n.conn, chainInfo{ + table: nat, + name: "PREROUTING", + chainType: nftables.ChainTypeNAT, + chainHook: nftables.ChainHookPrerouting, + chainPriority: nftables.ChainPriorityNATDest, + chainPolicy: &polAccept, + }) + if err != nil { + return nil, nil, fmt.Errorf("error ensuring prerouting chain: %w", err) + } + return nat, preroutingCh, nil +} + +func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error { + nat, preroutingCh, err := n.ensurePreroutingChain(dst) + if err != nil { + return err + } + var daddrOffset, fam, dadderLen uint32 + if origDst.Is4() { + daddrOffset = 16 + dadderLen = 4 + fam = unix.NFPROTO_IPV4 + } else { + daddrOffset = 24 + dadderLen = 16 + fam = unix.NFPROTO_IPV6 + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: dadderLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: origDst.AsSlice(), + }, + &expr.Immediate{ + Register: 1, + Data: dst.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: fam, + RegAddrMin: 1, + }, + }, + } + n.conn.InsertRule(dnatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error { + nat, preroutingCh, err := n.ensurePreroutingChain(dst) + if err != nil { + return err + } + var famConst uint32 + if dst.Is4() { + famConst = unix.NFPROTO_IPV4 + } else { + famConst = unix.NFPROTO_IPV6 + } + + dnatRule := &nftables.Rule{ + Table: nat, + Chain: preroutingCh, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: []byte(tunname), + }, + &expr.Immediate{ + Register: 1, + Data: dst.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeDestNAT, + Family: famConst, + RegAddrMin: 1, + }, + }, + } + n.conn.AddRule(dnatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(dst) + nat, err := createTableIfNotExist(n.conn, table.Proto, "nat") + if err != nil { + return fmt.Errorf("error ensuring nat table exists: %w", err) + } + + // ensure postrouting chain exists + postRoutingCh, err := getOrCreateChain(n.conn, chainInfo{ + table: nat, + name: "POSTROUTING", + chainType: nftables.ChainTypeNAT, + chainHook: nftables.ChainHookPostrouting, + chainPriority: nftables.ChainPriorityNATSource, + chainPolicy: &polAccept, + }) + if err != nil { + return fmt.Errorf("error ensuring postrouting chain: %w", err) + } + var daddrOffset, fam, daddrLen uint32 + if dst.Is4() { + daddrOffset = 16 + daddrLen = 4 + fam = unix.NFPROTO_IPV4 + } else { + daddrOffset = 24 + daddrLen = 16 + fam = unix.NFPROTO_IPV6 + } + + snatRule := &nftables.Rule{ + Table: nat, + Chain: postRoutingCh, + Exprs: []expr.Any{ + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseNetworkHeader, + Offset: daddrOffset, + Len: daddrLen, + }, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: dst.AsSlice(), + }, + &expr.Immediate{ + Register: 1, + Data: src.AsSlice(), + }, + &expr.NAT{ + Type: expr.NATTypeSourceNAT, + Family: fam, + RegAddrMin: 1, + }, + }, + } + n.conn.AddRule(snatRule) + return n.conn.Flush() +} + +func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + polAccept := nftables.ChainPolicyAccept + table := n.getNFTByAddr(addr) + filterTable, err := createTableIfNotExist(n.conn, table.Proto, "filter") + if err != nil { + return fmt.Errorf("error ensuring filter table: %w", err) + } + + // ensure forwarding chain exists + fwChain, err := getOrCreateChain(n.conn, chainInfo{ + table: filterTable, + name: "FORWARD", + chainType: nftables.ChainTypeFilter, + chainHook: nftables.ChainHookForward, + chainPriority: nftables.ChainPriorityFilter, + chainPolicy: &polAccept, + }) + if err != nil { + return fmt.Errorf("error ensuring forward chain: %w", err) + } + + clampRule := &nftables.Rule{ + Table: filterTable, + Chain: fwChain, + Exprs: []expr.Any{ + &expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte(tun), + }, + &expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1}, + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte{unix.IPPROTO_TCP}, + }, + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseTransportHeader, + Offset: 13, + Len: 1, + }, + &expr.Bitwise{ + DestRegister: 1, + SourceRegister: 1, + Len: 1, + Mask: []byte{0x02}, + Xor: []byte{0x00}, + }, + &expr.Cmp{ + Op: expr.CmpOpNeq, + Register: 1, + Data: []byte{0x00}, + }, + &expr.Rt{ + Register: 1, + Key: expr.RtTCPMSS, + }, + &expr.Byteorder{ + DestRegister: 1, + SourceRegister: 1, + Op: expr.ByteorderHton, + Len: 2, + Size: 2, + }, + &expr.Exthdr{ + SourceRegister: 1, + Type: 2, + Offset: 2, + Len: 2, + Op: expr.ExthdrOpTcpopt, + }, + }, + } + n.conn.AddRule(clampRule) + return n.conn.Flush() +} + // createTableIfNotExist creates a nftables table via connection c if it does not exist within the given family. func createTableIfNotExist(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) { tables, err := c.ListTables() @@ -145,18 +392,23 @@ func isTSChain(name string) bool { // createChainIfNotExist creates a chain with the given name in the given table // if it does not exist. func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error { + _, err := getOrCreateChain(c, cinfo) + return err +} + +func getOrCreateChain(c *nftables.Conn, cinfo chainInfo) (*nftables.Chain, error) { chain, err := getChainFromTable(c, cinfo.table, cinfo.name) if err != nil && !errors.Is(err, errorChainNotFound{cinfo.table.Name, cinfo.name}) { - return fmt.Errorf("get chain: %w", err) + return nil, fmt.Errorf("get chain: %w", err) } else if err == nil { // The chain already exists. If it is a TS chain, check the // type/hook/priority, but for "conventional chains" assume they're what // we expect (in case iptables-nft/ufw make minor behavior changes in // the future). if isTSChain(chain.Name) && (chain.Type != cinfo.chainType || chain.Hooknum != cinfo.chainHook || chain.Priority != cinfo.chainPriority) { - return fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name) + return nil, fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name) } - return nil + return chain, nil } _ = c.AddChain(&nftables.Chain{ @@ -169,10 +421,10 @@ func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error { }) if err := c.Flush(); err != nil { - return fmt.Errorf("add chain: %w", err) + return nil, fmt.Errorf("add chain: %w", err) } - return nil + return chain, nil } // NetfilterRunner abstracts helpers to run netfilter commands. It is @@ -217,6 +469,28 @@ type NetfilterRunner interface { // HasIPV6NAT reports true if the system supports IPv6 NAT. HasIPV6NAT() bool + + // AddDNATRule adds a rule to the nat/PREROUTING chain to DNAT traffic + // destined for the given original destination to the given new destination. + // This is used to forward all traffic destined for the Tailscale interface + // to the provided destination, as used in the Kubernetes ingress proxies. + AddDNATRule(origDst, dst netip.Addr) error + + // AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT + // traffic destined for dst to src. + // This is used to forward traffic destined for the local machine over + // the Tailscale interface, as used in the Kubernetes egress proxies. + AddSNATRuleForDst(src, dst netip.Addr) error + + // DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT + // all traffic inbound from any interface except exemptInterface to dst. + // This is used to forward traffic destined for the local machine over + // the Tailscale interface, as used in the Kubernetes egress proxies.// + DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error + + // ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for + // traffic destined for the provided tun interface. + ClampMSSToPMTU(tun string, addr netip.Addr) error } // New creates a NetfilterRunner using either nftables or iptables. diff --git a/wgengine/router/router_linux_test.go b/wgengine/router/router_linux_test.go index d77708f51..d16c72cdf 100644 --- a/wgengine/router/router_linux_test.go +++ b/wgengine/router/router_linux_test.go @@ -465,6 +465,22 @@ func (n *fakeIPTablesRunner) AddBase(tunname string) error { return nil } +func (n *fakeIPTablesRunner) AddDNATRule(origDst, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error { + return errors.New("not implemented") +} + +func (n *fakeIPTablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { + return errors.New("not implemented") +} + func (n *fakeIPTablesRunner) addBase4(tunname string) error { curIPT := n.ipt4 newRules := []struct{ chain, rule string }{