cmd/containerboot: use linuxfw.NetfilterRunner

This migrates containerboot to reuse the NetfilterRunner used
by tailscaled instead of manipulating iptables rule itself.
This has the added advantage of now working with nftables and
we can potentially drop the `iptables` command from the container
image in the future.

Updates #9310

Co-authored-by: Irbe Krumina <irbe@tailscale.com>
Signed-off-by: Maisem Ali <maisem@tailscale.com>
pull/9752/head
Maisem Ali 1 year ago committed by Maisem Ali
parent 7a0de2997e
commit fbfee6a8c0

@ -84,10 +84,19 @@ import (
"golang.org/x/sys/unix"
"tailscale.com/client/tailscale"
"tailscale.com/ipn"
"tailscale.com/types/logger"
"tailscale.com/types/ptr"
"tailscale.com/util/deephash"
"tailscale.com/util/linuxfw"
)
func newNetfilterRunner(logf logger.Logf) (linuxfw.NetfilterRunner, error) {
if defaultBool("TS_TEST_FAKE_NETFILTER", false) {
return linuxfw.NewFakeIPTablesRunner(), nil
}
return linuxfw.New(logf)
}
func main() {
log.SetPrefix("boot: ")
tailscale.I_Acknowledge_This_API_Is_Unstable = true
@ -295,6 +304,13 @@ authLoop:
if cfg.ServeConfigPath != "" {
go watchServeConfigChanges(ctx, cfg.ServeConfigPath, certDomainChanged, certDomain, client)
}
var nfr linuxfw.NetfilterRunner
if wantProxy {
nfr, err = newNetfilterRunner(log.Printf)
if err != nil {
log.Fatalf("error creating new netfilter runner: %v", err)
}
}
for {
n, err := w.Next()
if err != nil {
@ -315,7 +331,7 @@ authLoop:
ipsHaveChanged := newCurrentIPs != currentIPs
if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged {
log.Printf("Installing proxy rules")
if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs); err != nil {
if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil {
log.Fatalf("installing ingress proxy rules: %v", err)
}
}
@ -330,7 +346,7 @@ authLoop:
}
}
if cfg.TailnetTargetIP != "" && ipsHaveChanged && len(addrs) > 0 {
if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs); err != nil {
if err := installEgressForwardingRule(ctx, cfg.TailnetTargetIP, addrs, nfr); err != nil {
log.Fatalf("installing egress proxy rules: %v", err)
}
}
@ -662,16 +678,12 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, routes string
return nil
}
func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error {
func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error {
dst, err := netip.ParseAddr(dstStr)
if err != nil {
return err
}
argv0 := "iptables"
if dst.Is6() {
argv0 = "ip6tables"
}
var local string
var local netip.Addr
for _, pfx := range tsIPs {
if !pfx.IsSingleIP() {
continue
@ -679,52 +691,30 @@ func installEgressForwardingRule(ctx context.Context, dstStr string, tsIPs []net
if pfx.Addr().Is4() != dst.Is4() {
continue
}
local = pfx.Addr().String()
local = pfx.Addr()
break
}
if local == "" {
if !local.IsValid() {
return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs)
}
// Technically, if the control server ever changes the IPs assigned to this
// node, we'll slowly accumulate iptables rules. This shouldn't happen, so
// for now we'll live with it.
// Set up a rule that ensures that all packets
// except for those received on tailscale0 interface is forwarded to
// destination address
cmdDNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "!", "-i", "tailscale0", "-j", "DNAT", "--to-destination", dstStr)
cmdDNAT.Stdout = os.Stdout
cmdDNAT.Stderr = os.Stderr
if err := cmdDNAT.Run(); err != nil {
return fmt.Errorf("executing iptables failed: %w", err)
}
// Set up a rule that ensures that all packets sent to the destination
// address will have the proxy's IP set as source IP
cmdSNAT := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "POSTROUTING", "1", "--destination", dstStr, "-j", "SNAT", "--to-source", local)
cmdSNAT.Stdout = os.Stdout
cmdSNAT.Stderr = os.Stderr
if err := cmdSNAT.Run(); err != nil {
return fmt.Errorf("setting up SNAT via iptables failed: %w", err)
}
cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
cmdClamp.Stdout = os.Stdout
cmdClamp.Stderr = os.Stderr
if err := cmdClamp.Run(); err != nil {
return fmt.Errorf("executing iptables failed: %w", err)
if err := nfr.DNATNonTailscaleTraffic("tailscale0", dst); err != nil {
return fmt.Errorf("installing egress proxy rules: %w", err)
}
if err := nfr.AddSNATRuleForDst(local, dst); err != nil {
return fmt.Errorf("installing egress proxy rules: %w", err)
}
if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil {
return fmt.Errorf("installing egress proxy rules: %w", err)
}
return nil
}
func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix) error {
func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error {
dst, err := netip.ParseAddr(dstStr)
if err != nil {
return err
}
argv0 := "iptables"
if dst.Is6() {
argv0 = "ip6tables"
}
var local string
var local netip.Addr
for _, pfx := range tsIPs {
if !pfx.IsSingleIP() {
continue
@ -732,26 +722,17 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
if pfx.Addr().Is4() != dst.Is4() {
continue
}
local = pfx.Addr().String()
local = pfx.Addr()
break
}
if local == "" {
if !local.IsValid() {
return fmt.Errorf("no tailscale IP matching family of %s found in %v", dstStr, tsIPs)
}
// Technically, if the control server ever changes the IPs assigned to this
// node, we'll slowly accumulate iptables rules. This shouldn't happen, so
// for now we'll live with it.
cmd := exec.CommandContext(ctx, argv0, "-t", "nat", "-I", "PREROUTING", "1", "-d", local, "-j", "DNAT", "--to-destination", dstStr)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
return fmt.Errorf("executing iptables failed: %w", err)
if err := nfr.AddDNATRule(local, dst); err != nil {
return fmt.Errorf("installing ingress proxy rules: %w", err)
}
cmdClamp := exec.CommandContext(ctx, argv0, "-t", "mangle", "-A", "FORWARD", "-o", "tailscale0", "-p", "tcp", "-m", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
cmdClamp.Stdout = os.Stdout
cmdClamp.Stderr = os.Stderr
if err := cmdClamp.Run(); err != nil {
return fmt.Errorf("executing iptables failed: %w", err)
if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil {
return fmt.Errorf("installing ingress proxy rules: %w", err)
}
return nil
}

@ -340,8 +340,6 @@ func TestContainerBoot(t *testing.T) {
Notify: runningNotify,
WantCmds: []string{
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false",
"/usr/bin/iptables -t nat -I PREROUTING 1 -d 100.64.0.1 -j DNAT --to-destination 1.2.3.4",
"/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu",
},
},
},
@ -365,9 +363,6 @@ func TestContainerBoot(t *testing.T) {
Notify: runningNotify,
WantCmds: []string{
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false",
"/usr/bin/iptables -t nat -I PREROUTING 1 ! -i tailscale0 -j DNAT --to-destination 100.99.99.99",
"/usr/bin/iptables -t nat -I POSTROUTING 1 --destination 100.99.99.99 -j SNAT --to-source 100.64.0.1",
"/usr/bin/iptables -t mangle -A FORWARD -o tailscale0 -p tcp -m tcp --tcp-flags SYN,RST SYN -j TCPMSS --clamp-mss-to-pmtu",
},
},
},
@ -694,6 +689,7 @@ func TestContainerBoot(t *testing.T) {
fmt.Sprintf("TS_TEST_SOCKET=%s", lapi.Path),
fmt.Sprintf("TS_SOCKET=%s", runningSockPath),
fmt.Sprintf("TS_TEST_ONLY_ROOT=%s", d),
fmt.Sprint("TS_TEST_FAKE_NETFILTER=true"),
}
for k, v := range test.Env {
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))

@ -291,6 +291,26 @@ func (i *iptablesRunner) addBase4(tunname string) error {
return nil
}
func (i *iptablesRunner) AddDNATRule(origDst, dst netip.Addr) error {
table := i.getIPTByAddr(dst)
return table.Insert("nat", "PREROUTING", 1, "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dst.String())
}
func (i *iptablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
table := i.getIPTByAddr(dst)
return table.Insert("nat", "POSTROUTING", 1, "--destination", dst.String(), "-j", "SNAT", "--to-source", src.String())
}
func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) error {
table := i.getIPTByAddr(dst)
return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String())
}
func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
table := i.getIPTByAddr(addr)
return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")
}
// addBase6 adds some basic IPv4 processing rules to be
// supplemented by later calls to other helpers.
func (i *iptablesRunner) addBase6(tunname string) error {

@ -17,6 +17,7 @@ import (
"github.com/google/nftables"
"github.com/google/nftables/expr"
"golang.org/x/sys/unix"
"tailscale.com/net/tsaddr"
"tailscale.com/types/logger"
)
@ -69,6 +70,252 @@ type nftablesRunner struct {
v6NATAvailable bool
}
func (n *nftablesRunner) ensurePreroutingChain(dst netip.Addr) (*nftables.Table, *nftables.Chain, error) {
polAccept := nftables.ChainPolicyAccept
table := n.getNFTByAddr(dst)
nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
if err != nil {
return nil, nil, fmt.Errorf("error ensuring nat table: %w", err)
}
// ensure prerouting chain exists
preroutingCh, err := getOrCreateChain(n.conn, chainInfo{
table: nat,
name: "PREROUTING",
chainType: nftables.ChainTypeNAT,
chainHook: nftables.ChainHookPrerouting,
chainPriority: nftables.ChainPriorityNATDest,
chainPolicy: &polAccept,
})
if err != nil {
return nil, nil, fmt.Errorf("error ensuring prerouting chain: %w", err)
}
return nat, preroutingCh, nil
}
func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
nat, preroutingCh, err := n.ensurePreroutingChain(dst)
if err != nil {
return err
}
var daddrOffset, fam, dadderLen uint32
if origDst.Is4() {
daddrOffset = 16
dadderLen = 4
fam = unix.NFPROTO_IPV4
} else {
daddrOffset = 24
dadderLen = 16
fam = unix.NFPROTO_IPV6
}
dnatRule := &nftables.Rule{
Table: nat,
Chain: preroutingCh,
Exprs: []expr.Any{
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: daddrOffset,
Len: dadderLen,
},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: origDst.AsSlice(),
},
&expr.Immediate{
Register: 1,
Data: dst.AsSlice(),
},
&expr.NAT{
Type: expr.NATTypeDestNAT,
Family: fam,
RegAddrMin: 1,
},
},
}
n.conn.InsertRule(dnatRule)
return n.conn.Flush()
}
func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error {
nat, preroutingCh, err := n.ensurePreroutingChain(dst)
if err != nil {
return err
}
var famConst uint32
if dst.Is4() {
famConst = unix.NFPROTO_IPV4
} else {
famConst = unix.NFPROTO_IPV6
}
dnatRule := &nftables.Rule{
Table: nat,
Chain: preroutingCh,
Exprs: []expr.Any{
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte(tunname),
},
&expr.Immediate{
Register: 1,
Data: dst.AsSlice(),
},
&expr.NAT{
Type: expr.NATTypeDestNAT,
Family: famConst,
RegAddrMin: 1,
},
},
}
n.conn.AddRule(dnatRule)
return n.conn.Flush()
}
func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
polAccept := nftables.ChainPolicyAccept
table := n.getNFTByAddr(dst)
nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
if err != nil {
return fmt.Errorf("error ensuring nat table exists: %w", err)
}
// ensure postrouting chain exists
postRoutingCh, err := getOrCreateChain(n.conn, chainInfo{
table: nat,
name: "POSTROUTING",
chainType: nftables.ChainTypeNAT,
chainHook: nftables.ChainHookPostrouting,
chainPriority: nftables.ChainPriorityNATSource,
chainPolicy: &polAccept,
})
if err != nil {
return fmt.Errorf("error ensuring postrouting chain: %w", err)
}
var daddrOffset, fam, daddrLen uint32
if dst.Is4() {
daddrOffset = 16
daddrLen = 4
fam = unix.NFPROTO_IPV4
} else {
daddrOffset = 24
daddrLen = 16
fam = unix.NFPROTO_IPV6
}
snatRule := &nftables.Rule{
Table: nat,
Chain: postRoutingCh,
Exprs: []expr.Any{
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: daddrOffset,
Len: daddrLen,
},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: dst.AsSlice(),
},
&expr.Immediate{
Register: 1,
Data: src.AsSlice(),
},
&expr.NAT{
Type: expr.NATTypeSourceNAT,
Family: fam,
RegAddrMin: 1,
},
},
}
n.conn.AddRule(snatRule)
return n.conn.Flush()
}
func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
polAccept := nftables.ChainPolicyAccept
table := n.getNFTByAddr(addr)
filterTable, err := createTableIfNotExist(n.conn, table.Proto, "filter")
if err != nil {
return fmt.Errorf("error ensuring filter table: %w", err)
}
// ensure forwarding chain exists
fwChain, err := getOrCreateChain(n.conn, chainInfo{
table: filterTable,
name: "FORWARD",
chainType: nftables.ChainTypeFilter,
chainHook: nftables.ChainHookForward,
chainPriority: nftables.ChainPriorityFilter,
chainPolicy: &polAccept,
})
if err != nil {
return fmt.Errorf("error ensuring forward chain: %w", err)
}
clampRule := &nftables.Rule{
Table: filterTable,
Chain: fwChain,
Exprs: []expr.Any{
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte(tun),
},
&expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{unix.IPPROTO_TCP},
},
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseTransportHeader,
Offset: 13,
Len: 1,
},
&expr.Bitwise{
DestRegister: 1,
SourceRegister: 1,
Len: 1,
Mask: []byte{0x02},
Xor: []byte{0x00},
},
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte{0x00},
},
&expr.Rt{
Register: 1,
Key: expr.RtTCPMSS,
},
&expr.Byteorder{
DestRegister: 1,
SourceRegister: 1,
Op: expr.ByteorderHton,
Len: 2,
Size: 2,
},
&expr.Exthdr{
SourceRegister: 1,
Type: 2,
Offset: 2,
Len: 2,
Op: expr.ExthdrOpTcpopt,
},
},
}
n.conn.AddRule(clampRule)
return n.conn.Flush()
}
// createTableIfNotExist creates a nftables table via connection c if it does not exist within the given family.
func createTableIfNotExist(c *nftables.Conn, family nftables.TableFamily, name string) (*nftables.Table, error) {
tables, err := c.ListTables()
@ -145,18 +392,23 @@ func isTSChain(name string) bool {
// createChainIfNotExist creates a chain with the given name in the given table
// if it does not exist.
func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error {
_, err := getOrCreateChain(c, cinfo)
return err
}
func getOrCreateChain(c *nftables.Conn, cinfo chainInfo) (*nftables.Chain, error) {
chain, err := getChainFromTable(c, cinfo.table, cinfo.name)
if err != nil && !errors.Is(err, errorChainNotFound{cinfo.table.Name, cinfo.name}) {
return fmt.Errorf("get chain: %w", err)
return nil, fmt.Errorf("get chain: %w", err)
} else if err == nil {
// The chain already exists. If it is a TS chain, check the
// type/hook/priority, but for "conventional chains" assume they're what
// we expect (in case iptables-nft/ufw make minor behavior changes in
// the future).
if isTSChain(chain.Name) && (chain.Type != cinfo.chainType || chain.Hooknum != cinfo.chainHook || chain.Priority != cinfo.chainPriority) {
return fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name)
return nil, fmt.Errorf("chain %s already exists with different type/hook/priority", cinfo.name)
}
return nil
return chain, nil
}
_ = c.AddChain(&nftables.Chain{
@ -169,10 +421,10 @@ func createChainIfNotExist(c *nftables.Conn, cinfo chainInfo) error {
})
if err := c.Flush(); err != nil {
return fmt.Errorf("add chain: %w", err)
return nil, fmt.Errorf("add chain: %w", err)
}
return nil
return chain, nil
}
// NetfilterRunner abstracts helpers to run netfilter commands. It is
@ -217,6 +469,28 @@ type NetfilterRunner interface {
// HasIPV6NAT reports true if the system supports IPv6 NAT.
HasIPV6NAT() bool
// AddDNATRule adds a rule to the nat/PREROUTING chain to DNAT traffic
// destined for the given original destination to the given new destination.
// This is used to forward all traffic destined for the Tailscale interface
// to the provided destination, as used in the Kubernetes ingress proxies.
AddDNATRule(origDst, dst netip.Addr) error
// AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT
// traffic destined for dst to src.
// This is used to forward traffic destined for the local machine over
// the Tailscale interface, as used in the Kubernetes egress proxies.
AddSNATRuleForDst(src, dst netip.Addr) error
// DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
// all traffic inbound from any interface except exemptInterface to dst.
// This is used to forward traffic destined for the local machine over
// the Tailscale interface, as used in the Kubernetes egress proxies.//
DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error
// ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for
// traffic destined for the provided tun interface.
ClampMSSToPMTU(tun string, addr netip.Addr) error
}
// New creates a NetfilterRunner using either nftables or iptables.

@ -465,6 +465,22 @@ func (n *fakeIPTablesRunner) AddBase(tunname string) error {
return nil
}
func (n *fakeIPTablesRunner) AddDNATRule(origDst, dst netip.Addr) error {
return errors.New("not implemented")
}
func (n *fakeIPTablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
return errors.New("not implemented")
}
func (n *fakeIPTablesRunner) DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error {
return errors.New("not implemented")
}
func (n *fakeIPTablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
return errors.New("not implemented")
}
func (n *fakeIPTablesRunner) addBase4(tunname string) error {
curIPT := n.ipt4
newRules := []struct{ chain, rule string }{

Loading…
Cancel
Save