diff --git a/util/linuxfw/nftables_runner.go b/util/linuxfw/nftables_runner.go index ec9ef1ae5..7cfd0c5c7 100644 --- a/util/linuxfw/nftables_runner.go +++ b/util/linuxfw/nftables_runner.go @@ -62,6 +62,11 @@ type nftable struct { // - The table and chain conventions followed here are those used by // `iptables-nft` and `ufw`, so that those tools co-exist and do not // negatively affect Tailscale function. +// - Be mindful that 1) all chains attached to a given hook (i.e the forward hook) +// will be processed in priority order till either a rule in one of the chains issues a drop verdict +// or there are no more chains for that hook +// 2) processing of individual rules within a chain will stop once one of them issues a final verdict (accept, drop). +// https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains type nftablesRunner struct { conn *nftables.Conn nft4 *nftable @@ -238,6 +243,25 @@ func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error { return n.conn.Flush() } +// ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set +// being forwarded via the given interface (tun) have MSS set to - 40 (IP and TCP headers). This can be useful if this tailscale +// instance is expected to run as a forwarding proxy, forwarding packets from an +// endpoint with higher MTU in an environment where path MTU discovery is +// expected to not work (such as the proxies created by the Tailscale Kubernetes +// operator). ClamMSSToPMTU creates a new base-chain ts-clamp in the filter +// table with accept policy and priority -150. In practice, this means that for +// SYN packets the clamp rule in this chain will likely run first and accept the +// packet. This is fine because 1) nftables run ALL chains with the same hook +// type unless a rule in one of them drops the packet and 2) this chain does not +// have functionality to drop the packet- so in practice a matching clamp rule +// will always be followed by the custom tailscale filtering rules in the other +// chains attached to the filter hook (FORWARD, ts-forward). +// We do not want to place the clamping rule into FORWARD/ts-forward chains +// because wgengine populates those chains with rules that contain accept +// verdicts that would cause no further procesing within that chain. This +// functionality is currently invoked from outside wgengine (containerboot), so +// we don't want to race with wgengine for rule ordering within chains. func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { polAccept := nftables.ChainPolicyAccept table := n.getNFTByAddr(addr) @@ -246,13 +270,13 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { return fmt.Errorf("error ensuring filter table: %w", err) } - // ensure forwarding chain exists + // ensure ts-clamp chain exists fwChain, err := getOrCreateChain(n.conn, chainInfo{ table: filterTable, - name: "FORWARD", + name: "ts-clamp", chainType: nftables.ChainTypeFilter, chainHook: nftables.ChainHookForward, - chainPriority: nftables.ChainPriorityFilter, + chainPriority: nftables.ChainPriorityMangle, chainPolicy: &polAccept, }) if err != nil { @@ -289,7 +313,7 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error { Xor: []byte{0x00}, }, &expr.Cmp{ - Op: expr.CmpOpNeq, + Op: expr.CmpOpNeq, // match any packet with a TCP flag set (SYN, ACK, RST) Register: 1, Data: []byte{0x00}, },