@ -62,6 +62,11 @@ type nftable struct {
// - The table and chain conventions followed here are those used by
// - The table and chain conventions followed here are those used by
// `iptables-nft` and `ufw`, so that those tools co-exist and do not
// `iptables-nft` and `ufw`, so that those tools co-exist and do not
// negatively affect Tailscale function.
// negatively affect Tailscale function.
// - Be mindful that 1) all chains attached to a given hook (i.e the forward hook)
// will be processed in priority order till either a rule in one of the chains issues a drop verdict
// or there are no more chains for that hook
// 2) processing of individual rules within a chain will stop once one of them issues a final verdict (accept, drop).
// https://wiki.nftables.org/wiki-nftables/index.php/Configuring_chains
type nftablesRunner struct {
type nftablesRunner struct {
conn * nftables . Conn
conn * nftables . Conn
nft4 * nftable
nft4 * nftable
@ -238,6 +243,25 @@ func (n *nftablesRunner) AddSNATRuleForDst(src, dst netip.Addr) error {
return n . conn . Flush ( )
return n . conn . Flush ( )
}
}
// ClampMSSToPMTU ensures that all packets with TCP flags (SYN, ACK, RST) set
// being forwarded via the given interface (tun) have MSS set to <MTU of the
// interface> - 40 (IP and TCP headers). This can be useful if this tailscale
// instance is expected to run as a forwarding proxy, forwarding packets from an
// endpoint with higher MTU in an environment where path MTU discovery is
// expected to not work (such as the proxies created by the Tailscale Kubernetes
// operator). ClamMSSToPMTU creates a new base-chain ts-clamp in the filter
// table with accept policy and priority -150. In practice, this means that for
// SYN packets the clamp rule in this chain will likely run first and accept the
// packet. This is fine because 1) nftables run ALL chains with the same hook
// type unless a rule in one of them drops the packet and 2) this chain does not
// have functionality to drop the packet- so in practice a matching clamp rule
// will always be followed by the custom tailscale filtering rules in the other
// chains attached to the filter hook (FORWARD, ts-forward).
// We do not want to place the clamping rule into FORWARD/ts-forward chains
// because wgengine populates those chains with rules that contain accept
// verdicts that would cause no further procesing within that chain. This
// functionality is currently invoked from outside wgengine (containerboot), so
// we don't want to race with wgengine for rule ordering within chains.
func ( n * nftablesRunner ) ClampMSSToPMTU ( tun string , addr netip . Addr ) error {
func ( n * nftablesRunner ) ClampMSSToPMTU ( tun string , addr netip . Addr ) error {
polAccept := nftables . ChainPolicyAccept
polAccept := nftables . ChainPolicyAccept
table := n . getNFTByAddr ( addr )
table := n . getNFTByAddr ( addr )
@ -246,13 +270,13 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
return fmt . Errorf ( "error ensuring filter table: %w" , err )
return fmt . Errorf ( "error ensuring filter table: %w" , err )
}
}
// ensure forwarding chain exists
// ensure ts-clamp chain exists
fwChain , err := getOrCreateChain ( n . conn , chainInfo {
fwChain , err := getOrCreateChain ( n . conn , chainInfo {
table : filterTable ,
table : filterTable ,
name : " FORWARD ",
name : " ts-clamp ",
chainType : nftables . ChainTypeFilter ,
chainType : nftables . ChainTypeFilter ,
chainHook : nftables . ChainHookForward ,
chainHook : nftables . ChainHookForward ,
chainPriority : nftables . ChainPriority Filter ,
chainPriority : nftables . ChainPriority Mangle ,
chainPolicy : & polAccept ,
chainPolicy : & polAccept ,
} )
} )
if err != nil {
if err != nil {
@ -289,7 +313,7 @@ func (n *nftablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
Xor : [ ] byte { 0x00 } ,
Xor : [ ] byte { 0x00 } ,
} ,
} ,
& expr . Cmp {
& expr . Cmp {
Op : expr . CmpOpNeq ,
Op : expr . CmpOpNeq , // match any packet with a TCP flag set (SYN, ACK, RST)
Register : 1 ,
Register : 1 ,
Data : [ ] byte { 0x00 } ,
Data : [ ] byte { 0x00 } ,
} ,
} ,