You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
tailscale/util/linuxfw/nftables_for_svcs.go

246 lines
7.6 KiB
Go

cmd/containerboot,kube,util/linuxfw: configure kube egress proxies to route to 1+ tailnet targets (#13531) * cmd/containerboot,kube,util/linuxfw: configure kube egress proxies to route to 1+ tailnet targets This commit is first part of the work to allow running multiple replicas of the Kubernetes operator egress proxies per tailnet service + to allow exposing multiple tailnet services via each proxy replica. This expands the existing iptables/nftables-based proxy configuration mechanism. A proxy can now be configured to route to one or more tailnet targets via a (mounted) config file that, for each tailnet target, specifies: - the target's tailnet IP or FQDN - mappings of container ports to which cluster workloads will send traffic to tailnet target ports where the traffic should be forwarded. Example configfile contents: { "some-svc": {"tailnetTarget":{"fqdn":"foo.tailnetxyz.ts.net","ports"{"tcp:4006:80":{"protocol":"tcp","matchPort":4006,"targetPort":80},"tcp:4007:443":{"protocol":"tcp","matchPort":4007,"targetPort":443}}}} } A proxy that is configured with this config file will configure firewall rules to route cluster traffic to the tailnet targets. It will then watch the config file for updates as well as monitor relevant netmap updates and reconfigure firewall as needed. This adds a bunch of new iptables/nftables functionality to make it easier to dynamically update the firewall rules without needing to restart the proxy Pod as well as to make it easier to debug/understand the rules: - for iptables, each portmapping is a DNAT rule with a comment pointing at the 'service',i.e: -A PREROUTING ! -i tailscale0 -p tcp -m tcp --dport 4006 -m comment --comment "some-svc:tcp:4006 -> tcp:80" -j DNAT --to-destination 100.64.1.18:80 Additionally there is a SNAT rule for each tailnet target, to mask the source address. - for nftables, a separate prerouting chain is created for each tailnet target and all the portmapping rules are placed in that chain. This makes it easier to look up rules and delete services when no longer needed. (nftables allows hooking a custom chain to a prerouting hook, so no extra work is needed to ensure that the rules in the service chains are evaluated). The next steps will be to get the Kubernetes Operator to generate the configfile and ensure it is mounted to the relevant proxy nodes. Updates tailscale/tailscale#13406 Signed-off-by: Irbe Krumina <irbe@tailscale.com>
1 month ago
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
//go:build linux
package linuxfw
import (
"errors"
"fmt"
"net/netip"
"reflect"
"strings"
"github.com/google/nftables"
"github.com/google/nftables/binaryutil"
"github.com/google/nftables/expr"
"golang.org/x/sys/unix"
)
// This file contains functionality that is currently (09/2024) used to set up
// routing for the Tailscale Kubernetes operator egress proxies. A tailnet
// service (identified by tailnet IP or FQDN) that gets exposed to cluster
// workloads gets a separate prerouting chain created for it for each IP family
// of the chain's target addresses. Each service's prerouting chain contains one
// or more portmapping rules. A portmapping rule DNATs traffic received on a
// particular port to a port of the tailnet service. Creating a chain per
// service makes it easier to delete a service when no longer needed and helps
// with readability.
// EnsurePortMapRuleForSvc:
// - ensures that nat table exists
// - ensures that there is a prerouting chain for the given service and IP family of the target address in the nat table
// - ensures that there is a portmapping rule mathcing the given portmap (only creates the rule if it does not already exist)
func (n *nftablesRunner) EnsurePortMapRuleForSvc(svc, tun string, targetIP netip.Addr, pm PortMap) error {
t, ch, err := n.ensureChainForSvc(svc, targetIP)
if err != nil {
return fmt.Errorf("error ensuring chain for %s: %w", svc, err)
}
meta := svcPortMapRuleMeta(svc, targetIP, pm)
rule, err := n.findRuleByMetadata(t, ch, meta)
if err != nil {
return fmt.Errorf("error looking up rule: %w", err)
}
if rule != nil {
return nil
}
p, err := protoFromString(pm.Protocol)
if err != nil {
return fmt.Errorf("error converting protocol %s: %w", pm.Protocol, err)
}
rule = portMapRule(t, ch, tun, targetIP, pm.MatchPort, pm.TargetPort, p, meta)
n.conn.InsertRule(rule)
return n.conn.Flush()
}
// DeletePortMapRuleForSvc deletes a portmapping rule in the given service/IP family chain.
// It finds the matching rule using metadata attached to the rule.
// The caller is expected to call DeleteSvc if the whole service (the chain)
// needs to be deleted, so we don't deal with the case where this is the only
// rule in the chain here.
func (n *nftablesRunner) DeletePortMapRuleForSvc(svc, tun string, targetIP netip.Addr, pm PortMap) error {
table, err := n.getNFTByAddr(targetIP)
if err != nil {
return fmt.Errorf("error setting up nftables for IP family of %s: %w", targetIP, err)
}
t, err := getTableIfExists(n.conn, table.Proto, "nat")
if err != nil {
return fmt.Errorf("error checking if nat table exists: %w", err)
}
if t == nil {
return nil
}
ch, err := getChainFromTable(n.conn, t, svc)
if err != nil && !errors.Is(err, errorChainNotFound{t.Name, svc}) {
return fmt.Errorf("error checking if chain %s exists: %w", svc, err)
}
if errors.Is(err, errorChainNotFound{t.Name, svc}) {
return nil // service chain does not exist, so neither does the portmapping rule
}
meta := svcPortMapRuleMeta(svc, targetIP, pm)
rule, err := n.findRuleByMetadata(t, ch, meta)
if err != nil {
return fmt.Errorf("error checking if rule exists: %w", err)
}
if rule == nil {
return nil
}
if err := n.conn.DelRule(rule); err != nil {
return fmt.Errorf("error deleting rule: %w", err)
}
return n.conn.Flush()
}
// DeleteSvc deletes the chains for the given service if any exist.
func (n *nftablesRunner) DeleteSvc(svc, tun string, targetIPs []netip.Addr, pm []PortMap) error {
for _, tip := range targetIPs {
table, err := n.getNFTByAddr(tip)
if err != nil {
return fmt.Errorf("error setting up nftables for IP family of %s: %w", tip, err)
}
t, err := getTableIfExists(n.conn, table.Proto, "nat")
if err != nil {
return fmt.Errorf("error checking if nat table exists: %w", err)
}
if t == nil {
return nil
}
ch, err := getChainFromTable(n.conn, t, svc)
if err != nil && !errors.Is(err, errorChainNotFound{t.Name, svc}) {
return fmt.Errorf("error checking if chain %s exists: %w", svc, err)
}
if errors.Is(err, errorChainNotFound{t.Name, svc}) {
return nil
}
n.conn.DelChain(ch)
}
return n.conn.Flush()
}
func portMapRule(t *nftables.Table, ch *nftables.Chain, tun string, targetIP netip.Addr, matchPort, targetPort uint16, proto uint8, meta []byte) *nftables.Rule {
var fam uint32
if targetIP.Is4() {
fam = unix.NFPROTO_IPV4
} else {
fam = unix.NFPROTO_IPV6
}
rule := &nftables.Rule{
Table: t,
Chain: ch,
UserData: meta,
Exprs: []expr.Any{
&expr.Meta{Key: expr.MetaKeyOIFNAME, Register: 1},
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte(tun),
},
&expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: []byte{proto},
},
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseTransportHeader,
Offset: 2,
Len: 2,
},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: binaryutil.BigEndian.PutUint16(matchPort),
},
&expr.Immediate{
Register: 1,
Data: targetIP.AsSlice(),
},
&expr.Immediate{
Register: 2,
Data: binaryutil.BigEndian.PutUint16(targetPort),
},
&expr.NAT{
Type: expr.NATTypeDestNAT,
Family: fam,
RegAddrMin: 1,
RegAddrMax: 1,
RegProtoMin: 2,
RegProtoMax: 2,
},
},
}
return rule
}
// svcPortMapRuleMeta generates metadata for a rule.
// This metadata can then be used to find the rule.
// https://github.com/google/nftables/issues/48
func svcPortMapRuleMeta(svcName string, targetIP netip.Addr, pm PortMap) []byte {
return []byte(fmt.Sprintf("svc:%s,targetIP:%s:matchPort:%v,targetPort:%v,proto:%v", svcName, targetIP.String(), pm.MatchPort, pm.TargetPort, pm.Protocol))
}
func (n *nftablesRunner) findRuleByMetadata(t *nftables.Table, ch *nftables.Chain, meta []byte) (*nftables.Rule, error) {
if n.conn == nil || t == nil || ch == nil || len(meta) == 0 {
return nil, nil
}
rules, err := n.conn.GetRules(t, ch)
if err != nil {
return nil, fmt.Errorf("error listing rules: %w", err)
}
for _, rule := range rules {
if reflect.DeepEqual(rule.UserData, meta) {
return rule, nil
}
}
return nil, nil
}
func (n *nftablesRunner) ensureChainForSvc(svc string, targetIP netip.Addr) (*nftables.Table, *nftables.Chain, error) {
polAccept := nftables.ChainPolicyAccept
table, err := n.getNFTByAddr(targetIP)
if err != nil {
return nil, nil, fmt.Errorf("error setting up nftables for IP family of %v: %w", targetIP, err)
}
nat, err := createTableIfNotExist(n.conn, table.Proto, "nat")
if err != nil {
return nil, nil, fmt.Errorf("error ensuring nat table: %w", err)
}
svcCh, err := getOrCreateChain(n.conn, chainInfo{
table: nat,
name: svc,
chainType: nftables.ChainTypeNAT,
chainHook: nftables.ChainHookPrerouting,
chainPriority: nftables.ChainPriorityNATDest,
chainPolicy: &polAccept,
})
if err != nil {
return nil, nil, fmt.Errorf("error ensuring prerouting chain: %w", err)
}
return nat, svcCh, nil
}
// // PortMap is the port mapping for a service rule.
type PortMap struct {
// MatchPort is the local port to which the rule should apply.
MatchPort uint16
// TargetPort is the port to which the traffic should be forwarded.
TargetPort uint16
// Protocol is the protocol to match packets on. Only TCP and UDP are
// supported.
Protocol string
}
func protoFromString(s string) (uint8, error) {
switch strings.ToLower(s) {
case "tcp":
return unix.IPPROTO_TCP, nil
case "udp":
return unix.IPPROTO_UDP, nil
default:
return 0, fmt.Errorf("unrecognized protocol: %q", s)
}
}