Initial commit

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
irbekrm/extsvcnftableslb
Irbe Krumina 2 weeks ago
parent 03d5d1f0f9
commit d37f2f5085

@ -18,7 +18,9 @@
// previously advertised routes. To accept routes, use TS_EXTRA_ARGS to pass
// in --accept-routes.
// - TS_DEST_IP: proxy all incoming Tailscale traffic to the given
// destination.
// destination defined by an IP address.
// - TS_DEST_DNS_NAME: proxy all incoming Tailscale traffic to the given
// destination defined by a DNS name. The DNS name will be periodically resolved and firewall rules updated accordingly.
// - TS_TAILNET_TARGET_IP: proxy all incoming non-Tailscale traffic to the given
// destination defined by an IP.
// - TS_TAILNET_TARGET_FQDN: proxy all incoming non-Tailscale traffic to the given
@ -82,12 +84,14 @@ import (
"fmt"
"io/fs"
"log"
"net"
"net/netip"
"os"
"os/exec"
"os/signal"
"path/filepath"
"reflect"
"slices"
"strconv"
"strings"
"sync"
@ -122,7 +126,8 @@ func main() {
Hostname: defaultEnv("TS_HOSTNAME", ""),
Routes: defaultEnvStringPointer("TS_ROUTES"),
ServeConfigPath: defaultEnv("TS_SERVE_CONFIG", ""),
ProxyTo: defaultEnv("TS_DEST_IP", ""),
ProxyTargetIP: defaultEnv("TS_DEST_IP", ""),
ProxyTargetDNSName: defaultEnv("TS_DEST_DNS_NAME", ""),
TailnetTargetIP: defaultEnv("TS_TAILNET_TARGET_IP", ""),
TailnetTargetFQDN: defaultEnv("TS_TAILNET_TARGET_FQDN", ""),
DaemonExtraArgs: defaultEnv("TS_TAILSCALED_EXTRA_ARGS", ""),
@ -150,8 +155,8 @@ func main() {
if err := ensureTunFile(cfg.Root); err != nil {
log.Fatalf("Unable to create tuntap device file: %v", err)
}
if cfg.ProxyTo != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
if err := ensureIPForwarding(cfg.Root, cfg.ProxyTo, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
if cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.Routes != nil || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" {
if err := ensureIPForwarding(cfg.Root, cfg.ProxyTargetIP, cfg.ProxyTargetDNSName, cfg.TailnetTargetIP, cfg.TailnetTargetFQDN, cfg.Routes); err != nil {
log.Printf("Failed to enable IP forwarding: %v", err)
log.Printf("To run tailscale as a proxy or router container, IP forwarding must be enabled.")
if cfg.InKubernetes {
@ -341,7 +346,7 @@ authLoop:
}
var (
wantProxy = cfg.ProxyTo != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
wantProxy = cfg.ProxyTargetIP != "" || cfg.ProxyTargetDNSName != "" || cfg.TailnetTargetIP != "" || cfg.TailnetTargetFQDN != "" || cfg.AllowProxyingClusterTrafficViaIngress
wantDeviceInfo = cfg.InKubernetes && cfg.KubeSecret != "" && cfg.KubernetesCanPatch
startupTasksDone = false
currentIPs deephash.Sum // tailscale IPs assigned to device
@ -349,6 +354,9 @@ authLoop:
currentEgressIPs deephash.Sum
addrs []netip.Prefix
backendAddrs []net.IP
certDomain = new(atomic.Pointer[string])
certDomainChanged = make(chan bool, 1)
)
@ -362,6 +370,16 @@ authLoop:
log.Fatalf("error creating new netfilter runner: %v", err)
}
}
// If we are proxying to a target specified by a DNS name, periodically
// resolve the DNS name and update firewall rules if the backend IPs
// have changed.
const proxyTargetIPsResolvePeriod = time.Minute * 10
var ts time.Ticker
if cfg.ProxyTargetDNSName != "" {
ts = *time.NewTicker(proxyTargetIPsResolvePeriod)
}
notifyChan := make(chan ipn.Notify)
errChan := make(chan error)
go func() {
@ -399,7 +417,7 @@ runLoop:
log.Fatalf("tailscaled left running state (now in state %q), exiting", *n.State)
}
if n.NetMap != nil {
addrs := n.NetMap.SelfNode.Addresses().AsSlice()
addrs = n.NetMap.SelfNode.Addresses().AsSlice()
newCurrentIPs := deephash.Hash(&addrs)
ipsHaveChanged := newCurrentIPs != currentIPs
@ -441,12 +459,32 @@ runLoop:
}
currentEgressIPs = newCurentEgressIPs
}
if cfg.ProxyTo != "" && len(addrs) > 0 && ipsHaveChanged {
if cfg.ProxyTargetIP != "" && len(addrs) > 0 && ipsHaveChanged {
log.Printf("Installing proxy rules")
if err := installIngressForwardingRule(ctx, cfg.ProxyTo, addrs, nfr); err != nil {
if err := installIngressForwardingRule(ctx, cfg.ProxyTargetIP, addrs, nfr); err != nil {
log.Fatalf("installing ingress proxy rules: %v", err)
}
}
if cfg.ProxyTargetDNSName != "" {
newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
if err != nil {
log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod)
continue
}
backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
if ip1.Equal(ip2) {
return 0
}
return -1
})
if len(addrs) > 0 && (backendsHaveChanged != 0 || ipsHaveChanged) && len(newBackendAddrs) > 0 {
log.Printf("installing ingresss proxy rules for backends %v", newBackendAddrs)
if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
log.Fatalf("error installing ingress proxy rules: %v", err)
}
}
backendAddrs = newBackendAddrs
}
if cfg.ServeConfigPath != "" && len(n.NetMap.DNS.CertDomains) > 0 {
cd := n.NetMap.DNS.CertDomains[0]
prev := certDomain.Swap(ptr.To(cd))
@ -511,12 +549,31 @@ runLoop:
os.Exit(0)
}
}
}
wg.Add(1)
go reaper()
}
}
case <-ts.C:
newBackendAddrs, err := resolveDNS(ctx, cfg.ProxyTargetDNSName)
if err != nil {
log.Printf("unable to resolve DNS name %s: %v, retrying in %s", cfg.ProxyTargetDNSName, err, proxyTargetIPsResolvePeriod.String())
continue
}
backendsHaveChanged := slices.CompareFunc(backendAddrs, newBackendAddrs, func(ip1 net.IP, ip2 net.IP) int {
if ip1.Equal(ip2) {
return 0
}
return -1
})
if backendsHaveChanged != 0 && len(newBackendAddrs) != 0 && len(addrs) != 0 {
log.Printf("Backend address change detected, installing proxy rules for backends %v", newBackendAddrs)
if err := installIngressForwardingRuleExternalNameService(ctx, newBackendAddrs, addrs, nfr); err != nil {
log.Fatalf("installing ingress proxy rules for DNS target %s: %v", cfg.ProxyTargetDNSName, err)
}
}
backendAddrs = newBackendAddrs
}
}
wg.Wait()
@ -757,12 +814,12 @@ func ensureTunFile(root string) error {
}
// ensureIPForwarding enables IPv4/IPv6 forwarding for the container.
func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
func ensureIPForwarding(root, clusterProxyTargetIP, clusterProxyTargetDNSName, tailnetTargetiP, tailnetTargetFQDN string, routes *string) error {
var (
v4Forwarding, v6Forwarding bool
)
if clusterProxyTarget != "" {
proxyIP, err := netip.ParseAddr(clusterProxyTarget)
if clusterProxyTargetIP != "" {
proxyIP, err := netip.ParseAddr(clusterProxyTargetIP)
if err != nil {
return fmt.Errorf("invalid cluster destination IP: %v", err)
}
@ -772,6 +829,26 @@ func ensureIPForwarding(root, clusterProxyTarget, tailnetTargetiP, tailnetTarget
v6Forwarding = true
}
}
if clusterProxyTargetDNSName != "" {
ips, err := resolveDNS(context.Background(), clusterProxyTargetDNSName)
if err != nil {
return fmt.Errorf("error resolving DNS name %s: %w", clusterProxyTargetDNSName, err)
}
for _, ip := range ips {
if ip.To4() != nil {
v4Forwarding = true
if v6Forwarding {
break
}
}
if ip.To16() != nil {
v6Forwarding = true
if v4Forwarding {
break
}
}
}
}
if tailnetTargetiP != "" {
proxyIP, err := netip.ParseAddr(tailnetTargetiP)
if err != nil {
@ -918,15 +995,77 @@ func installIngressForwardingRule(ctx context.Context, dstStr string, tsIPs []ne
return nil
}
func installIngressForwardingRuleExternalNameService(ctx context.Context, backendAddrs []net.IP, tsIPs []netip.Prefix, nfr linuxfw.NetfilterRunner) error {
var (
tsv4 netip.Addr
tsv6 netip.Addr
v4Backends []netip.Addr
v6Backends []netip.Addr
)
for _, pfx := range tsIPs {
if pfx.IsSingleIP() && pfx.Addr().Is4() {
tsv4 = pfx.Addr()
continue
}
if pfx.IsSingleIP() && pfx.Addr().Is6() {
tsv6 = pfx.Addr()
continue
}
}
for _, ip := range backendAddrs {
if ip.To4() != nil {
v4Backends = append(v4Backends, netip.AddrFrom4([4]byte(ip.To4())))
}
if ip.To16() != nil {
v6Backends = append(v6Backends, netip.AddrFrom16([16]byte(ip.To16())))
}
}
updateFirewall := func(dst netip.Addr, backendTargets []netip.Addr) error {
if err := nfr.DNATWithLoadBalancer(dst, backendTargets); err != nil {
return fmt.Errorf("installing DNAT rules for ingress backends %+#v: %w", backendTargets, err)
}
// The backend might advertize MSS higher than that of the
// tailscale interfaces. Clamp MSS of packets going out via
// tailscale0 interface to its MTU to prevent broken connections
// in environments where path MTU discovery is not working.
if err := nfr.ClampMSSToPMTU("tailscale0", dst); err != nil {
return fmt.Errorf("adding rule to clamp traffic via tailscale0: %v", err)
}
return nil
}
if len(v4Backends) != 0 {
if !tsv4.IsValid() {
log.Printf("backend targets %v contain at least one IPv4 address, but this node's Tailscale IPs do not contain a valid IPv4 address: %v", backendAddrs, tsIPs)
} else if err := updateFirewall(tsv4, v4Backends); err != nil {
return fmt.Errorf("Installing IPv4 firewall rules: %w", err)
}
}
if len(v6Backends) != 0 && !tsv6.IsValid() {
if !tsv6.IsValid() {
log.Printf("backend targets %v contain at least one IPv6 address, but this node's Tailscale IPs do not contain a valid IPv6 address: %v", backendAddrs, tsIPs)
} else if !nfr.HasIPV6NAT() {
log.Printf("backend targets %v contain at least one IPv6 address, but the chosen firewall mode does not support IPv6 NAT", backendAddrs)
} else if err := updateFirewall(tsv6, v6Backends); err != nil {
return fmt.Errorf("Installing IPv6 firewall rules: %w", err)
}
}
return nil
}
// settings is all the configuration for containerboot.
type settings struct {
AuthKey string
Hostname string
Routes *string
// ProxyTo is the destination IP to which all incoming
// ProxyTargetIP is the destination IP to which all incoming
// Tailscale traffic should be proxied. If empty, no proxying
// is done. This is typically a locally reachable IP.
ProxyTo string
ProxyTargetIP string
// ProxyTargetDNSName is a DNS name whose backing IP addresses all
// incoming Tailscale traffic should be proxied to.
ProxyTargetDNSName string
// TailnetTargetIP is the destination IP to which all incoming
// non-Tailscale traffic should be proxied. This is typically a
// Tailscale IP.
@ -966,9 +1105,15 @@ func (s *settings) validate() error {
return fmt.Errorf("error validating tailscaled configfile contents: %w", err)
}
}
if s.ProxyTo != "" && s.UserspaceMode {
if s.ProxyTargetIP != "" && s.UserspaceMode {
return errors.New("TS_DEST_IP is not supported with TS_USERSPACE")
}
if s.ProxyTargetDNSName != "" && s.UserspaceMode {
return errors.New("TS_DEST_DNS_NAME is not supported with TS_USERSPACE")
}
if s.ProxyTargetDNSName != "" && s.ProxyTargetIP != "" {
return errors.New("TS_DEST_DNS_NAME and TS_DEST_IP cannot both be set")
}
if s.TailnetTargetIP != "" && s.UserspaceMode {
return errors.New("TS_TAILNET_TARGET_IP is not supported with TS_USERSPACE")
}
@ -993,6 +1138,16 @@ func (s *settings) validate() error {
return nil
}
func resolveDNS(ctx context.Context, name string) ([]net.IP, error) {
ips, err := net.LookupIP(name)
if err != nil {
return nil, fmt.Errorf("error looking up IPs for DNS name %s: %w", name, err)
}
log.Printf("%s resolved to %v", name, ips)
return ips, nil
}
// defaultEnv returns the value of the given envvar name, or defVal if
// unset.
func defaultEnv(name, defVal string) string {

@ -109,8 +109,9 @@ type tailscaleSTSConfig struct {
ParentResourceUID string
ChildResourceLabels map[string]string
ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
ClusterTargetIP string // ingress target
ServeConfig *ipn.ServeConfig // if serve config is set, this is a proxy for Ingress
ClusterTargetIP string // ingress target IP
ClusterTargetDNSName string // ingress target DNS name
// If set to true, operator should configure containerboot to forward
// cluster traffic via the proxy set up for Kubernetes Ingress.
ForwardClusterTrafficViaL7IngressProxy bool
@ -536,6 +537,12 @@ func (a *tailscaleSTSReconciler) reconcileSTS(ctx context.Context, logger *zap.S
Value: sts.ClusterTargetIP,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
} else if sts.ClusterTargetDNSName != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_DEST_DNS_NAME",
Value: sts.ClusterTargetDNSName,
})
mak.Set(&ss.Spec.Template.Annotations, podAnnotationLastSetClusterIP, sts.ClusterTargetIP)
} else if sts.TailnetTargetIP != "" {
container.Env = append(container.Env, corev1.EnvVar{
Name: "TS_TAILNET_TARGET_IP",

@ -200,10 +200,14 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
}
a.mu.Lock()
if a.shouldExpose(svc) {
if a.shouldExposeClusterIP(svc) {
sts.ClusterTargetIP = svc.Spec.ClusterIP
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
} else if a.shouldExposeDNSName(svc) {
sts.ClusterTargetDNSName = svc.Spec.ExternalName
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
} else if ip := a.tailnetTargetAnnotation(svc); ip != "" {
sts.TailnetTargetIP = ip
a.managedEgressProxies.Add(svc.UID)
@ -297,15 +301,22 @@ func validateService(svc *corev1.Service) []string {
}
func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool {
return a.shouldExposeClusterIP(svc) || a.shouldExposeDNSName(svc)
}
func (a *ServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool {
// Headless services can't be exposed, since there is no ClusterIP to
// forward to.
if svc.Spec.ClusterIP == "" || svc.Spec.ClusterIP == "None" {
return false
}
return a.hasLoadBalancerClass(svc) || a.hasExposeAnnotation(svc)
}
func (a *ServiceReconciler) shouldExposeDNSName(svc *corev1.Service) bool {
return a.hasExposeAnnotation(svc) && svc.Spec.Type == corev1.ServiceTypeExternalName && svc.Spec.ExternalName != ""
}
func (a *ServiceReconciler) hasLoadBalancerClass(svc *corev1.Service) bool {
return svc != nil &&
svc.Spec.Type == corev1.ServiceTypeLoadBalancer &&

@ -373,6 +373,30 @@ func (i *iptablesRunner) DNATNonTailscaleTraffic(tun string, dst netip.Addr) err
return table.Insert("nat", "PREROUTING", 1, "!", "-i", tun, "-j", "DNAT", "--to-destination", dst.String())
}
// DNATWithLoadBalancer adds DNAT rules to load balance all incoming traffic NOT
// destined to tailscale0 interface to provided destinations using round robin.
// NB: this function clears the nat PREROUTING chain on start, so it is only
// safe to use on systems where Tailscale is the only process that uses this
// chain (i.e containers).
func (i *iptablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
table := i.getIPTByAddr(dsts[0])
if err := table.ClearChain("nat", "PREROUTING"); err != nil && !isErrChainNotExist(err) {
// If clearing the PREROUTING chain fails, fail the whole operation. This
// rule is currently only used in Kubernetes containers where a
// failed container gets restarted which should hopefully fix things.
return fmt.Errorf("error clearing nat PREROUTING chain: %w", err)
}
// If dsts contain more than one address, for n := n in range(len(dsts)..2) route packets for every nth connection to dsts[n].
for i := len(dsts); i >= 2; i-- {
dst := dsts[i-1] // the order in which rules for addrs are installed does not matter
if err := table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-m", "statistic", "--mode", "nth", "--every", fmt.Sprint(i), "--packet", "0", "-j", "DNAT", "--to-destination", dst.String()); err != nil {
return fmt.Errorf("error adding DNAT rule for %s: %w", dst.String(), err)
}
}
// If the packet falls through to this rule, we route to the first destination in the list unconditionally.
return table.Append("nat", "PREROUTING", "--destination", origDst.String(), "-j", "DNAT", "--to-destination", dsts[0].String())
}
func (i *iptablesRunner) ClampMSSToPMTU(tun string, addr netip.Addr) error {
table := i.getIPTByAddr(addr)
return table.Append("mangle", "FORWARD", "-o", tun, "-p", "tcp", "--tcp-flags", "SYN,RST", "SYN", "-j", "TCPMSS", "--clamp-mss-to-pmtu")

@ -16,6 +16,7 @@ import (
"strings"
"github.com/google/nftables"
"github.com/google/nftables/binaryutil"
"github.com/google/nftables/expr"
"golang.org/x/sys/unix"
"tailscale.com/net/tsaddr"
@ -114,7 +115,6 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
dadderLen = 16
fam = unix.NFPROTO_IPV6
}
dnatRule := &nftables.Rule{
Table: nat,
Chain: preroutingCh,
@ -145,6 +145,91 @@ func (n *nftablesRunner) AddDNATRule(origDst netip.Addr, dst netip.Addr) error {
return n.conn.Flush()
}
// This function does set up nftables rules to load balance traffic to the
// backend targets as expected. However, if the same client makes frequent
// connections, the connections are frequently dropped. TODO (irbekrm):
// investigate why the connections are dropped.
func (n *nftablesRunner) DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error {
nat, preroutingCh, err := n.ensurePreroutingChain(dsts[0])
if err != nil {
return fmt.Errorf("error ensuring PREROUTING chain in nat table: %w", err)
}
// Figure out if we are dealing with IPv4 or IPv6 addresses and set
// parameters accordingly.
var (
dstsMapValType = nftables.TypeIPAddr
origDstIPHeaderOffset uint32 = 16
origDstIPHeaderLen uint32 = 4
fam = nftables.TableFamilyIPv4
)
if dsts[0].Is6() {
dstsMapValType = nftables.TypeIP6Addr
origDstIPHeaderOffset = 24
origDstIPHeaderLen = 16
fam = nftables.TableFamilyIPv6
}
mapElements := make([]nftables.SetElement, len(dsts))
for i, addr := range dsts {
mapElements[i] = nftables.SetElement{
Key: binaryutil.BigEndian.PutUint32(uint32(i)),
Val: addr.AsSlice(),
}
}
dstsMap := &nftables.Set{
Table: nat,
KeyByteOrder: binaryutil.NativeEndian,
KeyType: nftables.TypeInteger,
DataType: dstsMapValType,
IsMap: true,
Anonymous: true,
Constant: true, // Anonymous sets must be constant (unmodifiable)
}
if err := n.conn.AddSet(dstsMap, mapElements); err != nil {
return fmt.Errorf("error creating a new map: %w", err)
}
dnatRule := &nftables.Rule{
Table: nat,
Chain: preroutingCh,
Exprs: []expr.Any{
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseNetworkHeader,
Offset: origDstIPHeaderOffset,
Len: origDstIPHeaderLen,
},
&expr.Cmp{
Op: expr.CmpOpEq,
Register: 1,
Data: origDst.AsSlice(),
},
&expr.Numgen{
Register: 1,
Type: unix.NFT_NG_INCREMENTAL,
Modulus: uint32(len(dsts)),
Offset: 0,
},
&expr.Lookup{
SourceRegister: 1,
DestRegister: 2,
SetName: dstsMap.Name,
SetID: dstsMap.ID,
IsDestRegSet: true,
},
&expr.NAT{
Type: expr.NATTypeDestNAT,
Family: uint32(fam),
RegAddrMin: 2,
},
},
}
n.conn.InsertRule(dnatRule)
return n.conn.Flush()
}
func (n *nftablesRunner) DNATNonTailscaleTraffic(tunname string, dst netip.Addr) error {
nat, preroutingCh, err := n.ensurePreroutingChain(dst)
if err != nil {
@ -524,6 +609,14 @@ type NetfilterRunner interface {
// to the provided destination, as used in the Kubernetes ingress proxies.
AddDNATRule(origDst, dst netip.Addr) error
// DNATWithLoadBalancer adds a rule to the nat/PREROUTING chain to DNAT
// traffic destined for the given original destination to the given new
// destination(s) using round robin to load balance if more than one
// destination is provided. This is used to forward all traffic destined
// for the Tailscale interface to the provided destination(s), as used
// in the Kubernetes ingress proxies.
DNATWithLoadBalancer(origDst netip.Addr, dsts []netip.Addr) error
// AddSNATRuleForDst adds a rule to the nat/POSTROUTING chain to SNAT
// traffic destined for dst to src.
// This is used to forward traffic destined for the local machine over
@ -533,7 +626,7 @@ type NetfilterRunner interface {
// DNATNonTailscaleTraffic adds a rule to the nat/PREROUTING chain to DNAT
// all traffic inbound from any interface except exemptInterface to dst.
// This is used to forward traffic destined for the local machine over
// the Tailscale interface, as used in the Kubernetes egress proxies.//
// the Tailscale interface, as used in the Kubernetes egress proxies.
DNATNonTailscaleTraffic(exemptInterface string, dst netip.Addr) error
// ClampMSSToPMTU adds a rule to the mangle/FORWARD chain to clamp MSS for

Loading…
Cancel
Save