diff --git a/syncs/syncs.go b/syncs/syncs.go index 46861af63..1f0558865 100644 --- a/syncs/syncs.go +++ b/syncs/syncs.go @@ -79,6 +79,16 @@ func (b *AtomicBool) Set(v bool) { atomic.StoreInt32((*int32)(b), n) } +// Swap sets b to v and reports whether it changed. +func (b *AtomicBool) Swap(v bool) (changed bool) { + var n int32 + if v { + n = 1 + } + old := atomic.SwapInt32((*int32)(b), n) + return old != n +} + func (b *AtomicBool) Get() bool { return atomic.LoadInt32((*int32)(b)) != 0 } diff --git a/wgengine/monitor/monitor.go b/wgengine/monitor/monitor.go index 8ee7087ce..1f13ec5ce 100644 --- a/wgengine/monitor/monitor.go +++ b/wgengine/monitor/monitor.go @@ -62,6 +62,7 @@ type Mon struct { mu sync.Mutex // guards all following fields cbs map[*callbackHandle]ChangeFunc + ruleDelCB map[*callbackHandle]RuleDeleteCallback ifState *interfaces.State gwValid bool // whether gw and gwSelfIP are valid gw netaddr.IP // our gateway's IP @@ -148,6 +149,30 @@ func (m *Mon) RegisterChangeCallback(callback ChangeFunc) (unregister func()) { } } +// RuleDeleteCallback is a callback when a Linux IP policy routing +// rule is deleted. The table is the table number (52, 253, 354) and +// priority is the priority order number (for Tailscale rules +// currently: 5210, 5230, 5250, 5270) +type RuleDeleteCallback func(table uint8, priority uint32) + +// RegisterRuleDeleteCallback adds callback to the set of parties to be +// notified (in their own goroutine) when a Linux ip rule is deleted. +// To remove this callback, call unregister (or close the monitor). +func (m *Mon) RegisterRuleDeleteCallback(callback RuleDeleteCallback) (unregister func()) { + handle := new(callbackHandle) + m.mu.Lock() + defer m.mu.Unlock() + if m.ruleDelCB == nil { + m.ruleDelCB = map[*callbackHandle]RuleDeleteCallback{} + } + m.ruleDelCB[handle] = callback + return func() { + m.mu.Lock() + defer m.mu.Unlock() + delete(m.ruleDelCB, handle) + } +} + // Start starts the monitor. // A monitor can only be started & closed once. func (m *Mon) Start() { @@ -242,6 +267,10 @@ func (m *Mon) pump() { time.Sleep(time.Second) continue } + if rdm, ok := msg.(ipRuleDeletedMessage); ok { + m.notifyRuleDeleted(rdm) + continue + } if msg.ignore() { continue } @@ -249,6 +278,14 @@ func (m *Mon) pump() { } } +func (m *Mon) notifyRuleDeleted(rdm ipRuleDeletedMessage) { + m.mu.Lock() + defer m.mu.Unlock() + for _, cb := range m.ruleDelCB { + go cb(rdm.table, rdm.priority) + } +} + // debounce calls the callback function with a delay between events // and exits when a stop is issued. func (m *Mon) debounce() { @@ -338,3 +375,10 @@ func (m *Mon) checkWallTimeAdvanceLocked() { } m.lastWall = now } + +type ipRuleDeletedMessage struct { + table uint8 + priority uint32 +} + +func (ipRuleDeletedMessage) ignore() bool { return true } diff --git a/wgengine/monitor/monitor_linux.go b/wgengine/monitor/monitor_linux.go index fa85cf4d4..c8c8b1ea8 100644 --- a/wgengine/monitor/monitor_linux.go +++ b/wgengine/monitor/monitor_linux.go @@ -134,7 +134,10 @@ func (c *nlConn) Receive() (message, error) { // On `ip -4 rule del pref 5210 table main`, logs: // monitor: ip rule deleted: {Family:2 DstLength:0 SrcLength:0 Tos:0 Table:254 Protocol:0 Scope:0 Type:1 Flags:0 Attributes:{Dst: Src: Gateway: OutIface:0 Priority:5210 Table:254 Mark:4294967295 Expires: Metrics: Multipath:[]}} } - return ipRuleDeletedMessage{}, nil + return ipRuleDeletedMessage{ + table: rmsg.Table, + priority: rmsg.Attributes.Priority, + }, nil default: c.logf("unhandled netlink msg type %+v, %q", msg.Header, msg.Data) return unspecifiedMessage{}, nil @@ -192,7 +195,3 @@ func (m *newAddrMessage) ignore() bool { type ignoreMessage struct{} func (ignoreMessage) ignore() bool { return true } - -type ipRuleDeletedMessage struct{} - -func (ipRuleDeletedMessage) ignore() bool { return false } diff --git a/wgengine/router/router_linux.go b/wgengine/router/router_linux.go index ee9e4a565..1699eb423 100644 --- a/wgengine/router/router_linux.go +++ b/wgengine/router/router_linux.go @@ -13,12 +13,15 @@ import ( "os/exec" "strconv" "strings" + "time" "github.com/coreos/go-iptables/iptables" "github.com/go-multierror/multierror" + "golang.org/x/time/rate" "golang.zx2c4.com/wireguard/tun" "inet.af/netaddr" "tailscale.com/net/tsaddr" + "tailscale.com/syncs" "tailscale.com/types/logger" "tailscale.com/types/preftype" "tailscale.com/version/distro" @@ -95,15 +98,22 @@ type netfilterRunner interface { } type linuxRouter struct { + closed syncs.AtomicBool logf func(fmt string, args ...interface{}) tunname string linkMon *monitor.Mon + unregLinkMon func() addrs map[netaddr.IPPrefix]bool routes map[netaddr.IPPrefix]bool localRoutes map[netaddr.IPPrefix]bool snatSubnetRoutes bool netfilterMode preftype.NetfilterMode + // ruleRestorePending is whether a timer has been started to + // restore deleted ip rules. + ruleRestorePending syncs.AtomicBool + ipRuleFixLimiter *rate.Limiter + // Various feature checks for the network stack. ipRuleAvailable bool v6Available bool @@ -151,7 +161,7 @@ func newUserspaceRouter(logf logger.Logf, tunDev tun.Device, linkMon *monitor.Mo func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, linkMon *monitor.Mon, netfilter4, netfilter6 netfilterRunner, cmd commandRunner, supportsV6, supportsV6NAT bool) (Router, error) { ipRuleAvailable := (cmd.run("ip", "rule") == nil) - return &linuxRouter{ + r := &linuxRouter{ logf: logf, tunname: tunname, netfilterMode: netfilterOff, @@ -164,10 +174,52 @@ func newUserspaceRouterAdvanced(logf logger.Logf, tunname string, linkMon *monit ipt4: netfilter4, ipt6: netfilter6, cmd: cmd, - }, nil + + ipRuleFixLimiter: rate.NewLimiter(rate.Every(5*time.Second), 10), + } + + return r, nil +} + +// onIPRuleDeleted is the callback from the link monitor for when an IP policy +// rule is deleted. See Issue 1591. +// +// If an ip rule is deleted (with pref number 52xx, as Tailscale sets), then +// set a timer to restore our rules, in case they were deleted. The timer lets +// us do one fixup in response to a batch of rule deletes. It also lets us +// delay arbitrarily to prevent a high-speed fight over the rule between +// competiting processes. (Although empirically, systemd doesn't fight us +// like that... yet.) +// +// Note that we don't care about the table number. We don't strictly even care +// about the priority number. We could just do this in response to any netlink +// change. Filtering by known priority ranges cuts back on some logspam. +func (r *linuxRouter) onIPRuleDeleted(table uint8, priority uint32) { + if priority < 5200 || priority >= 5300 { + // Not our rule. + return + } + if !r.ruleRestorePending.Swap(true) { + // Another timer is already pending. + return + } + rr := r.ipRuleFixLimiter.Reserve() + if !rr.OK() { + r.ruleRestorePending.Swap(false) + return + } + time.AfterFunc(rr.Delay()+250*time.Millisecond, func() { + if r.ruleRestorePending.Swap(false) && !r.closed.Get() { + r.logf("somebody (likely systemd-networkd) deleted ip rules; restoring Tailscale's") + r.justAddIPRules() + } + }) } func (r *linuxRouter) Up() error { + if r.unregLinkMon == nil && r.linkMon != nil { + r.unregLinkMon = r.linkMon.RegisterRuleDeleteCallback(r.onIPRuleDeleted) + } if err := r.delLegacyNetfilter(); err != nil { return err } @@ -185,6 +237,10 @@ func (r *linuxRouter) Up() error { } func (r *linuxRouter) Close() error { + r.closed.Set(true) + if r.unregLinkMon != nil { + r.unregLinkMon() + } if err := r.downInterface(); err != nil { return err } @@ -565,6 +621,15 @@ func (r *linuxRouter) addIPRules() error { return err } + return r.justAddIPRules() +} + +// justAddIPRules adds policy routing rule without deleting any first. +func (r *linuxRouter) justAddIPRules() error { + if !r.ipRuleAvailable { + return nil + } + rg := newRunGroup(nil, r.cmd) for _, family := range r.iprouteFamilies() {