From 2b7f8b6be79ff5a4649c26e7b9fc7b47b064909a Mon Sep 17 00:00:00 2001 From: Jonathan Nobels Date: Thu, 30 Oct 2025 14:13:50 -0400 Subject: [PATCH] net/netmon: remove ChangeDelta.Major and start the TODOs updates tailscale/corp#33891 We've had some TODO's in netmon for ages related to asking more pointed questions of ChangeDelta. We're also seeing a lot of ChangeDelta's being flagged as "Major" when they are not interesting, triggering rebinds in wgengine that are not needed. This removes the "Major" flag and replaces it with a set of precomputed values that hint at what the ChangeDelta means. The dependencies are cleaned up a bit, notably removing dependency on netmon itself for calculating what is interesting, and what is not. This includes letting individual platforms set a bespoke global "IsInterestingInterface" function. This was only used on Darwin. RebindRequired roughly flows from how "Major" was historically calculated but includes some additional checks for various uninteresting events such as changes in interface addresses that shouldn't trigger a rebind to minimize thrashing. The individual values that we roll into RebindRequired are also exposed so that components consuming netmap.ChangeDelta can ask more targeted questions. Signed-off-by: Jonathan Nobels wgengine/userspace: skip rebind and restun if all of our interfaces are down updates tailscale/corp#33891 Skip rebind and restun if we have called SetNetworkUp with false. Neither cares about this state and it's just flappy to try to reStun and rebind if there are no usable interfaces available. Signed-off-by: Jonathan Nobels Add viability flag and fix bug in state --- cmd/tailscaled/debug.go | 4 +- ipn/ipnlocal/local.go | 7 +- net/netmon/loghelper.go | 2 +- net/netmon/loghelper_test.go | 2 +- net/netmon/netmon.go | 363 ++++++++++++++++++------------- net/netmon/netmon_darwin.go | 21 +- net/netmon/netmon_freebsd.go | 2 - net/netmon/netmon_linux.go | 2 - net/netmon/netmon_test.go | 372 ++++++++++++++++++++++++++++++-- net/netmon/netmon_windows.go | 2 - net/netmon/polling.go | 4 - net/netmon/state.go | 26 ++- net/sockstats/sockstats_tsgo.go | 2 +- wgengine/userspace.go | 40 +++- 14 files changed, 643 insertions(+), 206 deletions(-) diff --git a/cmd/tailscaled/debug.go b/cmd/tailscaled/debug.go index b16cb28e0..8b142ed00 100644 --- a/cmd/tailscaled/debug.go +++ b/cmd/tailscaled/debug.go @@ -138,8 +138,8 @@ func changeDeltaWatcher(ec *eventbus.Client, ctx context.Context, dump func(st * case <-ec.Done(): return case delta := <-changeSub.Events(): - if !delta.Major { - log.Printf("Network monitor fired; not a major change") + if !delta.RebindLikelyRequired { + log.Printf("Network monitor fired; not a significant change") return } log.Printf("Network monitor fired. New state:") diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index e5fafb5bd..557e3cf9d 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -564,7 +564,8 @@ func NewLocalBackend(logf logger.Logf, logID logid.PublicID, sys *tsd.System, lo b.prevIfState = netMon.InterfaceState() // Call our linkChange code once with the current state. // Following changes are triggered via the eventbus. - b.linkChange(&netmon.ChangeDelta{New: netMon.InterfaceState()}) + cd := netmon.NewChangeDelta(nil, b.prevIfState, false, netMon.TailscaleInterfaceName()) + b.linkChange(&cd) if buildfeatures.HasPeerAPIServer { if tunWrap, ok := b.sys.Tun.GetOK(); ok { @@ -964,13 +965,13 @@ func (b *LocalBackend) linkChange(delta *netmon.ChangeDelta) { b.prevIfState = ifst b.pauseOrResumeControlClientLocked() prefs := b.pm.CurrentPrefs() - if delta.Major && prefs.AutoExitNode().IsSet() { + if delta.RebindLikelyRequired && prefs.AutoExitNode().IsSet() { b.refreshAutoExitNode = true } var needReconfig bool // If the network changed and we're using an exit node and allowing LAN access, we may need to reconfigure. - if delta.Major && prefs.ExitNodeID() != "" && prefs.ExitNodeAllowLANAccess() { + if delta.RebindLikelyRequired && prefs.ExitNodeID() != "" && prefs.ExitNodeAllowLANAccess() { b.logf("linkChange: in state %v; updating LAN routes", b.state) needReconfig = true } diff --git a/net/netmon/loghelper.go b/net/netmon/loghelper.go index 675762cd1..ce3b9cb87 100644 --- a/net/netmon/loghelper.go +++ b/net/netmon/loghelper.go @@ -20,7 +20,7 @@ func LinkChangeLogLimiter(ctx context.Context, logf logger.Logf, nm *Monitor) lo var formatSeen sync.Map // map[string]bool sub := eventbus.SubscribeFunc(nm.b, func(cd ChangeDelta) { // If we're in a major change or a time jump, clear the seen map. - if cd.Major || cd.TimeJumped { + if cd.RebindLikelyRequired || cd.TimeJumped { formatSeen.Clear() } }) diff --git a/net/netmon/loghelper_test.go b/net/netmon/loghelper_test.go index ca3b1284c..0f60a040d 100644 --- a/net/netmon/loghelper_test.go +++ b/net/netmon/loghelper_test.go @@ -64,7 +64,7 @@ func syncTestLinkChangeLogLimiter(t *testing.T) { // InjectEvent doesn't work because it's not a major event, so we // instead inject the event ourselves. injector := eventbustest.NewInjector(t, bus) - eventbustest.Inject(injector, ChangeDelta{Major: true}) + eventbustest.Inject(injector, NewChangeDelta(nil, nil, true, "")) synctest.Wait() logf("hello %s", "world") diff --git a/net/netmon/netmon.go b/net/netmon/netmon.go index 657da04d5..f9516bce6 100644 --- a/net/netmon/netmon.go +++ b/net/netmon/netmon.go @@ -7,10 +7,11 @@ package netmon import ( - "encoding/json" "errors" + "fmt" "net/netip" "runtime" + "slices" "sync" "time" @@ -45,12 +46,15 @@ type osMon interface { // until the osMon is closed. After a Close, the returned // error is ignored. Receive() (message, error) - - // IsInterestingInterface reports whether the provided interface should - // be considered for network change events. - IsInterestingInterface(iface string) bool } +// IsInterestingInterface is the function used to determine whether +// a given interface name is interesting enough to pay attention to +// for network change monitoring purposes. +// +// If nil, all interfaces are considered interesting. +var IsInterestingInterface func(Interface, []netip.Prefix) bool + // Monitor represents a monitoring instance. type Monitor struct { logf logger.Logf @@ -62,10 +66,6 @@ type Monitor struct { stop chan struct{} // closed on Stop static bool // static Monitor that doesn't actually monitor - // Things that must be set early, before use, - // and not change at runtime. - tsIfName string // tailscale interface name, if known/set ("tailscale0", "utun3", ...) - mu syncs.Mutex // guards all following fields cbs set.HandleSet[ChangeFunc] ifState *State @@ -77,7 +77,8 @@ type Monitor struct { goroutines sync.WaitGroup wallTimer *time.Timer // nil until Started; re-armed AfterFunc per tick lastWall time.Time - timeJumped bool // whether we need to send a changed=true after a big time jump + timeJumped bool // whether we need to send a changed=true after a big time jump + tsIfName string // tailscale interface name, if known/set ("tailscale0", "utun3", ...) } // ChangeFunc is a callback function registered with Monitor that's called when the @@ -85,6 +86,8 @@ type Monitor struct { type ChangeFunc func(*ChangeDelta) // ChangeDelta describes the difference between two network states. +// +// Use NewChangeDelta to construct one and compute the cached fields. type ChangeDelta struct { // Old is the old interface state, if known. // It's nil if the old state is unknown. @@ -96,21 +99,182 @@ type ChangeDelta struct { // Do not mutate it. New *State - // Major is our legacy boolean of whether the network changed in some major - // way. - // - // Deprecated: do not remove. As of 2023-08-23 we're in a renewed effort to - // remove it and ask specific qustions of ChangeDelta instead. Look at Old - // and New (or add methods to ChangeDelta) instead of using Major. - Major bool - // TimeJumped is whether there was a big jump in wall time since the last - // time we checked. This is a hint that a mobile sleeping device might have + // time we checked. This is a hint that a sleeping device might have // come out of sleep. TimeJumped bool - // TODO(bradfitz): add some lazy cached fields here as needed with methods - // on *ChangeDelta to let callers ask specific questions + // The tailscale interface name, e.g. "tailscale0", "utun3", etc. Not all + // platforms know this or set it. Copied from netmon.Monitor.tsIfName. + TailscaleIfaceName string + + // Computed Fields + + DefaultInterfaceChanged bool // whether default route interface changed + IsLessExpensive bool // whether new state is less expensive than old + HasPACOrProxyConfigChanged bool // whether PAC/HTTP proxy config changed + InterfaceIPsChanged bool // whether any interface IPs changed in a meaningful way + AvailableProtocolsChanged bool // whether we have seen a change in available IPv4/IPv6 + DefaultInterfaceMaybeViable bool // whether the default interface is potentially viable (has usable IPs, is up and is not the tunnel itself) + + // RebindLikelyRequired combines the various fields above to report whether this change likely requires us + // to rebind sockets. This is a very conservative estimate and covers a number of + // cases where a rebind is not strictly necessary. Consumers of the ChangeDelta should + // use this as a hint only. If in doubt, rebind. + RebindLikelyRequired bool +} + +var skipRebindIfNoDefaultRouteChange = true + +// NewChangeDelta builds a ChangeDelta and eagerly computes the cached fields. +func NewChangeDelta(old, new *State, timeJumped bool, tsIfName string) ChangeDelta { + cd := ChangeDelta{ + Old: old, + New: new, + TimeJumped: timeJumped, + TailscaleIfaceName: tsIfName, + } + + if cd.New == nil { + return cd + } else if cd.Old == nil { + cd.DefaultInterfaceChanged = cd.New.DefaultRouteInterface != "" + cd.IsLessExpensive = false + cd.HasPACOrProxyConfigChanged = true + cd.InterfaceIPsChanged = true + } else { + cd.AvailableProtocolsChanged = cd.Old.HaveV4 != cd.New.HaveV4 || cd.Old.HaveV6 != cd.New.HaveV6 + cd.DefaultInterfaceChanged = cd.Old.DefaultRouteInterface != cd.New.DefaultRouteInterface + cd.IsLessExpensive = cd.Old.IsExpensive && !cd.New.IsExpensive + cd.HasPACOrProxyConfigChanged = cd.Old.PAC != cd.New.PAC || cd.Old.HTTPProxy != cd.New.HTTPProxy + cd.InterfaceIPsChanged = cd.isInterestingIntefaceChange() + } + + // If the default route interface is populated, but it's not up this event signifies that we're in + // the process of tearing it down. Rebinds are going to fail so it's flappy to try. + defIfName := new.DefaultRouteInterface + defIf := new.Interface[defIfName] + cd.DefaultInterfaceMaybeViable = true + // The default interface is not viable if is down or is the Tailscale interface itself. + if !defIf.IsUp() || defIfName == tsIfName { + cd.DefaultInterfaceMaybeViable = false + } + + // Compute rebind requirement. A number of these checks are redundant - HaveSomeAddressChanged + // subsumes InterfaceIPsChanged, IsExpensive likely does not change without a new interface + // appearing, but we'll keep them all for clarity and testability. + cd.RebindLikelyRequired = (cd.New == nil || // Do we need to rebind if there is no current state? + cd.Old == nil || + cd.TimeJumped || + cd.DefaultInterfaceChanged || + cd.InterfaceIPsChanged || + cd.IsLessExpensive || + cd.HasPACOrProxyConfigChanged || + cd.AvailableProtocolsChanged) && + cd.DefaultInterfaceMaybeViable + + // (barnstar) TODO: There are likely a number of optimizations we can do here to avoid + // rebinding in cases where it is not necessary but we really need to leave that to the + // upstream component. If it's sockets are happy, then it probably doesn't need to rebind, + // but it may want to if any of these fields are true. + + return cd +} + +// isInterestingIntefaceChange reports whether any interfaces have changed in a meaninful way. +// This excludes interfaces that are not interesting per IsInterestingInterface and +// filters out changes to interface IPs that that are uninteresting (e.g. link-local addresses). +func (cd *ChangeDelta) isInterestingIntefaceChange() bool { + // If either side is nil treat as changed. + if cd.Old == nil || cd.New == nil { + return true + } + + // Compare interfaces in both directions. Old to new and new to old. + + for iname, oldInterface := range cd.Old.Interface { + if iname == cd.TailscaleIfaceName { + // Ignore changes in the Tailscale interface itself. + continue + } + oldIps := filterRoutableIPs(cd.Old.InterfaceIPs[iname]) + if IsInterestingInterface != nil && !IsInterestingInterface(oldInterface, oldIps) { + continue + } + + // Old interfaces with no routable addresses are not interesting + if len(oldIps) == 0 { + continue + } + + // The old interface doesn't exist in the new interface set and it has + // an a global unicast IP. That's considered a change from the perspective + // of anything that may have been bound to it. If it didn't have a global + // unicast IP, it's not interesting. + newInterface, ok := cd.New.Interface[iname] + if !ok { + return true + } + newIps, ok := cd.New.InterfaceIPs[iname] + if !ok { + return true + } + newIps = filterRoutableIPs(newIps) + + if !oldInterface.Equal(newInterface) || !prefixesEqual(oldIps, newIps) { + return true + } + } + + for iname, newInterface := range cd.New.Interface { + if iname == cd.TailscaleIfaceName { + continue + } + newIps := filterRoutableIPs(cd.New.InterfaceIPs[iname]) + if IsInterestingInterface != nil && !IsInterestingInterface(newInterface, newIps) { + continue + } + + // New interfaces with no routable addresses are not interesting + if len(newIps) == 0 { + continue + } + + oldInterface, ok := cd.Old.Interface[iname] + if !ok { + return true + } + + oldIps, ok := cd.Old.InterfaceIPs[iname] + if !ok { + // Redundant but we can't dig up the "old" IPs for this interface. + return true + } + oldIps = filterRoutableIPs(oldIps) + + // The interface's IPs, Name, MTU, etc have changed. This is definitely interesting. + if !newInterface.Equal(oldInterface) || !prefixesEqual(oldIps, newIps) { + return true + } + } + return false +} + +func filterRoutableIPs(addrs []netip.Prefix) []netip.Prefix { + var filtered []netip.Prefix + for _, pfx := range addrs { + a := pfx.Addr() + // Skip link-local multicast addresses. + if a.IsLinkLocalMulticast() { + continue + } + + if isUsableV4(a) || isUsableV6(a) { + filtered = append(filtered, pfx) + } + } + fmt.Printf("Filtered: %v\n", filtered) + return filtered } // New instantiates and starts a monitoring instance. @@ -174,9 +338,17 @@ func (m *Monitor) interfaceStateUncached() (*State, error) { // This must be called only early in tailscaled startup before the monitor is // used. func (m *Monitor) SetTailscaleInterfaceName(ifName string) { + m.mu.Lock() + defer m.mu.Unlock() m.tsIfName = ifName } +func (m *Monitor) TailscaleInterfaceName() string { + m.mu.Lock() + defer m.mu.Unlock() + return m.tsIfName +} + // GatewayAndSelfIP returns the current network's default gateway, and // the machine's default IP for that gateway. // @@ -344,18 +516,7 @@ func (m *Monitor) pump() { } } -// isInterestingInterface reports whether the provided interface should be -// considered when checking for network state changes. -// The ips parameter should be the IPs of the provided interface. -func (m *Monitor) isInterestingInterface(i Interface, ips []netip.Prefix) bool { - if !m.om.IsInterestingInterface(i.Name) { - return false - } - - return true -} - -// debounce calls the callback function with a delay between events +// / debounce calls the callback function with a delay between events // and exits when a stop is issued. func (m *Monitor) debounce() { defer m.goroutines.Done() @@ -376,7 +537,10 @@ func (m *Monitor) debounce() { select { case <-m.stop: return - case <-time.After(250 * time.Millisecond): + // 1s is reasonable debounce time for network changes. Events such as undocking a laptop + // or roaming onto wifi will often generate multiple events in quick succession as interfaces + // flap. We want to avoid spamming consumers of these events. + case <-time.After(1000 * time.Millisecond): } } } @@ -403,34 +567,18 @@ func (m *Monitor) handlePotentialChange(newState *State, forceCallbacks bool) { return } - delta := ChangeDelta{ - Old: oldState, - New: newState, - TimeJumped: timeJumped, - } + delta := NewChangeDelta(oldState, newState, timeJumped, m.tsIfName) - delta.Major = m.IsMajorChangeFrom(oldState, newState) - if delta.Major { + if delta.RebindLikelyRequired { m.gwValid = false - - if s1, s2 := oldState.String(), delta.New.String(); s1 == s2 { - m.logf("[unexpected] network state changed, but stringification didn't: %v", s1) - m.logf("[unexpected] old: %s", jsonSummary(oldState)) - m.logf("[unexpected] new: %s", jsonSummary(newState)) - } } m.ifState = newState // See if we have a queued or new time jump signal. if timeJumped { m.resetTimeJumpedLocked() - if !delta.Major { - // Only log if it wasn't an interesting change. - m.logf("time jumped (probably wake from sleep); synthesizing major change event") - delta.Major = true - } } metricChange.Add(1) - if delta.Major { + if delta.RebindLikelyRequired { metricChangeMajor.Add(1) } if delta.TimeJumped { @@ -442,107 +590,24 @@ func (m *Monitor) handlePotentialChange(newState *State, forceCallbacks bool) { } } -// IsMajorChangeFrom reports whether the transition from s1 to s2 is -// a "major" change, where major roughly means it's worth tearing down -// a bunch of connections and rebinding. -// -// TODO(bradiftz): tigten this definition. -func (m *Monitor) IsMajorChangeFrom(s1, s2 *State) bool { - if s1 == nil && s2 == nil { +// reports whether a and b contain the same set of prefixes regardless of order. +func prefixesEqual(a, b []netip.Prefix) bool { + if len(a) != len(b) { return false } - if s1 == nil || s2 == nil { - return true - } - if s1.HaveV6 != s2.HaveV6 || - s1.HaveV4 != s2.HaveV4 || - s1.IsExpensive != s2.IsExpensive || - s1.DefaultRouteInterface != s2.DefaultRouteInterface || - s1.HTTPProxy != s2.HTTPProxy || - s1.PAC != s2.PAC { - return true - } - for iname, i := range s1.Interface { - if iname == m.tsIfName { - // Ignore changes in the Tailscale interface itself. - continue - } - ips := s1.InterfaceIPs[iname] - if !m.isInterestingInterface(i, ips) { - continue - } - i2, ok := s2.Interface[iname] - if !ok { - return true - } - ips2, ok := s2.InterfaceIPs[iname] - if !ok { - return true - } - if !i.Equal(i2) || !prefixesMajorEqual(ips, ips2) { - return true - } - } - // Iterate over s2 in case there is a field in s2 that doesn't exist in s1 - for iname, i := range s2.Interface { - if iname == m.tsIfName { - // Ignore changes in the Tailscale interface itself. - continue - } - ips := s2.InterfaceIPs[iname] - if !m.isInterestingInterface(i, ips) { - continue - } - i1, ok := s1.Interface[iname] - if !ok { - return true - } - ips1, ok := s1.InterfaceIPs[iname] - if !ok { - return true - } - if !i.Equal(i1) || !prefixesMajorEqual(ips, ips1) { - return true - } - } - return false -} -// prefixesMajorEqual reports whether a and b are equal after ignoring -// boring things like link-local, loopback, and multicast addresses. -func prefixesMajorEqual(a, b []netip.Prefix) bool { - // trim returns a subslice of p with link local unicast, - // loopback, and multicast prefixes removed from the front. - trim := func(p []netip.Prefix) []netip.Prefix { - for len(p) > 0 { - a := p[0].Addr() - if a.IsLinkLocalUnicast() || a.IsLoopback() || a.IsMulticast() { - p = p[1:] - continue - } - break - } - return p - } - for { - a = trim(a) - b = trim(b) - if len(a) == 0 || len(b) == 0 { - return len(a) == 0 && len(b) == 0 - } - if a[0] != b[0] { - return false - } - a, b = a[1:], b[1:] - } -} + aa := make([]netip.Prefix, len(a)) + bb := make([]netip.Prefix, len(b)) + copy(aa, a) + copy(bb, b) -func jsonSummary(x any) any { - j, err := json.Marshal(x) - if err != nil { - return err + less := func(x, y netip.Prefix) int { + return x.Addr().Compare(y.Addr()) } - return j + + slices.SortFunc(aa, less) + slices.SortFunc(bb, less) + return slices.Equal(aa, bb) } func wallTime() time.Time { diff --git a/net/netmon/netmon_darwin.go b/net/netmon/netmon_darwin.go index 9c5e76475..042f9a3b7 100644 --- a/net/netmon/netmon_darwin.go +++ b/net/netmon/netmon_darwin.go @@ -16,6 +16,12 @@ import ( "tailscale.com/util/eventbus" ) +func init() { + IsInterestingInterface = func(iface Interface, prefixes []netip.Prefix) bool { + return isInterestingInterface(iface.Name) + } +} + const debugRouteMessages = false // unspecifiedMessage is a minimal message implementation that should not @@ -125,11 +131,10 @@ func addrType(addrs []route.Addr, rtaxType int) route.Addr { return nil } -func (m *darwinRouteMon) IsInterestingInterface(iface string) bool { +func isInterestingInterface(iface string) bool { baseName := strings.TrimRight(iface, "0123456789") switch baseName { - // TODO(maisem): figure out what this list should actually be. - case "llw", "awdl", "ipsec": + case "llw", "awdl", "ipsec", "gif", "XHC", "anpi", "lo", "utun": return false } return true @@ -137,7 +142,7 @@ func (m *darwinRouteMon) IsInterestingInterface(iface string) bool { func (m *darwinRouteMon) skipInterfaceAddrMessage(msg *route.InterfaceAddrMessage) bool { if la, ok := addrType(msg.Addrs, unix.RTAX_IFP).(*route.LinkAddr); ok { - if !m.IsInterestingInterface(la.Name) { + if !isInterestingInterface(la.Name) { return true } } @@ -150,6 +155,14 @@ func (m *darwinRouteMon) skipRouteMessage(msg *route.RouteMessage) bool { // dst = fe80::b476:66ff:fe30:c8f6%15 return true } + + // We can skip route messages from uninteresting interfaces. We do this upstream + // against the InterfaceMonitor, but skipping them here avoids unnecessary work. + if la, ok := addrType(msg.Addrs, unix.RTAX_IFP).(*route.LinkAddr); ok { + if !isInterestingInterface(la.Name) { + return true + } + } return false } diff --git a/net/netmon/netmon_freebsd.go b/net/netmon/netmon_freebsd.go index 842cbdb0d..3a4fb44d8 100644 --- a/net/netmon/netmon_freebsd.go +++ b/net/netmon/netmon_freebsd.go @@ -34,8 +34,6 @@ func newOSMon(_ *eventbus.Bus, logf logger.Logf, m *Monitor) (osMon, error) { return &devdConn{conn}, nil } -func (c *devdConn) IsInterestingInterface(iface string) bool { return true } - func (c *devdConn) Close() error { return c.conn.Close() } diff --git a/net/netmon/netmon_linux.go b/net/netmon/netmon_linux.go index a1077c257..aa5253f9b 100644 --- a/net/netmon/netmon_linux.go +++ b/net/netmon/netmon_linux.go @@ -81,8 +81,6 @@ func newOSMon(bus *eventbus.Bus, logf logger.Logf, m *Monitor) (osMon, error) { }, nil } -func (c *nlConn) IsInterestingInterface(iface string) bool { return true } - func (c *nlConn) Close() error { c.busClient.Close() return c.conn.Close() diff --git a/net/netmon/netmon_test.go b/net/netmon/netmon_test.go index 6a87cedb8..040f879b4 100644 --- a/net/netmon/netmon_test.go +++ b/net/netmon/netmon_test.go @@ -8,6 +8,7 @@ import ( "net" "net/netip" "reflect" + "strings" "sync/atomic" "testing" "time" @@ -138,7 +139,7 @@ func TestMonitorMode(t *testing.T) { n := 0 mon.RegisterChangeCallback(func(d *ChangeDelta) { n++ - t.Logf("cb: changed=%v, ifSt=%v", d.Major, d.New) + t.Logf("cb: changed=%v, ifSt=%v", d.RebindLikelyRequired, d.New) }) mon.Start() <-done @@ -149,19 +150,20 @@ func TestMonitorMode(t *testing.T) { mon.Start() eventbustest.Expect(tw, func(event *ChangeDelta) (bool, error) { n++ - t.Logf("cb: changed=%v, ifSt=%v", event.Major, event.New) + t.Logf("cb: changed=%v, ifSt=%v", event.RebindLikelyRequired, event.New) return false, nil // Return false, indicating we wanna look for more events }) t.Logf("%v events", n) } } -// tests (*State).IsMajorChangeFrom -func TestIsMajorChangeFrom(t *testing.T) { +// tests (*ChangeDelta).RebindRequired +func TestRebindRequired(t *testing.T) { tests := []struct { - name string - s1, s2 *State - want bool + name string + s1, s2 *State + tsIfName string + want bool }{ { name: "eq_nil", @@ -188,6 +190,110 @@ func TestIsMajorChangeFrom(t *testing.T) { }, want: false, }, + { + name: "new-with-no-addr", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + "bar": {}, + }, + }, + want: false, + }, + { + name: "ignore-tailscale-interface-appearing", + tsIfName: "tailscale0", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + "tailscale0": {netip.MustParsePrefix("100.69.4.20/32")}, + }, + }, + want: false, + }, + { + name: "ignore-tailscale-interface-disappearing", + tsIfName: "tailscale0", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + "tailscale0": {netip.MustParsePrefix("100.69.4.20/32")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + want: false, + }, + { + name: "new-with-multicast-addr", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + "bar": {netip.MustParsePrefix("224.0.0.1/32")}, + }, + }, + want: false, + }, + { + name: "old-with-addr-dropped", + s1: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + "bar": {netip.MustParsePrefix("192.168.0.1/32")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "bar": {netip.MustParsePrefix("192.168.0.1/32")}, + }, + }, + want: true, + }, + { + name: "old-with-no-addr-dropped", + s1: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {}, + "bar": {netip.MustParsePrefix("192.168.0.1/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "bar": {netip.MustParsePrefix("192.168.0.1/16")}, + }, + }, + want: false, + }, { name: "default-route-changed", s1: &State{ @@ -221,6 +327,8 @@ func TestIsMajorChangeFrom(t *testing.T) { want: true, }, { + // (barnstar) TODO: ULA addresses are only useful in some contexts, + // so maybe this shouldn't trigger rebinds after all? Needs more thought. name: "ipv6-ula-addressed-appeared", s1: &State{ DefaultRouteInterface: "foo", @@ -233,15 +341,147 @@ func TestIsMajorChangeFrom(t *testing.T) { InterfaceIPs: map[string][]netip.Prefix{ "foo": { netip.MustParsePrefix("10.0.1.2/16"), - // Brad saw this address coming & going on his home LAN, possibly - // via an Apple TV Thread routing advertisement? (Issue 9040) netip.MustParsePrefix("fd15:bbfa:c583:4fce:f4fb:4ff:fe1a:4148/64"), }, }, }, - want: true, // TODO(bradfitz): want false (ignore the IPv6 ULA address on foo) + want: true, + }, + { + // (barnstar) TODO: ULA addresses are only useful in some contexts, + // so maybe this shouldn't trigger rebinds after all? Needs more thought. + name: "ipv6-ula-addressed-disappeared", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": { + netip.MustParsePrefix("10.0.1.2/16"), + netip.MustParsePrefix("fd15:bbfa:c583:4fce:f4fb:4ff:fe1a:4148/64"), + }, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + want: true, + }, + { + name: "ipv6-link-local-addressed-appeared", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": {netip.MustParsePrefix("10.0.1.2/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": { + netip.MustParsePrefix("10.0.1.2/16"), + netip.MustParsePrefix("fe80::f242:25ff:fe64:b280/64"), + }, + }, + }, + want: false, + }, + { + name: "ipv6-addressed-changed", + s1: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": { + netip.MustParsePrefix("10.0.1.2/16"), + netip.MustParsePrefix("2001::f242:25ff:fe64:b280/64"), + netip.MustParsePrefix("fe80::f242:25ff:fe64:b280/64"), + }, + }, + }, + s2: &State{ + DefaultRouteInterface: "foo", + InterfaceIPs: map[string][]netip.Prefix{ + "foo": { + netip.MustParsePrefix("10.0.1.2/16"), + netip.MustParsePrefix("2001::beef:8bad:f00d:b280/64"), + netip.MustParsePrefix("fe80::f242:25ff:fe64:b280/64"), + }, + }, + }, + want: true, + }, + { + name: "have-addr-changed", + s1: &State{ + HaveV6: false, + HaveV4: false, + }, + + s2: &State{ + HaveV6: true, + HaveV4: true, + }, + want: true, + }, + { + name: "have-addr-unchanged", + s1: &State{ + HaveV6: true, + HaveV4: true, + }, + + s2: &State{ + HaveV6: true, + HaveV4: true, + }, + want: false, + }, + { + name: "new-is-less-expensive", + s1: &State{ + IsExpensive: true, + }, + + s2: &State{ + IsExpensive: false, + }, + want: true, + }, + { + name: "new-is-more-expensive", + s1: &State{ + IsExpensive: false, + }, + + s2: &State{ + IsExpensive: true, + }, + want: false, + }, + { + name: "uninteresting-interface-added", + s1: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "bar": {netip.MustParsePrefix("192.168.0.1/16")}, + }, + }, + s2: &State{ + DefaultRouteInterface: "bar", + InterfaceIPs: map[string][]netip.Prefix{ + "bar": {netip.MustParsePrefix("192.168.0.1/16")}, + "boring": {netip.MustParsePrefix("fd7a:115c:a1e0:ab12:4843:cd96:625e:13ce/64")}, + }, + }, + want: false, }, } + + withIsInterestingInterface(t, func(ni Interface, pfxs []netip.Prefix) bool { + return !strings.HasPrefix(ni.Name, "boring") + }) + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Populate dummy interfaces where missing. @@ -258,16 +498,111 @@ func TestIsMajorChangeFrom(t *testing.T) { } } - var m Monitor - m.om = &testOSMon{ - Interesting: func(name string) bool { return true }, + cd := NewChangeDelta(tt.s1, tt.s2, false, tt.tsIfName) + _ = cd // in case we need it later + if got := cd.RebindLikelyRequired; got != tt.want { + t.Errorf("RebindRequired = %v; want %v", got, tt.want) } - if got := m.IsMajorChangeFrom(tt.s1, tt.s2); got != tt.want { - t.Errorf("IsMajorChange = %v; want %v", got, tt.want) + }) + } +} + +func withIsInterestingInterface(t *testing.T, fn func(Interface, []netip.Prefix) bool) { + t.Helper() + old := IsInterestingInterface + IsInterestingInterface = fn + t.Cleanup(func() { IsInterestingInterface = old }) +} + +func TestIncludesRoutableIP(t *testing.T) { + addrs := []netip.Prefix{ + netip.MustParsePrefix("1.2.3.4/32"), + netip.MustParsePrefix("10.0.0.1/24"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("172.16.0.1/12"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("192.168.1.1/24"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("fd15:dead:beef::1/64"), // IPv6 ULA (should be filtered) + netip.MustParsePrefix("2001:db8::1/64"), // global IPv6 (documentation block) + netip.MustParsePrefix("fe80::1/64"), // link-local IPv6 + netip.MustParsePrefix("::1/128"), // loopback IPv6 + netip.MustParsePrefix("::/128"), // unspecified IPv6 + netip.MustParsePrefix("224.0.0.1/32"), // multicast IPv4 + netip.MustParsePrefix("127.0.0.1/32"), // loopback IPv4 + } + + got := filterRoutableIPs(addrs) + + want := []netip.Prefix{ + netip.MustParsePrefix("1.2.3.4/32"), + netip.MustParsePrefix("10.0.0.1/24"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("172.16.0.1/12"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("192.168.1.1/24"), // RFC1918 IPv4 (private) + netip.MustParsePrefix("fd15:dead:beef::1/64"), // IPv6 ULA + netip.MustParsePrefix("2001:db8::1/64"), + } + + if !reflect.DeepEqual(got, want) { + t.Fatalf("filterRoutableIPs returned %v; want %v", got, want) + } +} + +func TestPrefixesEqual(t *testing.T) { + tests := []struct { + name string + a, b []netip.Prefix + want bool + }{ + { + name: "empty", + a: []netip.Prefix{}, + b: []netip.Prefix{}, + want: true, + }, + { + name: "single-equal", + a: []netip.Prefix{netip.MustParsePrefix("10.0.0.1/24")}, + b: []netip.Prefix{netip.MustParsePrefix("10.0.0.1/24")}, + want: true, + }, + { + name: "single-different", + a: []netip.Prefix{netip.MustParsePrefix("10.0.0.1/24")}, + b: []netip.Prefix{netip.MustParsePrefix("10.0.0.2/24")}, + want: false, + }, + { + name: "unordered-equal", + a: []netip.Prefix{ + netip.MustParsePrefix("10.0.0.1/24"), + netip.MustParsePrefix("10.0.2.1/24"), + }, + b: []netip.Prefix{ + netip.MustParsePrefix("10.0.2.1/24"), + netip.MustParsePrefix("10.0.0.1/24"), + }, + want: true, + }, + { + name: "subset", + a: []netip.Prefix{ + netip.MustParsePrefix("10.0.2.1/24"), + }, + b: []netip.Prefix{ + netip.MustParsePrefix("10.0.2.1/24"), + netip.MustParsePrefix("10.0.0.1/24"), + }, + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := prefixesEqual(tt.a, tt.b) + if got != tt.want { + t.Errorf("prefixesEqual(%v, %v) = %v; want %v", tt.a, tt.b, got, tt.want) } }) } } + func TestForeachInterface(t *testing.T) { tests := []struct { name string @@ -312,10 +647,3 @@ type testOSMon struct { osMon Interesting func(name string) bool } - -func (m *testOSMon) IsInterestingInterface(name string) bool { - if m.Interesting == nil { - return true - } - return m.Interesting(name) -} diff --git a/net/netmon/netmon_windows.go b/net/netmon/netmon_windows.go index 718724b6d..e8966faf0 100644 --- a/net/netmon/netmon_windows.go +++ b/net/netmon/netmon_windows.go @@ -74,8 +74,6 @@ func newOSMon(_ *eventbus.Bus, logf logger.Logf, pm *Monitor) (osMon, error) { return m, nil } -func (m *winMon) IsInterestingInterface(iface string) bool { return true } - func (m *winMon) Close() (ret error) { m.cancel() m.noDeadlockTicker.Stop() diff --git a/net/netmon/polling.go b/net/netmon/polling.go index ce1618ed6..2a3e44cba 100644 --- a/net/netmon/polling.go +++ b/net/netmon/polling.go @@ -35,10 +35,6 @@ type pollingMon struct { stop chan struct{} } -func (pm *pollingMon) IsInterestingInterface(iface string) bool { - return true -} - func (pm *pollingMon) Close() error { pm.closeOnce.Do(func() { close(pm.stop) diff --git a/net/netmon/state.go b/net/netmon/state.go index 27e3524e8..dd8ed58f1 100644 --- a/net/netmon/state.go +++ b/net/netmon/state.go @@ -149,12 +149,28 @@ type Interface struct { Desc string // extra description (used on Windows) } -func (i Interface) IsLoopback() bool { return isLoopback(i.Interface) } -func (i Interface) IsUp() bool { return isUp(i.Interface) } +func (i Interface) IsLoopback() bool { + if i.Interface == nil { + return false + } + return isLoopback(i.Interface) +} + +func (i Interface) IsUp() bool { + if i.Interface == nil { + return false + } + return isUp(i.Interface) +} + func (i Interface) Addrs() ([]net.Addr, error) { if i.AltAddrs != nil { return i.AltAddrs, nil } + if i.Interface == nil { + return nil, nil + } + return i.Interface.Addrs() } @@ -485,6 +501,12 @@ func getState(optTSInterfaceName string) (*State, error) { ifUp := ni.IsUp() s.Interface[ni.Name] = ni s.InterfaceIPs[ni.Name] = append(s.InterfaceIPs[ni.Name], pfxs...) + + // Skip uninteresting interfaces. + if IsInterestingInterface != nil && !IsInterestingInterface(ni, pfxs) { + return + } + if !ifUp || isTSInterfaceName || isTailscaleInterface(ni.Name, pfxs) { return } diff --git a/net/sockstats/sockstats_tsgo.go b/net/sockstats/sockstats_tsgo.go index aa875df9a..ae96218d4 100644 --- a/net/sockstats/sockstats_tsgo.go +++ b/net/sockstats/sockstats_tsgo.go @@ -271,7 +271,7 @@ func setNetMon(netMon *netmon.Monitor) { } netMon.RegisterChangeCallback(func(delta *netmon.ChangeDelta) { - if !delta.Major { + if !delta.RebindLikelyRequired { return } state := delta.New diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 1b8562d3f..0bf007771 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -1332,20 +1332,19 @@ func (e *userspaceEngine) Done() <-chan struct{} { } func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { - changed := delta.Major // TODO(bradfitz): ask more specific questions? + cur := delta.New up := cur.AnyInterfaceUp() if !up { e.logf("LinkChange: all links down; pausing: %v", cur) - } else if changed { - e.logf("LinkChange: major, rebinding. New state: %v", cur) + } else if delta.RebindLikelyRequired { + e.logf("LinkChange: major, rebinding. New state: %v OldState: %v", cur, delta.Old) } else { e.logf("[v1] LinkChange: minor") } e.health.SetAnyInterfaceUp(up) - e.magicConn.SetNetworkUp(up) - if !up || changed { + if !up || delta.RebindLikelyRequired { if err := e.dns.FlushCaches(); err != nil { e.logf("wgengine: dns flush failed after major link change: %v", err) } @@ -1355,9 +1354,20 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { // suspend/resume or whenever NetworkManager is started, it // nukes all systemd-resolved configs. So reapply our DNS // config on major link change. - // TODO: explain why this is ncessary not just on Linux but also android - // and Apple platforms. - if changed { + // + // On Darwin (netext), we reapply the DNS config when the interface flaps + // because the change in interface can potentially change the nameservers + // for the forwarder. On Darwin netext clients, magicDNS is ~always the default + // resolver so having no nameserver to forward queries to (or one on a network we + // are not currently on) breaks DNS resolution system-wide. There are notable + // timing issues here with Darwin's network stack. It is not guaranteed that + // the forward resolver will be available immediately after the interface + // comes up. We leave it to the network extension to also poke magicDNS directly + // via [dns.Manager.RecompileDNSConfig] when it detects any change in the + // nameservers. + // + // TODO: On Android, Darwin-tailscaled, and openbsd, why do we need this? + if delta.RebindLikelyRequired && up { switch runtime.GOOS { case "linux", "android", "ios", "darwin", "openbsd": e.wgLock.Lock() @@ -1375,15 +1385,23 @@ func (e *userspaceEngine) linkChange(delta *netmon.ChangeDelta) { } } + e.magicConn.SetNetworkUp(up) + why := "link-change-minor" - if changed { + if delta.RebindLikelyRequired { why = "link-change-major" metricNumMajorChanges.Add(1) - e.magicConn.Rebind() } else { metricNumMinorChanges.Add(1) } - e.magicConn.ReSTUN(why) + + // If we're up and it's a minor change, just send a STUN ping + if up { + if delta.RebindLikelyRequired { + e.magicConn.Rebind() + } + e.magicConn.ReSTUN(why) + } } func (e *userspaceEngine) SetNetworkMap(nm *netmap.NetworkMap) {