diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 16ef49915..d70d568fe 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1360,13 +1360,15 @@ func wgRecvAddr(e conn.Endpoint, ipp netaddr.IPPort, addr *net.UDPAddr) *net.UDP return ipp.UDPAddr() } -// noteRecvActivity calls the magicsock.Conn.noteRecvActivity hook if -// e is a discovery-capable peer. +// noteRecvActivityFromEndpoint calls the c.noteRecvActivity hook if +// e is a discovery-capable peer and this is the first receive activity +// it's got in awhile (in last 10 seconds). // // This should be called whenever a packet arrives from e. -func noteRecvActivity(e conn.Endpoint) { - if de, ok := e.(*discoEndpoint); ok { - de.onRecvActivity() +func (c *Conn) noteRecvActivityFromEndpoint(e conn.Endpoint) { + de, ok := e.(*discoEndpoint) + if ok && c.noteRecvActivity != nil && de.isFirstRecvActivityInAwhile() { + c.noteRecvActivity(de.discoKey) } } @@ -1377,7 +1379,7 @@ Top: c.bufferedIPv4From = netaddr.IPPort{} addr = from.UDPAddr() ep := c.findEndpoint(from, addr) - noteRecvActivity(ep) + c.noteRecvActivityFromEndpoint(ep) return copy(b, c.bufferedIPv4Packet), ep, wgRecvAddr(ep, from, addr), nil } @@ -1489,7 +1491,7 @@ Top: ep = c.findEndpoint(ipp, addr) } if !didNoteRecvActivity { - noteRecvActivity(ep) + c.noteRecvActivityFromEndpoint(ep) } return n, ep, wgRecvAddr(ep, ipp, addr), nil } @@ -1517,7 +1519,7 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) { } ep := c.findEndpoint(ipp, addr) - noteRecvActivity(ep) + c.noteRecvActivityFromEndpoint(ep) return n, ep, wgRecvAddr(ep, ipp, addr), nil } } @@ -1615,8 +1617,9 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { return false } - de, ok := c.endpointOfDisco[sender] - if !ok { + needsRecvActivityCall := false + de, endpointFound0 := c.endpointOfDisco[sender] + if !endpointFound0 { // We don't have an active endpoint for this sender but we knew about the node, so // it's an idle endpoint that doesn't yet exist in the wireguard config. We now have // to notify the userspace engine (via noteRecvActivity) so wireguard-go can create @@ -1628,6 +1631,11 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { c.logf("magicsock: [unexpected] have node without endpoint, without c.noteRecvActivity hook") return false } + needsRecvActivityCall = true + } else { + needsRecvActivityCall = de.isFirstRecvActivityInAwhile() + } + if needsRecvActivityCall && c.noteRecvActivity != nil { // We can't hold Conn.mu while calling noteRecvActivity. // noteRecvActivity acquires userspaceEngine.wgLock (and per our // lock ordering rules: wgLock must come first), and also calls @@ -1651,7 +1659,9 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { c.logf("magicsock: [unexpected] lazy endpoint not created for %v, %v", peerNode.Key.ShortString(), sender.ShortString()) return false } - c.logf("magicsock: lazy endpoint created via disco message for %v, %v", peerNode.Key.ShortString(), sender.ShortString()) + if !endpointFound0 { + c.logf("magicsock: lazy endpoint created via disco message for %v, %v", peerNode.Key.ShortString(), sender.ShortString()) + } } // First, do we even know (and thus care) about this sender? If not, @@ -2676,17 +2686,6 @@ func (c *Conn) CreateEndpoint(pubKey [32]byte, addrs string) (conn.Endpoint, err sentPing: map[stun.TxID]sentPing{}, endpointState: map[netaddr.IPPort]*endpointState{}, } - lastRecvTime := new(int64) // atomic - de.onRecvActivity = func() { - now := time.Now().Unix() - old := atomic.LoadInt64(lastRecvTime) - if old == 0 || old <= now-10 { - atomic.StoreInt64(lastRecvTime, now) - if c.noteRecvActivity != nil { - c.noteRecvActivity(de.discoKey) - } - } - } de.initFakeUDPAddr() de.updateFromNode(c.nodeOfDisco[de.discoKey]) c.endpointOfDisco[de.discoKey] = de @@ -2958,6 +2957,9 @@ func udpAddrDebugString(ua net.UDPAddr) string { // discoEndpoint is a wireguard/conn.Endpoint for new-style peers that // advertise a DiscoKey and participate in active discovery. type discoEndpoint struct { + // atomically accessed; declared first for alignment reasons + lastRecvUnixAtomic int64 + // These fields are initialized once and never modified. c *Conn publicKey tailcfg.NodeKey // peer public key (for WireGuard + DERP) @@ -2966,7 +2968,6 @@ type discoEndpoint struct { fakeWGAddr netaddr.IPPort // the UDP address we tell wireguard-go we're using fakeWGAddrStd *net.UDPAddr // the *net.UDPAddr form of fakeWGAddr wgEndpointHostPort string // string from CreateEndpoint: ".disco.tailscale:12345" - onRecvActivity func() // Owned by Conn.mu: lastPingFrom netaddr.IPPort @@ -3063,6 +3064,19 @@ func (de *discoEndpoint) initFakeUDPAddr() { de.fakeWGAddrStd = de.fakeWGAddr.UDPAddr() } +// isFirstRecvActivityInAwhile notes that receive activity has occured for this +// endpoint and reports whether it's been at least 10 seconds since the last +// receive activity (including having never received from this peer before). +func (de *discoEndpoint) isFirstRecvActivityInAwhile() bool { + now := time.Now().Unix() + old := atomic.LoadInt64(&de.lastRecvUnixAtomic) + if old <= now-10 { + atomic.StoreInt64(&de.lastRecvUnixAtomic, now) + return true + } + return false +} + // String exists purely so wireguard-go internals can log.Printf("%v") // its internal conn.Endpoints and we don't end up with data races // from fmt (via log) reading mutex fields and such. diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 2187d0d1a..d2ce76459 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -22,6 +22,7 @@ import ( "sync" "testing" "time" + "unsafe" "github.com/google/go-cmp/cmp" "github.com/tailscale/wireguard-go/device" @@ -1322,3 +1323,17 @@ func stringifyConfig(cfg wgcfg.Config) string { } return string(j) } + +func TestDiscoEndpointAlignment(t *testing.T) { + var de discoEndpoint + off := unsafe.Offsetof(de.lastRecvUnixAtomic) + if off%8 != 0 { + t.Fatalf("lastRecvUnixAtomic is not 8-byte aligned") + } + if !de.isFirstRecvActivityInAwhile() { // verify this doesn't panic on 32-bit + t.Error("expected true") + } + if de.isFirstRecvActivityInAwhile() { + t.Error("expected false on second call") + } +} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index a6117eefd..f8cba23d7 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -87,6 +87,7 @@ type userspaceEngine struct { logf logger.Logf reqCh chan struct{} waitCh chan struct{} // chan is closed when first Close call completes; contrast with closing bool + timeNow func() time.Time tundev *tstun.TUN wgdev *device.Device router router.Router @@ -94,6 +95,8 @@ type userspaceEngine struct { magicConn *magicsock.Conn linkMon *monitor.Mon + testMaybeReconfigHook func() // for tests; if non-nil, fires if maybeReconfigWireguardLocked called + // localAddrs is the set of IP addresses assigned to the local // tunnel interface. It's used to reflect local packets // incorrectly sent to us. @@ -199,6 +202,7 @@ func newUserspaceEngineAdvanced(conf EngineConfig) (_ Engine, reterr error) { logf := conf.Logf e := &userspaceEngine{ + timeNow: time.Now, logf: logf, reqCh: make(chan struct{}, 1), waitCh: make(chan struct{}), @@ -622,12 +626,12 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) { e.wgLock.Lock() defer e.wgLock.Unlock() - now := time.Now() was, ok := e.recvActivityAt[dk] if !ok { // Not a trimmable peer we care about tracking. (See isTrimmablePeer) return } + now := e.timeNow() e.recvActivityAt[dk] = now // If the last activity time jumped a bunch (say, at least @@ -636,7 +640,7 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) { // lazyPeerIdleThreshold without the divide by 2, but // maybeReconfigWireguardLocked is cheap enough to call every // couple minutes (just not on every packet). - if was.IsZero() || now.Sub(was) < -lazyPeerIdleThreshold/2 { + if was.IsZero() || now.Sub(was) > lazyPeerIdleThreshold/2 { e.maybeReconfigWireguardLocked() } } @@ -677,6 +681,11 @@ func discoKeyFromPeer(p *wgcfg.Peer) tailcfg.DiscoKey { // e.wgLock must be held. func (e *userspaceEngine) maybeReconfigWireguardLocked() error { + if hook := e.testMaybeReconfigHook; hook != nil { + hook() + return nil + } + full := e.lastCfgFull // Compute a minimal config to pass to wireguard-go @@ -689,7 +698,7 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error { // the past 5 minutes. That's more than WireGuard's key // rotation time anyway so it's no harm if we remove it // later if it's been inactive. - activeCutoff := time.Now().Add(-lazyPeerIdleThreshold) + activeCutoff := e.timeNow().Add(-lazyPeerIdleThreshold) // Not all peers can be trimmed from the network map (see // isTrimmablePeer). For those are are trimmable, keep track @@ -765,7 +774,7 @@ func (e *userspaceEngine) updateActivityMapsLocked(trackDisco []tailcfg.DiscoKey if fn == nil { // This is the func that gets run on every outgoing packet for tracked IPs: fn = func() { - now := time.Now().Unix() + now := e.timeNow().Unix() old := atomic.LoadInt64(timePtr) // How long's it been since we last sent a packet? diff --git a/wgengine/userspace_test.go b/wgengine/userspace_test.go new file mode 100644 index 000000000..545148fce --- /dev/null +++ b/wgengine/userspace_test.go @@ -0,0 +1,88 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package wgengine + +import ( + "bytes" + "fmt" + "testing" + "time" + + "tailscale.com/tailcfg" + "tailscale.com/types/key" + "tailscale.com/wgengine/tstun" +) + +func TestNoteReceiveActivity(t *testing.T) { + now := time.Unix(1, 0) + tick := func(d time.Duration) { now = now.Add(d) } + var logBuf bytes.Buffer + + confc := make(chan bool, 1) + gotConf := func() bool { + select { + case <-confc: + return true + default: + return false + } + } + e := &userspaceEngine{ + timeNow: func() time.Time { return now }, + recvActivityAt: map[tailcfg.DiscoKey]time.Time{}, + logf: func(format string, a ...interface{}) { + fmt.Fprintf(&logBuf, format, a...) + }, + tundev: new(tstun.TUN), + testMaybeReconfigHook: func() { confc <- true }, + } + ra := e.recvActivityAt + + dk := tailcfg.DiscoKey(key.NewPrivate().Public()) + + // Activity on an untracked key should do nothing. + e.noteReceiveActivity(dk) + if len(ra) != 0 { + t.Fatalf("unexpected growth in map: now has %d keys; want 0", len(ra)) + } + if logBuf.Len() != 0 { + t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes()) + } + + // Now track it and expect updates. + ra[dk] = time.Time{} + e.noteReceiveActivity(dk) + if len(ra) != 1 { + t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra)) + } + if got := ra[dk]; got != now { + t.Fatalf("time in map = %v; want %v", got, now) + } + if !gotConf() { + t.Fatalf("didn't get expected reconfig") + } + + // With updates 1 second apart, don't expect a reconfig. + for i := 0; i < 300; i++ { + tick(time.Second) + e.noteReceiveActivity(dk) + if len(ra) != 1 { + t.Fatalf("map len = %d; want 1", len(ra)) + } + if got := ra[dk]; got != now { + t.Fatalf("time in map = %v; want %v", got, now) + } + if gotConf() { + t.Fatalf("unexpected reconfig") + } + } + + // But if there's a big jump it should get an update. + tick(3 * time.Minute) + e.noteReceiveActivity(dk) + if !gotConf() { + t.Fatalf("expected config") + } +}