From 756d6a72bdc97a0110f920c0ce04aff6c497389d Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 26 Aug 2020 12:20:09 -0700 Subject: [PATCH] wgengine: lazily create peer wireguard configs more explicitly Rather than consider bigs jumps in last-received-from activity as a signal to possibly reconfigure the set of wireguard peers to have configured, instead just track the set of peers that are currently excluded from the configuration. Easier to reason about. Also adds a bit more logging. This might fix an error we saw on a machine running a recent unstable build: 2020-08-26 17:54:11.528033751 +0000 UTC: 8.6M/92.6M magicsock: [unexpected] lazy endpoint not created for [UcppE], d:42a770f678357249 2020-08-26 17:54:13.691305296 +0000 UTC: 8.7M/92.6M magicsock: DERP packet received from idle peer [UcppE]; created=false 2020-08-26 17:54:13.691383687 +0000 UTC: 8.7M/92.6M magicsock: DERP packet from unknown key: [UcppE] If it does happen again, though, we'll have more logs. --- wgengine/magicsock/magicsock.go | 1 + wgengine/userspace.go | 18 +++++++++++++---- wgengine/userspace_test.go | 35 ++++++++++++--------------------- 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 8e27b3ed3..b1f919270 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1765,6 +1765,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { needsRecvActivityCall = de.isFirstRecvActivityInAwhile() } if needsRecvActivityCall && c.noteRecvActivity != nil { + c.logf("magicsock: got disco message from idle peer, starting lazy conf for %v, %v", peerNode.Key.ShortString(), sender.ShortString()) // We can't hold Conn.mu while calling noteRecvActivity. // noteRecvActivity acquires userspaceEngine.wgLock (and per our // lock ordering rules: wgLock must come first), and also calls diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 7f7070b35..69eaff7d0 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -105,7 +105,8 @@ type userspaceEngine struct { lastEngineSigFull string // of full wireguard config lastEngineSigTrim string // of trimmed wireguard config recvActivityAt map[tailcfg.DiscoKey]time.Time - sentActivityAt map[packet.IP]*int64 // value is atomic int64 of unixtime + trimmedDisco map[tailcfg.DiscoKey]bool // set of disco keys of peers currently excluded from wireguard config + sentActivityAt map[packet.IP]*int64 // value is atomic int64 of unixtime destIPActivityFuncs map[packet.IP]func() mu sync.Mutex // guards following; see lock order comment below @@ -636,9 +637,11 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) { e.wgLock.Lock() defer e.wgLock.Unlock() - was, ok := e.recvActivityAt[dk] - if !ok { + if _, ok := e.recvActivityAt[dk]; !ok { // Not a trimmable peer we care about tracking. (See isTrimmablePeer) + if e.trimmedDisco[dk] { + e.logf("wgengine: [unexpected] noteReceiveActivity called on idle discokey %v that's not in recvActivityAt", dk.ShortString()) + } return } now := e.timeNow() @@ -650,7 +653,8 @@ func (e *userspaceEngine) noteReceiveActivity(dk tailcfg.DiscoKey) { // lazyPeerIdleThreshold without the divide by 2, but // maybeReconfigWireguardLocked is cheap enough to call every // couple minutes (just not on every packet). - if was.IsZero() || now.Sub(was) > lazyPeerIdleThreshold/2 { + if e.trimmedDisco[dk] { + e.logf("wgengine: idle peer %v now active, reconfiguring wireguard", dk.ShortString()) e.maybeReconfigWireguardLocked() } } @@ -718,6 +722,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error { trackDisco := make([]tailcfg.DiscoKey, 0, len(full.Peers)) trackIPs := make([]wgcfg.IP, 0, len(full.Peers)) + trimmedDisco := map[tailcfg.DiscoKey]bool{} // TODO: don't re-alloc this map each time + for i := range full.Peers { p := &full.Peers[i] if !isTrimmablePeer(p, len(full.Peers)) { @@ -730,6 +736,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error { trackIPs = append(trackIPs, tsIP) if e.isActiveSince(dk, tsIP, activeCutoff) { min.Peers = append(min.Peers, *p) + } else { + trimmedDisco[dk] = true } } @@ -738,6 +746,8 @@ func (e *userspaceEngine) maybeReconfigWireguardLocked() error { return nil } + e.trimmedDisco = trimmedDisco + e.updateActivityMapsLocked(trackDisco, trackIPs) e.logf("wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)", len(min.Peers), len(full.Peers)) diff --git a/wgengine/userspace_test.go b/wgengine/userspace_test.go index 545148fce..3c0c7cb3d 100644 --- a/wgengine/userspace_test.go +++ b/wgengine/userspace_test.go @@ -17,7 +17,6 @@ import ( func TestNoteReceiveActivity(t *testing.T) { now := time.Unix(1, 0) - tick := func(d time.Duration) { now = now.Add(d) } var logBuf bytes.Buffer confc := make(chan bool, 1) @@ -37,6 +36,7 @@ func TestNoteReceiveActivity(t *testing.T) { }, tundev: new(tstun.TUN), testMaybeReconfigHook: func() { confc <- true }, + trimmedDisco: map[tailcfg.DiscoKey]bool{}, } ra := e.recvActivityAt @@ -51,7 +51,7 @@ func TestNoteReceiveActivity(t *testing.T) { t.Fatalf("unexpected log write (and thus activity): %s", logBuf.Bytes()) } - // Now track it and expect updates. + // Now track it, but don't mark it trimmed, so shouldn't update. ra[dk] = time.Time{} e.noteReceiveActivity(dk) if len(ra) != 1 { @@ -60,29 +60,20 @@ func TestNoteReceiveActivity(t *testing.T) { if got := ra[dk]; got != now { t.Fatalf("time in map = %v; want %v", got, now) } - if !gotConf() { - t.Fatalf("didn't get expected reconfig") - } - - // With updates 1 second apart, don't expect a reconfig. - for i := 0; i < 300; i++ { - tick(time.Second) - e.noteReceiveActivity(dk) - if len(ra) != 1 { - t.Fatalf("map len = %d; want 1", len(ra)) - } - if got := ra[dk]; got != now { - t.Fatalf("time in map = %v; want %v", got, now) - } - if gotConf() { - t.Fatalf("unexpected reconfig") - } + if gotConf() { + t.Fatalf("unexpected reconfig") } - // But if there's a big jump it should get an update. - tick(3 * time.Minute) + // Now mark it trimmed and expect an update. + e.trimmedDisco[dk] = true e.noteReceiveActivity(dk) + if len(ra) != 1 { + t.Fatalf("unexpected growth in map: now has %d keys; want 1", len(ra)) + } + if got := ra[dk]; got != now { + t.Fatalf("time in map = %v; want %v", got, now) + } if !gotConf() { - t.Fatalf("expected config") + t.Fatalf("didn't get expected reconfig") } }