From 830f641c6be3025c445284236babc6a67a3944b4 Mon Sep 17 00:00:00 2001 From: David Anderson Date: Wed, 6 Oct 2021 10:18:12 -0700 Subject: [PATCH] wgengine/magicsock: update discokeys on netmap change. Fixes #3008. Signed-off-by: David Anderson --- wgengine/magicsock/magicsock.go | 35 ++++++-- wgengine/magicsock/magicsock_test.go | 115 ++++++++++++++++++++++----- 2 files changed, 121 insertions(+), 29 deletions(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 165811181..d901c8e09 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -2072,6 +2072,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) { } if ep, ok := c.peerMap.endpointForDiscoKey(n.DiscoKey); ok && ep.publicKey == n.Key { ep.updateFromNode(n) + c.peerMap.upsertDiscoEndpoint(ep) // maybe update discokey mappings in peerMap } else if ep != nil { // Endpoint no longer belongs to the same node. We'll // create the new endpoint below. @@ -2095,6 +2096,7 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) { for _, n := range nm.Peers { if ep, ok := c.peerMap.endpointForNodeKey(n.Key); ok { ep.updateFromNode(n) + c.peerMap.upsertDiscoEndpoint(ep) // maybe update discokey mappings in peerMap continue } @@ -2156,6 +2158,13 @@ func (c *Conn) SetNetworkMap(nm *netmap.NetworkMap) { } }) } + + // discokeys might have changed in the above. Discard unused cached keys. + for discoKey := range c.sharedDiscoKey { + if _, ok := c.peerMap.endpointForDiscoKey(discoKey); !ok { + delete(c.sharedDiscoKey, discoKey) + } + } } func (c *Conn) wantDerpLocked() bool { return c.derpMap != nil } @@ -3415,6 +3424,12 @@ func (de *endpoint) updateFromNode(n *tailcfg.Node) { de.mu.Lock() defer de.mu.Unlock() + if de.discoKey != n.DiscoKey { + de.c.logf("[v1] magicsock: disco: node %s changed from discokey %s to %s", de.publicKey.ShortString(), de.discoKey, n.DiscoKey) + de.discoKey = n.DiscoKey + de.discoShort = de.discoKey.ShortString() + de.resetLocked() + } if n.DERP == "" { de.derpAddr = netaddr.IPPort{} } else { @@ -3702,8 +3717,18 @@ func (de *endpoint) stopAndReset() { de.c.logf("[v1] magicsock: doing cleanup for discovery key %x", de.discoKey[:]) - // Zero these fields so if the user re-starts the network, the discovery - // state isn't a mix of before & after two sessions. + de.resetLocked() + if de.heartBeatTimer != nil { + de.heartBeatTimer.Stop() + de.heartBeatTimer = nil + } + de.pendingCLIPings = nil +} + +// resetLocked clears all the endpoint's p2p state, reverting it to a +// DERP-only endpoint. It does not stop the endpoint's heartbeat +// timer, if one is running. +func (de *endpoint) resetLocked() { de.lastSend = 0 de.lastFullPing = 0 de.bestAddr = addrLatency{} @@ -3712,15 +3737,9 @@ func (de *endpoint) stopAndReset() { for _, es := range de.endpointState { es.lastPing = 0 } - for txid, sp := range de.sentPing { de.removeSentPingLocked(txid, sp) } - if de.heartBeatTimer != nil { - de.heartBeatTimer.Stop() - de.heartBeatTimer = nil - } - de.pendingCLIPings = nil } func (de *endpoint) numStopAndReset() int64 { diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index ba4828ee1..2a54f9dce 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -136,13 +136,17 @@ type magicStack struct { // friends. You need to call conn.SetNetworkMap and dev.Reconfig // before anything interesting happens. func newMagicStack(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap) *magicStack { - t.Helper() - privateKey, err := wgkey.NewPrivate() if err != nil { t.Fatalf("generating private key: %v", err) } + return newMagicStackWithKey(t, logf, l, derpMap, privateKey) +} + +func newMagicStackWithKey(t testing.TB, logf logger.Logf, l nettype.PacketListener, derpMap *tailcfg.DERPMap, privateKey wgkey.Private) *magicStack { + t.Helper() + epCh := make(chan []tailcfg.Endpoint, 100) // arbitrary conn, err := NewConn(Options{ Logf: logf, @@ -647,6 +651,76 @@ func TestNoDiscoKey(t *testing.T) { } } +func TestDiscokeyChange(t *testing.T) { + tstest.PanicOnLog() + tstest.ResourceCheck(t) + + derpMap, cleanup := runDERPAndStun(t, t.Logf, localhostListener{}, netaddr.IPv4(127, 0, 0, 1)) + defer cleanup() + + m1Key, err := wgkey.NewPrivate() + if err != nil { + t.Fatalf("generating nodekey: %v", err) + } + m1 := newMagicStackWithKey(t, t.Logf, localhostListener{}, derpMap, m1Key) + defer m1.Close() + m2 := newMagicStack(t, t.Logf, localhostListener{}, derpMap) + defer m2.Close() + + var ( + mu sync.Mutex + // Start with some random discoKey that isn't actually m1's key, + // to simulate m2 coming up with knowledge of an old, expired + // discokey. We'll switch to the correct one later in the test. + m1DiscoKey = tailcfg.DiscoKey(key.NewPrivate().Public()) + ) + setm1Key := func(idx int, nm *netmap.NetworkMap) { + if idx != 1 { + // only mutate m2's netmap + return + } + if len(nm.Peers) != 1 { + // m1 not in netmap yet. + return + } + mu.Lock() + defer mu.Unlock() + nm.Peers[0].DiscoKey = m1DiscoKey + } + + cleanupMesh := meshStacks(t.Logf, setm1Key, m1, m2) + defer cleanupMesh() + + // Wait for both peers to know about each other. + for { + if s1 := m1.Status(); len(s1.Peer) != 1 { + time.Sleep(10 * time.Millisecond) + continue + } + if s2 := m2.Status(); len(s2.Peer) != 1 { + time.Sleep(10 * time.Millisecond) + continue + } + break + } + + mu.Lock() + m1DiscoKey = m1.conn.DiscoPublicKey() + mu.Unlock() + + // Manually trigger an endpoint update to meshStacks, so it hands + // m2 a new netmap. + m1.conn.mu.Lock() + m1.epCh <- m1.conn.lastEndpoints + m1.conn.mu.Unlock() + + cleanup = newPinger(t, t.Logf, m1, m2) + defer cleanup() + + mustDirect(t, t.Logf, m1, m2) + mustDirect(t, t.Logf, m2, m1) +} + func TestActiveDiscovery(t *testing.T) { t.Run("simple_internet", func(t *testing.T) { t.Parallel() @@ -878,28 +952,27 @@ func testActiveDiscovery(t *testing.T, d *devices) { // Everything is now up and running, active discovery should find // a direct path between our peers. Wait for it to switch away // from DERP. + mustDirect(t, logf, m1, m2) + mustDirect(t, logf, m2, m1) - mustDirect := func(m1, m2 *magicStack) { - lastLog := time.Now().Add(-time.Minute) - // See https://github.com/tailscale/tailscale/issues/654 for a discussion of this deadline. - for deadline := time.Now().Add(10 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) { - pst := m1.Status().Peer[m2.Public()] - if pst.CurAddr != "" { - logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr) - return - } - if now := time.Now(); now.Sub(lastLog) > time.Second { - logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs) - lastLog = now - } + logf("starting cleanup") +} + +func mustDirect(t *testing.T, logf logger.Logf, m1, m2 *magicStack) { + lastLog := time.Now().Add(-time.Minute) + // See https://github.com/tailscale/tailscale/issues/654 for a discussion of this deadline. + for deadline := time.Now().Add(10 * time.Second); time.Now().Before(deadline); time.Sleep(10 * time.Millisecond) { + pst := m1.Status().Peer[m2.Public()] + if pst.CurAddr != "" { + logf("direct link %s->%s found with addr %s", m1, m2, pst.CurAddr) + return + } + if now := time.Now(); now.Sub(lastLog) > time.Second { + logf("no direct path %s->%s yet, addrs %v", m1, m2, pst.Addrs) + lastLog = now } - t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2) } - - mustDirect(m1, m2) - mustDirect(m2, m1) - - logf("starting cleanup") + t.Errorf("magicsock did not find a direct path from %s to %s", m1, m2) } func testTwoDevicePing(t *testing.T, d *devices) {