From b3d6704aa3e9903166331570b58b5550a6c5a812 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 17 Nov 2021 17:30:27 -0800 Subject: [PATCH] wgengine/magicsock: fix data race on endpoint.discoKey endpoint.discoKey is protected by endpoint.mu. endpoint.sendDiscoMessage was reading it without holding the lock. This showed up in a CI failure and is readily reproducible locally. The fix is in two parts. First, for Conn.enqueueCallMeMaybe, eliminate the one-line helper method endpoint.sendDiscoMessage; call Conn.sendDiscoMessage directly. This makes it more natural to read endpoint.discoKey in a context in which endpoint.mu is already held. Second, for endpoint.sendDiscoPing, explicitly pass the disco key as an argument. Again, this makes it easier to read endpoint.discoKey in a context in which endpoint.mu is already held. Signed-off-by: Josh Bleecher Snyder --- wgengine/magicsock/magicsock.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index ded52577e..82e756568 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -2102,7 +2102,7 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netaddr.IPPort, de *endpoint) { for _, ep := range c.lastEndpoints { eps = append(eps, ep.Addr) } - go de.sendDiscoMessage(derpAddr, &disco.CallMeMaybe{MyNumber: eps}, discoLog) + go de.c.sendDiscoMessage(derpAddr, de.publicKey, de.discoKey, &disco.CallMeMaybe{MyNumber: eps}, discoLog) } // discoInfoLocked returns the previous or new discoInfo for k. @@ -3544,13 +3544,16 @@ func (de *endpoint) removeSentPingLocked(txid stun.TxID, sp sentPing) { delete(de.sentPing, txid) } -// sendDiscoPing sends a ping with the provided txid to ep. +// sendDiscoPing sends a ping with the provided txid to ep using de's discoKey. // -// The caller (startPingLocked) should've already been recorded the ping in +// The caller (startPingLocked) should've already recorded the ping in // sentPing and set up the timer. -func (de *endpoint) sendDiscoPing(ep netaddr.IPPort, txid stun.TxID, logLevel discoLogLevel) { +// +// The caller should use de.discoKey as the discoKey argument. +// It is passed in so that sendDiscoPing doesn't need to lock de.mu. +func (de *endpoint) sendDiscoPing(ep netaddr.IPPort, discoKey key.DiscoPublic, txid stun.TxID, logLevel discoLogLevel) { selfPubKey, _ := de.c.publicKeyAtomic.Load().(key.NodePublic) - sent, _ := de.sendDiscoMessage(ep, &disco.Ping{ + sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{ TxID: [12]byte(txid), NodeKey: selfPubKey, }, logLevel) @@ -3606,7 +3609,7 @@ func (de *endpoint) startPingLocked(ep netaddr.IPPort, now mono.Time, purpose di if purpose == pingHeartbeat { logLevel = discoVerboseLog } - go de.sendDiscoPing(ep, txid, logLevel) + go de.sendDiscoPing(ep, de.discoKey, txid, logLevel) } func (de *endpoint) sendPingsLocked(now mono.Time, sendCallMeMaybe bool) { @@ -3644,10 +3647,6 @@ func (de *endpoint) sendPingsLocked(now mono.Time, sendCallMeMaybe bool) { } } -func (de *endpoint) sendDiscoMessage(dst netaddr.IPPort, dm disco.Message, logLevel discoLogLevel) (sent bool, err error) { - return de.c.sendDiscoMessage(dst, de.publicKey, de.discoKey, dm, logLevel) -} - func (de *endpoint) updateFromNode(n *tailcfg.Node) { if n == nil { panic("nil node when updating disco ep")