From 37903a9056d664ddbc52cce3324dc0610d81862b Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Mon, 27 Jul 2020 12:23:14 -0700 Subject: [PATCH] wgengine/magicsock: fix occasional deadlock on Conn.Close on c.derpStarted The deadlock was: * Conn.Close was called, which acquired c.mu * Then this goroutine scheduled: if firstDerp { startGate = c.derpStarted go func() { dc.Connect(ctx) close(c.derpStarted) }() } * The getRegion hook for that derphttp.Client then ran, which also tries to acquire c.mu. This change makes that hook first see if we're already in a closing state and then it can pretend that region doesn't exist. --- wgengine/magicsock/magicsock.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 929ac0069..08cb38219 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -139,7 +139,7 @@ type Conn struct { netMap *controlclient.NetworkMap privateKey key.Private myDerp int // nearest DERP region ID; 0 means none/unknown - derpStarted chan struct{} // closed on first connection to DERP; for tests + derpStarted chan struct{} // closed on first connection to DERP; for tests & cleaner Close activeDerp map[int]activeDerp // DERP regionID -> connection to a node in that region prevDerp map[int]*syncs.WaitGroupChan @@ -1006,6 +1006,11 @@ func (c *Conn) derpWriteChanOfAddr(addr netaddr.IPPort, peer key.Public) chan<- // Note that derphttp.NewClient does not dial the server // so it is safe to do under the mu lock. dc := derphttp.NewRegionClient(c.privateKey, c.logf, func() *tailcfg.DERPRegion { + if c.connCtx.Err() != nil { + // If we're closing, don't try to acquire the lock. + // We might already be in Conn.Close and the Lock would deadlock. + return nil + } c.mu.Lock() defer c.mu.Unlock() if c.derpMap == nil {