From 0aa55bffce3ae1dd4eb695bf0aada34438a7e143 Mon Sep 17 00:00:00 2001 From: Denton Gentry Date: Sun, 10 Jan 2021 06:50:35 -0800 Subject: [PATCH] magicsock: test error case in derpWriteChanOfAddr In derpWriteChanOfAddr when we call derphttp.NewRegionClient(), there is a check of whether the connection is already errored and if so it returns before grabbing the lock. The lock might already be held and would be a deadlock. This corner case is not being reliably exercised by other tests. This shows up in code coverage reports, the lines of code in derpWriteChanOfAddr are alternately added and subtracted from code coverage. Add a test to specifically exercise this code path, and verify that it doesn't deadlock. This is the best tradeoff I could come up with: + the moment code calls Err() to check if there is an error, we grab the lock to make sure it would deadlock if it tries to grab the lock itself. + if a new call to Err() is added in this code path, only the first one will be covered and the rest will not be tested. + this test doesn't verify whether code is checking for Err() in the right place, which ideally I guess it would. Signed-off-by: Denton Gentry --- wgengine/magicsock/magicsock.go | 2 +- wgengine/magicsock/magicsock_test.go | 83 ++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index d80307e5f..7b451a0b7 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1170,7 +1170,7 @@ func (c *Conn) derpWriteChanOfAddr(addr netaddr.IPPort, peer key.Public) chan<- return nil } - // Note that derphttp.NewClient does not dial the server + // Note that derphttp.NewRegionClient does not dial the server // so it is safe to do under the mu lock. dc := derphttp.NewRegionClient(c.privateKey, c.logf, func() *tailcfg.DERPRegion { if c.connCtx.Err() != nil { diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 7aac0572a..1609de008 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -11,6 +11,7 @@ import ( "crypto/tls" "encoding/binary" "encoding/json" + "errors" "fmt" "io/ioutil" "net" @@ -542,6 +543,88 @@ func TestDeviceStartStop(t *testing.T) { dev.Close() } +type testConnClosingContext struct { + parent context.Context + mu *sync.Mutex +} + +func (c *testConnClosingContext) Deadline() (deadline time.Time, ok bool) { + d, o := c.parent.Deadline() + return d, o +} +func (c *testConnClosingContext) Done() <-chan struct{} { + return c.parent.Done() +} +func (c *testConnClosingContext) Err() error { + // Deliberately deadlock if anything grabs the lock after checking Err() + c.mu.Lock() + return errors.New("testConnClosingContext error") +} +func (c *testConnClosingContext) Value(key interface{}) interface{} { + return c.parent.Value(key) +} +func (*testConnClosingContext) String() string { + return "testConnClosingContext" +} + +func TestConnClosing(t *testing.T) { + privateKey, err := wgkey.NewPrivate() + if err != nil { + t.Fatalf("generating private key: %v", err) + } + + epCh := make(chan []string, 100) + conn, err := NewConn(Options{ + Logf: t.Logf, + PacketListener: nettype.Std{}, + EndpointsFunc: func(eps []string) { + epCh <- eps + }, + SimulatedNetwork: false, + }) + if err != nil { + t.Fatalf("constructing magicsock: %v", err) + } + + derpMap, cleanup := runDERPAndStun(t, t.Logf, nettype.Std{}, netaddr.IPv4(127, 0, 3, 1)) + defer cleanup() + + // The point of this test case is to exercise handling in derpWriteChanOfAddr() which + // returns early if connCtx.Err() returns non-nil, to avoid a deadlock on conn.mu. + // We swap in a context which always returns an error, and deliberately grabs the lock + // to cause a deadlock if magicsock.go tries to acquire the lock after calling Err(). + closingCtx := testConnClosingContext{parent: conn.connCtx, mu: &conn.mu} + conn.connCtx = &closingCtx + conn.Start() + + conn.SetDERPMap(derpMap) + if err := conn.SetPrivateKey(privateKey); err != nil { + t.Fatalf("setting private key in magicsock: %v", err) + } + + tun := tuntest.NewChannelTUN() + tsTun := tstun.WrapTUN(t.Logf, tun.TUN()) + tsTun.SetFilter(filter.NewAllowAllForTest(t.Logf)) + + dev := device.NewDevice(tsTun, &device.DeviceOptions{ + Logger: &device.Logger{ + Debug: logger.StdLogger(t.Logf), + Info: logger.StdLogger(t.Logf), + Error: logger.StdLogger(t.Logf), + }, + CreateEndpoint: conn.CreateEndpoint, + CreateBind: conn.CreateBind, + SkipBindUpdate: true, + }) + + dev.Up() + conn.WaitReady(t) + + // We don't assert any failures within the test itself. If derpWriteChanOfAddr tries to + // grab the lock it will deadlock, and conn.WaitReady(t) will call t.Fatal() after timeout. + // (verified by deliberately breaking derpWriteChanOfAddr) +} + func makeNestable(t *testing.T) (logf logger.Logf, setT func(t *testing.T)) { var mu sync.RWMutex cur := t