From b8594dc9379ebbc63d861514a275aa8601fd13e1 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Tue, 21 Apr 2020 15:04:05 -0700 Subject: [PATCH] control/controlclient: fix deadlock in timeout+keepalive race Signed-off-by: Brad Fitzpatrick --- control/controlclient/direct.go | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index aa406c950..2162028f0 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -499,21 +499,25 @@ func (c *Direct) PollNetMap(ctx context.Context, maxPolls int, cb func(*NetworkM const pollTimeout = 120 * time.Second timeout := time.NewTimer(pollTimeout) timeoutReset := make(chan struct{}) - defer close(timeoutReset) + pollDone := make(chan struct{}) + defer close(pollDone) go func() { for { select { + case <-pollDone: + vlogf("netmap: ending timeout goroutine") + return case <-timeout.C: c.logf("map response long-poll timed out!") cancel() return - case _, ok := <-timeoutReset: - if !ok { - vlogf("netmap: ending timeout goroutine") - return // channel closed, shut down goroutine - } + case <-timeoutReset: if !timeout.Stop() { - <-timeout.C + select { + case <-timeout.C: + case <-pollDone: + return + } } vlogf("netmap: reset timeout timer") timeout.Reset(pollTimeout) @@ -551,7 +555,13 @@ func (c *Direct) PollNetMap(ctx context.Context, maxPolls int, cb func(*NetworkM } if resp.KeepAlive { vlogf("netmap: got keep-alive") - timeoutReset <- struct{}{} + select { + case timeoutReset <- struct{}{}: + vlogf("netmap: sent keep-alive timer reset") + case <-ctx.Done(): + c.logf("netmap: not resetting timer for keep-alive due to: %v", ctx.Err()) + return ctx.Err() + } continue } vlogf("netmap: got new map")