From e975cb6b053e24499309dac2de5b6c9d6f930dc5 Mon Sep 17 00:00:00 2001
From: Andrew Dunham <andrew@tailscale.com>
Date: Wed, 2 Nov 2022 15:45:20 -0400
Subject: [PATCH] ipn/ipnlocal: fix test flake when we log after a test
 completes

This switches from using an atomic.Bool to a mutex for reasons that are
described in the commit, and should address the flakes that we're still
seeing.

Fixes #3020

Change-Id: I4e39471c0eb95886db03020ea1ccf688c7564a11
Signed-off-by: Andrew Dunham <andrew@tailscale.com>
---
 ipn/ipnlocal/state_test.go | 15 ++++--------
 tstest/log.go              | 47 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/ipn/ipnlocal/state_test.go b/ipn/ipnlocal/state_test.go
index 01db3243b..de2964500 100644
--- a/ipn/ipnlocal/state_test.go
+++ b/ipn/ipnlocal/state_test.go
@@ -17,6 +17,7 @@ import (
 	"tailscale.com/ipn"
 	"tailscale.com/ipn/store/mem"
 	"tailscale.com/tailcfg"
+	"tailscale.com/tstest"
 	"tailscale.com/types/empty"
 	"tailscale.com/types/key"
 	"tailscale.com/types/logger"
@@ -92,7 +93,6 @@ type mockControl struct {
 	opts       controlclient.Options
 	logfActual logger.Logf
 	statusFunc func(controlclient.Status)
-	preventLog atomic.Bool
 
 	mu          sync.Mutex
 	calls       []string
@@ -109,7 +109,7 @@ func newMockControl(tb testing.TB) *mockControl {
 }
 
 func (cc *mockControl) logf(format string, args ...any) {
-	if cc.preventLog.Load() || cc.logfActual == nil {
+	if cc.logfActual == nil {
 		return
 	}
 	cc.logfActual(format, args...)
@@ -282,7 +282,7 @@ func (cc *mockControl) UpdateEndpoints(endpoints []tailcfg.Endpoint) {
 func TestStateMachine(t *testing.T) {
 	c := qt.New(t)
 
-	logf := t.Logf
+	logf := tstest.WhileTestRunningLogger(t)
 	store := new(testStateStorage)
 	e, err := wgengine.NewFakeUserspaceEngine(logf, 0)
 	if err != nil {
@@ -297,7 +297,6 @@ func TestStateMachine(t *testing.T) {
 
 	cc := newMockControl(t)
 	cc.statusFunc = b.setClientStatus
-	t.Cleanup(func() { cc.preventLog.Store(true) }) // hacky way to pacify issue 3020
 
 	b.SetControlClientGetterForTesting(func(opts controlclient.Options) (controlclient.Client, error) {
 		cc.mu.Lock()
@@ -316,9 +315,6 @@ func TestStateMachine(t *testing.T) {
 	notifies.expect(0)
 
 	b.SetNotifyCallback(func(n ipn.Notify) {
-		if cc.preventLog.Load() {
-			return
-		}
 		if n.State != nil ||
 			n.Prefs.Valid() ||
 			n.BrowseToURL != nil ||
@@ -329,7 +325,6 @@ func TestStateMachine(t *testing.T) {
 			logf("\n(ignored) %v\n\n", n)
 		}
 	})
-	t.Cleanup(func() { b.SetNotifyCallback(nil) }) // hacky way to pacify issue 3020
 
 	// Check that it hasn't called us right away.
 	// The state machine should be idle until we call Start().
@@ -924,7 +919,7 @@ func TestStateMachine(t *testing.T) {
 }
 
 func TestEditPrefsHasNoKeys(t *testing.T) {
-	logf := t.Logf
+	logf := tstest.WhileTestRunningLogger(t)
 	store := new(testStateStorage)
 	e, err := wgengine.NewFakeUserspaceEngine(logf, 0)
 	if err != nil {
@@ -1006,7 +1001,7 @@ func (s *testStateStorage) sawWrite() bool {
 func TestWGEngineStatusRace(t *testing.T) {
 	t.Skip("test fails")
 	c := qt.New(t)
-	logf := t.Logf
+	logf := tstest.WhileTestRunningLogger(t)
 	eng, err := wgengine.NewFakeUserspaceEngine(logf, 0)
 	c.Assert(err, qt.IsNil)
 	t.Cleanup(eng.Close)
diff --git a/tstest/log.go b/tstest/log.go
index 3f30dbdf9..6f80da6be 100644
--- a/tstest/log.go
+++ b/tstest/log.go
@@ -145,3 +145,50 @@ func (ml *MemLogger) String() string {
 	defer ml.Unlock()
 	return ml.Buffer.String()
 }
+
+// WhileTestRunningLogger returns a logger.Logf that logs to t.Logf until the
+// test finishes, at which point it no longer logs anything.
+func WhileTestRunningLogger(t testing.TB) logger.Logf {
+	var (
+		mu   sync.RWMutex
+		done bool
+	)
+
+	logger := func(format string, args ...any) {
+		t.Helper()
+
+		mu.RLock()
+		defer mu.RUnlock()
+
+		if done {
+			return
+		}
+		t.Logf(format, args...)
+	}
+
+	// t.Cleanup is run before the test is marked as done, so by acquiring
+	// the mutex and then disabling logs, we know that all existing log
+	// functions have completed, and that no future calls to the logger
+	// will log something.
+	//
+	// We can't do this with an atomic bool, since it's possible to
+	// observe the following race:
+	//
+	//    test goroutine                goroutine 1
+	//    --------------                -----------
+	//                                  check atomic, testFinished = no
+	//    test finishes
+	//    run t.Cleanups
+	//    set testFinished = true
+	//                                  call t.Logf
+	//                                  panic
+	//
+	// Using a mutex ensures that all actions in goroutine 1 in the
+	// sequence above occur atomically, and thus should not panic.
+	t.Cleanup(func() {
+		mu.Lock()
+		defer mu.Unlock()
+		done = true
+	})
+	return logger
+}