tstest/integration: add some debugging for TestAddPingRequest flakes

This fails pretty reliably with a lot of output now showing what's
happening:

TS_DEBUG_MAP=1 go test --failfast -v -run=Ping -race -count=20 ./tstest/integration --verbose-tailscaled

I haven't dug into the details yet, though.

Updates #2079
pull/2108/head
Brad Fitzpatrick 3 years ago
parent ee3df2f720
commit 0affcd4e12

@ -6,6 +6,7 @@ package integration
import ( import (
"bytes" "bytes"
"context"
crand "crypto/rand" crand "crypto/rand"
"crypto/tls" "crypto/tls"
"encoding/json" "encoding/json"
@ -46,11 +47,13 @@ import (
var ( var (
verboseLogCatcher = flag.Bool("verbose-log-catcher", false, "verbose log catcher logging") verboseLogCatcher = flag.Bool("verbose-log-catcher", false, "verbose log catcher logging")
verboseTailscaled = flag.Bool("verbose-tailscaled", false, "verbose tailscaled logging")
) )
var mainError atomic.Value // of error var mainError atomic.Value // of error
func TestMain(m *testing.M) { func TestMain(m *testing.M) {
flag.Parse()
v := m.Run() v := m.Run()
if v != 0 { if v != 0 {
os.Exit(v) os.Exit(v)
@ -253,19 +256,27 @@ func TestAddPingRequest(t *testing.T) {
} }
nodeKey := nodes[0].Key nodeKey := nodes[0].Key
pr := &tailcfg.PingRequest{URL: waitPing.URL, Log: true} for i := 0; i < 10; i++ {
ok := env.Control.AddPingRequest(nodeKey, pr) t.Logf("ping %v ...", i)
if !ok { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
t.Fatalf("no node found with NodeKey %v in AddPingRequest", nodeKey) if err := env.Control.AwaitNodeInMapRequest(ctx, nodeKey); err != nil {
} t.Fatal(err)
}
cancel()
pr := &tailcfg.PingRequest{URL: fmt.Sprintf("%s/ping-%d", waitPing.URL, i), Log: true}
ok := env.Control.AddPingRequest(nodeKey, pr)
if !ok {
t.Fatalf("no node found with NodeKey %v in AddPingRequest", nodeKey)
}
// Wait for PingRequest to come back // Wait for PingRequest to come back
pingTimeout := time.NewTimer(10 * time.Second) pingTimeout := time.NewTimer(2 * time.Second)
select { select {
case <-gotPing: case <-gotPing:
pingTimeout.Stop() pingTimeout.Stop()
case <-pingTimeout.C: case <-pingTimeout.C:
t.Error("didn't get PingRequest from tailscaled") t.Fatal("didn't get PingRequest from tailscaled")
}
} }
} }
@ -384,6 +395,10 @@ func (n *testNode) StartDaemon(t testing.TB) *Daemon {
"HTTP_PROXY="+n.env.TrafficTrapServer.URL, "HTTP_PROXY="+n.env.TrafficTrapServer.URL,
"HTTPS_PROXY="+n.env.TrafficTrapServer.URL, "HTTPS_PROXY="+n.env.TrafficTrapServer.URL,
) )
if *verboseTailscaled {
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stdout
}
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
t.Fatalf("starting tailscaled: %v", err) t.Fatalf("starting tailscaled: %v", err)
} }

@ -7,6 +7,7 @@ package testcontrol
import ( import (
"bytes" "bytes"
"context"
crand "crypto/rand" crand "crypto/rand"
"encoding/binary" "encoding/binary"
"encoding/json" "encoding/json"
@ -46,6 +47,7 @@ type Server struct {
mux *http.ServeMux mux *http.ServeMux
mu sync.Mutex mu sync.Mutex
cond *sync.Cond // lazily initialized by condLocked
pubKey wgkey.Key pubKey wgkey.Key
privKey wgkey.Private privKey wgkey.Private
nodes map[tailcfg.NodeKey]*tailcfg.Node nodes map[tailcfg.NodeKey]*tailcfg.Node
@ -68,6 +70,47 @@ func (s *Server) NumNodes() int {
return len(s.nodes) return len(s.nodes)
} }
// condLocked lazily initializes and returns s.cond.
// s.mu must be held.
func (s *Server) condLocked() *sync.Cond {
if s.cond == nil {
s.cond = sync.NewCond(&s.mu)
}
return s.cond
}
// AwaitNodeInMapRequest waits for node k to be stuck in a map poll.
// It returns an error if and only if the context is done first.
func (s *Server) AwaitNodeInMapRequest(ctx context.Context, k tailcfg.NodeKey) error {
s.mu.Lock()
defer s.mu.Unlock()
cond := s.condLocked()
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-done:
case <-ctx.Done():
cond.Broadcast()
}
}()
for {
node := s.nodeLocked(k)
if node == nil {
return errors.New("unknown node key")
}
if _, ok := s.updates[node.ID]; ok {
return nil
}
cond.Wait()
if err := ctx.Err(); err != nil {
return err
}
}
}
// AddPingRequest sends the ping pr to nodeKeyDst. It reports whether it did so. That is, // AddPingRequest sends the ping pr to nodeKeyDst. It reports whether it did so. That is,
// it reports whether nodeKeyDst was connected. // it reports whether nodeKeyDst was connected.
func (s *Server) AddPingRequest(nodeKeyDst tailcfg.NodeKey, pr *tailcfg.PingRequest) bool { func (s *Server) AddPingRequest(nodeKeyDst tailcfg.NodeKey, pr *tailcfg.PingRequest) bool {
@ -85,8 +128,7 @@ func (s *Server) AddPingRequest(nodeKeyDst tailcfg.NodeKey, pr *tailcfg.PingRequ
s.pingReqsToAdd[nodeKeyDst] = pr s.pingReqsToAdd[nodeKeyDst] = pr
nodeID := node.ID nodeID := node.ID
oldUpdatesCh := s.updates[nodeID] oldUpdatesCh := s.updates[nodeID]
sendUpdate(oldUpdatesCh, updateDebugInjection) return sendUpdate(oldUpdatesCh, updateDebugInjection)
return true
} }
type AuthPath struct { type AuthPath struct {
@ -414,17 +456,19 @@ func (s *Server) updateLocked(source string, peers []tailcfg.NodeID) {
} }
// sendUpdate sends updateType to dst if dst is non-nil and // sendUpdate sends updateType to dst if dst is non-nil and
// has capacity. // has capacity. It reports whether a value was sent.
func sendUpdate(dst chan<- updateType, updateType updateType) { func sendUpdate(dst chan<- updateType, updateType updateType) bool {
if dst == nil { if dst == nil {
return return false
} }
// The dst channel has a buffer size of 1. // The dst channel has a buffer size of 1.
// If we fail to insert an update into the buffer that // If we fail to insert an update into the buffer that
// means there is already an update pending. // means there is already an update pending.
select { select {
case dst <- updateType: case dst <- updateType:
return true
default: default:
return false
} }
} }
@ -489,6 +533,7 @@ func (s *Server) serveMap(w http.ResponseWriter, r *http.Request, mkey tailcfg.M
sendUpdate(oldUpdatesCh, updateSelfChanged) sendUpdate(oldUpdatesCh, updateSelfChanged)
} }
s.updateLocked("serveMap", peersToUpdate) s.updateLocked("serveMap", peersToUpdate)
s.condLocked().Broadcast()
s.mu.Unlock() s.mu.Unlock()
// ReadOnly implies no streaming, as it doesn't // ReadOnly implies no streaming, as it doesn't

Loading…
Cancel
Save