From e8f09d24c77ab4239783ea5d886402b38aeb6a3c Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Sat, 27 Aug 2022 12:36:07 -0400 Subject: [PATCH] wgengine: use a singleflight.Group to reduce status contention (#5450) Updates tailscale/coral#72 Signed-off-by: Andrew Dunham Signed-off-by: Andrew Dunham --- wgengine/pendopen.go | 11 ++++++++++- wgengine/userspace.go | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/wgengine/pendopen.go b/wgengine/pendopen.go index c614faf80..df624162f 100644 --- a/wgengine/pendopen.go +++ b/wgengine/pendopen.go @@ -157,12 +157,21 @@ func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) { return } + // We don't care if this information is perfectly up-to-date, since + // we're just using it to print debug information. + // + // In tailscale/coral#72, we see a goroutine profile with thousands of + // goroutines blocked on the mutex in getStatus here, so we wrap it in + // a singleflight and accept stale information to reduce contention. + st, err, _ := e.getStatusSf.Do(struct{}{}, e.getStatus) + var ps *ipnstate.PeerStatusLite - if st, err := e.getStatus(); err == nil { + if err == nil { for _, v := range st.Peers { if v.NodeKey == n.Key { v := v // copy ps = &v + break } } } else { diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 8b590f8bc..bfc0df59e 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -46,6 +46,7 @@ import ( "tailscale.com/util/clientmetric" "tailscale.com/util/deephash" "tailscale.com/util/mak" + "tailscale.com/util/singleflight" "tailscale.com/version" "tailscale.com/wgengine/filter" "tailscale.com/wgengine/magicsock" @@ -146,6 +147,10 @@ type userspaceEngine struct { // value of the ICMP identifer and sequence number concatenated. icmpEchoResponseCallback map[uint32]func() + // this singleflight is used to deduplicate calls to getStatus when we + // don't care if the data is perfectly fresh + getStatusSf singleflight.Group[struct{}, *Status] + // Lock ordering: magicsock.Conn.mu, wgLock, then mu. }