From d6a8fb20e71132091e8c95f2818f1da5756cc060 Mon Sep 17 00:00:00 2001 From: Andrea Gottardo Date: Tue, 18 Jun 2024 16:03:17 -0700 Subject: [PATCH] health: include DERP region name in bad derp notifications (#12530) Fixes tailscale/corp#20971 We added some Warnables for DERP failure situations, but their Text currently spits out the DERP region ID ("10") in the UI, which is super ugly. It would be better to provide the RegionName of the DERP region that is failing. We can do so by storing a reference to the last-known DERP map in the health package whenever we fetch one, and using it when generating the notification text. This way, the following message... > Tailscale could not connect to the relay server '10'. The server might be temporarily unavailable, or your Internet connection might be down. becomes: > Tailscale could not connect to the 'Seattle' relay server. The server might be temporarily unavailable, or your Internet connection might be down. which is a lot more user-friendly. Signed-off-by: Andrea Gottardo --- health/args.go | 9 +++++++-- health/health.go | 27 +++++++++++++++++++++------ health/warnings.go | 16 ++++++++++++---- ipn/ipnlocal/local.go | 3 +++ 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/health/args.go b/health/args.go index 18fe4365a..ced42bba8 100644 --- a/health/args.go +++ b/health/args.go @@ -22,8 +22,13 @@ const ( // ArgMagicsockFunctionName provides a Warnable with the name of the Magicsock function that caused the unhealthy state. ArgMagicsockFunctionName Arg = "magicsock-function-name" - // ArgRegionID provides a Warnable with the ID of a DERP server involved in the unhealthy state. - ArgRegionID Arg = "region-id" + // ArgDERPRegionID provides a Warnable with the ID of a DERP server involved in the unhealthy state. + ArgDERPRegionID Arg = "derp-region-id" + + // ArgDERPRegionName provides a Warnable with the name of a DERP server involved in the unhealthy state. + // It is used to show a more friendly message like "the Seattle relay server failed to connect" versus + // "relay server 10 failed to connect". + ArgDERPRegionName Arg = "derp-region-name" // ArgServerName provides a Warnable with the hostname of a server involved in the unhealthy state. ArgServerName Arg = "server-name" diff --git a/health/health.go b/health/health.go index 957b43fa9..5aa489843 100644 --- a/health/health.go +++ b/health/health.go @@ -88,7 +88,8 @@ type Tracker struct { derpRegionConnected map[int]bool derpRegionHealthProblem map[int]string derpRegionLastFrame map[int]time.Time - lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest + derpMap *tailcfg.DERPMap // last DERP map from control, could be nil if never received one + lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest ipnState string ipnWantRunning bool anyInterfaceUp opt.Bool // empty means unknown (assume true) @@ -672,6 +673,18 @@ func (t *Tracker) GetDERPRegionReceivedTime(region int) time.Time { return t.derpRegionLastFrame[region] } +// SetDERPMap sets the last fetched DERP map in the Tracker. The DERP map is used +// to provide a region name in user-facing DERP-related warnings. +func (t *Tracker) SetDERPMap(dm *tailcfg.DERPMap) { + if t.nil() { + return + } + t.mu.Lock() + defer t.mu.Unlock() + t.derpMap = dm + t.selfCheckLocked() +} + // state is an ipn.State.String() value: "Running", "Stopped", "NeedsLogin", etc. func (t *Tracker) SetIPNState(state string, wantRunning bool) { if t.nil() { @@ -914,13 +927,15 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { return } else if !t.derpRegionConnected[rid] { t.setUnhealthyLocked(noDERPConnectionWarnable, Args{ - ArgRegionID: fmt.Sprint(rid), + ArgDERPRegionID: fmt.Sprint(rid), + ArgDERPRegionName: t.derpMap.Regions[rid].RegionName, }) return } else if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle { t.setUnhealthyLocked(derpTimeoutWarnable, Args{ - ArgRegionID: fmt.Sprint(rid), - ArgDuration: d.String(), + ArgDERPRegionID: fmt.Sprint(rid), + ArgDERPRegionName: t.derpMap.Regions[rid].RegionName, + ArgDuration: d.String(), }) return } @@ -964,8 +979,8 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { if len(t.derpRegionHealthProblem) > 0 { for regionID, problem := range t.derpRegionHealthProblem { t.setUnhealthyLocked(derpRegionErrorWarnable, Args{ - ArgRegionID: fmt.Sprint(regionID), - ArgError: problem, + ArgDERPRegionID: fmt.Sprint(regionID), + ArgError: problem, }) } } else { diff --git a/health/warnings.go b/health/warnings.go index fc769e686..2db9135a8 100644 --- a/health/warnings.go +++ b/health/warnings.go @@ -103,11 +103,15 @@ var noDERPHomeWarnable = Register(&Warnable{ // noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server. var noDERPConnectionWarnable = Register(&Warnable{ Code: "no-derp-connection", - Title: "No relay server connection", + Title: "Relay server unavailable", Severity: SeverityHigh, DependsOn: []*Warnable{NetworkStatusWarnable}, Text: func(args Args) string { - return fmt.Sprintf("Tailscale could not connect to the relay server '%s'. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID]) + if n := args[ArgDERPRegionName]; n != "" { + return fmt.Sprintf("Tailscale could not connect to the '%s' relay server. Your Internet connection might be down, or the server might be temporarily unavailable.", n) + } else { + return fmt.Sprintf("Tailscale could not connect to the relay server with ID '%s'. Your Internet connection might be down, or the server might be temporarily unavailable.", args[ArgDERPRegionID]) + } }, }) @@ -118,7 +122,11 @@ var derpTimeoutWarnable = Register(&Warnable{ Severity: SeverityMedium, DependsOn: []*Warnable{NetworkStatusWarnable}, Text: func(args Args) string { - return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region %v) in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgRegionID], args[ArgDuration]) + if n := args[ArgDERPRegionName]; n != "" { + return fmt.Sprintf("Tailscale hasn't heard from the '%s' relay server in %v. The server might be temporarily unavailable, or your Internet connection might be down.", n, args[ArgDuration]) + } else { + return fmt.Sprintf("Tailscale hasn't heard from the home relay server (region ID '%v') in %v. The server might be temporarily unavailable, or your Internet connection might be down.", args[ArgDERPRegionID], args[ArgDuration]) + } }, }) @@ -129,7 +137,7 @@ var derpRegionErrorWarnable = Register(&Warnable{ Severity: SeverityMedium, DependsOn: []*Warnable{NetworkStatusWarnable}, Text: func(args Args) string { - return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgRegionID], args[ArgError]) + return fmt.Sprintf("The relay server #%v is reporting an issue: %v", args[ArgDERPRegionID], args[ArgError]) }, }) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 980b6c421..1e50f6264 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -1316,6 +1316,9 @@ func (b *LocalBackend) SetControlClientStatus(c controlclient.Client, st control // Update our cached DERP map dnsfallback.UpdateCache(st.NetMap.DERPMap, b.logf) + // Update the DERP map in the health package, which uses it for health notifications + b.health.SetDERPMap(st.NetMap.DERPMap) + b.send(ipn.Notify{NetMap: st.NetMap}) } if st.URL != "" {