From c2f0c705e7b5c771aa0464577ada19ca32d87d74 Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 25 Sep 2024 08:40:26 -0700 Subject: [PATCH] health: clean up updateBuiltinWarnablesLocked a bit, fix DERP warnings Updates #13265 Change-Id: Iabe4a062204a7859d869f6acfb9274437b4ea1ea Signed-off-by: Brad Fitzpatrick --- health/health.go | 84 +++++++++++++++++++++++++--------------------- health/warnings.go | 44 ++++++++++++++++-------- 2 files changed, 76 insertions(+), 52 deletions(-) diff --git a/health/health.go b/health/health.go index c4c5fa719..10a4e565f 100644 --- a/health/health.go +++ b/health/health.go @@ -107,7 +107,6 @@ type Tracker struct { ipnWantRunning bool ipnWantRunningLastTrue time.Time // when ipnWantRunning last changed false -> true anyInterfaceUp opt.Bool // empty means unknown (assume true) - udp4Unbound bool controlHealth []string lastLoginErr error localLogConfigErr error @@ -843,8 +842,12 @@ func (t *Tracker) SetUDP4Unbound(unbound bool) { } t.mu.Lock() defer t.mu.Unlock() - t.udp4Unbound = unbound - t.selfCheckLocked() + + if unbound { + t.setUnhealthyLocked(noUDP4BindWarnable, nil) + } else { + t.setHealthyLocked(noUDP4BindWarnable) + } } // SetAuthRoutineInError records the latest error encountered as a result of a @@ -1002,7 +1005,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { if v, ok := t.anyInterfaceUp.Get(); ok && !v { t.setUnhealthyLocked(NetworkStatusWarnable, nil) - return } else { t.setHealthyLocked(NetworkStatusWarnable) } @@ -1011,11 +1013,50 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { t.setUnhealthyLocked(localLogWarnable, Args{ ArgError: t.localLogConfigErr.Error(), }) - return } else { t.setHealthyLocked(localLogWarnable) } + now := time.Now() + + // How long we assume we'll have heard a DERP frame or a MapResponse + // KeepAlive by. + const tooIdle = 2*time.Minute + 5*time.Second + + // Whether user recently turned on Tailscale. + recentlyOn := now.Sub(t.ipnWantRunningLastTrue) < 5*time.Second + + homeDERP := t.derpHomeRegion + if recentlyOn { + // If user just turned Tailscale on, don't warn for a bit. + t.setHealthyLocked(noDERPHomeWarnable) + t.setHealthyLocked(noDERPConnectionWarnable) + t.setHealthyLocked(derpTimeoutWarnable) + } else if !t.ipnWantRunning || t.derpHomeless || homeDERP != 0 { + t.setHealthyLocked(noDERPHomeWarnable) + } else { + t.setUnhealthyLocked(noDERPHomeWarnable, nil) + } + + if homeDERP != 0 && t.derpRegionConnected[homeDERP] { + t.setHealthyLocked(noDERPConnectionWarnable) + + if d := now.Sub(t.derpRegionLastFrame[homeDERP]); d < tooIdle { + t.setHealthyLocked(derpTimeoutWarnable) + } else { + t.setUnhealthyLocked(derpTimeoutWarnable, Args{ + ArgDERPRegionID: fmt.Sprint(homeDERP), + ArgDERPRegionName: t.derpRegionNameLocked(homeDERP), + ArgDuration: d.Round(time.Second).String(), + }) + } + } else { + t.setUnhealthyLocked(noDERPConnectionWarnable, Args{ + ArgDERPRegionID: fmt.Sprint(homeDERP), + ArgDERPRegionName: t.derpRegionNameLocked(homeDERP), + }) + } + if !t.ipnWantRunning { t.setUnhealthyLocked(IPNStateWarnable, Args{ "State": t.ipnState, @@ -1038,7 +1079,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { t.setHealthyLocked(LoginStateWarnable) } - now := time.Now() if !t.inMapPoll && (t.lastMapPollEndedAt.IsZero() || now.Sub(t.lastMapPollEndedAt) > 10*time.Second) { t.setUnhealthyLocked(notInMapPollWarnable, nil) return @@ -1046,7 +1086,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { t.setHealthyLocked(notInMapPollWarnable) } - const tooIdle = 2*time.Minute + 5*time.Second if d := now.Sub(t.lastStreamedMapResponse).Round(time.Second); d > tooIdle { t.setUnhealthyLocked(mapResponseTimeoutWarnable, Args{ ArgDuration: d.String(), @@ -1056,37 +1095,6 @@ func (t *Tracker) updateBuiltinWarnablesLocked() { t.setHealthyLocked(mapResponseTimeoutWarnable) } - if !t.derpHomeless { - rid := t.derpHomeRegion - if rid == 0 { - t.setUnhealthyLocked(noDERPHomeWarnable, nil) - return - } else if !t.derpRegionConnected[rid] { - t.setUnhealthyLocked(noDERPConnectionWarnable, Args{ - ArgDERPRegionID: fmt.Sprint(rid), - ArgDERPRegionName: t.derpRegionNameLocked(rid), - }) - return - } else if d := now.Sub(t.derpRegionLastFrame[rid]).Round(time.Second); d > tooIdle { - t.setUnhealthyLocked(derpTimeoutWarnable, Args{ - ArgDERPRegionID: fmt.Sprint(rid), - ArgDERPRegionName: t.derpRegionNameLocked(rid), - ArgDuration: d.String(), - }) - return - } - } - t.setHealthyLocked(noDERPHomeWarnable) - t.setHealthyLocked(noDERPConnectionWarnable) - t.setHealthyLocked(derpTimeoutWarnable) - - if t.udp4Unbound { - t.setUnhealthyLocked(noUDP4BindWarnable, nil) - return - } else { - t.setHealthyLocked(noUDP4BindWarnable) - } - // TODO: use _ = t.inMapPollSince _ = t.lastMapPollEndedAt diff --git a/health/warnings.go b/health/warnings.go index e84043341..7a21f9695 100644 --- a/health/warnings.go +++ b/health/warnings.go @@ -65,7 +65,7 @@ var NetworkStatusWarnable = Register(&Warnable{ // IPNStateWarnable is a Warnable that warns the user that Tailscale is stopped. var IPNStateWarnable = Register(&Warnable{ Code: "wantrunning-false", - Title: "Not connected to Tailscale", + Title: "Tailscale off", Severity: SeverityLow, Text: StaticMessage("Tailscale is stopped."), }) @@ -93,6 +93,7 @@ var LoginStateWarnable = Register(&Warnable{ return "You are logged out." } }, + DependsOn: []*Warnable{IPNStateWarnable}, }) // notInMapPollWarnable is a Warnable that warns the user that we are using a stale network map. @@ -100,7 +101,7 @@ var notInMapPollWarnable = Register(&Warnable{ Code: "not-in-map-poll", Title: "Out of sync", Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, + DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable}, Text: StaticMessage("Unable to connect to the Tailscale coordination server to synchronize the state of your tailnet. Peer reachability might degrade over time."), // 8 minutes reflects a maximum maintenance window for the coordination server. TimeToVisible: 8 * time.Minute, @@ -119,10 +120,20 @@ var noDERPHomeWarnable = Register(&Warnable{ // noDERPConnectionWarnable is a Warnable that warns the user that Tailscale couldn't connect to a specific DERP server. var noDERPConnectionWarnable = Register(&Warnable{ - Code: "no-derp-connection", - Title: "Relay server unavailable", - Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, + Code: "no-derp-connection", + Title: "Relay server unavailable", + Severity: SeverityMedium, + DependsOn: []*Warnable{ + NetworkStatusWarnable, + + // Technically noDERPConnectionWarnable could be used to warn about + // failure to connect to a specific DERP server (e.g. your home is derp1 + // but you're trying to connect to a peer's derp4 and are unable) but as + // of 2024-09-25 we only use this for connecting to your home DERP, so + // we depend on noDERPHomeWarnable which is the ability to figure out + // what your DERP home even is. + noDERPHomeWarnable, + }, Text: func(args Args) string { if n := args[ArgDERPRegionName]; n != "" { return fmt.Sprintf("Tailscale could not connect to the '%s' relay server. Your Internet connection might be down, or the server might be temporarily unavailable.", n) @@ -134,12 +145,17 @@ var noDERPConnectionWarnable = Register(&Warnable{ TimeToVisible: 10 * time.Second, }) -// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't heard from the home DERP region for a while. +// derpTimeoutWarnable is a Warnable that warns the user that Tailscale hasn't +// heard from the home DERP region for a while. var derpTimeoutWarnable = Register(&Warnable{ - Code: "derp-timed-out", - Title: "Relay server timed out", - Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, + Code: "derp-timed-out", + Title: "Relay server timed out", + Severity: SeverityMedium, + DependsOn: []*Warnable{ + NetworkStatusWarnable, + noDERPConnectionWarnable, // don't warn about it being stalled if we're not connected + noDERPHomeWarnable, // same reason as noDERPConnectionWarnable's dependency + }, Text: func(args Args) string { if n := args[ArgDERPRegionName]; n != "" { return fmt.Sprintf("Tailscale hasn't heard from the '%s' relay server in %v. The server might be temporarily unavailable, or your Internet connection might be down.", n, args[ArgDuration]) @@ -163,9 +179,9 @@ var derpRegionErrorWarnable = Register(&Warnable{ // noUDP4BindWarnable is a Warnable that warns the user that Tailscale couldn't listen for incoming UDP connections. var noUDP4BindWarnable = Register(&Warnable{ Code: "no-udp4-bind", - Title: "Incoming connections may fail", + Title: "NAT traversal setup failure", Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, + DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable}, Text: StaticMessage("Tailscale couldn't listen for incoming UDP connections."), ImpactsConnectivity: true, }) @@ -175,7 +191,7 @@ var mapResponseTimeoutWarnable = Register(&Warnable{ Code: "mapresponse-timeout", Title: "Network map response timeout", Severity: SeverityMedium, - DependsOn: []*Warnable{NetworkStatusWarnable}, + DependsOn: []*Warnable{NetworkStatusWarnable, IPNStateWarnable}, Text: func(args Args) string { return fmt.Sprintf("Tailscale hasn't received a network map from the coordination server in %s.", args[ArgDuration]) },