diff --git a/health/health.go b/health/health.go index a5524487f..0f8de11c5 100644 --- a/health/health.go +++ b/health/health.go @@ -32,6 +32,7 @@ var ( lastStreamedMapResponse time.Time derpHomeRegion int derpRegionConnected = map[int]bool{} + derpRegionHealthProblem = map[int]string{} derpRegionLastFrame = map[int]time.Time{} lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest ipnState string @@ -191,6 +192,19 @@ func SetDERPRegionConnectedState(region int, connected bool) { selfCheckLocked() } +// SetDERPRegionHealth sets or clears any problem associated with the +// provided DERP region. +func SetDERPRegionHealth(region int, problem string) { + mu.Lock() + defer mu.Unlock() + if problem == "" { + delete(derpRegionHealthProblem, region) + } else { + derpRegionHealthProblem[region] = problem + } + selfCheckLocked() +} + func NoteDERPRegionReceivedFrame(region int) { mu.Lock() defer mu.Unlock() @@ -241,6 +255,16 @@ func selfCheckLocked() { setLocked(SysOverall, overallErrorLocked()) } +// OverallError returns a summary of the health state. +// +// If there are multiple problems, the error will be of type +// multierror.MultipleErrors. +func OverallError() error { + mu.Lock() + defer mu.Unlock() + return overallErrorLocked() +} + func overallErrorLocked() error { if !anyInterfaceUp { return errors.New("network down") @@ -288,6 +312,9 @@ func overallErrorLocked() error { } errs = append(errs, fmt.Errorf("%v: %w", sys, err)) } + for regionID, problem := range derpRegionHealthProblem { + errs = append(errs, fmt.Errorf("derp%d: %v", regionID, problem)) + } sort.Slice(errs, func(i, j int) bool { // Not super efficient (stringifying these in a sort), but probably max 2 or 3 items. return errs[i].Error() < errs[j].Error() diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 6362b8a7e..35d280667 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -25,6 +25,7 @@ import ( "syscall" "time" + "github.com/go-multierror/multierror" "inet.af/netaddr" "tailscale.com/client/tailscale/apitype" "tailscale.com/control/controlclient" @@ -331,6 +332,17 @@ func (b *LocalBackend) updateStatus(sb *ipnstate.StatusBuilder, extraLocked func s.Version = version.Long s.BackendState = b.state.String() s.AuthURL = b.authURLSticky + + if err := health.OverallError(); err != nil { + switch e := err.(type) { + case multierror.MultipleErrors: + for _, err := range e { + s.Health = append(s.Health, err.Error()) + } + default: + s.Health = append(s.Health, err.Error()) + } + } if b.netMap != nil { s.MagicDNSSuffix = b.netMap.MagicDNSSuffix() s.CertDomains = append([]string(nil), b.netMap.DNS.CertDomains...) diff --git a/ipn/ipnstate/ipnstate.go b/ipn/ipnstate/ipnstate.go index 62d43b332..e77a60585 100644 --- a/ipn/ipnstate/ipnstate.go +++ b/ipn/ipnstate/ipnstate.go @@ -38,6 +38,11 @@ type Status struct { TailscaleIPs []netaddr.IP // Tailscale IP(s) assigned to this node Self *PeerStatus + // Health contains health check problems. + // Empty means everything is good. (or at least that no known + // problems are detected) + Health []string + // MagicDNSSuffix is the network's MagicDNS suffix for nodes // in the network such as "userfoo.tailscale.net". // There are no surrounding dots. diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index f8f6d39af..4609a7c04 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -1403,6 +1403,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d } defer health.SetDERPRegionConnectedState(regionID, false) + defer health.SetDERPRegionHealth(regionID, "") // peerPresent is the set of senders we know are present on this // connection, based on messages we've received from the server. @@ -1458,6 +1459,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d switch m := msg.(type) { case derp.ServerInfoMessage: health.SetDERPRegionConnectedState(regionID, true) + health.SetDERPRegionHealth(regionID, "") // until declared otherwise c.logf("magicsock: derp-%d connected; connGen=%v", regionID, connGen) continue case derp.ReceivedPacket: @@ -1482,6 +1484,8 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d } }() continue + case derp.HealthMessage: + health.SetDERPRegionHealth(regionID, m.Problem) default: // Ignore. continue