diff --git a/control/controlclient/map.go b/control/controlclient/map.go index 40d3109e3..cdeca1248 100644 --- a/control/controlclient/map.go +++ b/control/controlclient/map.go @@ -44,6 +44,7 @@ type mapSession struct { collectServices bool previousPeers []*tailcfg.Node // for delta-purposes lastDomain string + lastHealth []string // netMapBuilding is non-nil during a netmapForResponse call, // containing the value to be returned, once fully populated. @@ -105,6 +106,9 @@ func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.Netwo if resp.Domain != "" { ms.lastDomain = resp.Domain } + if resp.Health != nil { + ms.lastHealth = resp.Health + } nm := &netmap.NetworkMap{ NodeKey: tailcfg.NodeKey(ms.privateNodeKey.Public()), @@ -118,6 +122,7 @@ func (ms *mapSession) netmapForResponse(resp *tailcfg.MapResponse) *netmap.Netwo CollectServices: ms.collectServices, DERPMap: ms.lastDERPMap, Debug: resp.Debug, + ControlHealth: ms.lastHealth, } ms.netMapBuilding = nm diff --git a/health/health.go b/health/health.go index 0dade963f..314107cfa 100644 --- a/health/health.go +++ b/health/health.go @@ -40,6 +40,7 @@ var ( ipnWantRunning bool anyInterfaceUp = true // until told otherwise udp4Unbound bool + controlHealth []string ) // Subsystem is the name of a subsystem whose health can be monitored. @@ -141,6 +142,13 @@ func setLocked(key Subsystem, err error) { } } +func SetControlHealth(problems []string) { + mu.Lock() + defer mu.Unlock() + controlHealth = problems + selfCheckLocked() +} + // GotStreamedMapResponse notes that we got a tailcfg.MapResponse // message in streaming mode, even if it's just a keep-alive message. func GotStreamedMapResponse() { @@ -318,6 +326,9 @@ func overallErrorLocked() error { for regionID, problem := range derpRegionHealthProblem { errs = append(errs, fmt.Errorf("derp%d: %v", regionID, problem)) } + for _, s := range controlHealth { + errs = append(errs, errors.New(s)) + } if e := fakeErrForTesting; len(errs) == 0 && e != "" { return errors.New(e) } diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 93667d283..3e7dd33c3 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -2548,6 +2548,12 @@ func (b *LocalBackend) setNetMapLocked(nm *netmap.NetworkMap) { } b.maybePauseControlClientLocked() + if nm != nil { + health.SetControlHealth(nm.ControlHealth) + } else { + health.SetControlHealth(nil) + } + // Determine if file sharing is enabled fs := hasCapability(nm, tailcfg.CapabilityFileSharing) if fs != b.capFileSharing { diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index bef39bb2b..53ba9b7a2 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -47,7 +47,8 @@ import ( // 21: 2021-06-15: added MapResponse.DNSConfig.CertDomains // 22: 2021-06-16: added MapResponse.DNSConfig.ExtraRecords // 23: 2021-08-25: DNSConfig.Routes values may be empty (for ExtraRecords support in 1.14.1+) -const CurrentMapRequestVersion = 23 +// 24: 2021-09-18: MapResponse.Health from control to node; node shows in "tailscale status" +const CurrentMapRequestVersion = 24 type StableID string @@ -1028,6 +1029,14 @@ type MapResponse struct { // user profiles only. UserProfiles []UserProfile `json:",omitempty"` + // Health, if non-nil, sets the health state + // of the node from the control plane's perspective. + // A nil value means no change from the previous MapResponse. + // A non-nil 0-length slice restores the health to good (no known problems). + // A non-zero length slice are the list of problems that the control place + // sees. + Health []string `json:",omitempty"` + // Debug is normally nil, except for when the control server // is setting debug settings on a node. Debug *Debug `json:",omitempty"` diff --git a/types/netmap/netmap.go b/types/netmap/netmap.go index 556ed5d06..5eec06c6f 100644 --- a/types/netmap/netmap.go +++ b/types/netmap/netmap.go @@ -54,6 +54,13 @@ type NetworkMap struct { // Debug knobs from control server for debug or feature gating. Debug *tailcfg.Debug + // ControlHealth are the list of health check problems for this + // node from the perspective of the control plane. + // If empty, there are no known problems from the control plane's + // point of view, but the node might know about its own health + // check problems. + ControlHealth []string + // ACLs User tailcfg.UserID