wgengine/magicsock, health, ipn/ipnstate: track DERP-advertised health

And add health check errors to ipnstate.Status (tailscale status --json).

Updates #2746
Updates #2775

Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
pull/2586/head
Brad Fitzpatrick 3 years ago committed by Brad Fitzpatrick
parent 722942dd46
commit 5bacbf3744

@ -32,6 +32,7 @@ var (
lastStreamedMapResponse time.Time
derpHomeRegion int
derpRegionConnected = map[int]bool{}
derpRegionHealthProblem = map[int]string{}
derpRegionLastFrame = map[int]time.Time{}
lastMapRequestHeard time.Time // time we got a 200 from control for a MapRequest
ipnState string
@ -191,6 +192,19 @@ func SetDERPRegionConnectedState(region int, connected bool) {
selfCheckLocked()
}
// SetDERPRegionHealth sets or clears any problem associated with the
// provided DERP region.
func SetDERPRegionHealth(region int, problem string) {
mu.Lock()
defer mu.Unlock()
if problem == "" {
delete(derpRegionHealthProblem, region)
} else {
derpRegionHealthProblem[region] = problem
}
selfCheckLocked()
}
func NoteDERPRegionReceivedFrame(region int) {
mu.Lock()
defer mu.Unlock()
@ -241,6 +255,16 @@ func selfCheckLocked() {
setLocked(SysOverall, overallErrorLocked())
}
// OverallError returns a summary of the health state.
//
// If there are multiple problems, the error will be of type
// multierror.MultipleErrors.
func OverallError() error {
mu.Lock()
defer mu.Unlock()
return overallErrorLocked()
}
func overallErrorLocked() error {
if !anyInterfaceUp {
return errors.New("network down")
@ -288,6 +312,9 @@ func overallErrorLocked() error {
}
errs = append(errs, fmt.Errorf("%v: %w", sys, err))
}
for regionID, problem := range derpRegionHealthProblem {
errs = append(errs, fmt.Errorf("derp%d: %v", regionID, problem))
}
sort.Slice(errs, func(i, j int) bool {
// Not super efficient (stringifying these in a sort), but probably max 2 or 3 items.
return errs[i].Error() < errs[j].Error()

@ -25,6 +25,7 @@ import (
"syscall"
"time"
"github.com/go-multierror/multierror"
"inet.af/netaddr"
"tailscale.com/client/tailscale/apitype"
"tailscale.com/control/controlclient"
@ -331,6 +332,17 @@ func (b *LocalBackend) updateStatus(sb *ipnstate.StatusBuilder, extraLocked func
s.Version = version.Long
s.BackendState = b.state.String()
s.AuthURL = b.authURLSticky
if err := health.OverallError(); err != nil {
switch e := err.(type) {
case multierror.MultipleErrors:
for _, err := range e {
s.Health = append(s.Health, err.Error())
}
default:
s.Health = append(s.Health, err.Error())
}
}
if b.netMap != nil {
s.MagicDNSSuffix = b.netMap.MagicDNSSuffix()
s.CertDomains = append([]string(nil), b.netMap.DNS.CertDomains...)

@ -38,6 +38,11 @@ type Status struct {
TailscaleIPs []netaddr.IP // Tailscale IP(s) assigned to this node
Self *PeerStatus
// Health contains health check problems.
// Empty means everything is good. (or at least that no known
// problems are detected)
Health []string
// MagicDNSSuffix is the network's MagicDNS suffix for nodes
// in the network such as "userfoo.tailscale.net".
// There are no surrounding dots.

@ -1403,6 +1403,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d
}
defer health.SetDERPRegionConnectedState(regionID, false)
defer health.SetDERPRegionHealth(regionID, "")
// peerPresent is the set of senders we know are present on this
// connection, based on messages we've received from the server.
@ -1458,6 +1459,7 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d
switch m := msg.(type) {
case derp.ServerInfoMessage:
health.SetDERPRegionConnectedState(regionID, true)
health.SetDERPRegionHealth(regionID, "") // until declared otherwise
c.logf("magicsock: derp-%d connected; connGen=%v", regionID, connGen)
continue
case derp.ReceivedPacket:
@ -1482,6 +1484,8 @@ func (c *Conn) runDerpReader(ctx context.Context, derpFakeAddr netaddr.IPPort, d
}
}()
continue
case derp.HealthMessage:
health.SetDERPRegionHealth(regionID, m.Problem)
default:
// Ignore.
continue

Loading…
Cancel
Save