netcheck, controlclient, magicsock: add more metrics

Updates #3307

Change-Id: Ibb33425764a75bde49230632f1b472f923551126
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
pull/3326/head
Brad Fitzpatrick 3 years ago committed by Brad Fitzpatrick
parent 3b541c833e
commit 24ea365d48

@ -72,6 +72,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/types/persist from tailscale.com/ipn tailscale.com/types/persist from tailscale.com/ipn
tailscale.com/types/preftype from tailscale.com/cmd/tailscale/cli+ tailscale.com/types/preftype from tailscale.com/cmd/tailscale/cli+
tailscale.com/types/structs from tailscale.com/ipn+ tailscale.com/types/structs from tailscale.com/ipn+
tailscale.com/util/clientmetric from tailscale.com/net/netcheck
tailscale.com/util/dnsname from tailscale.com/cmd/tailscale/cli+ tailscale.com/util/dnsname from tailscale.com/cmd/tailscale/cli+
W tailscale.com/util/endian from tailscale.com/net/netns W tailscale.com/util/endian from tailscale.com/net/netns
tailscale.com/util/groupmember from tailscale.com/cmd/tailscale/cli tailscale.com/util/groupmember from tailscale.com/cmd/tailscale/cli

@ -46,6 +46,7 @@ import (
"tailscale.com/types/netmap" "tailscale.com/types/netmap"
"tailscale.com/types/opt" "tailscale.com/types/opt"
"tailscale.com/types/persist" "tailscale.com/types/persist"
"tailscale.com/util/clientmetric"
"tailscale.com/util/systemd" "tailscale.com/util/systemd"
"tailscale.com/wgengine/monitor" "tailscale.com/wgengine/monitor"
) )
@ -558,6 +559,15 @@ const pollTimeout = 120 * time.Second
// cb nil means to omit peers. // cb nil means to omit peers.
func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netmap.NetworkMap)) error { func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netmap.NetworkMap)) error {
metricMapRequests.Add(1)
metricMapRequestsActive.Add(1)
defer metricMapRequestsActive.Add(-1)
if maxPolls == -1 {
metricMapRequestsPoll.Add(1)
} else {
metricMapRequestsLite.Add(1)
}
c.mu.Lock() c.mu.Lock()
persist := c.persist persist := c.persist
serverURL := c.serverURL serverURL := c.serverURL
@ -747,11 +757,14 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
return err return err
} }
metricMapResponseMessages.Add(1)
if allowStream { if allowStream {
health.GotStreamedMapResponse() health.GotStreamedMapResponse()
} }
if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) { if pr := resp.PingRequest; pr != nil && c.isUniquePingRequest(pr) {
metricMapResponsePings.Add(1)
go answerPing(c.logf, c.httpc, pr) go answerPing(c.logf, c.httpc, pr)
} }
@ -768,9 +781,15 @@ func (c *Direct) sendMapRequest(ctx context.Context, maxPolls int, cb func(*netm
return ctx.Err() return ctx.Err()
} }
if resp.KeepAlive { if resp.KeepAlive {
metricMapResponseKeepAlives.Add(1)
continue continue
} }
metricMapResponseMap.Add(1)
if i > 0 {
metricMapResponseMapDelta.Add(1)
}
hasDebug := resp.Debug != nil hasDebug := resp.Debug != nil
// being conservative here, if Debug not present set to False // being conservative here, if Debug not present set to False
controlknobs.SetDisableUPnP(hasDebug && resp.Debug.DisableUPnP.EqualBool(true)) controlknobs.SetDisableUPnP(hasDebug && resp.Debug.DisableUPnP.EqualBool(true))
@ -1181,7 +1200,13 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, timeoutReset chan<-
// SetDNS sends the SetDNSRequest request to the control plane server, // SetDNS sends the SetDNSRequest request to the control plane server,
// requesting a DNS record be created or updated. // requesting a DNS record be created or updated.
func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) error { func (c *Direct) SetDNS(ctx context.Context, req *tailcfg.SetDNSRequest) (err error) {
metricSetDNS.Add(1)
defer func() {
if err != nil {
metricSetDNSError.Add(1)
}
}()
c.mu.Lock() c.mu.Lock()
serverKey := c.serverKey serverKey := c.serverKey
c.mu.Unlock() c.mu.Unlock()
@ -1281,3 +1306,20 @@ func postPingResult(now time.Time, logf logger.Logf, c *http.Client, pr *tailcfg
} }
return nil return nil
} }
var (
metricMapRequestsActive = clientmetric.NewGauge("controlclient_map_requests_active")
metricMapRequests = clientmetric.NewCounter("controlclient_map_requests")
metricMapRequestsLite = clientmetric.NewCounter("controlclient_map_requests_lite")
metricMapRequestsPoll = clientmetric.NewCounter("controlclient_map_requests_poll")
metricMapResponseMessages = clientmetric.NewCounter("controlclient_map_response_message") // any message type
metricMapResponsePings = clientmetric.NewCounter("controlclient_map_response_ping")
metricMapResponseKeepAlives = clientmetric.NewCounter("controlclient_map_response_keepalive")
metricMapResponseMap = clientmetric.NewCounter("controlclient_map_response_map") // any non-keepalive map response
metricMapResponseMapDelta = clientmetric.NewCounter("controlclient_map_response_map_delta") // 2nd+ non-keepalive map response
metricSetDNS = clientmetric.NewCounter("controlclient_setdns")
metricSetDNSError = clientmetric.NewCounter("controlclient_setdns_error")
)

@ -34,6 +34,7 @@ import (
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/types/logger" "tailscale.com/types/logger"
"tailscale.com/types/opt" "tailscale.com/types/opt"
"tailscale.com/util/clientmetric"
) )
// Debugging and experimentation tweakables. // Debugging and experimentation tweakables.
@ -232,6 +233,12 @@ func (c *Client) MakeNextReportFull() {
func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) { func (c *Client) ReceiveSTUNPacket(pkt []byte, src netaddr.IPPort) {
c.vlogf("received STUN packet from %s", src) c.vlogf("received STUN packet from %s", src)
if src.IP().Is4() {
metricSTUNRecv4.Add(1)
} else if src.IP().Is6() {
metricSTUNRecv6.Add(1)
}
c.mu.Lock() c.mu.Lock()
if c.handleHairSTUNLocked(pkt, src) { if c.handleHairSTUNLocked(pkt, src) {
c.mu.Unlock() c.mu.Unlock()
@ -737,7 +744,13 @@ func (c *Client) udpBindAddr() string {
// GetReport gets a report. // GetReport gets a report.
// //
// It may not be called concurrently with itself. // It may not be called concurrently with itself.
func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, error) { func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (_ *Report, reterr error) {
defer func() {
if reterr != nil {
metricNumGetReportError.Add(1)
}
}()
metricNumGetReport.Add(1)
// Mask user context with ours that we guarantee to cancel so // Mask user context with ours that we guarantee to cancel so
// we can depend on it being closed in goroutines later. // we can depend on it being closed in goroutines later.
// (User ctx might be context.Background, etc) // (User ctx might be context.Background, etc)
@ -769,6 +782,7 @@ func (c *Client) GetReport(ctx context.Context, dm *tailcfg.DERPMap) (*Report, e
last = nil // causes makeProbePlan below to do a full (initial) plan last = nil // causes makeProbePlan below to do a full (initial) plan
c.nextFull = false c.nextFull = false
c.lastFull = now c.lastFull = now
metricNumGetReportFull.Add(1)
} }
rs.incremental = last != nil rs.incremental = last != nil
c.mu.Unlock() c.mu.Unlock()
@ -983,6 +997,7 @@ func (c *Client) runHTTPOnlyChecks(ctx context.Context, last *Report, rs *report
} }
func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) { func (c *Client) measureHTTPSLatency(ctx context.Context, reg *tailcfg.DERPRegion) (time.Duration, netaddr.IP, error) {
metricHTTPSend.Add(1)
var result httpstat.Result var result httpstat.Result
ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout) ctx, cancel := context.WithTimeout(httpstat.WithHTTPStat(ctx, &result), overallProbeTimeout)
defer cancel() defer cancel()
@ -1217,6 +1232,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
switch probe.proto { switch probe.proto {
case probeIPv4: case probeIPv4:
metricSTUNSend4.Add(1)
n, err := rs.pc4.WriteTo(req, addr) n, err := rs.pc4.WriteTo(req, addr)
if n == len(req) && err == nil { if n == len(req) && err == nil {
rs.mu.Lock() rs.mu.Lock()
@ -1224,6 +1240,7 @@ func (rs *reportState) runProbe(ctx context.Context, dm *tailcfg.DERPMap, probe
rs.mu.Unlock() rs.mu.Unlock()
} }
case probeIPv6: case probeIPv6:
metricSTUNSend6.Add(1)
n, err := rs.pc6.WriteTo(req, addr) n, err := rs.pc6.WriteTo(req, addr)
if n == len(req) && err == nil { if n == len(req) && err == nil {
rs.mu.Lock() rs.mu.Lock()
@ -1322,3 +1339,15 @@ func conciseOptBool(b opt.Bool, trueVal string) string {
} }
return "" return ""
} }
var (
metricNumGetReport = clientmetric.NewCounter("netcheck_report")
metricNumGetReportFull = clientmetric.NewCounter("netcheck_report_full")
metricNumGetReportError = clientmetric.NewCounter("netcheck_report_error")
metricSTUNSend4 = clientmetric.NewCounter("netcheck_stun_send_ipv4")
metricSTUNSend6 = clientmetric.NewCounter("netcheck_stun_send_ipv6")
metricSTUNRecv4 = clientmetric.NewCounter("netcheck_stun_recv_ipv4")
metricSTUNRecv6 = clientmetric.NewCounter("netcheck_stun_recv_ipv6")
metricHTTPSend = clientmetric.NewCounter("netcheck_https_measure")
)

@ -967,6 +967,9 @@ func (c *Conn) setNearestDERP(derpNum int) (wantDERP bool) {
// No change. // No change.
return true return true
} }
if c.myDerp != 0 && derpNum != 0 {
metricDERPHomeChange.Add(1)
}
c.myDerp = derpNum c.myDerp = derpNum
health.SetMagicSockDERPHome(derpNum) health.SetMagicSockDERPHome(derpNum)
@ -1616,6 +1619,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
err := dc.Send(wr.pubKey, wr.b) err := dc.Send(wr.pubKey, wr.b)
if err != nil { if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err) c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1)
} else {
metricSendDERP.Add(1)
} }
} }
} }
@ -4054,6 +4060,8 @@ var (
metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue") metricSendDERPErrorQueue = clientmetric.NewCounter("magicsock_send_derp_error_queue")
metricSendUDP = clientmetric.NewCounter("magicsock_send_udp") metricSendUDP = clientmetric.NewCounter("magicsock_send_udp")
metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error") metricSendUDPError = clientmetric.NewCounter("magicsock_send_udp_error")
metricSendDERP = clientmetric.NewCounter("magicsock_send_derp")
metricSendDERPError = clientmetric.NewCounter("magicsock_send_derp_error")
// Data packets (non-disco) // Data packets (non-disco)
metricSendData = clientmetric.NewCounter("magicsock_send_data") metricSendData = clientmetric.NewCounter("magicsock_send_data")
@ -4079,4 +4087,8 @@ var (
metricRecvDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe") metricRecvDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe")
metricRecvDiscoCallMeMaybeBadNode = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_node") metricRecvDiscoCallMeMaybeBadNode = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_node")
metricRecvDiscoCallMeMaybeBadDisco = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_disco") metricRecvDiscoCallMeMaybeBadDisco = clientmetric.NewCounter("magicsock_disco_recv_callmemaybe_bad_disco")
// metricDERPHomeChange is how many times our DERP home region DI has
// changed from non-zero to a different non-zero.
metricDERPHomeChange = clientmetric.NewCounter("derp_home_change")
) )

Loading…
Cancel
Save