From d0906cda97b116c33f191af4fcb81ac88ab7c7b6 Mon Sep 17 00:00:00 2001 From: Mihai Parparita Date: Wed, 19 Apr 2023 13:45:02 -0700 Subject: [PATCH] net/sockstats: expose debug info Exposes some internal state of the sockstats package via the C2N and PeerAPI endpoints, so that it can be used for debugging. For now this includes the estimated radio on percentage and a second-by-second view of the times the radio was active. Also fixes another off-by-one error in the radio on percentage that was leading to >100% values (if n seconds have passed since we started to monitor, there may be n + 1 possible seconds where the radio could have been on). Updates tailscale/corp#9230 Signed-off-by: Mihai Parparita --- ipn/ipnlocal/c2n.go | 4 +- ipn/ipnlocal/peerapi.go | 6 +++ net/sockstats/sockstats.go | 6 +++ net/sockstats/sockstats_noop.go | 4 ++ net/sockstats/sockstats_tsgo.go | 76 +++++++++++++++++++++------- net/sockstats/sockstats_tsgo_test.go | 13 ++++- 6 files changed, 88 insertions(+), 21 deletions(-) diff --git a/ipn/ipnlocal/c2n.go b/ipn/ipnlocal/c2n.go index 68c1e7e72..54428d38e 100644 --- a/ipn/ipnlocal/c2n.go +++ b/ipn/ipnlocal/c2n.go @@ -17,6 +17,7 @@ import ( "time" "tailscale.com/envknob" + "tailscale.com/net/sockstats" "tailscale.com/tailcfg" "tailscale.com/util/clientmetric" "tailscale.com/util/goroutines" @@ -94,7 +95,8 @@ func (b *LocalBackend) handleC2N(w http.ResponseWriter, r *http.Request) { return } b.sockstatLogger.Flush() - fmt.Fprintln(w, b.sockstatLogger.LogID()) + fmt.Fprintf(w, "logid: %s\n", b.sockstatLogger.LogID()) + fmt.Fprintf(w, "debug info: %v\n", sockstats.DebugInfo()) default: http.Error(w, "unknown c2n path", http.StatusBadRequest) } diff --git a/ipn/ipnlocal/peerapi.go b/ipn/ipnlocal/peerapi.go index fcacad0c9..d28206418 100644 --- a/ipn/ipnlocal/peerapi.go +++ b/ipn/ipnlocal/peerapi.go @@ -947,6 +947,12 @@ func (h *peerAPIHandler) handleServeSockStats(w http.ResponseWriter, r *http.Req fmt.Fprintln(w, "") fmt.Fprintln(w, "") + + fmt.Fprintln(w, "

Debug Info

") + + fmt.Fprintln(w, "
")
+	fmt.Fprintln(w, html.EscapeString(sockstats.DebugInfo()))
+	fmt.Fprintln(w, "
") } type incomingFile struct { diff --git a/net/sockstats/sockstats.go b/net/sockstats/sockstats.go index 97aa0236c..468fa7005 100644 --- a/net/sockstats/sockstats.go +++ b/net/sockstats/sockstats.go @@ -119,3 +119,9 @@ type LinkMonitor interface { func SetLinkMonitor(lm LinkMonitor) { setLinkMonitor(lm) } + +// DebugInfo returns a string containing debug information about the tracked +// statistics. +func DebugInfo() string { + return debugInfo() +} diff --git a/net/sockstats/sockstats_noop.go b/net/sockstats/sockstats_noop.go index c988249b8..96fd0da9b 100644 --- a/net/sockstats/sockstats_noop.go +++ b/net/sockstats/sockstats_noop.go @@ -31,3 +31,7 @@ func getValidation() *ValidationSockStats { func setLinkMonitor(lm LinkMonitor) { } + +func debugInfo() string { + return "" +} diff --git a/net/sockstats/sockstats_tsgo.go b/net/sockstats/sockstats_tsgo.go index 80ff7084a..de162e7e7 100644 --- a/net/sockstats/sockstats_tsgo.go +++ b/net/sockstats/sockstats_tsgo.go @@ -288,6 +288,19 @@ func setLinkMonitor(lm LinkMonitor) { }) } +func debugInfo() string { + var b strings.Builder + fmt.Fprintf(&b, "radio high percent: %d\n", radio.radioHighPercent()) + fmt.Fprintf(&b, "radio activity for the last hour (one minute per line):\n") + for i, a := range radio.radioActive() { + fmt.Fprintf(&b, "%d", a) + if i%60 == 59 { + fmt.Fprintf(&b, "\n") + } + } + return b.String() +} + func isLikelyCellularInterface(ifName string) bool { return strings.HasPrefix(ifName, "rmnet") || // Android strings.HasPrefix(ifName, "ww") || // systemd naming scheme for WWAN @@ -331,18 +344,56 @@ const ( radioLowIdle = 12 // seconds radio idles in low power state before transitioning to off ) +// radioActive returns a slice of 1s samples (one per second) for the past hour +// indicating whether the radio was active (1) or idle (0). +func (rm *radioMonitor) radioActive() (active [radioSampleSize]int64) { + rm.forEachSample(func(c int, isActive bool) { + if isActive { + active[c] = 1 + } + }) + return +} + // radioHighPercent returns the percentage of time (as an int from 0 to 100) // that the cellular radio was in high power mode during the past hour. // If the radio has been monitored for less than an hour, // the percentage is calculated based on the time monitored. func (rm *radioMonitor) radioHighPercent() int64 { + var highPowerSec int64 // total seconds radio was in high power (active or idle) + lastActive := -1 // counter when radio was last active + + periodLength := rm.forEachSample(func(c int, isActive bool) { + if isActive { + // radio on and active + highPowerSec++ + lastActive = c + } else if lastActive != -1 && c-lastActive < radioHighIdle { + // radio on but idle + highPowerSec++ + } + }) + + if periodLength == 0 { + return 0 + } + + if highPowerSec == 0 { + return 0 + } + return highPowerSec * 100 / periodLength +} + +// forEachSample calls f for each sample in the past hour (or less if less time +// has passed -- the evaluated period is returned) +func (rm *radioMonitor) forEachSample(f func(c int, isActive bool)) (periodLength int64) { now := rm.now().Unix() - var periodLength int64 = radioSampleSize + periodLength = radioSampleSize if t := now - rm.startTime; t < periodLength { if t <= 0 { return 0 } - periodLength = t + periodLength = t + 1 // we want an inclusive range (with the current second) } periodStart := now - periodLength // start of current reporting period @@ -353,25 +404,14 @@ func (rm *radioMonitor) radioHighPercent() int64 { rm.usage[split:], rm.usage[:split], } - var highPowerSec int64 // total seconds radio was in high power (active or idle) - var c int // counter - var lastActive int // counter when radio was last active + + var c int // counter for _, slice := range slices { for _, v := range slice { - c++ // increment first so we don't have zero values - if v >= periodStart { - // radio on and active - highPowerSec++ - lastActive = c - } else if lastActive > 0 && c-lastActive < radioHighIdle { - // radio on but idle - highPowerSec++ - } + f(c, v >= periodStart) + c++ } } - if highPowerSec == 0 { - return 0 - } - return highPowerSec * 100 / periodLength + return periodLength } diff --git a/net/sockstats/sockstats_tsgo_test.go b/net/sockstats/sockstats_tsgo_test.go index f85ab9fca..9e3a5a86a 100644 --- a/net/sockstats/sockstats_tsgo_test.go +++ b/net/sockstats/sockstats_tsgo_test.go @@ -37,10 +37,19 @@ func TestRadioMonitor(t *testing.T) { "active, 10 sec idle", func(tt *testTime, rm *radioMonitor) { rm.active() - tt.Add(10 * time.Second) + tt.Add(9 * time.Second) }, 50, // radio on 5 seconds of 10 seconds }, + { + "active, spanning two seconds", + func(tt *testTime, rm *radioMonitor) { + rm.active() + tt.Add(1100 * time.Millisecond) + rm.active() + }, + 100, // radio on for 2 seconds + }, { "400 iterations: 2 sec active, 1 min idle", func(tt *testTime, rm *radioMonitor) { @@ -57,7 +66,7 @@ func TestRadioMonitor(t *testing.T) { { "activity at end of time window", func(tt *testTime, rm *radioMonitor) { - tt.Add(2 * time.Second) + tt.Add(1 * time.Second) rm.active() }, 50,