From a2c42d3cd4e914b8ac879ac0a21c284ecaf143fc Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Thu, 1 Aug 2024 13:00:36 +0200 Subject: [PATCH] usermetric: add initial user-facing metrics This commit adds a new usermetric package and wires up metrics across the tailscale client. Updates tailscale/corp#22075 Co-authored-by: Anton Tolchanov Signed-off-by: Kristoffer Dalby --- client/tailscale/localclient.go | 6 +++ client/web/web.go | 6 +++ cmd/derper/depaware.txt | 1 + cmd/k8s-operator/depaware.txt | 2 + cmd/tailscale/depaware.txt | 4 +- cmd/tailscaled/depaware.txt | 3 +- health/health.go | 25 +++++++++ ipn/ipnlocal/local.go | 13 +++++ ipn/localapi/localapi.go | 15 ++++++ metrics/multilabelmap.go | 32 ++++++++++-- metrics/multilabelmap_test.go | 14 ++++- net/tstun/wrap.go | 40 +++++++++++++- net/tstun/wrap_test.go | 22 ++++++++ tsnet/tsnet_test.go | 65 +++++++++++++++++++++++ tsweb/varz/varz.go | 33 +++++++----- util/usermetric/usermetric.go | 84 ++++++++++++++++++++++++++++++ util/usermetric/usermetric_test.go | 25 +++++++++ 17 files changed, 368 insertions(+), 22 deletions(-) create mode 100644 util/usermetric/usermetric.go create mode 100644 util/usermetric/usermetric_test.go diff --git a/client/tailscale/localclient.go b/client/tailscale/localclient.go index 6f27e56b8..98371393d 100644 --- a/client/tailscale/localclient.go +++ b/client/tailscale/localclient.go @@ -353,6 +353,12 @@ func (lc *LocalClient) DaemonMetrics(ctx context.Context) ([]byte, error) { return lc.get200(ctx, "/localapi/v0/metrics") } +// UserMetrics returns the user metrics in +// the Prometheus text exposition format. +func (lc *LocalClient) UserMetrics(ctx context.Context) ([]byte, error) { + return lc.get200(ctx, "/localapi/v0/usermetrics") +} + // IncrementCounter increments the value of a Tailscale daemon's counter // metric by the given delta. If the metric has yet to exist, a new counter // metric is created and initialized to delta. diff --git a/client/web/web.go b/client/web/web.go index 9f9465db9..b914070b2 100644 --- a/client/web/web.go +++ b/client/web/web.go @@ -283,6 +283,12 @@ func (s *Server) serve(w http.ResponseWriter, r *http.Request) { } } + if r.URL.Path == "/metrics" { + r.URL.Path = "/api/local/v0/usermetrics" + s.proxyRequestToLocalAPI(w, r) + return + } + if strings.HasPrefix(r.URL.Path, "/api/") { switch { case r.URL.Path == "/api/auth" && r.Method == httpm.GET: diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index 4f5029ca9..53b263d03 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -163,6 +163,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa tailscale.com/util/syspolicy from tailscale.com/ipn tailscale.com/util/syspolicy/internal from tailscale.com/util/syspolicy/setting tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy + tailscale.com/util/usermetric from tailscale.com/health tailscale.com/util/vizerror from tailscale.com/tailcfg+ W 💣 tailscale.com/util/winutil from tailscale.com/hostinfo+ W 💣 tailscale.com/util/winutil/winenv from tailscale.com/hostinfo+ diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index a41622888..738d0cff2 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -754,6 +754,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/tstime from tailscale.com/cmd/k8s-operator+ tailscale.com/tstime/mono from tailscale.com/net/tstun+ tailscale.com/tstime/rate from tailscale.com/derp+ + tailscale.com/tsweb/varz from tailscale.com/util/usermetric tailscale.com/types/appctype from tailscale.com/ipn/ipnlocal tailscale.com/types/dnstype from tailscale.com/ipn/ipnlocal+ tailscale.com/types/empty from tailscale.com/ipn+ @@ -812,6 +813,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/util/testenv from tailscale.com/control/controlclient+ tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/uniq from tailscale.com/ipn/ipnlocal+ + tailscale.com/util/usermetric from tailscale.com/health+ tailscale.com/util/vizerror from tailscale.com/tailcfg+ 💣 tailscale.com/util/winutil from tailscale.com/clientupdate+ W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate+ diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index d92b6612b..b121ee019 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -99,7 +99,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/ipn from tailscale.com/client/tailscale+ tailscale.com/ipn/ipnstate from tailscale.com/client/tailscale+ tailscale.com/licenses from tailscale.com/client/web+ - tailscale.com/metrics from tailscale.com/derp + tailscale.com/metrics from tailscale.com/derp+ tailscale.com/net/captivedetection from tailscale.com/net/netcheck tailscale.com/net/dns/recursive from tailscale.com/net/dnsfallback tailscale.com/net/dnscache from tailscale.com/control/controlhttp+ @@ -132,6 +132,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/tstime from tailscale.com/control/controlhttp+ tailscale.com/tstime/mono from tailscale.com/tstime/rate tailscale.com/tstime/rate from tailscale.com/cmd/tailscale/cli+ + tailscale.com/tsweb/varz from tailscale.com/util/usermetric tailscale.com/types/dnstype from tailscale.com/tailcfg tailscale.com/types/empty from tailscale.com/ipn tailscale.com/types/ipproto from tailscale.com/net/flowtrack+ @@ -173,6 +174,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/util/syspolicy/setting from tailscale.com/util/syspolicy tailscale.com/util/testenv from tailscale.com/cmd/tailscale/cli tailscale.com/util/truncate from tailscale.com/cmd/tailscale/cli + tailscale.com/util/usermetric from tailscale.com/health tailscale.com/util/vizerror from tailscale.com/tailcfg+ 💣 tailscale.com/util/winutil from tailscale.com/clientupdate+ W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 0e977ea44..16c09cf95 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -343,7 +343,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/tstime from tailscale.com/control/controlclient+ tailscale.com/tstime/mono from tailscale.com/net/tstun+ tailscale.com/tstime/rate from tailscale.com/derp+ - tailscale.com/tsweb/varz from tailscale.com/cmd/tailscaled + tailscale.com/tsweb/varz from tailscale.com/cmd/tailscaled+ tailscale.com/types/appctype from tailscale.com/ipn/ipnlocal tailscale.com/types/dnstype from tailscale.com/ipn/ipnlocal+ tailscale.com/types/empty from tailscale.com/ipn+ @@ -403,6 +403,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/util/testenv from tailscale.com/ipn/ipnlocal+ tailscale.com/util/truncate from tailscale.com/logtail tailscale.com/util/uniq from tailscale.com/ipn/ipnlocal+ + tailscale.com/util/usermetric from tailscale.com/health+ tailscale.com/util/vizerror from tailscale.com/tailcfg+ 💣 tailscale.com/util/winutil from tailscale.com/clientupdate+ W 💣 tailscale.com/util/winutil/authenticode from tailscale.com/clientupdate+ diff --git a/health/health.go b/health/health.go index 10549b523..1faab53f9 100644 --- a/health/health.go +++ b/health/health.go @@ -8,6 +8,7 @@ package health import ( "context" "errors" + "expvar" "fmt" "maps" "net/http" @@ -25,6 +26,7 @@ import ( "tailscale.com/util/mak" "tailscale.com/util/multierr" "tailscale.com/util/set" + "tailscale.com/util/usermetric" "tailscale.com/version" ) @@ -1202,6 +1204,18 @@ func (t *Tracker) ReceiveFuncStats(which ReceiveFunc) *ReceiveFuncStats { } func (t *Tracker) doOnceInit() { + metricHealthMessage.Set(metricHealthMessageLabel{ + Type: "warning", + }, expvar.Func(func() any { + if t.nil() { + return 0 + } + t.mu.Lock() + defer t.mu.Unlock() + t.updateBuiltinWarnablesLocked() + return int64(len(t.stringsLocked())) + })) + for i := range t.MagicSockReceiveFuncs { f := &t.MagicSockReceiveFuncs[i] f.name = (ReceiveFunc(i)).String() @@ -1232,3 +1246,14 @@ func (t *Tracker) checkReceiveFuncsLocked() { f.missing = true } } + +type metricHealthMessageLabel struct { + // TODO: break down by warnable.severity as well? + Type string +} + +var metricHealthMessage = usermetric.NewMultiLabelMap[metricHealthMessageLabel]( + "tailscaled_health_messages", + "gauge", + "Number of health messages broken down by type.", +) diff --git a/ipn/ipnlocal/local.go b/ipn/ipnlocal/local.go index 73c41f326..da8b68744 100644 --- a/ipn/ipnlocal/local.go +++ b/ipn/ipnlocal/local.go @@ -106,6 +106,7 @@ import ( "tailscale.com/util/systemd" "tailscale.com/util/testenv" "tailscale.com/util/uniq" + "tailscale.com/util/usermetric" "tailscale.com/version" "tailscale.com/version/distro" "tailscale.com/wgengine" @@ -117,6 +118,9 @@ import ( "tailscale.com/wgengine/wgcfg/nmcfg" ) +var metricAdvertisedRoutes = usermetric.NewGauge( + "tailscaled_advertised_routes", "Number of advertised network routes (e.g. by a subnet router)") + var controlDebugFlags = getControlDebugFlags() func getControlDebugFlags() []string { @@ -4646,6 +4650,15 @@ func (b *LocalBackend) applyPrefsToHostinfoLocked(hi *tailcfg.Hostinfo, prefs ip hi.ShieldsUp = prefs.ShieldsUp() hi.AllowsUpdate = envknob.AllowsRemoteUpdate() || prefs.AutoUpdate().Apply.EqualBool(true) + // count routes without exit node routes + var routes int64 + for _, route := range hi.RoutableIPs { + if route.Bits() != 0 { + routes++ + } + } + metricAdvertisedRoutes.Set(float64(routes)) + var sshHostKeys []string if prefs.RunSSH() && envknob.CanSSHD() { // TODO(bradfitz): this is called with b.mu held. Not ideal. diff --git a/ipn/localapi/localapi.go b/ipn/localapi/localapi.go index 3be469193..af4c262e0 100644 --- a/ipn/localapi/localapi.go +++ b/ipn/localapi/localapi.go @@ -63,6 +63,8 @@ import ( "tailscale.com/util/osuser" "tailscale.com/util/progresstracking" "tailscale.com/util/rands" + "tailscale.com/util/testenv" + "tailscale.com/util/usermetric" "tailscale.com/version" "tailscale.com/wgengine/magicsock" ) @@ -141,6 +143,7 @@ var handler = map[string]localAPIHandler{ "update/install": (*Handler).serveUpdateInstall, "update/progress": (*Handler).serveUpdateProgress, "upload-client-metrics": (*Handler).serveUploadClientMetrics, + "usermetrics": (*Handler).serveUserMetrics, "watch-ipn-bus": (*Handler).serveWatchIPNBus, "whois": (*Handler).serveWhoIs, } @@ -571,6 +574,18 @@ func (h *Handler) serveMetrics(w http.ResponseWriter, r *http.Request) { clientmetric.WritePrometheusExpositionFormat(w) } +// TODO(kradalby): Remove this once we have landed on a final set of +// metrics to export to clients and consider the metrics stable. +var debugUsermetricsEndpoint = envknob.RegisterBool("TS_DEBUG_USER_METRICS") + +func (h *Handler) serveUserMetrics(w http.ResponseWriter, r *http.Request) { + if !testenv.InTest() && !debugUsermetricsEndpoint() { + http.Error(w, "usermetrics debug flag not enabled", http.StatusForbidden) + return + } + usermetric.Handler(w, r) +} + func (h *Handler) serveDebug(w http.ResponseWriter, r *http.Request) { if !h.PermitWrite { http.Error(w, "debug access denied", http.StatusForbidden) diff --git a/metrics/multilabelmap.go b/metrics/multilabelmap.go index c0f312e7d..df2ae5073 100644 --- a/metrics/multilabelmap.go +++ b/metrics/multilabelmap.go @@ -39,7 +39,7 @@ func NewMultiLabelMap[T comparable](name string, promType, helpText string) *Mul Help: helpText, } var zero T - _ = labelString(zero) // panic early if T is invalid + _ = LabelString(zero) // panic early if T is invalid expvar.Publish(name, m) return m } @@ -50,8 +50,10 @@ type labelsAndValue[T comparable] struct { val expvar.Var } -// labelString returns a Prometheus-formatted label string for the given key. -func labelString(k any) string { +// LabelString returns a Prometheus-formatted label string for the given key. +// k must be a struct type with scalar fields, as required by MultiLabelMap, +// if k is not a struct, it will panic. +func LabelString(k any) string { rv := reflect.ValueOf(k) t := rv.Type() if t.Kind() != reflect.Struct { @@ -150,7 +152,7 @@ func (v *MultiLabelMap[T]) Init() *MultiLabelMap[T] { // // v.mu must be held. func (v *MultiLabelMap[T]) addKeyLocked(key T, val expvar.Var) { - ls := labelString(key) + ls := LabelString(key) ent := labelsAndValue[T]{key, ls, val} // Using insertion sort to place key into the already-sorted v.keys. @@ -209,6 +211,26 @@ func (v *MultiLabelMap[T]) Set(key T, val expvar.Var) { v.m.Store(key, val) } +// SetInt sets val to the *[expvar.Int] value stored under the given map key, +// creating it if it doesn't exist yet. +// It does nothing if key exists but is of the wrong type. +func (v *MultiLabelMap[T]) SetInt(key T, val int64) { + // Set to Int; ignore otherwise. + if iv, ok := v.getOrFill(key, newInt).(*expvar.Int); ok { + iv.Set(val) + } +} + +// SetFloat sets val to the *[expvar.Float] value stored under the given map key, +// creating it if it doesn't exist yet. +// It does nothing if key exists but is of the wrong type. +func (v *MultiLabelMap[T]) SetFloat(key T, val float64) { + // Set to Float; ignore otherwise. + if iv, ok := v.getOrFill(key, newFloat).(*expvar.Float); ok { + iv.Set(val) + } +} + // Add adds delta to the *[expvar.Int] value stored under the given map key, // creating it if it doesn't exist yet. // It does nothing if key exists but is of the wrong type. @@ -234,7 +256,7 @@ func (v *MultiLabelMap[T]) AddFloat(key T, delta float64) { // This is not optimized for highly concurrent usage; it's presumed to only be // used rarely, at startup. func (v *MultiLabelMap[T]) Delete(key T) { - ls := labelString(key) + ls := LabelString(key) v.mu.Lock() defer v.mu.Unlock() diff --git a/metrics/multilabelmap_test.go b/metrics/multilabelmap_test.go index 9a1340a3c..b53e15ec8 100644 --- a/metrics/multilabelmap_test.go +++ b/metrics/multilabelmap_test.go @@ -5,6 +5,7 @@ package metrics import ( "bytes" + "expvar" "fmt" "io" "testing" @@ -22,6 +23,12 @@ func TestMultilabelMap(t *testing.T) { m.Add(L2{"b", "b"}, 3) m.Add(L2{"a", "a"}, 1) + m.SetFloat(L2{"sf", "sf"}, 3.5) + m.SetFloat(L2{"sf", "sf"}, 5.5) + m.Set(L2{"sfunc", "sfunc"}, expvar.Func(func() any { return 3 })) + m.SetInt(L2{"si", "si"}, 3) + m.SetInt(L2{"si", "si"}, 5) + cur := func() string { var buf bytes.Buffer m.Do(func(kv KeyValue[L2]) { @@ -33,7 +40,7 @@ func TestMultilabelMap(t *testing.T) { return buf.String() } - if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4"; g != w { + if g, w := cur(), "a/a=1,a/b=2,b/b=3,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w { t.Errorf("got %q; want %q", g, w) } @@ -43,6 +50,9 @@ func TestMultilabelMap(t *testing.T) { metricname{foo="a",bar="b"} 2 metricname{foo="b",bar="b"} 3 metricname{foo="b",bar="c"} 4 +metricname{foo="sf",bar="sf"} 5.5 +metricname{foo="sfunc",bar="sfunc"} 3 +metricname{foo="si",bar="si"} 5 ` if got := buf.String(); got != want { t.Errorf("promtheus output = %q; want %q", got, want) @@ -50,7 +60,7 @@ metricname{foo="b",bar="c"} 4 m.Delete(L2{"b", "b"}) - if g, w := cur(), "a/a=1,a/b=2,b/c=4"; g != w { + if g, w := cur(), "a/a=1,a/b=2,b/c=4,sf/sf=5.5,sfunc/sfunc=3,si/si=5"; g != w { t.Errorf("got %q; want %q", g, w) } diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index 84eb1aec2..dc1aeabc6 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -34,6 +34,7 @@ import ( "tailscale.com/types/key" "tailscale.com/types/logger" "tailscale.com/util/clientmetric" + "tailscale.com/util/usermetric" "tailscale.com/wgengine/capture" "tailscale.com/wgengine/filter" "tailscale.com/wgengine/netstack/gro" @@ -868,6 +869,9 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf if filt.RunOut(p, t.filterFlags) != filter.Accept { metricPacketOutDropFilter.Add(1) + metricOutboundDroppedPacketsTotal.Add(dropPacketLabel{ + Reason: DropReasonACL, + }, 1) return filter.Drop } @@ -876,7 +880,6 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf return res } } - return filter.Accept } @@ -1133,6 +1136,9 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca if outcome != filter.Accept { metricPacketInDropFilter.Add(1) + metricInboundDroppedPacketsTotal.Add(dropPacketLabel{ + Reason: DropReasonACL, + }, 1) // Tell them, via TSMP, we're dropping them due to the ACL. // Their host networking stack can translate this into ICMP @@ -1210,6 +1216,11 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) { if len(buffs) > 0 { t.noteActivity() _, err := t.tdevWrite(buffs, offset) + if err != nil { + metricInboundDroppedPacketsTotal.Add(dropPacketLabel{ + Reason: DropReasonError, + }, int64(len(buffs))) + } return len(buffs), err } return 0, nil @@ -1449,6 +1460,33 @@ var ( metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco") ) +type DropReason string + +const ( + DropReasonACL DropReason = "acl" + DropReasonError DropReason = "error" +) + +type dropPacketLabel struct { + // Reason indicates what we have done with the packet, and has the following values: + // - acl (rejected packets because of ACL) + // - error (rejected packets because of an error) + Reason DropReason +} + +var ( + metricInboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel]( + "tailscaled_inbound_dropped_packets_total", + "counter", + "Counts the number of dropped packets received by the node from other peers", + ) + metricOutboundDroppedPacketsTotal = usermetric.NewMultiLabelMap[dropPacketLabel]( + "tailscaled_outbound_dropped_packets_total", + "counter", + "Counts the number of packets dropped while being sent to other peers", + ) +) + func (t *Wrapper) InstallCaptureHook(cb capture.Callback) { t.captureHook.Store(cb) } diff --git a/net/tstun/wrap_test.go b/net/tstun/wrap_test.go index 0280cfbc3..9cb2ad550 100644 --- a/net/tstun/wrap_test.go +++ b/net/tstun/wrap_test.go @@ -315,6 +315,12 @@ func mustHexDecode(s string) []byte { } func TestFilter(t *testing.T) { + // Reset the metrics before test. These are global + // so the different tests might have affected them. + metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0) + metricInboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonError}, 0) + metricOutboundDroppedPacketsTotal.SetInt(dropPacketLabel{Reason: DropReasonACL}, 0) + chtun, tun := newChannelTUN(t.Logf, true) defer tun.Close() @@ -429,6 +435,22 @@ func TestFilter(t *testing.T) { } }) } + + inACL := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}) + inError := metricInboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}) + outACL := metricOutboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}) + + assertMetricPackets(t, "inACL", "3", inACL.String()) + assertMetricPackets(t, "inError", "0", inError.String()) + assertMetricPackets(t, "outACL", "1", outACL.String()) + +} + +func assertMetricPackets(t *testing.T, metricName, want, got string) { + t.Helper() + if want != got { + t.Errorf("%s got unexpected value, got %s, want %s", metricName, got, want) + } } func TestAllocs(t *testing.T) { diff --git a/tsnet/tsnet_test.go b/tsnet/tsnet_test.go index 9589b4796..7f6fb00c0 100644 --- a/tsnet/tsnet_test.go +++ b/tsnet/tsnet_test.go @@ -31,8 +31,10 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" "golang.org/x/net/proxy" "tailscale.com/cmd/testwrapper/flakytest" + "tailscale.com/health" "tailscale.com/ipn" "tailscale.com/ipn/store/mem" "tailscale.com/net/netns" @@ -815,3 +817,66 @@ func TestUDPConn(t *testing.T) { t.Errorf("got %q, want world", got) } } + +func TestUserMetrics(t *testing.T) { + tstest.ResourceCheck(t) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // testWarnable is a Warnable that is used within this package for testing purposes only. + var testWarnable = health.Register(&health.Warnable{ + Code: "test-warnable-tsnet", + Title: "Test warnable", + Severity: health.SeverityLow, + Text: func(args health.Args) string { + return args[health.ArgError] + }, + }) + + controlURL, c := startControl(t) + s1, _, s1PubKey := startServer(t, ctx, controlURL, "s1") + + s1.lb.EditPrefs(&ipn.MaskedPrefs{ + Prefs: ipn.Prefs{ + AdvertiseRoutes: []netip.Prefix{ + netip.MustParsePrefix("192.0.2.0/24"), + netip.MustParsePrefix("192.0.3.0/24"), + }, + }, + AdvertiseRoutesSet: true, + }) + c.SetSubnetRoutes(s1PubKey, []netip.Prefix{netip.MustParsePrefix("192.0.2.0/24")}) + + lc1, err := s1.LocalClient() + if err != nil { + t.Fatal(err) + } + + ht := s1.lb.HealthTracker() + ht.SetUnhealthy(testWarnable, health.Args{"Text": "Hello world 1"}) + + metrics1, err := lc1.UserMetrics(ctx) + if err != nil { + t.Fatal(err) + } + + // Note that this test will check for two warnings because the health + // tracker will have two warnings: one from the testWarnable, added in + // this test, and one because we are running the dev/unstable version + // of tailscale. + want := `# TYPE tailscaled_advertised_routes gauge +# HELP tailscaled_advertised_routes Number of advertised network routes (e.g. by a subnet router) +tailscaled_advertised_routes 2 +# TYPE tailscaled_health_messages gauge +# HELP tailscaled_health_messages Number of health messages broken down by type. +tailscaled_health_messages{type="warning"} 2 +# TYPE tailscaled_inbound_dropped_packets_total counter +# HELP tailscaled_inbound_dropped_packets_total Counts the number of dropped packets received by the node from other peers +# TYPE tailscaled_outbound_dropped_packets_total counter +# HELP tailscaled_outbound_dropped_packets_total Counts the number of packets dropped while being sent to other peers +` + + if diff := cmp.Diff(want, string(metrics1)); diff != "" { + t.Fatalf("unexpected metrics (-want +got):\n%s", diff) + } +} diff --git a/tsweb/varz/varz.go b/tsweb/varz/varz.go index cdf9cfe7c..561b24877 100644 --- a/tsweb/varz/varz.go +++ b/tsweb/varz/varz.go @@ -273,19 +273,28 @@ type sortedKVs struct { // // This will evolve over time, or perhaps be replaced. func Handler(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/plain;version=0.0.4;charset=utf-8") + ExpvarDoHandler(expvarDo)(w, r) +} - s := sortedKVsPool.Get().(*sortedKVs) - defer sortedKVsPool.Put(s) - s.kvs = s.kvs[:0] - expvarDo(func(kv expvar.KeyValue) { - s.kvs = append(s.kvs, sortedKV{kv, removeTypePrefixes(kv.Key)}) - }) - sort.Slice(s.kvs, func(i, j int) bool { - return s.kvs[i].sortKey < s.kvs[j].sortKey - }) - for _, e := range s.kvs { - writePromExpVar(w, "", e.KeyValue) +// ExpvarDoHandler handler returns a Handler like above, but takes an optional +// expvar.Do func allow the usage of alternative containers of metrics, other +// than the global expvar.Map. +func ExpvarDoHandler(expvarDoFunc func(f func(expvar.KeyValue))) func(http.ResponseWriter, *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain;version=0.0.4;charset=utf-8") + + s := sortedKVsPool.Get().(*sortedKVs) + defer sortedKVsPool.Put(s) + s.kvs = s.kvs[:0] + expvarDoFunc(func(kv expvar.KeyValue) { + s.kvs = append(s.kvs, sortedKV{kv, removeTypePrefixes(kv.Key)}) + }) + sort.Slice(s.kvs, func(i, j int) bool { + return s.kvs[i].sortKey < s.kvs[j].sortKey + }) + for _, e := range s.kvs { + writePromExpVar(w, "", e.KeyValue) + } } } diff --git a/util/usermetric/usermetric.go b/util/usermetric/usermetric.go new file mode 100644 index 000000000..cb3f66ea9 --- /dev/null +++ b/util/usermetric/usermetric.go @@ -0,0 +1,84 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// Package usermetric provides a container and handler +// for user-facing metrics. +package usermetric + +import ( + "expvar" + "fmt" + "io" + "net/http" + + "tailscale.com/metrics" + "tailscale.com/tsweb/varz" +) + +var vars expvar.Map + +// NewMultiLabelMap creates and register a new +// MultiLabelMap[T] variable with the given name and returns it. +// The variable is registered with the userfacing metrics package. +// +// Note that usermetric are not protected against duplicate +// metrics name. It is the caller's responsibility to ensure that +// the name is unique. +func NewMultiLabelMap[T comparable](name string, promType, helpText string) *metrics.MultiLabelMap[T] { + m := &metrics.MultiLabelMap[T]{ + Type: promType, + Help: helpText, + } + var zero T + _ = metrics.LabelString(zero) // panic early if T is invalid + vars.Set(name, m) + return m +} + +// Gauge is a gauge metric with no labels. +type Gauge struct { + m *expvar.Float + help string +} + +// NewGauge creates and register a new gauge metric with the given name and help text. +func NewGauge(name, help string) *Gauge { + g := &Gauge{&expvar.Float{}, help} + vars.Set(name, g) + return g +} + +// Set sets the gauge to the given value. +func (g *Gauge) Set(v float64) { + g.m.Set(v) +} + +// String returns the string of the underlying expvar.Float. +// This satisfies the expvar.Var interface. +func (g *Gauge) String() string { + return g.m.String() +} + +// WritePrometheus writes the gauge metric in Prometheus format to the given writer. +// This satisfies the varz.PrometheusWriter interface. +func (g *Gauge) WritePrometheus(w io.Writer, name string) { + io.WriteString(w, "# TYPE ") + io.WriteString(w, name) + io.WriteString(w, " gauge\n") + if g.help != "" { + io.WriteString(w, "# HELP ") + io.WriteString(w, name) + io.WriteString(w, " ") + io.WriteString(w, g.help) + io.WriteString(w, "\n") + } + + io.WriteString(w, name) + fmt.Fprintf(w, " %v\n", g.m.Value()) +} + +// Handler returns a varz.Handler that serves the userfacing expvar contained +// in this package. +func Handler(w http.ResponseWriter, r *http.Request) { + varz.ExpvarDoHandler(vars.Do)(w, r) +} diff --git a/util/usermetric/usermetric_test.go b/util/usermetric/usermetric_test.go new file mode 100644 index 000000000..aa0e82ea6 --- /dev/null +++ b/util/usermetric/usermetric_test.go @@ -0,0 +1,25 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package usermetric + +import ( + "bytes" + "testing" +) + +func TestGauge(t *testing.T) { + g := NewGauge("test_gauge", "This is a test gauge") + g.Set(15) + + var buf bytes.Buffer + g.WritePrometheus(&buf, "test_gauge") + const want = `# TYPE test_gauge gauge +# HELP test_gauge This is a test gauge +test_gauge 15 +` + if got := buf.String(); got != want { + t.Errorf("got %q; want %q", got, want) + } + +}