From b4f46c31bbf8f079a0e617997e8b86f3c94247bd Mon Sep 17 00:00:00 2001 From: Anton Tolchanov Date: Tue, 29 Oct 2024 09:19:40 +0000 Subject: [PATCH] wgengine/magicsock: export packet drop metric for outbound errors This required sharing the dropped packet metric between two packages (tstun and magicsock), so I've moved its definition to util/usermetric. Updates tailscale/corp#22075 Signed-off-by: Anton Tolchanov --- net/tstun/wrap.go | 44 ++++-------------- net/tstun/wrap_test.go | 6 +-- util/usermetric/metrics.go | 69 ++++++++++++++++++++++++++++ util/usermetric/usermetric.go | 3 ++ wgengine/magicsock/derp.go | 3 ++ wgengine/magicsock/magicsock.go | 15 +++++- wgengine/magicsock/magicsock_test.go | 25 ++++++++++ 7 files changed, 127 insertions(+), 38 deletions(-) create mode 100644 util/usermetric/metrics.go diff --git a/net/tstun/wrap.go b/net/tstun/wrap.go index 0b858fc1c..c384abf9d 100644 --- a/net/tstun/wrap.go +++ b/net/tstun/wrap.go @@ -213,24 +213,14 @@ type Wrapper struct { } type metrics struct { - inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel] - outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel] + inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels] + outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels] } func registerMetrics(reg *usermetric.Registry) *metrics { return &metrics{ - inboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel]( - reg, - "tailscaled_inbound_dropped_packets_total", - "counter", - "Counts the number of dropped packets received by the node from other peers", - ), - outboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel]( - reg, - "tailscaled_outbound_dropped_packets_total", - "counter", - "Counts the number of packets dropped while being sent to other peers", - ), + inboundDroppedPacketsTotal: reg.DroppedPacketsInbound(), + outboundDroppedPacketsTotal: reg.DroppedPacketsOutbound(), } } @@ -886,8 +876,8 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf if filt.RunOut(p, t.filterFlags) != filter.Accept { metricPacketOutDropFilter.Add(1) - t.metrics.outboundDroppedPacketsTotal.Add(dropPacketLabel{ - Reason: DropReasonACL, + t.metrics.outboundDroppedPacketsTotal.Add(usermetric.DropLabels{ + Reason: usermetric.ReasonACL, }, 1) return filter.Drop, gro } @@ -1158,8 +1148,8 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca if outcome != filter.Accept { metricPacketInDropFilter.Add(1) - t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{ - Reason: DropReasonACL, + t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{ + Reason: usermetric.ReasonACL, }, 1) // Tell them, via TSMP, we're dropping them due to the ACL. @@ -1239,8 +1229,8 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) { t.noteActivity() _, err := t.tdevWrite(buffs, offset) if err != nil { - t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{ - Reason: DropReasonError, + t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{ + Reason: usermetric.ReasonError, }, int64(len(buffs))) } return len(buffs), err @@ -1482,20 +1472,6 @@ var ( metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco") ) -type DropReason string - -const ( - DropReasonACL DropReason = "acl" - DropReasonError DropReason = "error" -) - -type dropPacketLabel struct { - // Reason indicates what we have done with the packet, and has the following values: - // - acl (rejected packets because of ACL) - // - error (rejected packets because of an error) - Reason DropReason -} - func (t *Wrapper) InstallCaptureHook(cb capture.Callback) { t.captureHook.Store(cb) } diff --git a/net/tstun/wrap_test.go b/net/tstun/wrap_test.go index 0ed0075b6..9ebedda83 100644 --- a/net/tstun/wrap_test.go +++ b/net/tstun/wrap_test.go @@ -441,13 +441,13 @@ func TestFilter(t *testing.T) { } var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64 - if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok { + if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok { metricInboundDroppedPacketsACL = m.Value() } - if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}).(*expvar.Int); ok { + if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonError}).(*expvar.Int); ok { metricInboundDroppedPacketsErr = m.Value() } - if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok { + if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok { metricOutboundDroppedPacketsACL = m.Value() } diff --git a/util/usermetric/metrics.go b/util/usermetric/metrics.go new file mode 100644 index 000000000..7f85989ff --- /dev/null +++ b/util/usermetric/metrics.go @@ -0,0 +1,69 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +// This file contains user-facing metrics that are used by multiple packages. +// Use it to define more common metrics. Any changes to the registry and +// metric types should be in usermetric.go. + +package usermetric + +import ( + "sync" + + "tailscale.com/metrics" +) + +// Metrics contains user-facing metrics that are used by multiple packages. +type Metrics struct { + initOnce sync.Once + + droppedPacketsInbound *metrics.MultiLabelMap[DropLabels] + droppedPacketsOutbound *metrics.MultiLabelMap[DropLabels] +} + +// DropReason is the reason why a packet was dropped. +type DropReason string + +const ( + // ReasonACL means that the packet was not permitted by ACL. + ReasonACL DropReason = "acl" + + // ReasonError means that the packet was dropped because of an error. + ReasonError DropReason = "error" +) + +// DropLabels contains common label(s) for dropped packet counters. +type DropLabels struct { + Reason DropReason +} + +// initOnce initializes the common metrics. +func (r *Registry) initOnce() { + r.m.initOnce.Do(func() { + r.m.droppedPacketsInbound = NewMultiLabelMapWithRegistry[DropLabels]( + r, + "tailscaled_inbound_dropped_packets_total", + "counter", + "Counts the number of dropped packets received by the node from other peers", + ) + r.m.droppedPacketsOutbound = NewMultiLabelMapWithRegistry[DropLabels]( + r, + "tailscaled_outbound_dropped_packets_total", + "counter", + "Counts the number of packets dropped while being sent to other peers", + ) + }) +} + +// DroppedPacketsOutbound returns the outbound dropped packet metric, creating it +// if necessary. +func (r *Registry) DroppedPacketsOutbound() *metrics.MultiLabelMap[DropLabels] { + r.initOnce() + return r.m.droppedPacketsOutbound +} + +// DroppedPacketsInbound returns the inbound dropped packet metric. +func (r *Registry) DroppedPacketsInbound() *metrics.MultiLabelMap[DropLabels] { + r.initOnce() + return r.m.droppedPacketsInbound +} diff --git a/util/usermetric/usermetric.go b/util/usermetric/usermetric.go index c964e08a7..7913a4ef0 100644 --- a/util/usermetric/usermetric.go +++ b/util/usermetric/usermetric.go @@ -19,6 +19,9 @@ import ( // Registry tracks user-facing metrics of various Tailscale subsystems. type Registry struct { vars expvar.Map + + // m contains common metrics owned by the registry. + m Metrics } // NewMultiLabelMapWithRegistry creates and register a new diff --git a/wgengine/magicsock/derp.go b/wgengine/magicsock/derp.go index 0204fa0f5..e9f070862 100644 --- a/wgengine/magicsock/derp.go +++ b/wgengine/magicsock/derp.go @@ -674,6 +674,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan if err != nil { c.logf("magicsock: derp.Send(%v): %v", wr.addr, err) metricSendDERPError.Add(1) + if !wr.isDisco { + c.metrics.outboundPacketsDroppedErrors.Add(1) + } } else if !wr.isDisco { c.metrics.outboundPacketsDERPTotal.Add(1) c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b))) diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 705e42d9e..a9c6fa070 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -127,6 +127,10 @@ type metrics struct { outboundBytesIPv4Total expvar.Int outboundBytesIPv6Total expvar.Int outboundBytesDERPTotal expvar.Int + + // outboundPacketsDroppedErrors is the total number of outbound packets + // dropped due to errors. + outboundPacketsDroppedErrors expvar.Int } // A Conn routes UDP packets and actively manages a list of its endpoints. @@ -605,6 +609,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics { "counter", "Counts the number of bytes sent to other peers", ) + outboundPacketsDroppedErrors := reg.DroppedPacketsOutbound() + m := new(metrics) // Map clientmetrics to the usermetric counters. @@ -631,6 +637,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics { outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total) outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal) + outboundPacketsDroppedErrors.Set(usermetric.DropLabels{Reason: usermetric.ReasonError}, &m.outboundPacketsDroppedErrors) + return m } @@ -1202,8 +1210,13 @@ func (c *Conn) networkDown() bool { return !c.networkUp.Load() } // Send implements conn.Bind. // // See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send -func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) error { +func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) (err error) { n := int64(len(buffs)) + defer func() { + if err != nil { + c.metrics.outboundPacketsDroppedErrors.Add(n) + } + }() metricSendData.Add(n) if c.networkDown() { metricSendDataNetworkDown.Add(n) diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index 7e48e1daa..1b3f8ec73 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -63,6 +63,7 @@ import ( "tailscale.com/types/nettype" "tailscale.com/types/ptr" "tailscale.com/util/cibuild" + "tailscale.com/util/must" "tailscale.com/util/racebuild" "tailscale.com/util/set" "tailscale.com/util/usermetric" @@ -3083,3 +3084,27 @@ func TestMaybeRebindOnError(t *testing.T) { } }) } + +func TestNetworkDownSendErrors(t *testing.T) { + netMon := must.Get(netmon.New(t.Logf)) + defer netMon.Close() + + reg := new(usermetric.Registry) + conn := must.Get(NewConn(Options{ + DisablePortMapper: true, + Logf: t.Logf, + NetMon: netMon, + Metrics: reg, + })) + defer conn.Close() + + conn.SetNetworkUp(false) + if err := conn.Send([][]byte{{00}}, &lazyEndpoint{}); err == nil { + t.Error("expected error, got nil") + } + resp := httptest.NewRecorder() + reg.Handler(resp, new(http.Request)) + if !strings.Contains(resp.Body.String(), `tailscaled_outbound_dropped_packets_total{reason="error"} 1`) { + t.Errorf("expected NetworkDown to increment packet dropped metric; got %q", resp.Body.String()) + } +}