wgengine/magicsock: export packet drop metric for outbound errors

This required sharing the dropped packet metric between two packages
(tstun and magicsock), so I've moved its definition to util/usermetric.

Updates tailscale/corp#22075

Signed-off-by: Anton Tolchanov <anton@tailscale.com>
bradfitz/mesh_vpc
Anton Tolchanov 4 weeks ago committed by Anton Tolchanov
parent 532b26145a
commit b4f46c31bb

@ -213,24 +213,14 @@ type Wrapper struct {
}
type metrics struct {
inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[dropPacketLabel]
inboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels]
outboundDroppedPacketsTotal *tsmetrics.MultiLabelMap[usermetric.DropLabels]
}
func registerMetrics(reg *usermetric.Registry) *metrics {
return &metrics{
inboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
reg,
"tailscaled_inbound_dropped_packets_total",
"counter",
"Counts the number of dropped packets received by the node from other peers",
),
outboundDroppedPacketsTotal: usermetric.NewMultiLabelMapWithRegistry[dropPacketLabel](
reg,
"tailscaled_outbound_dropped_packets_total",
"counter",
"Counts the number of packets dropped while being sent to other peers",
),
inboundDroppedPacketsTotal: reg.DroppedPacketsInbound(),
outboundDroppedPacketsTotal: reg.DroppedPacketsOutbound(),
}
}
@ -886,8 +876,8 @@ func (t *Wrapper) filterPacketOutboundToWireGuard(p *packet.Parsed, pc *peerConf
if filt.RunOut(p, t.filterFlags) != filter.Accept {
metricPacketOutDropFilter.Add(1)
t.metrics.outboundDroppedPacketsTotal.Add(dropPacketLabel{
Reason: DropReasonACL,
t.metrics.outboundDroppedPacketsTotal.Add(usermetric.DropLabels{
Reason: usermetric.ReasonACL,
}, 1)
return filter.Drop, gro
}
@ -1158,8 +1148,8 @@ func (t *Wrapper) filterPacketInboundFromWireGuard(p *packet.Parsed, captHook ca
if outcome != filter.Accept {
metricPacketInDropFilter.Add(1)
t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
Reason: DropReasonACL,
t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
Reason: usermetric.ReasonACL,
}, 1)
// Tell them, via TSMP, we're dropping them due to the ACL.
@ -1239,8 +1229,8 @@ func (t *Wrapper) Write(buffs [][]byte, offset int) (int, error) {
t.noteActivity()
_, err := t.tdevWrite(buffs, offset)
if err != nil {
t.metrics.inboundDroppedPacketsTotal.Add(dropPacketLabel{
Reason: DropReasonError,
t.metrics.inboundDroppedPacketsTotal.Add(usermetric.DropLabels{
Reason: usermetric.ReasonError,
}, int64(len(buffs)))
}
return len(buffs), err
@ -1482,20 +1472,6 @@ var (
metricPacketOutDropSelfDisco = clientmetric.NewCounter("tstun_out_to_wg_drop_self_disco")
)
type DropReason string
const (
DropReasonACL DropReason = "acl"
DropReasonError DropReason = "error"
)
type dropPacketLabel struct {
// Reason indicates what we have done with the packet, and has the following values:
// - acl (rejected packets because of ACL)
// - error (rejected packets because of an error)
Reason DropReason
}
func (t *Wrapper) InstallCaptureHook(cb capture.Callback) {
t.captureHook.Store(cb)
}

@ -441,13 +441,13 @@ func TestFilter(t *testing.T) {
}
var metricInboundDroppedPacketsACL, metricInboundDroppedPacketsErr, metricOutboundDroppedPacketsACL int64
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
metricInboundDroppedPacketsACL = m.Value()
}
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonError}).(*expvar.Int); ok {
if m, ok := tun.metrics.inboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonError}).(*expvar.Int); ok {
metricInboundDroppedPacketsErr = m.Value()
}
if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(dropPacketLabel{Reason: DropReasonACL}).(*expvar.Int); ok {
if m, ok := tun.metrics.outboundDroppedPacketsTotal.Get(usermetric.DropLabels{Reason: usermetric.ReasonACL}).(*expvar.Int); ok {
metricOutboundDroppedPacketsACL = m.Value()
}

@ -0,0 +1,69 @@
// Copyright (c) Tailscale Inc & AUTHORS
// SPDX-License-Identifier: BSD-3-Clause
// This file contains user-facing metrics that are used by multiple packages.
// Use it to define more common metrics. Any changes to the registry and
// metric types should be in usermetric.go.
package usermetric
import (
"sync"
"tailscale.com/metrics"
)
// Metrics contains user-facing metrics that are used by multiple packages.
type Metrics struct {
initOnce sync.Once
droppedPacketsInbound *metrics.MultiLabelMap[DropLabels]
droppedPacketsOutbound *metrics.MultiLabelMap[DropLabels]
}
// DropReason is the reason why a packet was dropped.
type DropReason string
const (
// ReasonACL means that the packet was not permitted by ACL.
ReasonACL DropReason = "acl"
// ReasonError means that the packet was dropped because of an error.
ReasonError DropReason = "error"
)
// DropLabels contains common label(s) for dropped packet counters.
type DropLabels struct {
Reason DropReason
}
// initOnce initializes the common metrics.
func (r *Registry) initOnce() {
r.m.initOnce.Do(func() {
r.m.droppedPacketsInbound = NewMultiLabelMapWithRegistry[DropLabels](
r,
"tailscaled_inbound_dropped_packets_total",
"counter",
"Counts the number of dropped packets received by the node from other peers",
)
r.m.droppedPacketsOutbound = NewMultiLabelMapWithRegistry[DropLabels](
r,
"tailscaled_outbound_dropped_packets_total",
"counter",
"Counts the number of packets dropped while being sent to other peers",
)
})
}
// DroppedPacketsOutbound returns the outbound dropped packet metric, creating it
// if necessary.
func (r *Registry) DroppedPacketsOutbound() *metrics.MultiLabelMap[DropLabels] {
r.initOnce()
return r.m.droppedPacketsOutbound
}
// DroppedPacketsInbound returns the inbound dropped packet metric.
func (r *Registry) DroppedPacketsInbound() *metrics.MultiLabelMap[DropLabels] {
r.initOnce()
return r.m.droppedPacketsInbound
}

@ -19,6 +19,9 @@ import (
// Registry tracks user-facing metrics of various Tailscale subsystems.
type Registry struct {
vars expvar.Map
// m contains common metrics owned by the registry.
m Metrics
}
// NewMultiLabelMapWithRegistry creates and register a new

@ -674,6 +674,9 @@ func (c *Conn) runDerpWriter(ctx context.Context, dc *derphttp.Client, ch <-chan
if err != nil {
c.logf("magicsock: derp.Send(%v): %v", wr.addr, err)
metricSendDERPError.Add(1)
if !wr.isDisco {
c.metrics.outboundPacketsDroppedErrors.Add(1)
}
} else if !wr.isDisco {
c.metrics.outboundPacketsDERPTotal.Add(1)
c.metrics.outboundBytesDERPTotal.Add(int64(len(wr.b)))

@ -127,6 +127,10 @@ type metrics struct {
outboundBytesIPv4Total expvar.Int
outboundBytesIPv6Total expvar.Int
outboundBytesDERPTotal expvar.Int
// outboundPacketsDroppedErrors is the total number of outbound packets
// dropped due to errors.
outboundPacketsDroppedErrors expvar.Int
}
// A Conn routes UDP packets and actively manages a list of its endpoints.
@ -605,6 +609,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
"counter",
"Counts the number of bytes sent to other peers",
)
outboundPacketsDroppedErrors := reg.DroppedPacketsOutbound()
m := new(metrics)
// Map clientmetrics to the usermetric counters.
@ -631,6 +637,8 @@ func registerMetrics(reg *usermetric.Registry) *metrics {
outboundBytesTotal.Set(pathDirectV6, &m.outboundBytesIPv6Total)
outboundBytesTotal.Set(pathDERP, &m.outboundBytesDERPTotal)
outboundPacketsDroppedErrors.Set(usermetric.DropLabels{Reason: usermetric.ReasonError}, &m.outboundPacketsDroppedErrors)
return m
}
@ -1202,8 +1210,13 @@ func (c *Conn) networkDown() bool { return !c.networkUp.Load() }
// Send implements conn.Bind.
//
// See https://pkg.go.dev/golang.zx2c4.com/wireguard/conn#Bind.Send
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) error {
func (c *Conn) Send(buffs [][]byte, ep conn.Endpoint) (err error) {
n := int64(len(buffs))
defer func() {
if err != nil {
c.metrics.outboundPacketsDroppedErrors.Add(n)
}
}()
metricSendData.Add(n)
if c.networkDown() {
metricSendDataNetworkDown.Add(n)

@ -63,6 +63,7 @@ import (
"tailscale.com/types/nettype"
"tailscale.com/types/ptr"
"tailscale.com/util/cibuild"
"tailscale.com/util/must"
"tailscale.com/util/racebuild"
"tailscale.com/util/set"
"tailscale.com/util/usermetric"
@ -3083,3 +3084,27 @@ func TestMaybeRebindOnError(t *testing.T) {
}
})
}
func TestNetworkDownSendErrors(t *testing.T) {
netMon := must.Get(netmon.New(t.Logf))
defer netMon.Close()
reg := new(usermetric.Registry)
conn := must.Get(NewConn(Options{
DisablePortMapper: true,
Logf: t.Logf,
NetMon: netMon,
Metrics: reg,
}))
defer conn.Close()
conn.SetNetworkUp(false)
if err := conn.Send([][]byte{{00}}, &lazyEndpoint{}); err == nil {
t.Error("expected error, got nil")
}
resp := httptest.NewRecorder()
reg.Handler(resp, new(http.Request))
if !strings.Contains(resp.Body.String(), `tailscaled_outbound_dropped_packets_total{reason="error"} 1`) {
t.Errorf("expected NetworkDown to increment packet dropped metric; got %q", resp.Body.String())
}
}

Loading…
Cancel
Save