From 249edaa34918f87d75b41eb5bc0b0119c1f12eb9 Mon Sep 17 00:00:00 2001 From: Val Date: Sat, 7 Oct 2023 08:10:37 +0200 Subject: [PATCH] wgengine/magicsock: add probed MTU metrics Record the number of MTU probes sent, the total bytes sent, the number of times we got a successful return from an MTU probe of a particular size, and the max MTU recorded. Updates #311 Signed-off-by: Val --- wgengine/magicsock/endpoint.go | 17 ++++++++++++++- wgengine/magicsock/magicsock.go | 37 ++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index 03f1e0ccc..1e344d156 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -633,6 +633,12 @@ func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, t }, logLevel) if !sent { de.forgetDiscoPing(txid) + return + } + + if size != 0 { + metricSentDiscoPeerMTUProbes.Add(1) + metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6()))) } } @@ -1062,6 +1068,15 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip knownTxID = true // for naked returns below de.removeSentDiscoPingLocked(m.TxID, sp) + pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6())) + if sp.size != 0 { + m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen)) + m.Add(1) + if metricMaxPeerMTUProbed.Value() < int64(pktLen) { + metricMaxPeerMTUProbed.Set(int64(pktLen)) + } + } + now := mono.Now() latency := now.Sub(sp.at) @@ -1083,7 +1098,7 @@ func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip } if sp.purpose != pingHeartbeat { - de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pingSizeToPktLen(sp.size, sp.to.Addr().Is6()), m.Src, logger.ArgWriter(func(bw *bufio.Writer) { + de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) { if sp.to != src { fmt.Fprintf(bw, " ping.to=%v", sp.to) } diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 667032c17..335ce5656 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -42,6 +42,7 @@ import ( "tailscale.com/net/portmapper" "tailscale.com/net/sockstats" "tailscale.com/net/stun" + "tailscale.com/net/tstun" "tailscale.com/syncs" "tailscale.com/tailcfg" "tailscale.com/tstime" @@ -2825,16 +2826,18 @@ var ( metricRecvDataIPv6 = clientmetric.NewCounter("magicsock_recv_data_ipv6") // Disco packets - metricSendDiscoUDP = clientmetric.NewCounter("magicsock_disco_send_udp") - metricSendDiscoDERP = clientmetric.NewCounter("magicsock_disco_send_derp") - metricSentDiscoUDP = clientmetric.NewCounter("magicsock_disco_sent_udp") - metricSentDiscoDERP = clientmetric.NewCounter("magicsock_disco_sent_derp") - metricSentDiscoPing = clientmetric.NewCounter("magicsock_disco_sent_ping") - metricSentDiscoPong = clientmetric.NewCounter("magicsock_disco_sent_pong") - metricSentDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_sent_callmemaybe") - metricRecvDiscoBadPeer = clientmetric.NewCounter("magicsock_disco_recv_bad_peer") - metricRecvDiscoBadKey = clientmetric.NewCounter("magicsock_disco_recv_bad_key") - metricRecvDiscoBadParse = clientmetric.NewCounter("magicsock_disco_recv_bad_parse") + metricSendDiscoUDP = clientmetric.NewCounter("magicsock_disco_send_udp") + metricSendDiscoDERP = clientmetric.NewCounter("magicsock_disco_send_derp") + metricSentDiscoUDP = clientmetric.NewCounter("magicsock_disco_sent_udp") + metricSentDiscoDERP = clientmetric.NewCounter("magicsock_disco_sent_derp") + metricSentDiscoPing = clientmetric.NewCounter("magicsock_disco_sent_ping") + metricSentDiscoPong = clientmetric.NewCounter("magicsock_disco_sent_pong") + metricSentDiscoPeerMTUProbes = clientmetric.NewCounter("magicsock_disco_sent_peer_mtu_probes") + metricSentDiscoPeerMTUProbeBytes = clientmetric.NewCounter("magicsock_disco_sent_peer_mtu_probe_bytes") + metricSentDiscoCallMeMaybe = clientmetric.NewCounter("magicsock_disco_sent_callmemaybe") + metricRecvDiscoBadPeer = clientmetric.NewCounter("magicsock_disco_recv_bad_peer") + metricRecvDiscoBadKey = clientmetric.NewCounter("magicsock_disco_recv_bad_key") + metricRecvDiscoBadParse = clientmetric.NewCounter("magicsock_disco_recv_bad_parse") metricRecvDiscoUDP = clientmetric.NewCounter("magicsock_disco_recv_udp") metricRecvDiscoDERP = clientmetric.NewCounter("magicsock_disco_recv_derp") @@ -2852,4 +2855,18 @@ var ( // Disco packets received bpf read path metricRecvDiscoPacketIPv4 = clientmetric.NewCounter("magicsock_disco_recv_bpf_ipv4") metricRecvDiscoPacketIPv6 = clientmetric.NewCounter("magicsock_disco_recv_bpf_ipv6") + + // metricMaxPeerMTUProbed is the largest peer path MTU we successfully probed. + metricMaxPeerMTUProbed = clientmetric.NewGauge("magicsock_max_peer_mtu_probed") + + // metricRecvDiscoPeerMTUProbesByMTU collects the number of times we + // received an peer MTU probe response for a given MTU size. + // TODO: add proper support for label maps in clientmetrics + metricRecvDiscoPeerMTUProbesByMTU syncs.Map[string, *clientmetric.Metric] ) + +func getPeerMTUsProbedMetric(mtu tstun.WireMTU) *clientmetric.Metric { + key := fmt.Sprintf("magicsock_recv_disco_peer_mtu_probes_by_mtu_%d", mtu) + mm, _ := metricRecvDiscoPeerMTUProbesByMTU.LoadOrInit(key, func() *clientmetric.Metric { return clientmetric.NewCounter(key) }) + return mm +}