From 151644f647d9388bb4cb1ae1c4c155b8d8de4cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Claus=20Lensb=C3=B8l?= Date: Thu, 22 Jan 2026 14:50:24 -0500 Subject: [PATCH] wgengine: send disco key via TSMP on first contact (#18215) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we have not yet communicated with a peer, send a TSMPDiscoAdvertisement to let the peer know of our disco key. This is in most cases redundant, but will allow us to set up direct connections when the client cannot access control. Some parts taken from: #18073 Updates #12639 Signed-off-by: Claus Lensbøl --- wgengine/magicsock/endpoint.go | 1 + wgengine/magicsock/magicsock.go | 61 ++++++++++++++++++++++++++++++--- wgengine/userspace.go | 9 +++++ 3 files changed, 67 insertions(+), 4 deletions(-) diff --git a/wgengine/magicsock/endpoint.go b/wgengine/magicsock/endpoint.go index eda589e14..586a2dc75 100644 --- a/wgengine/magicsock/endpoint.go +++ b/wgengine/magicsock/endpoint.go @@ -80,6 +80,7 @@ type endpoint struct { lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints lastUDPRelayPathDiscovery mono.Time // last time we ran UDP relay path discovery + sentDiscoKeyAdvertisement bool // wether we sent a TSMPDiscoAdvertisement or not to this endpoint derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients) bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked() diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index 8fbd07013..1c1309347 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -179,9 +179,10 @@ type Conn struct { // A publisher for synchronization points to ensure correct ordering of // config changes between magicsock and wireguard. - syncPub *eventbus.Publisher[syncPoint] - allocRelayEndpointPub *eventbus.Publisher[UDPRelayAllocReq] - portUpdatePub *eventbus.Publisher[router.PortUpdate] + syncPub *eventbus.Publisher[syncPoint] + allocRelayEndpointPub *eventbus.Publisher[UDPRelayAllocReq] + portUpdatePub *eventbus.Publisher[router.PortUpdate] + tsmpDiscoKeyAvailablePub *eventbus.Publisher[NewDiscoKeyAvailable] // pconn4 and pconn6 are the underlying UDP sockets used to // send/receive packets for wireguard and other magicsock @@ -696,6 +697,7 @@ func NewConn(opts Options) (*Conn, error) { c.syncPub = eventbus.Publish[syncPoint](ec) c.allocRelayEndpointPub = eventbus.Publish[UDPRelayAllocReq](ec) c.portUpdatePub = eventbus.Publish[router.PortUpdate](ec) + c.tsmpDiscoKeyAvailablePub = eventbus.Publish[NewDiscoKeyAvailable](ec) eventbus.SubscribeFunc(ec, c.onPortMapChanged) eventbus.SubscribeFunc(ec, c.onFilterUpdate) eventbus.SubscribeFunc(ec, c.onNodeViewsUpdate) @@ -1249,7 +1251,8 @@ func (c *Conn) DiscoPublicKey() key.DiscoPublic { // RotateDiscoKey generates a new discovery key pair and updates the connection // to use it. This invalidates all existing disco sessions and will cause peers -// to re-establish discovery sessions with the new key. +// to re-establish discovery sessions with the new key. Addtionally, the +// lastTSMPDiscoAdvertisement on all endpoints is reset to 0. // // This is primarily for debugging and testing purposes, a future enhancement // should provide a mechanism for seamless rotation by supporting short term use @@ -1263,6 +1266,11 @@ func (c *Conn) RotateDiscoKey() { newShort := c.discoAtomic.Short() c.discoInfo = make(map[key.DiscoPublic]*discoInfo) connCtx := c.connCtx + for _, endpoint := range c.peerMap.byEpAddr { + endpoint.ep.mu.Lock() + endpoint.ep.sentDiscoKeyAdvertisement = false + endpoint.ep.mu.Unlock() + } c.mu.Unlock() c.logf("magicsock: rotated disco key from %v to %v", oldShort, newShort) @@ -2247,6 +2255,7 @@ func (c *Conn) handleDiscoMessage(msg []byte, src epAddr, shouldBeRelayHandshake if debugDisco() { c.logf("magicsock: disco: failed to open naclbox from %v (wrong rcpt?) via %s", sender, via) } + metricRecvDiscoBadKey.Add(1) return } @@ -2654,6 +2663,8 @@ func (c *Conn) enqueueCallMeMaybe(derpAddr netip.AddrPort, de *endpoint) { return } + c.maybeSendTSMPDiscoAdvert(de) + eps := make([]netip.AddrPort, 0, len(c.lastEndpoints)) for _, ep := range c.lastEndpoints { eps = append(eps, ep.Addr) @@ -4314,3 +4325,45 @@ func (c *Conn) HandleDiscoKeyAdvertisement(node tailcfg.NodeView, update packet. c.logf("magicsock: updated disco key for peer %v to %v", nodeKey.ShortString(), discoKey.ShortString()) metricTSMPDiscoKeyAdvertisementApplied.Add(1) } + +// NewDiscoKeyAvailable is an eventbus topic that is emitted when we're sending +// a packet to a node and observe we haven't told it our current DiscoKey before. +// +// The publisher is magicsock, when we're sending a packet. +// The subscriber is userspaceEngine, which sends a TSMP packet, also via +// magicsock. This doesn't recurse infinitely because we only publish it once per +// DiscoKey. +// In the common case, a DiscoKey is not rotated within a process generation +// (as of 2026-01-21), except with debug commands to simulate process restarts. +// +// The address is the first node address (tailscale address) of the node. It +// does not matter if the address is v4/v6, the receiver should handle either. +// +// Since we have not yet communicated with the node at the time we are +// sending this event, the resulting TSMPDiscoKeyAdvertisement will with all +// likelihood be transmitted via DERP. +type NewDiscoKeyAvailable struct { + NodeFirstAddr netip.Addr + NodeID tailcfg.NodeID +} + +// maybeSendTSMPDiscoAdvert conditionally emits an event indicating that we +// should send our DiscoKey to the first node address of the magicksock endpoint. +// The event is only emitted if we have not yet contacted that endpoint since +// the DiscoKey changed. +// +// This condition is most likely met only once per endpoint, after the start of +// tailscaled, but not until we contact the endpoint for the first time. +// +// We do not need the Conn to be locked, but the endpoint should be. +func (c *Conn) maybeSendTSMPDiscoAdvert(de *endpoint) { + de.mu.Lock() + defer de.mu.Unlock() + if !de.sentDiscoKeyAdvertisement { + de.sentDiscoKeyAdvertisement = true + c.tsmpDiscoKeyAvailablePub.Publish(NewDiscoKeyAvailable{ + NodeFirstAddr: de.nodeAddr, + NodeID: de.nodeID, + }) + } +} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 875011a9c..dbc8e8b57 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -54,6 +54,7 @@ import ( "tailscale.com/util/execqueue" "tailscale.com/util/mak" "tailscale.com/util/set" + "tailscale.com/util/singleflight" "tailscale.com/util/testenv" "tailscale.com/util/usermetric" "tailscale.com/version" @@ -568,6 +569,14 @@ func NewUserspaceEngine(logf logger.Logf, conf Config) (_ Engine, reterr error) } e.magicConn.HandleDiscoKeyAdvertisement(peer.Node, pkt) }) + var tsmpRequestGroup singleflight.Group[netip.Addr, struct{}] + eventbus.SubscribeFunc(ec, func(req magicsock.NewDiscoKeyAvailable) { + go tsmpRequestGroup.Do(req.NodeFirstAddr, func() (struct{}, error) { + e.sendTSMPDiscoAdvertisement(req.NodeFirstAddr) + e.logf("wgengine: sending TSMP disco key advertisement to %v", req.NodeFirstAddr) + return struct{}{}, nil + }) + }) e.eventClient = ec e.logf("Engine created.") return e, nil