mirror of https://github.com/tailscale/tailscale/
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1869 lines
60 KiB
Go
1869 lines
60 KiB
Go
// Copyright (c) Tailscale Inc & AUTHORS
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
package magicsock
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"iter"
|
|
"math"
|
|
"math/rand/v2"
|
|
"net"
|
|
"net/netip"
|
|
"reflect"
|
|
"runtime"
|
|
"slices"
|
|
"sync"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
xmaps "golang.org/x/exp/maps"
|
|
"golang.org/x/net/ipv4"
|
|
"golang.org/x/net/ipv6"
|
|
"tailscale.com/disco"
|
|
"tailscale.com/ipn/ipnstate"
|
|
"tailscale.com/net/stun"
|
|
"tailscale.com/net/tstun"
|
|
"tailscale.com/tailcfg"
|
|
"tailscale.com/tstime/mono"
|
|
"tailscale.com/types/key"
|
|
"tailscale.com/types/logger"
|
|
"tailscale.com/util/mak"
|
|
"tailscale.com/util/ringbuffer"
|
|
)
|
|
|
|
var mtuProbePingSizesV4 []int
|
|
var mtuProbePingSizesV6 []int
|
|
|
|
func init() {
|
|
for _, m := range tstun.WireMTUsToProbe {
|
|
mtuProbePingSizesV4 = append(mtuProbePingSizesV4, pktLenToPingSize(m, false))
|
|
mtuProbePingSizesV6 = append(mtuProbePingSizesV6, pktLenToPingSize(m, true))
|
|
}
|
|
}
|
|
|
|
// endpoint is a wireguard/conn.Endpoint. In wireguard-go and kernel WireGuard
|
|
// there is only one endpoint for a peer, but in Tailscale we distribute a
|
|
// number of possible endpoints for a peer which would include the all the
|
|
// likely addresses at which a peer may be reachable. This endpoint type holds
|
|
// the information required that when wireguard-go wants to send to a
|
|
// particular peer (essentially represented by this endpoint type), the send
|
|
// function can use the currently best known Tailscale endpoint to send packets
|
|
// to the peer.
|
|
type endpoint struct {
|
|
// atomically accessed; declared first for alignment reasons
|
|
lastRecvWG mono.Time // last time there were incoming packets from this peer destined for wireguard-go (e.g. not disco)
|
|
lastRecvUDPAny mono.Time // last time there were incoming UDP packets from this peer of any kind
|
|
numStopAndResetAtomic int64
|
|
debugUpdates *ringbuffer.RingBuffer[EndpointChange]
|
|
|
|
// These fields are initialized once and never modified.
|
|
c *Conn
|
|
nodeID tailcfg.NodeID
|
|
publicKey key.NodePublic // peer public key (for WireGuard + DERP)
|
|
publicKeyHex string // cached output of publicKey.UntypedHexString
|
|
fakeWGAddr netip.AddrPort // the UDP address we tell wireguard-go we're using
|
|
nodeAddr netip.Addr // the node's first tailscale address; used for logging & wireguard rate-limiting (Issue 6686)
|
|
|
|
disco atomic.Pointer[endpointDisco] // if the peer supports disco, the key and short string
|
|
|
|
// mu protects all following fields.
|
|
mu sync.Mutex // Lock ordering: Conn.mu, then endpoint.mu
|
|
|
|
heartBeatTimer *time.Timer // nil when idle
|
|
lastSendExt mono.Time // last time there were outgoing packets sent to this peer from an external trigger (e.g. wireguard-go or disco pingCLI)
|
|
lastSendAny mono.Time // last time there were outgoing packets sent this peer from any trigger, internal or external to magicsock
|
|
lastFullPing mono.Time // last time we pinged all disco or wireguard only endpoints
|
|
derpAddr netip.AddrPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients)
|
|
|
|
bestAddr addrQuality // best non-DERP path; zero if none; mutate via setBestAddrLocked()
|
|
bestAddrAt mono.Time // time best address re-confirmed
|
|
trustBestAddrUntil mono.Time // time when bestAddr expires
|
|
sentPing map[stun.TxID]sentPing
|
|
endpointState map[netip.AddrPort]*endpointState
|
|
isCallMeMaybeEP map[netip.AddrPort]bool
|
|
|
|
// The following fields are related to the new "silent disco"
|
|
// implementation that's a WIP as of 2022-10-20.
|
|
// See #540 for background.
|
|
heartbeatDisabled bool
|
|
probeUDPLifetime *probeUDPLifetime // UDP path lifetime probing; nil if disabled
|
|
|
|
expired bool // whether the node has expired
|
|
isWireguardOnly bool // whether the endpoint is WireGuard only
|
|
}
|
|
|
|
func (de *endpoint) setBestAddrLocked(v addrQuality) {
|
|
if v.AddrPort != de.bestAddr.AddrPort {
|
|
de.probeUDPLifetime.resetCycleEndpointLocked()
|
|
}
|
|
de.bestAddr = v
|
|
}
|
|
|
|
const (
|
|
// udpLifetimeProbeCliffSlack is how much slack to use relative to a
|
|
// ProbeUDPLifetimeConfig.Cliffs duration in order to account for RTT,
|
|
// scheduling jitter, buffers, etc. If the cliff is 10s, we attempt to probe
|
|
// after 10s - 2s (8s) amount of inactivity.
|
|
udpLifetimeProbeCliffSlack = time.Second * 2
|
|
// udpLifetimeProbeSchedulingTolerance is how much of a difference can be
|
|
// tolerated between a UDP lifetime probe scheduling target and when it
|
|
// actually fired. This must be some fraction of udpLifetimeProbeCliffSlack.
|
|
udpLifetimeProbeSchedulingTolerance = udpLifetimeProbeCliffSlack / 8
|
|
)
|
|
|
|
// probeUDPLifetime represents the configuration and state tied to probing UDP
|
|
// path lifetime. A probe "cycle" involves pinging the UDP path at various
|
|
// timeout cliffs, which are pre-defined durations of interest commonly used by
|
|
// NATs/firewalls as default stateful session timeout values. Cliffs are probed
|
|
// in ascending order. A "cycle" completes when all cliffs have received a pong,
|
|
// or when a ping times out. Cycles may extend across endpoint session lifetimes
|
|
// if they are disrupted by user traffic.
|
|
type probeUDPLifetime struct {
|
|
// All fields are guarded by endpoint.mu. probeUDPLifetime methods are for
|
|
// convenience.
|
|
|
|
// config holds the probing configuration.
|
|
config ProbeUDPLifetimeConfig
|
|
|
|
// timer is nil when idle. A non-nil timer indicates we intend to probe a
|
|
// timeout cliff in the future.
|
|
timer *time.Timer
|
|
|
|
// bestAddr contains the endpoint.bestAddr.AddrPort at the time a cycle was
|
|
// scheduled to start. A probing cycle is 1:1 with the current
|
|
// endpoint.bestAddr.AddrPort in the interest of simplicity. When
|
|
// endpoint.bestAddr.AddrPort changes, any active probing cycle will reset.
|
|
bestAddr netip.AddrPort
|
|
// cycleStartedAt contains the time at which the first cliff
|
|
// (ProbeUDPLifetimeConfig.Cliffs[0]) was pinged for the current/last cycle.
|
|
cycleStartedAt time.Time
|
|
// cycleActive is true if a probing cycle is active, otherwise false.
|
|
cycleActive bool
|
|
// currentCliff represents the index into ProbeUDPLifetimeConfig.Cliffs for
|
|
// the cliff that we are waiting to ping, or waiting on a pong/timeout.
|
|
currentCliff int
|
|
// lastTxID is the ID for the last ping that was sent.
|
|
lastTxID stun.TxID
|
|
}
|
|
|
|
func (p *probeUDPLifetime) currentCliffDurationEndpointLocked() time.Duration {
|
|
if p == nil {
|
|
return 0
|
|
}
|
|
return p.config.Cliffs[p.currentCliff]
|
|
}
|
|
|
|
// cycleCompleteMaxCliffEndpointLocked records the max cliff (as an index of
|
|
// ProbeUDPLifetimeConfig.Cliffs) a probing cycle reached, i.e. received a pong
|
|
// for. A value < 0 indicates no cliff was reached. It is a no-op if the active
|
|
// configuration does not equal defaultProbeUDPLifetimeConfig.
|
|
func (p *probeUDPLifetime) cycleCompleteMaxCliffEndpointLocked(cliff int) {
|
|
if !p.config.Equals(defaultProbeUDPLifetimeConfig) {
|
|
return
|
|
}
|
|
switch {
|
|
case cliff < 0:
|
|
metricUDPLifetimeCycleCompleteNoCliffReached.Add(1)
|
|
case cliff == 0:
|
|
metricUDPLifetimeCycleCompleteAt10sCliff.Add(1)
|
|
case cliff == 1:
|
|
metricUDPLifetimeCycleCompleteAt30sCliff.Add(1)
|
|
case cliff == 2:
|
|
metricUDPLifetimeCycleCompleteAt60sCliff.Add(1)
|
|
}
|
|
}
|
|
|
|
// resetCycleEndpointLocked resets the state contained in p to reflect an
|
|
// inactive cycle.
|
|
func (p *probeUDPLifetime) resetCycleEndpointLocked() {
|
|
if p == nil {
|
|
return
|
|
}
|
|
if p.timer != nil {
|
|
p.timer.Stop()
|
|
p.timer = nil
|
|
}
|
|
p.cycleActive = false
|
|
p.currentCliff = 0
|
|
p.bestAddr = netip.AddrPort{}
|
|
}
|
|
|
|
// ProbeUDPLifetimeConfig represents the configuration for probing UDP path
|
|
// lifetime.
|
|
type ProbeUDPLifetimeConfig struct {
|
|
// The timeout cliffs to probe. Values are in ascending order. Ascending
|
|
// order is chosen over descending because we have limited opportunities to
|
|
// probe. With a descending order we are stuck waiting for a new UDP
|
|
// path/session if the first value times out. When that new path is
|
|
// established is anyone's guess.
|
|
Cliffs []time.Duration
|
|
// CycleCanStartEvery represents the min duration between cycles starting
|
|
// up.
|
|
CycleCanStartEvery time.Duration
|
|
}
|
|
|
|
var (
|
|
// defaultProbeUDPLifetimeConfig is the configuration that must be used
|
|
// for UDP path lifetime probing until it can be wholly disseminated (not
|
|
// just on/off) from upstream control components, and associated metrics
|
|
// (metricUDPLifetime*) have lifetime management.
|
|
//
|
|
// TODO(#10928): support dynamic config via tailcfg.PeerCapMap.
|
|
defaultProbeUDPLifetimeConfig = &ProbeUDPLifetimeConfig{
|
|
Cliffs: []time.Duration{
|
|
time.Second * 10,
|
|
time.Second * 30,
|
|
time.Second * 60,
|
|
},
|
|
CycleCanStartEvery: time.Hour * 24,
|
|
}
|
|
)
|
|
|
|
// Equals returns true if b equals p, otherwise false. If both sides are nil,
|
|
// Equals returns true. If only one side is nil, Equals returns false.
|
|
func (p *ProbeUDPLifetimeConfig) Equals(b *ProbeUDPLifetimeConfig) bool {
|
|
if p == b {
|
|
return true
|
|
}
|
|
if (p == nil && b != nil) || (b == nil && p != nil) {
|
|
return false
|
|
}
|
|
if !slices.Equal(p.Cliffs, b.Cliffs) {
|
|
return false
|
|
}
|
|
if p.CycleCanStartEvery != b.CycleCanStartEvery {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Valid returns true if p is valid, otherwise false. p must be non-nil.
|
|
func (p *ProbeUDPLifetimeConfig) Valid() bool {
|
|
if len(p.Cliffs) < 1 {
|
|
// We need at least one cliff, otherwise there is nothing to probe.
|
|
return false
|
|
}
|
|
if p.CycleCanStartEvery < 1 {
|
|
// Probing must be constrained by a positive CycleCanStartEvery.
|
|
return false
|
|
}
|
|
for i, c := range p.Cliffs {
|
|
if c <= max(udpLifetimeProbeCliffSlack*2, heartbeatInterval) {
|
|
// A timeout cliff less than or equal to twice
|
|
// udpLifetimeProbeCliffSlack is invalid due to being effectively
|
|
// zero when the cliff slack is subtracted from the cliff value at
|
|
// scheduling time.
|
|
//
|
|
// A timeout cliff less or equal to the heartbeatInterval is also
|
|
// invalid, as we may attempt to schedule on the tail end of the
|
|
// last heartbeat tied to an active session.
|
|
//
|
|
// These values are constants, but max()'d in case they change in
|
|
// the future.
|
|
return false
|
|
}
|
|
if i == 0 {
|
|
continue
|
|
}
|
|
if c <= p.Cliffs[i-1] {
|
|
// Cliffs must be in ascending order.
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// setProbeUDPLifetimeOn enables or disables probing of UDP path lifetime based
|
|
// on v. In the case of enablement defaultProbeUDPLifetimeConfig is used as the
|
|
// desired configuration.
|
|
func (de *endpoint) setProbeUDPLifetimeOn(v bool) {
|
|
de.mu.Lock()
|
|
if v {
|
|
de.setProbeUDPLifetimeConfigLocked(defaultProbeUDPLifetimeConfig)
|
|
} else {
|
|
de.setProbeUDPLifetimeConfigLocked(nil)
|
|
}
|
|
de.mu.Unlock()
|
|
}
|
|
|
|
// setProbeUDPLifetimeConfigLocked sets the desired configuration for probing
|
|
// UDP path lifetime. Ownership of desired is passed to endpoint, it must not be
|
|
// mutated once this call is made. A nil value disables the feature. If desired
|
|
// is non-nil but desired.Valid() returns false this is a no-op.
|
|
func (de *endpoint) setProbeUDPLifetimeConfigLocked(desired *ProbeUDPLifetimeConfig) {
|
|
if de.isWireguardOnly {
|
|
return
|
|
}
|
|
if desired == nil {
|
|
if de.probeUDPLifetime == nil {
|
|
// noop, not currently configured or desired
|
|
return
|
|
}
|
|
de.probeUDPLifetime.resetCycleEndpointLocked()
|
|
de.probeUDPLifetime = nil
|
|
return
|
|
}
|
|
if !desired.Valid() {
|
|
return
|
|
}
|
|
if de.probeUDPLifetime != nil {
|
|
if de.probeUDPLifetime.config.Equals(desired) {
|
|
// noop, current config equals desired
|
|
return
|
|
}
|
|
de.probeUDPLifetime.resetCycleEndpointLocked()
|
|
} else {
|
|
de.probeUDPLifetime = &probeUDPLifetime{}
|
|
}
|
|
p := de.probeUDPLifetime
|
|
p.config = *desired
|
|
p.resetCycleEndpointLocked()
|
|
}
|
|
|
|
// endpointDisco is the current disco key and short string for an endpoint. This
|
|
// structure is immutable.
|
|
type endpointDisco struct {
|
|
key key.DiscoPublic // for discovery messages.
|
|
short string // ShortString of discoKey.
|
|
}
|
|
|
|
type sentPing struct {
|
|
to netip.AddrPort
|
|
at mono.Time
|
|
timer *time.Timer // timeout timer
|
|
purpose discoPingPurpose
|
|
size int // size of the disco message
|
|
resCB *pingResultAndCallback // or nil for internal use
|
|
}
|
|
|
|
// endpointState is some state and history for a specific endpoint of
|
|
// a endpoint. (The subject is the endpoint.endpointState
|
|
// map key)
|
|
type endpointState struct {
|
|
// all fields guarded by endpoint.mu
|
|
|
|
// lastPing is the last (outgoing) ping time.
|
|
lastPing mono.Time
|
|
|
|
// lastGotPing, if non-zero, means that this was an endpoint
|
|
// that we learned about at runtime (from an incoming ping)
|
|
// and that is not in the network map. If so, we keep the time
|
|
// updated and use it to discard old candidates.
|
|
lastGotPing time.Time
|
|
|
|
// lastGotPingTxID contains the TxID for the last incoming ping. This is
|
|
// used to de-dup incoming pings that we may see on both the raw disco
|
|
// socket on Linux, and UDP socket. We cannot rely solely on the raw socket
|
|
// disco handling due to https://github.com/tailscale/tailscale/issues/7078.
|
|
lastGotPingTxID stun.TxID
|
|
|
|
// callMeMaybeTime, if non-zero, is the time this endpoint
|
|
// was advertised last via a call-me-maybe disco message.
|
|
callMeMaybeTime time.Time
|
|
|
|
recentPongs []pongReply // ring buffer up to pongHistoryCount entries
|
|
recentPong uint16 // index into recentPongs of most recent; older before, wrapped
|
|
|
|
index int16 // index in nodecfg.Node.Endpoints; meaningless if lastGotPing non-zero
|
|
}
|
|
|
|
// clear removes all derived / probed state from an endpointState.
|
|
func (s *endpointState) clear() {
|
|
*s = endpointState{
|
|
index: s.index,
|
|
lastGotPing: s.lastGotPing,
|
|
}
|
|
}
|
|
|
|
// pongHistoryCount is how many pongReply values we keep per endpointState
|
|
const pongHistoryCount = 64
|
|
|
|
type pongReply struct {
|
|
latency time.Duration
|
|
pongAt mono.Time // when we received the pong
|
|
from netip.AddrPort // the pong's src (usually same as endpoint map key)
|
|
pongSrc netip.AddrPort // what they reported they heard
|
|
}
|
|
|
|
// EndpointChange is a structure containing information about changes made to a
|
|
// particular endpoint. This is not a stable interface and could change at any
|
|
// time.
|
|
type EndpointChange struct {
|
|
When time.Time // when the change occurred
|
|
What string // what this change is
|
|
From any `json:",omitempty"` // information about the previous state
|
|
To any `json:",omitempty"` // information about the new state
|
|
}
|
|
|
|
// shouldDeleteLocked reports whether we should delete this endpoint.
|
|
func (st *endpointState) shouldDeleteLocked() bool {
|
|
switch {
|
|
case !st.callMeMaybeTime.IsZero():
|
|
return false
|
|
case st.lastGotPing.IsZero():
|
|
// This was an endpoint from the network map. Is it still in the network map?
|
|
return st.index == indexSentinelDeleted
|
|
default:
|
|
// This was an endpoint discovered at runtime.
|
|
return time.Since(st.lastGotPing) > sessionActiveTimeout
|
|
}
|
|
}
|
|
|
|
// latencyLocked returns the most recent latency measurement, if any.
|
|
// endpoint.mu must be held.
|
|
func (st *endpointState) latencyLocked() (lat time.Duration, ok bool) {
|
|
if len(st.recentPongs) == 0 {
|
|
return 0, false
|
|
}
|
|
return st.recentPongs[st.recentPong].latency, true
|
|
}
|
|
|
|
// endpoint.mu must be held.
|
|
func (st *endpointState) addPongReplyLocked(r pongReply) {
|
|
if n := len(st.recentPongs); n < pongHistoryCount {
|
|
st.recentPong = uint16(n)
|
|
st.recentPongs = append(st.recentPongs, r)
|
|
return
|
|
}
|
|
i := st.recentPong + 1
|
|
if i == pongHistoryCount {
|
|
i = 0
|
|
}
|
|
st.recentPongs[i] = r
|
|
st.recentPong = i
|
|
}
|
|
|
|
func (de *endpoint) deleteEndpointLocked(why string, ep netip.AddrPort) {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "deleteEndpointLocked-" + why,
|
|
From: ep,
|
|
})
|
|
delete(de.endpointState, ep)
|
|
if de.bestAddr.AddrPort == ep {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "deleteEndpointLocked-bestAddr-" + why,
|
|
From: de.bestAddr,
|
|
})
|
|
de.setBestAddrLocked(addrQuality{})
|
|
}
|
|
}
|
|
|
|
// initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr.
|
|
// The current implementation just uses the pointer value of de jammed into an IPv6
|
|
// address, but it could also be, say, a counter.
|
|
func (de *endpoint) initFakeUDPAddr() {
|
|
var addr [16]byte
|
|
addr[0] = 0xfd
|
|
addr[1] = 0x00
|
|
binary.BigEndian.PutUint64(addr[2:], uint64(reflect.ValueOf(de).Pointer()))
|
|
de.fakeWGAddr = netip.AddrPortFrom(netip.AddrFrom16(addr).Unmap(), 12345)
|
|
}
|
|
|
|
// noteRecvActivity records receive activity on de, and invokes
|
|
// Conn.noteRecvActivity no more than once every 10s.
|
|
func (de *endpoint) noteRecvActivity(ipp netip.AddrPort, now mono.Time) {
|
|
if de.isWireguardOnly {
|
|
de.mu.Lock()
|
|
de.bestAddr.AddrPort = ipp
|
|
de.bestAddrAt = now
|
|
de.trustBestAddrUntil = now.Add(5 * time.Second)
|
|
de.mu.Unlock()
|
|
} else {
|
|
// TODO(jwhited): subject to change as part of silent disco effort.
|
|
// Necessary when heartbeat is disabled for the endpoint, otherwise we
|
|
// kick off discovery disco pings every trustUDPAddrDuration and mirror
|
|
// to DERP.
|
|
de.mu.Lock()
|
|
if de.heartbeatDisabled && de.bestAddr.AddrPort == ipp {
|
|
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
|
|
}
|
|
de.mu.Unlock()
|
|
}
|
|
|
|
elapsed := now.Sub(de.lastRecvWG.LoadAtomic())
|
|
if elapsed > 10*time.Second {
|
|
de.lastRecvWG.StoreAtomic(now)
|
|
|
|
if de.c.noteRecvActivity == nil {
|
|
return
|
|
}
|
|
de.c.noteRecvActivity(de.publicKey)
|
|
}
|
|
}
|
|
|
|
func (de *endpoint) discoShort() string {
|
|
var short string
|
|
if d := de.disco.Load(); d != nil {
|
|
short = d.short
|
|
}
|
|
return short
|
|
}
|
|
|
|
// String exists purely so wireguard-go internals can log.Printf("%v")
|
|
// its internal conn.Endpoints and we don't end up with data races
|
|
// from fmt (via log) reading mutex fields and such.
|
|
func (de *endpoint) String() string {
|
|
return fmt.Sprintf("magicsock.endpoint{%v, %v}", de.publicKey.ShortString(), de.discoShort())
|
|
}
|
|
|
|
func (de *endpoint) ClearSrc() {}
|
|
func (de *endpoint) SrcToString() string { panic("unused") } // unused by wireguard-go
|
|
func (de *endpoint) SrcIP() netip.Addr { panic("unused") } // unused by wireguard-go
|
|
func (de *endpoint) DstToString() string { return de.publicKeyHex }
|
|
func (de *endpoint) DstIP() netip.Addr { return de.nodeAddr } // see tailscale/tailscale#6686
|
|
func (de *endpoint) DstToBytes() []byte { return packIPPort(de.fakeWGAddr) }
|
|
|
|
// addrForSendLocked returns the address(es) that should be used for
|
|
// sending the next packet. Zero, one, or both of UDP address and DERP
|
|
// addr may be non-zero. If the endpoint is WireGuard only and does not have
|
|
// latency information, a bool is returned to indicate that the
|
|
// WireGuard latency discovery pings should be sent.
|
|
//
|
|
// de.mu must be held.
|
|
//
|
|
// TODO(val): Rewrite the addrFor*Locked() variations to share code.
|
|
func (de *endpoint) addrForSendLocked(now mono.Time) (udpAddr, derpAddr netip.AddrPort, sendWGPing bool) {
|
|
udpAddr = de.bestAddr.AddrPort
|
|
|
|
if udpAddr.IsValid() && !now.After(de.trustBestAddrUntil) {
|
|
return udpAddr, netip.AddrPort{}, false
|
|
}
|
|
|
|
if de.isWireguardOnly {
|
|
// If the endpoint is wireguard-only, we don't have a DERP
|
|
// address to send to, so we have to send to the UDP address.
|
|
udpAddr, shouldPing := de.addrForWireGuardSendLocked(now)
|
|
return udpAddr, netip.AddrPort{}, shouldPing
|
|
}
|
|
|
|
// We had a bestAddr but it expired so send both to it
|
|
// and DERP.
|
|
return udpAddr, de.derpAddr, false
|
|
}
|
|
|
|
// addrForWireGuardSendLocked returns the address that should be used for
|
|
// sending the next packet. If a packet has never or not recently been sent to
|
|
// the endpoint, then a randomly selected address for the endpoint is returned,
|
|
// as well as a bool indiciating that WireGuard discovery pings should be started.
|
|
// If the addresses have latency information available, then the address with the
|
|
// best latency is used.
|
|
//
|
|
// de.mu must be held.
|
|
func (de *endpoint) addrForWireGuardSendLocked(now mono.Time) (udpAddr netip.AddrPort, shouldPing bool) {
|
|
if len(de.endpointState) == 0 {
|
|
de.c.logf("magicsock: addrForSendWireguardLocked: [unexpected] no candidates available for endpoint")
|
|
return udpAddr, false
|
|
}
|
|
|
|
// lowestLatency is a high duration initially, so we
|
|
// can be sure we're going to have a duration lower than this
|
|
// for the first latency retrieved.
|
|
lowestLatency := time.Hour
|
|
var oldestPing mono.Time
|
|
for ipp, state := range de.endpointState {
|
|
if oldestPing.IsZero() {
|
|
oldestPing = state.lastPing
|
|
} else if state.lastPing.Before(oldestPing) {
|
|
oldestPing = state.lastPing
|
|
}
|
|
|
|
if latency, ok := state.latencyLocked(); ok {
|
|
if latency < lowestLatency || latency == lowestLatency && ipp.Addr().Is6() {
|
|
// If we have the same latency,IPv6 is prioritized.
|
|
// TODO(catzkorn): Consider a small increase in latency to use
|
|
// IPv6 in comparison to IPv4, when possible.
|
|
lowestLatency = latency
|
|
udpAddr = ipp
|
|
}
|
|
}
|
|
}
|
|
needPing := len(de.endpointState) > 1 && now.Sub(oldestPing) > wireguardPingInterval
|
|
|
|
if !udpAddr.IsValid() {
|
|
candidates := xmaps.Keys(de.endpointState)
|
|
|
|
// Randomly select an address to use until we retrieve latency information
|
|
// and give it a short trustBestAddrUntil time so we avoid flapping between
|
|
// addresses while waiting on latency information to be populated.
|
|
udpAddr = candidates[rand.IntN(len(candidates))]
|
|
}
|
|
|
|
de.bestAddr.AddrPort = udpAddr
|
|
// Only extend trustBestAddrUntil by one second to avoid packet
|
|
// reordering and/or CPU usage from random selection during the first
|
|
// second. We should receive a response due to a WireGuard handshake in
|
|
// less than one second in good cases, in which case this will be then
|
|
// extended to 15 seconds.
|
|
de.trustBestAddrUntil = now.Add(time.Second)
|
|
return udpAddr, needPing
|
|
}
|
|
|
|
// addrForPingSizeLocked returns the address(es) that should be used for sending
|
|
// the next ping. It will only return addrs with a large enough path MTU to
|
|
// permit a ping payload of size bytes to be delivered (DERP is always one such
|
|
// addr as it is a TCP connection). If it returns a zero-value udpAddr, then we
|
|
// should continue probing the MTU of all paths to this endpoint. Zero, one, or
|
|
// both of the returned UDP address and DERP address may be non-zero.
|
|
//
|
|
// de.mu must be held.
|
|
func (de *endpoint) addrForPingSizeLocked(now mono.Time, size int) (udpAddr, derpAddr netip.AddrPort) {
|
|
if size == 0 {
|
|
udpAddr, derpAddr, _ = de.addrForSendLocked(now)
|
|
return
|
|
}
|
|
|
|
udpAddr = de.bestAddr.AddrPort
|
|
pathMTU := de.bestAddr.wireMTU
|
|
requestedMTU := pingSizeToPktLen(size, udpAddr.Addr().Is6())
|
|
mtuOk := requestedMTU <= pathMTU
|
|
|
|
if udpAddr.IsValid() && mtuOk {
|
|
if !now.After(de.trustBestAddrUntil) {
|
|
return udpAddr, netip.AddrPort{}
|
|
}
|
|
// We had a bestAddr with large enough MTU but it expired, so
|
|
// send both to it and DERP.
|
|
return udpAddr, de.derpAddr
|
|
}
|
|
|
|
// The UDP address isn't valid or it doesn't have a path MTU big enough
|
|
// for the packet. Return a zero-value udpAddr to signal that we should
|
|
// keep probing the path MTU to all addresses for this endpoint, and a
|
|
// valid DERP addr to signal that we should also send via DERP.
|
|
return netip.AddrPort{}, de.derpAddr
|
|
}
|
|
|
|
// maybeProbeUDPLifetimeLocked returns an afterInactivityFor duration and true
|
|
// if de is a candidate for UDP path lifetime probing in the future, otherwise
|
|
// false.
|
|
func (de *endpoint) maybeProbeUDPLifetimeLocked() (afterInactivityFor time.Duration, maybe bool) {
|
|
p := de.probeUDPLifetime
|
|
if p == nil {
|
|
return afterInactivityFor, false
|
|
}
|
|
if !de.bestAddr.IsValid() {
|
|
return afterInactivityFor, false
|
|
}
|
|
epDisco := de.disco.Load()
|
|
if epDisco == nil {
|
|
// peer does not support disco
|
|
return afterInactivityFor, false
|
|
}
|
|
// We compare disco keys, which may have a shorter lifetime than node keys
|
|
// since disco keys reset on startup. This has the desired side effect of
|
|
// shuffling probing probability where the local node ends up with a large
|
|
// key value lexicographically relative to the other nodes it tends to
|
|
// communicate with. If de's disco key changes, the cycle will reset.
|
|
if de.c.discoPublic.Compare(epDisco.key) >= 0 {
|
|
// lower disco pub key node probes higher
|
|
return afterInactivityFor, false
|
|
}
|
|
if !p.cycleActive && time.Since(p.cycleStartedAt) < p.config.CycleCanStartEvery {
|
|
// This is conservative as it doesn't account for afterInactivityFor use
|
|
// by the caller, potentially delaying the start of the next cycle. We
|
|
// assume the cycle could start immediately following
|
|
// maybeProbeUDPLifetimeLocked(), regardless of the value of
|
|
// afterInactivityFor relative to latest packets in/out time.
|
|
return afterInactivityFor, false
|
|
}
|
|
afterInactivityFor = p.currentCliffDurationEndpointLocked() - udpLifetimeProbeCliffSlack
|
|
if afterInactivityFor < 0 {
|
|
// shouldn't happen
|
|
return afterInactivityFor, false
|
|
}
|
|
return afterInactivityFor, true
|
|
}
|
|
|
|
// heartbeatForLifetimeVia represents the scheduling source of
|
|
// endpoint.heartbeatForLifetime().
|
|
type heartbeatForLifetimeVia string
|
|
|
|
const (
|
|
heartbeatForLifetimeViaSessionInactive heartbeatForLifetimeVia = "session-inactive"
|
|
heartbeatForLifetimeViaPongRx heartbeatForLifetimeVia = "pong-rx"
|
|
heartbeatForLifetimeViaSelf heartbeatForLifetimeVia = "self"
|
|
)
|
|
|
|
// scheduleHeartbeatForLifetimeLocked schedules de.heartbeatForLifetime to fire
|
|
// in the future (after). The caller must describe themselves in the via arg.
|
|
func (de *endpoint) scheduleHeartbeatForLifetimeLocked(after time.Duration, via heartbeatForLifetimeVia) {
|
|
p := de.probeUDPLifetime
|
|
if p == nil {
|
|
return
|
|
}
|
|
de.c.dlogf("[v1] magicsock: disco: scheduling UDP lifetime probe for cliff=%v via=%v to %v (%v)",
|
|
p.currentCliffDurationEndpointLocked(), via, de.publicKey.ShortString(), de.discoShort())
|
|
p.bestAddr = de.bestAddr.AddrPort
|
|
p.timer = time.AfterFunc(after, de.heartbeatForLifetime)
|
|
if via == heartbeatForLifetimeViaSelf {
|
|
metricUDPLifetimeCliffsRescheduled.Add(1)
|
|
} else {
|
|
metricUDPLifetimeCliffsScheduled.Add(1)
|
|
}
|
|
}
|
|
|
|
// heartbeatForLifetime sends a disco ping recorded locally with a purpose of
|
|
// pingHeartbeatForUDPLifetime to de if de.bestAddr has remained stable, and it
|
|
// has been inactive for a duration that is within the error bounds for current
|
|
// lifetime probing cliff. Alternatively it may reschedule itself into the
|
|
// future, which is one of three scheduling sources. The other scheduling
|
|
// sources are de.heartbeat() and de.probeUDPLifetimeCliffDoneLocked().
|
|
func (de *endpoint) heartbeatForLifetime() {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
p := de.probeUDPLifetime
|
|
if p == nil || p.timer == nil {
|
|
// We raced with a code path trying to p.timer.Stop() us. Give up early
|
|
// in the interest of simplicity. If p.timer.Stop() happened in
|
|
// de.heartbeat() presumably because of recent packets in/out we *could*
|
|
// still probe here, and it would be meaningful, but the time logic
|
|
// below would reschedule as-is.
|
|
return
|
|
}
|
|
p.timer = nil
|
|
if !p.bestAddr.IsValid() || de.bestAddr.AddrPort != p.bestAddr {
|
|
// best path changed
|
|
p.resetCycleEndpointLocked()
|
|
return
|
|
}
|
|
afterInactivityFor, ok := de.maybeProbeUDPLifetimeLocked()
|
|
if !ok {
|
|
p.resetCycleEndpointLocked()
|
|
return
|
|
}
|
|
inactiveFor := mono.Now().Sub(max(de.lastRecvUDPAny.LoadAtomic(), de.lastSendAny))
|
|
delta := afterInactivityFor - inactiveFor
|
|
if delta.Abs() > udpLifetimeProbeSchedulingTolerance {
|
|
if delta < 0 {
|
|
// We missed our opportunity. We can resume this cliff at the tail
|
|
// end of another session.
|
|
metricUDPLifetimeCliffsMissed.Add(1)
|
|
return
|
|
} else {
|
|
// We need to wait longer before sending a ping. This can happen for
|
|
// a number of reasons, which are described in more detail in
|
|
// de.heartbeat().
|
|
de.scheduleHeartbeatForLifetimeLocked(delta, heartbeatForLifetimeViaSelf)
|
|
return
|
|
}
|
|
}
|
|
if p.currentCliff == 0 {
|
|
p.cycleStartedAt = time.Now()
|
|
p.cycleActive = true
|
|
}
|
|
de.c.dlogf("[v1] magicsock: disco: sending disco ping for UDP lifetime probe cliff=%v to %v (%v)",
|
|
p.currentCliffDurationEndpointLocked(), de.publicKey.ShortString(), de.discoShort())
|
|
de.startDiscoPingLocked(de.bestAddr.AddrPort, mono.Now(), pingHeartbeatForUDPLifetime, 0, nil)
|
|
}
|
|
|
|
// heartbeat is called every heartbeatInterval to keep the best UDP path alive,
|
|
// kick off discovery of other paths, or schedule the probing of UDP path
|
|
// lifetime on the tail end of an active session.
|
|
func (de *endpoint) heartbeat() {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
if de.probeUDPLifetime != nil && de.probeUDPLifetime.timer != nil {
|
|
de.probeUDPLifetime.timer.Stop()
|
|
de.probeUDPLifetime.timer = nil
|
|
}
|
|
de.heartBeatTimer = nil
|
|
|
|
if de.heartbeatDisabled {
|
|
// If control override to disable heartBeatTimer set, return early.
|
|
return
|
|
}
|
|
|
|
if de.lastSendExt.IsZero() {
|
|
// Shouldn't happen.
|
|
return
|
|
}
|
|
|
|
now := mono.Now()
|
|
if now.Sub(de.lastSendExt) > sessionActiveTimeout {
|
|
// Session's idle. Stop heartbeating.
|
|
de.c.dlogf("[v1] magicsock: disco: ending heartbeats for idle session to %v (%v)", de.publicKey.ShortString(), de.discoShort())
|
|
if afterInactivityFor, ok := de.maybeProbeUDPLifetimeLocked(); ok {
|
|
// This is the best place to best effort schedule a probe of UDP
|
|
// path lifetime in the future as it loosely translates to "UDP path
|
|
// is inactive".
|
|
//
|
|
// Note: wireguard-go schedules a WireGuard keepalive packet (by
|
|
// default, not tied to persistent keepalive feature) 10 seconds in
|
|
// the future after receiving an authenticated data packet. It's
|
|
// typically only sent by one side based on how the WireGuard state
|
|
// machine controls the timer. So, if we are on the receiving end of
|
|
// that keepalive, de.lastSendExt won't move, assuming there is no
|
|
// other user-generated traffic. This is one reason why we perform
|
|
// a more granular check of the last packets in/out time, below, as
|
|
// a WireGuard keepalive may have fallen somewhere within the
|
|
// sessionActiveTimeout window. heartbeatForLifetime will also
|
|
// perform a similar check, and reschedule as necessary.
|
|
inactiveFor := now.Sub(max(de.lastSendAny, de.lastRecvUDPAny.LoadAtomic()))
|
|
after := afterInactivityFor - inactiveFor
|
|
if after < 0 {
|
|
// shouldn't happen
|
|
return
|
|
}
|
|
de.scheduleHeartbeatForLifetimeLocked(after, heartbeatForLifetimeViaSessionInactive)
|
|
}
|
|
return
|
|
}
|
|
|
|
udpAddr, _, _ := de.addrForSendLocked(now)
|
|
if udpAddr.IsValid() {
|
|
// We have a preferred path. Ping that every 2 seconds.
|
|
de.startDiscoPingLocked(udpAddr, now, pingHeartbeat, 0, nil)
|
|
}
|
|
|
|
if de.wantFullPingLocked(now) {
|
|
de.sendDiscoPingsLocked(now, true)
|
|
}
|
|
|
|
de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat)
|
|
}
|
|
|
|
// setHeartbeatDisabled sets heartbeatDisabled to the provided value.
|
|
func (de *endpoint) setHeartbeatDisabled(v bool) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
de.heartbeatDisabled = v
|
|
}
|
|
|
|
// wantFullPingLocked reports whether we should ping to all our peers looking for
|
|
// a better path.
|
|
//
|
|
// de.mu must be held.
|
|
func (de *endpoint) wantFullPingLocked(now mono.Time) bool {
|
|
if runtime.GOOS == "js" {
|
|
return false
|
|
}
|
|
if !de.bestAddr.IsValid() || de.lastFullPing.IsZero() {
|
|
return true
|
|
}
|
|
if now.After(de.trustBestAddrUntil) {
|
|
return true
|
|
}
|
|
if de.bestAddr.latency <= goodEnoughLatency {
|
|
return false
|
|
}
|
|
if now.Sub(de.lastFullPing) >= upgradeInterval {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (de *endpoint) noteTxActivityExtTriggerLocked(now mono.Time) {
|
|
de.lastSendExt = now
|
|
if de.heartBeatTimer == nil && !de.heartbeatDisabled {
|
|
de.heartBeatTimer = time.AfterFunc(heartbeatInterval, de.heartbeat)
|
|
}
|
|
}
|
|
|
|
// MaxDiscoPingSize is the largest useful ping message size that we
|
|
// can send - the maximum packet size minus the IPv4 and UDP headers.
|
|
var MaxDiscoPingSize = tstun.MaxPacketSize - 20 - 8
|
|
|
|
type pingResultAndCallback struct {
|
|
taken atomic.Bool // first CompareAndSwamp from false to true takes ownership of res
|
|
res *ipnstate.PingResult
|
|
cb func(*ipnstate.PingResult)
|
|
}
|
|
|
|
func (p *pingResultAndCallback) reply() bool {
|
|
return p != nil && p.taken.CompareAndSwap(false, true)
|
|
}
|
|
|
|
// discoPing starts a disco-level ping for the "tailscale ping" command (or other
|
|
// callers, such as c2n). res is value to call cb with, already partially
|
|
// filled. cb must be called at most once. Once called, ownership of res passes to cb.
|
|
func (de *endpoint) discoPing(res *ipnstate.PingResult, size int, cb func(*ipnstate.PingResult)) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
if de.expired {
|
|
res.Err = errExpired.Error()
|
|
cb(res)
|
|
return
|
|
}
|
|
if size > MaxDiscoPingSize {
|
|
res.Err = errPingTooBig.Error()
|
|
cb(res)
|
|
return
|
|
}
|
|
|
|
resCB := &pingResultAndCallback{res: res, cb: cb}
|
|
|
|
now := mono.Now()
|
|
udpAddr, derpAddr := de.addrForPingSizeLocked(now, size)
|
|
|
|
if derpAddr.IsValid() {
|
|
de.startDiscoPingLocked(derpAddr, now, pingCLI, size, resCB)
|
|
}
|
|
if udpAddr.IsValid() && now.Before(de.trustBestAddrUntil) {
|
|
// Already have an active session, so just ping the address we're using.
|
|
// Otherwise "tailscale ping" results to a node on the local network
|
|
// can look like they're bouncing between, say 10.0.0.0/9 and the peer's
|
|
// IPv6 address, both 1ms away, and it's random who replies first.
|
|
de.startDiscoPingLocked(udpAddr, now, pingCLI, size, resCB)
|
|
} else {
|
|
for ep := range de.endpointState {
|
|
de.startDiscoPingLocked(ep, now, pingCLI, size, resCB)
|
|
}
|
|
}
|
|
}
|
|
|
|
var (
|
|
errExpired = errors.New("peer's node key has expired")
|
|
errNoUDPOrDERP = errors.New("no UDP or DERP addr")
|
|
errPingTooBig = errors.New("ping size too big")
|
|
)
|
|
|
|
func (de *endpoint) send(buffs [][]byte) error {
|
|
de.mu.Lock()
|
|
if de.expired {
|
|
de.mu.Unlock()
|
|
return errExpired
|
|
}
|
|
|
|
now := mono.Now()
|
|
udpAddr, derpAddr, startWGPing := de.addrForSendLocked(now)
|
|
|
|
if de.isWireguardOnly {
|
|
if startWGPing {
|
|
de.sendWireGuardOnlyPingsLocked(now)
|
|
}
|
|
} else if !udpAddr.IsValid() || now.After(de.trustBestAddrUntil) {
|
|
de.sendDiscoPingsLocked(now, true)
|
|
}
|
|
de.noteTxActivityExtTriggerLocked(now)
|
|
de.lastSendAny = now
|
|
de.mu.Unlock()
|
|
|
|
if !udpAddr.IsValid() && !derpAddr.IsValid() {
|
|
return errNoUDPOrDERP
|
|
}
|
|
var err error
|
|
if udpAddr.IsValid() {
|
|
_, err = de.c.sendUDPBatch(udpAddr, buffs)
|
|
|
|
// If the error is known to indicate that the endpoint is no longer
|
|
// usable, clear the endpoint statistics so that the next send will
|
|
// re-evaluate the best endpoint.
|
|
if err != nil && isBadEndpointErr(err) {
|
|
de.noteBadEndpoint(udpAddr)
|
|
}
|
|
|
|
var txBytes int
|
|
for _, b := range buffs {
|
|
txBytes += len(b)
|
|
}
|
|
|
|
switch {
|
|
case udpAddr.Addr().Is4():
|
|
de.c.metrics.outboundPacketsIPv4Total.Add(int64(len(buffs)))
|
|
de.c.metrics.outboundBytesIPv4Total.Add(int64(txBytes))
|
|
case udpAddr.Addr().Is6():
|
|
de.c.metrics.outboundPacketsIPv6Total.Add(int64(len(buffs)))
|
|
de.c.metrics.outboundBytesIPv6Total.Add(int64(txBytes))
|
|
}
|
|
|
|
// TODO(raggi): needs updating for accuracy, as in error conditions we may have partial sends.
|
|
if stats := de.c.stats.Load(); err == nil && stats != nil {
|
|
stats.UpdateTxPhysical(de.nodeAddr, udpAddr, len(buffs), txBytes)
|
|
}
|
|
}
|
|
if derpAddr.IsValid() {
|
|
allOk := true
|
|
var txBytes int
|
|
for _, buff := range buffs {
|
|
const isDisco = false
|
|
ok, _ := de.c.sendAddr(derpAddr, de.publicKey, buff, isDisco)
|
|
txBytes += len(buff)
|
|
if !ok {
|
|
allOk = false
|
|
}
|
|
}
|
|
|
|
if stats := de.c.stats.Load(); stats != nil {
|
|
stats.UpdateTxPhysical(de.nodeAddr, derpAddr, len(buffs), txBytes)
|
|
}
|
|
if allOk {
|
|
return nil
|
|
}
|
|
}
|
|
return err
|
|
}
|
|
|
|
// probeUDPLifetimeCliffDoneLocked is called when a disco
|
|
// pingHeartbeatForUDPLifetime is being cleaned up. result contains the reason
|
|
// for the cleanup, txid contains the ping's txid.
|
|
// probeUDPLifetimeCliffDoneLocked may schedule another
|
|
// pingHeartbeatForUDPLifetime in the future if there is another cliff remaining
|
|
// for the current probing cycle.
|
|
func (de *endpoint) probeUDPLifetimeCliffDoneLocked(result discoPingResult, txid stun.TxID) {
|
|
p := de.probeUDPLifetime
|
|
if p == nil || !p.cycleActive || de.probeUDPLifetime.timer != nil || txid != p.lastTxID {
|
|
// Probing may have been disabled while heartbeats were in flight. This
|
|
// can also be a duplicate or late arriving result.
|
|
return
|
|
}
|
|
metricUDPLifetimeCliffsCompleted.Add(1)
|
|
if result != discoPongReceived || p.currentCliff >= len(p.config.Cliffs)-1 {
|
|
maxCliffIndex := p.currentCliff
|
|
if result != discoPongReceived {
|
|
maxCliffIndex = p.currentCliff - 1
|
|
}
|
|
var maxCliffDuration time.Duration
|
|
if maxCliffIndex >= 0 {
|
|
maxCliffDuration = p.config.Cliffs[maxCliffIndex]
|
|
}
|
|
p.cycleCompleteMaxCliffEndpointLocked(maxCliffIndex)
|
|
de.c.dlogf("[v1] magicsock: disco: UDP lifetime probe cycle completed max cliff=%v for %v (%v)",
|
|
maxCliffDuration, de.publicKey.ShortString(), de.discoShort())
|
|
metricUDPLifetimeCyclesCompleted.Add(1)
|
|
p.resetCycleEndpointLocked()
|
|
} else {
|
|
p.currentCliff++
|
|
if after, ok := de.maybeProbeUDPLifetimeLocked(); ok {
|
|
de.scheduleHeartbeatForLifetimeLocked(after, heartbeatForLifetimeViaPongRx)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (de *endpoint) discoPingTimeout(txid stun.TxID) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
sp, ok := de.sentPing[txid]
|
|
if !ok {
|
|
return
|
|
}
|
|
if debugDisco() || !de.bestAddr.IsValid() || mono.Now().After(de.trustBestAddrUntil) {
|
|
de.c.dlogf("[v1] magicsock: disco: timeout waiting for pong %x from %v (%v, %v)", txid[:6], sp.to, de.publicKey.ShortString(), de.discoShort())
|
|
}
|
|
de.removeSentDiscoPingLocked(txid, sp, discoPingTimedOut)
|
|
}
|
|
|
|
// forgetDiscoPing is called when a ping fails to send.
|
|
func (de *endpoint) forgetDiscoPing(txid stun.TxID) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
if sp, ok := de.sentPing[txid]; ok {
|
|
de.removeSentDiscoPingLocked(txid, sp, discoPingFailed)
|
|
}
|
|
}
|
|
|
|
// discoPingResult represents the result of an attempted disco ping send
|
|
// operation.
|
|
type discoPingResult int
|
|
|
|
const (
|
|
discoPingResultUnknown discoPingResult = iota
|
|
discoPingFailed
|
|
discoPingTimedOut
|
|
discoPongReceived
|
|
)
|
|
|
|
func (de *endpoint) removeSentDiscoPingLocked(txid stun.TxID, sp sentPing, result discoPingResult) {
|
|
// Stop the timer for the case where sendPing failed to write to UDP.
|
|
// In the case of a timer already having fired, this is a no-op:
|
|
sp.timer.Stop()
|
|
if sp.purpose == pingHeartbeatForUDPLifetime {
|
|
de.probeUDPLifetimeCliffDoneLocked(result, txid)
|
|
}
|
|
delete(de.sentPing, txid)
|
|
}
|
|
|
|
// poly1305AuthenticatorSize is the size, in bytes, of a poly1305 authenticator.
|
|
// It's the same as golang.org/x/crypto/poly1305.TagSize, but that
|
|
// page is deprecated and we only need this one constant, so we copy it.
|
|
const poly1305AuthenticatorSize = 16
|
|
|
|
// discoPingSize is the size of a complete disco ping packet, without any padding.
|
|
const discoPingSize = len(disco.Magic) + key.DiscoPublicRawLen + disco.NonceLen +
|
|
poly1305AuthenticatorSize + disco.MessageHeaderLen + disco.PingLen
|
|
|
|
// sendDiscoPing sends a ping with the provided txid to ep using de's discoKey. size
|
|
// is the desired disco message size, including all disco headers but excluding IP/UDP
|
|
// headers.
|
|
//
|
|
// The caller (startDiscoPingLocked) should've already recorded the ping in
|
|
// sentPing and set up the timer.
|
|
//
|
|
// The caller should use de.discoKey as the discoKey argument.
|
|
// It is passed in so that sendDiscoPing doesn't need to lock de.mu.
|
|
func (de *endpoint) sendDiscoPing(ep netip.AddrPort, discoKey key.DiscoPublic, txid stun.TxID, size int, logLevel discoLogLevel) {
|
|
size = min(size, MaxDiscoPingSize)
|
|
padding := max(size-discoPingSize, 0)
|
|
|
|
sent, _ := de.c.sendDiscoMessage(ep, de.publicKey, discoKey, &disco.Ping{
|
|
TxID: [12]byte(txid),
|
|
NodeKey: de.c.publicKeyAtomic.Load(),
|
|
Padding: padding,
|
|
}, logLevel)
|
|
if !sent {
|
|
de.forgetDiscoPing(txid)
|
|
return
|
|
}
|
|
|
|
if size != 0 {
|
|
metricSentDiscoPeerMTUProbes.Add(1)
|
|
metricSentDiscoPeerMTUProbeBytes.Add(int64(pingSizeToPktLen(size, ep.Addr().Is6())))
|
|
}
|
|
}
|
|
|
|
// discoPingPurpose is the reason why a discovery ping message was sent.
|
|
type discoPingPurpose int
|
|
|
|
//go:generate go run tailscale.com/cmd/addlicense -file discopingpurpose_string.go go run golang.org/x/tools/cmd/stringer -type=discoPingPurpose -trimprefix=ping
|
|
const (
|
|
// pingDiscovery means that purpose of a ping was to see if a
|
|
// path was valid.
|
|
pingDiscovery discoPingPurpose = iota
|
|
|
|
// pingHeartbeat means that purpose of a ping was whether a
|
|
// peer was still there.
|
|
pingHeartbeat
|
|
|
|
// pingCLI means that the user is running "tailscale ping"
|
|
// from the CLI. These types of pings can go over DERP.
|
|
pingCLI
|
|
|
|
// pingHeartbeatForUDPLifetime means that the purpose of a ping was to
|
|
// discover whether the UDP path was still active through any and all
|
|
// stateful middleboxes involved.
|
|
pingHeartbeatForUDPLifetime
|
|
)
|
|
|
|
// startDiscoPingLocked sends a disco ping to ep in a separate goroutine. resCB,
|
|
// if non-nil, means that a caller external to the magicsock package internals
|
|
// is interested in the result (such as a CLI "tailscale ping" or a c2n ping
|
|
// request, etc)
|
|
func (de *endpoint) startDiscoPingLocked(ep netip.AddrPort, now mono.Time, purpose discoPingPurpose, size int, resCB *pingResultAndCallback) {
|
|
if runtime.GOOS == "js" {
|
|
return
|
|
}
|
|
epDisco := de.disco.Load()
|
|
if epDisco == nil {
|
|
return
|
|
}
|
|
if purpose != pingCLI {
|
|
st, ok := de.endpointState[ep]
|
|
if !ok {
|
|
// Shouldn't happen. But don't ping an endpoint that's
|
|
// not active for us.
|
|
de.c.logf("magicsock: disco: [unexpected] attempt to ping no longer live endpoint %v", ep)
|
|
return
|
|
}
|
|
st.lastPing = now
|
|
}
|
|
|
|
// If we are doing a discovery ping or a CLI ping with no specified size
|
|
// to a non DERP address, then probe the MTU. Otherwise just send the
|
|
// one specified ping.
|
|
|
|
// Default to sending a single ping of the specified size
|
|
sizes := []int{size}
|
|
if de.c.PeerMTUEnabled() {
|
|
isDerp := ep.Addr() == tailcfg.DerpMagicIPAddr
|
|
if !isDerp && ((purpose == pingDiscovery) || (purpose == pingCLI && size == 0)) {
|
|
de.c.dlogf("[v1] magicsock: starting MTU probe")
|
|
sizes = mtuProbePingSizesV4
|
|
if ep.Addr().Is6() {
|
|
sizes = mtuProbePingSizesV6
|
|
}
|
|
}
|
|
}
|
|
|
|
logLevel := discoLog
|
|
if purpose == pingHeartbeat {
|
|
logLevel = discoVerboseLog
|
|
}
|
|
if purpose == pingCLI {
|
|
de.noteTxActivityExtTriggerLocked(now)
|
|
}
|
|
de.lastSendAny = now
|
|
for _, s := range sizes {
|
|
txid := stun.NewTxID()
|
|
de.sentPing[txid] = sentPing{
|
|
to: ep,
|
|
at: now,
|
|
timer: time.AfterFunc(pingTimeoutDuration, func() { de.discoPingTimeout(txid) }),
|
|
purpose: purpose,
|
|
resCB: resCB,
|
|
size: s,
|
|
}
|
|
if purpose == pingHeartbeatForUDPLifetime && de.probeUDPLifetime != nil {
|
|
de.probeUDPLifetime.lastTxID = txid
|
|
}
|
|
go de.sendDiscoPing(ep, epDisco.key, txid, s, logLevel)
|
|
}
|
|
|
|
}
|
|
|
|
// sendDiscoPingsLocked starts pinging all of ep's endpoints.
|
|
func (de *endpoint) sendDiscoPingsLocked(now mono.Time, sendCallMeMaybe bool) {
|
|
de.lastFullPing = now
|
|
var sentAny bool
|
|
for ep, st := range de.endpointState {
|
|
if st.shouldDeleteLocked() {
|
|
de.deleteEndpointLocked("sendPingsLocked", ep)
|
|
continue
|
|
}
|
|
if runtime.GOOS == "js" {
|
|
continue
|
|
}
|
|
if !st.lastPing.IsZero() && now.Sub(st.lastPing) < discoPingInterval {
|
|
continue
|
|
}
|
|
|
|
firstPing := !sentAny
|
|
sentAny = true
|
|
|
|
if firstPing && sendCallMeMaybe {
|
|
de.c.dlogf("[v1] magicsock: disco: send, starting discovery for %v (%v)", de.publicKey.ShortString(), de.discoShort())
|
|
}
|
|
|
|
de.startDiscoPingLocked(ep, now, pingDiscovery, 0, nil)
|
|
}
|
|
derpAddr := de.derpAddr
|
|
if sentAny && sendCallMeMaybe && derpAddr.IsValid() {
|
|
// Have our magicsock.Conn figure out its STUN endpoint (if
|
|
// it doesn't know already) and then send a CallMeMaybe
|
|
// message to our peer via DERP informing them that we've
|
|
// sent so our firewall ports are probably open and now
|
|
// would be a good time for them to connect.
|
|
go de.c.enqueueCallMeMaybe(derpAddr, de)
|
|
}
|
|
}
|
|
|
|
// sendWireGuardOnlyPingsLocked evaluates all available addresses for
|
|
// a WireGuard only endpoint and initates an ICMP ping for useable
|
|
// addresses.
|
|
func (de *endpoint) sendWireGuardOnlyPingsLocked(now mono.Time) {
|
|
if runtime.GOOS == "js" {
|
|
return
|
|
}
|
|
|
|
// Normally we only send pings at a low rate as the decision to start
|
|
// sending a ping sets bestAddrAtUntil with a reasonable time to keep trying
|
|
// that address, however, if that code changed we may want to be sure that
|
|
// we don't ever send excessive pings to avoid impact to the client/user.
|
|
if !now.After(de.lastFullPing.Add(10 * time.Second)) {
|
|
return
|
|
}
|
|
de.lastFullPing = now
|
|
|
|
for ipp := range de.endpointState {
|
|
if ipp.Addr().Is4() && de.c.noV4.Load() {
|
|
continue
|
|
}
|
|
if ipp.Addr().Is6() && de.c.noV6.Load() {
|
|
continue
|
|
}
|
|
|
|
go de.sendWireGuardOnlyPing(ipp, now)
|
|
}
|
|
}
|
|
|
|
// sendWireGuardOnlyPing sends a ICMP ping to a WireGuard only address to
|
|
// discover the latency.
|
|
func (de *endpoint) sendWireGuardOnlyPing(ipp netip.AddrPort, now mono.Time) {
|
|
ctx, cancel := context.WithTimeout(de.c.connCtx, 5*time.Second)
|
|
defer cancel()
|
|
|
|
de.setLastPing(ipp, now)
|
|
|
|
addr := &net.IPAddr{
|
|
IP: net.IP(ipp.Addr().AsSlice()),
|
|
Zone: ipp.Addr().Zone(),
|
|
}
|
|
|
|
p := de.c.getPinger()
|
|
if p == nil {
|
|
de.c.logf("[v2] magicsock: sendWireGuardOnlyPingLocked: pinger is nil")
|
|
return
|
|
}
|
|
|
|
latency, err := p.Send(ctx, addr, nil)
|
|
if err != nil {
|
|
de.c.logf("[v2] magicsock: sendWireGuardOnlyPingLocked: %s", err)
|
|
return
|
|
}
|
|
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
state, ok := de.endpointState[ipp]
|
|
if !ok {
|
|
return
|
|
}
|
|
state.addPongReplyLocked(pongReply{
|
|
latency: latency,
|
|
pongAt: now,
|
|
from: ipp,
|
|
pongSrc: netip.AddrPort{}, // We don't know this.
|
|
})
|
|
}
|
|
|
|
// setLastPing sets lastPing on the endpointState to now.
|
|
func (de *endpoint) setLastPing(ipp netip.AddrPort, now mono.Time) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
state, ok := de.endpointState[ipp]
|
|
if !ok {
|
|
return
|
|
}
|
|
state.lastPing = now
|
|
}
|
|
|
|
// updateFromNode updates the endpoint based on a tailcfg.Node from a NetMap
|
|
// update.
|
|
func (de *endpoint) updateFromNode(n tailcfg.NodeView, heartbeatDisabled bool, probeUDPLifetimeEnabled bool) {
|
|
if !n.Valid() {
|
|
panic("nil node when updating endpoint")
|
|
}
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
de.heartbeatDisabled = heartbeatDisabled
|
|
if probeUDPLifetimeEnabled {
|
|
de.setProbeUDPLifetimeConfigLocked(defaultProbeUDPLifetimeConfig)
|
|
} else {
|
|
de.setProbeUDPLifetimeConfigLocked(nil)
|
|
}
|
|
de.expired = n.Expired()
|
|
|
|
epDisco := de.disco.Load()
|
|
var discoKey key.DiscoPublic
|
|
if epDisco != nil {
|
|
discoKey = epDisco.key
|
|
}
|
|
|
|
if discoKey != n.DiscoKey() {
|
|
de.c.logf("[v1] magicsock: disco: node %s changed from %s to %s", de.publicKey.ShortString(), discoKey, n.DiscoKey())
|
|
de.disco.Store(&endpointDisco{
|
|
key: n.DiscoKey(),
|
|
short: n.DiscoKey().ShortString(),
|
|
})
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "updateFromNode-resetLocked",
|
|
})
|
|
de.resetLocked()
|
|
}
|
|
if n.DERP() == "" {
|
|
if de.derpAddr.IsValid() {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "updateFromNode-remove-DERP",
|
|
From: de.derpAddr,
|
|
})
|
|
}
|
|
de.derpAddr = netip.AddrPort{}
|
|
} else {
|
|
newDerp, _ := netip.ParseAddrPort(n.DERP())
|
|
if de.derpAddr != newDerp {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "updateFromNode-DERP",
|
|
From: de.derpAddr,
|
|
To: newDerp,
|
|
})
|
|
}
|
|
de.derpAddr = newDerp
|
|
}
|
|
|
|
de.setEndpointsLocked(n.Endpoints())
|
|
}
|
|
|
|
func (de *endpoint) setEndpointsLocked(eps interface {
|
|
All() iter.Seq2[int, netip.AddrPort]
|
|
}) {
|
|
for _, st := range de.endpointState {
|
|
st.index = indexSentinelDeleted // assume deleted until updated in next loop
|
|
}
|
|
|
|
var newIpps []netip.AddrPort
|
|
for i, ipp := range eps.All() {
|
|
if i > math.MaxInt16 {
|
|
// Seems unlikely.
|
|
break
|
|
}
|
|
if !ipp.IsValid() {
|
|
de.c.logf("magicsock: bogus netmap endpoint from %v", eps)
|
|
continue
|
|
}
|
|
if st, ok := de.endpointState[ipp]; ok {
|
|
st.index = int16(i)
|
|
} else {
|
|
de.endpointState[ipp] = &endpointState{index: int16(i)}
|
|
newIpps = append(newIpps, ipp)
|
|
}
|
|
}
|
|
if len(newIpps) > 0 {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "updateFromNode-new-Endpoints",
|
|
To: newIpps,
|
|
})
|
|
}
|
|
|
|
// Now delete anything unless it's still in the network map or
|
|
// was a recently discovered endpoint.
|
|
for ep, st := range de.endpointState {
|
|
if st.shouldDeleteLocked() {
|
|
de.deleteEndpointLocked("updateFromNode", ep)
|
|
}
|
|
}
|
|
}
|
|
|
|
// addCandidateEndpoint adds ep as an endpoint to which we should send
|
|
// future pings. If there is an existing endpointState for ep, and forRxPingTxID
|
|
// matches the last received ping TxID, this function reports true, otherwise
|
|
// false.
|
|
//
|
|
// This is called once we've already verified that we got a valid
|
|
// discovery message from de via ep.
|
|
func (de *endpoint) addCandidateEndpoint(ep netip.AddrPort, forRxPingTxID stun.TxID) (duplicatePing bool) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
if st, ok := de.endpointState[ep]; ok {
|
|
duplicatePing = forRxPingTxID == st.lastGotPingTxID
|
|
if !duplicatePing {
|
|
st.lastGotPingTxID = forRxPingTxID
|
|
}
|
|
if st.lastGotPing.IsZero() {
|
|
// Already-known endpoint from the network map.
|
|
return duplicatePing
|
|
}
|
|
st.lastGotPing = time.Now()
|
|
return duplicatePing
|
|
}
|
|
|
|
// Newly discovered endpoint. Exciting!
|
|
de.c.dlogf("[v1] magicsock: disco: adding %v as candidate endpoint for %v (%s)", ep, de.discoShort(), de.publicKey.ShortString())
|
|
de.endpointState[ep] = &endpointState{
|
|
lastGotPing: time.Now(),
|
|
lastGotPingTxID: forRxPingTxID,
|
|
}
|
|
|
|
// If for some reason this gets very large, do some cleanup.
|
|
if size := len(de.endpointState); size > 100 {
|
|
for ep, st := range de.endpointState {
|
|
if st.shouldDeleteLocked() {
|
|
de.deleteEndpointLocked("addCandidateEndpoint", ep)
|
|
}
|
|
}
|
|
size2 := len(de.endpointState)
|
|
de.c.dlogf("[v1] magicsock: disco: addCandidateEndpoint pruned %v candidate set from %v to %v entries", size, size2)
|
|
}
|
|
return false
|
|
}
|
|
|
|
// clearBestAddrLocked clears the bestAddr and related fields such that future
|
|
// packets will re-evaluate the best address to send to next.
|
|
//
|
|
// de.mu must be held.
|
|
func (de *endpoint) clearBestAddrLocked() {
|
|
de.setBestAddrLocked(addrQuality{})
|
|
de.bestAddrAt = 0
|
|
de.trustBestAddrUntil = 0
|
|
}
|
|
|
|
// noteBadEndpoint marks ipp as a bad endpoint that would need to be
|
|
// re-evaluated before future use, this should be called for example if a send
|
|
// to ipp fails due to a host unreachable error or similar.
|
|
func (de *endpoint) noteBadEndpoint(ipp netip.AddrPort) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
de.clearBestAddrLocked()
|
|
|
|
if st, ok := de.endpointState[ipp]; ok {
|
|
st.clear()
|
|
}
|
|
}
|
|
|
|
// noteConnectivityChange is called when connectivity changes enough
|
|
// that we should question our earlier assumptions about which paths
|
|
// work.
|
|
func (de *endpoint) noteConnectivityChange() {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
de.clearBestAddrLocked()
|
|
|
|
for k := range de.endpointState {
|
|
de.endpointState[k].clear()
|
|
}
|
|
}
|
|
|
|
// pingSizeToPktLen calculates the minimum path MTU that would permit
|
|
// a disco ping message of length size to reach its target at
|
|
// addr. size is the length of the entire disco message including
|
|
// disco headers. If size is zero, assume it is the safe wire MTU.
|
|
func pingSizeToPktLen(size int, is6 bool) tstun.WireMTU {
|
|
if size == 0 {
|
|
return tstun.SafeWireMTU()
|
|
}
|
|
headerLen := ipv4.HeaderLen
|
|
if is6 {
|
|
headerLen = ipv6.HeaderLen
|
|
}
|
|
headerLen += 8 // UDP header length
|
|
return tstun.WireMTU(size + headerLen)
|
|
}
|
|
|
|
// pktLenToPingSize calculates the ping payload size that would
|
|
// create a disco ping message whose on-the-wire length is exactly mtu
|
|
// bytes long. If mtu is zero or less than the minimum ping size, then
|
|
// no MTU probe is desired and return zero for an unpadded ping.
|
|
func pktLenToPingSize(mtu tstun.WireMTU, is6 bool) int {
|
|
if mtu == 0 {
|
|
return 0
|
|
}
|
|
headerLen := ipv4.HeaderLen
|
|
if is6 {
|
|
headerLen = ipv6.HeaderLen
|
|
}
|
|
headerLen += 8 // UDP header length
|
|
if mtu < tstun.WireMTU(headerLen) {
|
|
return 0
|
|
}
|
|
return int(mtu) - headerLen
|
|
}
|
|
|
|
// handlePongConnLocked handles a Pong message (a reply to an earlier ping).
|
|
// It should be called with the Conn.mu held.
|
|
//
|
|
// It reports whether m.TxID corresponds to a ping that this endpoint sent.
|
|
func (de *endpoint) handlePongConnLocked(m *disco.Pong, di *discoInfo, src netip.AddrPort) (knownTxID bool) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
isDerp := src.Addr() == tailcfg.DerpMagicIPAddr
|
|
|
|
sp, ok := de.sentPing[m.TxID]
|
|
if !ok {
|
|
// This is not a pong for a ping we sent.
|
|
return false
|
|
}
|
|
knownTxID = true // for naked returns below
|
|
de.removeSentDiscoPingLocked(m.TxID, sp, discoPongReceived)
|
|
|
|
pktLen := int(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))
|
|
if sp.size != 0 {
|
|
m := getPeerMTUsProbedMetric(tstun.WireMTU(pktLen))
|
|
m.Add(1)
|
|
if metricMaxPeerMTUProbed.Value() < int64(pktLen) {
|
|
metricMaxPeerMTUProbed.Set(int64(pktLen))
|
|
}
|
|
}
|
|
|
|
now := mono.Now()
|
|
latency := now.Sub(sp.at)
|
|
|
|
if !isDerp {
|
|
st, ok := de.endpointState[sp.to]
|
|
if !ok {
|
|
// This is no longer an endpoint we care about.
|
|
return
|
|
}
|
|
|
|
de.c.peerMap.setNodeKeyForIPPort(src, de.publicKey)
|
|
|
|
st.addPongReplyLocked(pongReply{
|
|
latency: latency,
|
|
pongAt: now,
|
|
from: src,
|
|
pongSrc: m.Src,
|
|
})
|
|
}
|
|
|
|
if sp.purpose != pingHeartbeat && sp.purpose != pingHeartbeatForUDPLifetime {
|
|
de.c.dlogf("[v1] magicsock: disco: %v<-%v (%v, %v) got pong tx=%x latency=%v pktlen=%v pong.src=%v%v", de.c.discoShort, de.discoShort(), de.publicKey.ShortString(), src, m.TxID[:6], latency.Round(time.Millisecond), pktLen, m.Src, logger.ArgWriter(func(bw *bufio.Writer) {
|
|
if sp.to != src {
|
|
fmt.Fprintf(bw, " ping.to=%v", sp.to)
|
|
}
|
|
}))
|
|
}
|
|
|
|
// Currently only CLI ping uses this callback.
|
|
if sp.resCB.reply() {
|
|
if sp.purpose == pingCLI {
|
|
de.c.populateCLIPingResponseLocked(sp.resCB.res, latency, sp.to)
|
|
}
|
|
go sp.resCB.cb(sp.resCB.res)
|
|
}
|
|
|
|
// Promote this pong response to our current best address if it's lower latency.
|
|
// TODO(bradfitz): decide how latency vs. preference order affects decision
|
|
if !isDerp {
|
|
thisPong := addrQuality{sp.to, latency, tstun.WireMTU(pingSizeToPktLen(sp.size, sp.to.Addr().Is6()))}
|
|
if betterAddr(thisPong, de.bestAddr) {
|
|
de.c.logf("magicsock: disco: node %v %v now using %v mtu=%v tx=%x", de.publicKey.ShortString(), de.discoShort(), sp.to, thisPong.wireMTU, m.TxID[:6])
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "handlePingLocked-bestAddr-update",
|
|
From: de.bestAddr,
|
|
To: thisPong,
|
|
})
|
|
de.setBestAddrLocked(thisPong)
|
|
}
|
|
if de.bestAddr.AddrPort == thisPong.AddrPort {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "handlePingLocked-bestAddr-latency",
|
|
From: de.bestAddr,
|
|
To: thisPong,
|
|
})
|
|
de.bestAddr.latency = latency
|
|
de.bestAddrAt = now
|
|
de.trustBestAddrUntil = now.Add(trustUDPAddrDuration)
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// addrQuality is an IPPort with an associated latency and path mtu.
|
|
type addrQuality struct {
|
|
netip.AddrPort
|
|
latency time.Duration
|
|
wireMTU tstun.WireMTU
|
|
}
|
|
|
|
func (a addrQuality) String() string {
|
|
return fmt.Sprintf("%v@%v+%v", a.AddrPort, a.latency, a.wireMTU)
|
|
}
|
|
|
|
// betterAddr reports whether a is a better addr to use than b.
|
|
func betterAddr(a, b addrQuality) bool {
|
|
if a.AddrPort == b.AddrPort {
|
|
if a.wireMTU > b.wireMTU {
|
|
// TODO(val): Think harder about the case of lower
|
|
// latency and smaller or unknown MTU, and higher
|
|
// latency but larger MTU. Probably in most cases the
|
|
// largest MTU will also be the lowest latency but we
|
|
// can't depend on that.
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
if !b.IsValid() {
|
|
return true
|
|
}
|
|
if !a.IsValid() {
|
|
return false
|
|
}
|
|
|
|
// Each address starts with a set of points (from 0 to 100) that
|
|
// represents how much faster they are than the highest-latency
|
|
// endpoint. For example, if a has latency 200ms and b has latency
|
|
// 190ms, then a starts with 0 points and b starts with 5 points since
|
|
// it's 5% faster.
|
|
var aPoints, bPoints int
|
|
if a.latency > b.latency && a.latency > 0 {
|
|
bPoints = int(100 - ((b.latency * 100) / a.latency))
|
|
} else if b.latency > 0 {
|
|
aPoints = int(100 - ((a.latency * 100) / b.latency))
|
|
}
|
|
|
|
// Prefer private IPs over public IPs as long as the latencies are
|
|
// roughly equivalent, since it's less likely that a user will have to
|
|
// pay for the bandwidth in a cloud environment.
|
|
//
|
|
// Additionally, prefer any loopback address strongly over non-loopback
|
|
// addresses, and prefer link-local unicast addresses over other types
|
|
// of private IP addresses since it's definitionally more likely that
|
|
// they'll be on the same network segment than a general private IP.
|
|
if a.Addr().IsLoopback() {
|
|
aPoints += 50
|
|
} else if a.Addr().IsLinkLocalUnicast() {
|
|
aPoints += 30
|
|
} else if a.Addr().IsPrivate() {
|
|
aPoints += 20
|
|
}
|
|
if b.Addr().IsLoopback() {
|
|
bPoints += 50
|
|
} else if b.Addr().IsLinkLocalUnicast() {
|
|
bPoints += 30
|
|
} else if b.Addr().IsPrivate() {
|
|
bPoints += 20
|
|
}
|
|
|
|
// Prefer IPv6 for being a bit more robust, as long as
|
|
// the latencies are roughly equivalent.
|
|
if a.Addr().Is6() {
|
|
aPoints += 10
|
|
}
|
|
if b.Addr().Is6() {
|
|
bPoints += 10
|
|
}
|
|
|
|
// Don't change anything if the latency improvement is less than 1%; we
|
|
// want a bit of "stickiness" (a.k.a. hysteresis) to avoid flapping if
|
|
// there's two roughly-equivalent endpoints.
|
|
//
|
|
// Points are essentially the percentage improvement of latency vs. the
|
|
// slower endpoint; absent any boosts from private IPs, IPv6, etc., a
|
|
// will be a better address than b by a fraction of 1% or less if
|
|
// aPoints <= 1 and bPoints == 0.
|
|
if aPoints <= 1 && bPoints == 0 {
|
|
return false
|
|
}
|
|
|
|
return aPoints > bPoints
|
|
}
|
|
|
|
// handleCallMeMaybe handles a CallMeMaybe discovery message via
|
|
// DERP. The contract for use of this message is that the peer has
|
|
// already sent to us via UDP, so their stateful firewall should be
|
|
// open. Now we can Ping back and make it through.
|
|
func (de *endpoint) handleCallMeMaybe(m *disco.CallMeMaybe) {
|
|
if runtime.GOOS == "js" {
|
|
// Nothing to do on js/wasm if we can't send UDP packets anyway.
|
|
return
|
|
}
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
now := time.Now()
|
|
for ep := range de.isCallMeMaybeEP {
|
|
de.isCallMeMaybeEP[ep] = false // mark for deletion
|
|
}
|
|
var newEPs []netip.AddrPort
|
|
for _, ep := range m.MyNumber {
|
|
if ep.Addr().Is6() && ep.Addr().IsLinkLocalUnicast() {
|
|
// We send these out, but ignore them for now.
|
|
// TODO: teach the ping code to ping on all interfaces
|
|
// for these.
|
|
continue
|
|
}
|
|
mak.Set(&de.isCallMeMaybeEP, ep, true)
|
|
if es, ok := de.endpointState[ep]; ok {
|
|
es.callMeMaybeTime = now
|
|
} else {
|
|
de.endpointState[ep] = &endpointState{callMeMaybeTime: now}
|
|
newEPs = append(newEPs, ep)
|
|
}
|
|
}
|
|
if len(newEPs) > 0 {
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "handleCallMeMaybe-new-endpoints",
|
|
To: newEPs,
|
|
})
|
|
|
|
de.c.dlogf("[v1] magicsock: disco: call-me-maybe from %v %v added new endpoints: %v",
|
|
de.publicKey.ShortString(), de.discoShort(),
|
|
logger.ArgWriter(func(w *bufio.Writer) {
|
|
for i, ep := range newEPs {
|
|
if i > 0 {
|
|
w.WriteString(", ")
|
|
}
|
|
w.WriteString(ep.String())
|
|
}
|
|
}))
|
|
}
|
|
|
|
// Delete any prior CallMeMaybe endpoints that weren't included
|
|
// in this message.
|
|
for ep, want := range de.isCallMeMaybeEP {
|
|
if !want {
|
|
delete(de.isCallMeMaybeEP, ep)
|
|
de.deleteEndpointLocked("handleCallMeMaybe", ep)
|
|
}
|
|
}
|
|
|
|
// Zero out all the lastPing times to force sendPingsLocked to send new ones,
|
|
// even if it's been less than 5 seconds ago.
|
|
for _, st := range de.endpointState {
|
|
st.lastPing = 0
|
|
}
|
|
de.sendDiscoPingsLocked(mono.Now(), false)
|
|
}
|
|
|
|
func (de *endpoint) populatePeerStatus(ps *ipnstate.PeerStatus) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
ps.Relay = de.c.derpRegionCodeOfIDLocked(int(de.derpAddr.Port()))
|
|
|
|
if de.lastSendExt.IsZero() {
|
|
return
|
|
}
|
|
|
|
now := mono.Now()
|
|
ps.LastWrite = de.lastSendExt.WallTime()
|
|
ps.Active = now.Sub(de.lastSendExt) < sessionActiveTimeout
|
|
|
|
if udpAddr, derpAddr, _ := de.addrForSendLocked(now); udpAddr.IsValid() && !derpAddr.IsValid() {
|
|
ps.CurAddr = udpAddr.String()
|
|
}
|
|
}
|
|
|
|
// stopAndReset stops timers associated with de and resets its state back to zero.
|
|
// It's called when a discovery endpoint is no longer present in the
|
|
// NetworkMap, or when magicsock is transitioning from running to
|
|
// stopped state (via SetPrivateKey(zero))
|
|
func (de *endpoint) stopAndReset() {
|
|
atomic.AddInt64(&de.numStopAndResetAtomic, 1)
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
|
|
if closing := de.c.closing.Load(); !closing {
|
|
if de.isWireguardOnly {
|
|
de.c.logf("[v1] magicsock: doing cleanup for wireguard key %s", de.publicKey.ShortString())
|
|
} else {
|
|
de.c.logf("[v1] magicsock: doing cleanup for discovery key %s", de.discoShort())
|
|
}
|
|
}
|
|
|
|
de.debugUpdates.Add(EndpointChange{
|
|
When: time.Now(),
|
|
What: "stopAndReset-resetLocked",
|
|
})
|
|
de.resetLocked()
|
|
if de.heartBeatTimer != nil {
|
|
de.heartBeatTimer.Stop()
|
|
de.heartBeatTimer = nil
|
|
}
|
|
}
|
|
|
|
// resetLocked clears all the endpoint's p2p state, reverting it to a
|
|
// DERP-only endpoint. It does not stop the endpoint's heartbeat
|
|
// timer, if one is running.
|
|
func (de *endpoint) resetLocked() {
|
|
de.lastSendExt = 0
|
|
de.lastFullPing = 0
|
|
de.clearBestAddrLocked()
|
|
for _, es := range de.endpointState {
|
|
es.lastPing = 0
|
|
}
|
|
if !de.isWireguardOnly {
|
|
for txid, sp := range de.sentPing {
|
|
de.removeSentDiscoPingLocked(txid, sp, discoPingResultUnknown)
|
|
}
|
|
}
|
|
de.probeUDPLifetime.resetCycleEndpointLocked()
|
|
}
|
|
|
|
func (de *endpoint) numStopAndReset() int64 {
|
|
return atomic.LoadInt64(&de.numStopAndResetAtomic)
|
|
}
|
|
|
|
func (de *endpoint) setDERPHome(regionID uint16) {
|
|
de.mu.Lock()
|
|
defer de.mu.Unlock()
|
|
de.derpAddr = netip.AddrPortFrom(tailcfg.DerpMagicIPAddr, uint16(regionID))
|
|
}
|