// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package wgengine
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"net"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"github.com/tailscale/wireguard-go/wgcfg"
"go4.org/mem"
"inet.af/netaddr"
"tailscale.com/control/controlclient"
"tailscale.com/internal/deepprint"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/interfaces"
"tailscale.com/net/packet"
"tailscale.com/net/tsaddr"
"tailscale.com/net/tshttpproxy"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/version"
"tailscale.com/version/distro"
"tailscale.com/wgengine/filter"
"tailscale.com/wgengine/magicsock"
"tailscale.com/wgengine/monitor"
"tailscale.com/wgengine/router"
"tailscale.com/wgengine/tsdns"
"tailscale.com/wgengine/tstun"
)
// minimalMTU is the MTU we set on tailscale's TUN
// interface. wireguard-go defaults to 1420 bytes, which only works if
// the "outer" MTU is 1500 bytes. This breaks on DSL connections
// (typically 1492 MTU) and on GCE (1460 MTU?!).
//
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU
// discovery.
const minimalMTU = 1280
const (
magicDNSIP = 0x64646464 // 100.100.100.100
magicDNSPort = 53
)
// Lazy wireguard-go configuration parameters.
const (
// lazyPeerIdleThreshold is the idle duration after
// which we remove a peer from the wireguard configuration.
// (This includes peers that have never been idle, which
// effectively have infinite idleness)
lazyPeerIdleThreshold = 5 * time . Minute
// packetSendTimeUpdateFrequency controls how often we record
// the time that we wrote a packet to an IP address.
packetSendTimeUpdateFrequency = 10 * time . Second
// packetSendRecheckWireguardThreshold controls how long we can go
// between packet sends to an IP before checking to see
// whether this IP address needs to be added back to the
// Wireguard peer oconfig.
packetSendRecheckWireguardThreshold = 1 * time . Minute
)
type userspaceEngine struct {
logf logger . Logf
reqCh chan struct { }
waitCh chan struct { } // chan is closed when first Close call completes; contrast with closing bool
timeNow func ( ) time . Time
tundev * tstun . TUN
wgdev * device . Device
router router . Router
resolver * tsdns . Resolver
magicConn * magicsock . Conn
linkMon * monitor . Mon
testMaybeReconfigHook func ( ) // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
// localAddrs is the set of IP addresses assigned to the local
// tunnel interface. It's used to reflect local packets
// incorrectly sent to us.
localAddrs atomic . Value // of map[packet.IP4]bool
wgLock sync . Mutex // serializes all wgdev operations; see lock order comment below
lastCfgFull wgcfg . Config
lastRouterSig string // of router.Config
lastEngineSigFull string // of full wireguard config
lastEngineSigTrim string // of trimmed wireguard config
recvActivityAt map [ tailcfg . DiscoKey ] time . Time
trimmedDisco map [ tailcfg . DiscoKey ] bool // set of disco keys of peers currently excluded from wireguard config
sentActivityAt4 map [ packet . IP4 ] * int64 // value is atomic int64 of unixtime
destIPActivityFuncs4 map [ packet . IP4 ] func ( )
sentActivityAt6 map [ packet . IP6 ] * int64 // value is atomic int64 of unixtime
destIPActivityFuncs6 map [ packet . IP6 ] func ( )
mu sync . Mutex // guards following; see lock order comment below
closing bool // Close was called (even if we're still closing)
statusCallback StatusCallback
linkChangeCallback func ( major bool , newState * interfaces . State )
peerSequence [ ] wgcfg . Key
endpoints [ ] string
pingers map [ wgcfg . Key ] * pinger // legacy pingers for pre-discovery peers
linkState * interfaces . State
// Lock ordering: magicsock.Conn.mu, wgLock, then mu.
}
// RouterGen is the signature for a function that creates a
// router.Router.
type RouterGen func ( logf logger . Logf , wgdev * device . Device , tundev tun . Device ) ( router . Router , error )
type EngineConfig struct {
// Logf is the logging function used by the engine.
Logf logger . Logf
// TUN is the tun device used by the engine.
TUN tun . Device
// RouterGen is the function used to instantiate the router.
RouterGen RouterGen
// ListenPort is the port on which the engine will listen.
ListenPort uint16
// Fake determines whether this engine is running in fake mode,
// which disables such features as DNS configuration and unrestricted ICMP Echo responses.
Fake bool
}
func NewFakeUserspaceEngine ( logf logger . Logf , listenPort uint16 ) ( Engine , error ) {
logf ( "Starting userspace wireguard engine (with fake TUN device)" )
conf := EngineConfig {
Logf : logf ,
TUN : tstun . NewFakeTUN ( ) ,
RouterGen : router . NewFake ,
ListenPort : listenPort ,
Fake : true ,
}
return NewUserspaceEngineAdvanced ( conf )
}
// NewUserspaceEngine creates the named tun device and returns a
// Tailscale Engine running on it.
func NewUserspaceEngine ( logf logger . Logf , tunname string , listenPort uint16 ) ( Engine , error ) {
if tunname == "" {
return nil , fmt . Errorf ( "--tun name must not be blank" )
}
logf ( "Starting userspace wireguard engine with tun device %q" , tunname )
tun , err := tun . CreateTUN ( tunname , minimalMTU )
if err != nil {
diagnoseTUNFailure ( logf )
logf ( "CreateTUN: %v" , err )
return nil , err
}
logf ( "CreateTUN ok." )
conf := EngineConfig {
Logf : logf ,
TUN : tun ,
RouterGen : router . New ,
ListenPort : listenPort ,
}
e , err := NewUserspaceEngineAdvanced ( conf )
if err != nil {
return nil , err
}
return e , err
}
// NewUserspaceEngineAdvanced is like NewUserspaceEngine
// but provides control over all config fields.
func NewUserspaceEngineAdvanced ( conf EngineConfig ) ( Engine , error ) {
return newUserspaceEngineAdvanced ( conf )
}
func newUserspaceEngineAdvanced ( conf EngineConfig ) ( _ Engine , reterr error ) {
logf := conf . Logf
rconf := tsdns . ResolverConfig {
Logf : conf . Logf ,
Forward : true ,
}
e := & userspaceEngine {
timeNow : time . Now ,
logf : logf ,
reqCh : make ( chan struct { } , 1 ) ,
waitCh : make ( chan struct { } ) ,
tundev : tstun . WrapTUN ( logf , conf . TUN ) ,
resolver : tsdns . NewResolver ( rconf ) ,
pingers : make ( map [ wgcfg . Key ] * pinger ) ,
}
e . localAddrs . Store ( map [ packet . IP4 ] bool { } )
e . linkState , _ = getLinkState ( )
logf ( "link state: %+v" , e . linkState )
// Respond to all pings only in fake mode.
if conf . Fake {
e . tundev . PostFilterIn = echoRespondToAll
}
e . tundev . PreFilterOut = e . handleLocalPackets
mon , err := monitor . New ( logf , func ( ) {
e . LinkChange ( false )
tshttpproxy . InvalidateCache ( )
} )
if err != nil {
e . tundev . Close ( )
return nil , err
}
e . linkMon = mon
endpointsFn := func ( endpoints [ ] string ) {
e . mu . Lock ( )
e . endpoints = append ( e . endpoints [ : 0 ] , endpoints ... )
e . mu . Unlock ( )
e . RequestStatus ( )
}
magicsockOpts := magicsock . Options {
Logf : logf ,
Port : conf . ListenPort ,
EndpointsFunc : endpointsFn ,
DERPActiveFunc : e . RequestStatus ,
IdleFunc : e . tundev . IdleDuration ,
NoteRecvActivity : e . noteReceiveActivity ,
}
e . magicConn , err = magicsock . NewConn ( magicsockOpts )
if err != nil {
e . tundev . Close ( )
return nil , fmt . Errorf ( "wgengine: %v" , err )
}
e . magicConn . SetNetworkUp ( e . linkState . AnyInterfaceUp ( ) )
// flags==0 because logf is already nested in another logger.
// The outer one can display the preferred log prefixes, etc.
dlog := logger . StdLogger ( logf )
logger := device . Logger {
Debug : dlog ,
Info : dlog ,
Error : dlog ,
}
opts := & device . DeviceOptions {
Logger : & logger ,
HandshakeDone : func ( peerKey wgcfg . Key , peer * device . Peer , deviceAllowedIPs * device . AllowedIPs ) {
// Send an unsolicited status event every time a
// handshake completes. This makes sure our UI can
// update quickly as soon as it connects to a peer.
//
// We use a goroutine here to avoid deadlocking
// wireguard, since RequestStatus() will call back
// into it, and wireguard is what called us to get
// here.
go e . RequestStatus ( )
if e . magicConn . PeerHasDiscoKey ( tailcfg . NodeKey ( peerKey ) ) {
e . logf ( "wireguard handshake complete for %v" , peerKey . ShortString ( ) )
// This is a modern peer with discovery support. No need to send pings.
return
}
e . logf ( "wireguard handshake complete for %v; sending legacy pings" , peerKey . ShortString ( ) )
// Ping every single-IP that peer routes.
// These synthetic packets are used to traverse NATs.
var ips [ ] wgcfg . IP
allowedIPs := deviceAllowedIPs . EntriesForPeer ( peer )
for _ , ipNet := range allowedIPs {
if ones , bits := ipNet . Mask . Size ( ) ; ones == bits && ones != 0 {
var ip wgcfg . IP
copy ( ip . Addr [ : ] , ipNet . IP . To16 ( ) )
ips = append ( ips , ip )
}
}
if len ( ips ) > 0 {
go e . pinger ( peerKey , ips )
} else {
logf ( "[unexpected] peer %s has no single-IP routes: %v" , peerKey . ShortString ( ) , allowedIPs )
}
} ,
CreateBind : e . magicConn . CreateBind ,
CreateEndpoint : e . magicConn . CreateEndpoint ,
SkipBindUpdate : true ,
}
// wgdev takes ownership of tundev, will close it when closed.
e . logf ( "Creating wireguard device..." )
e . wgdev = device . NewDevice ( e . tundev , opts )
defer func ( ) {
if reterr != nil {
e . wgdev . Close ( )
}
} ( )
// Pass the underlying tun.(*NativeDevice) to the router:
// routers do not Read or Write, but do access native interfaces.
e . logf ( "Creating router..." )
e . router , err = conf . RouterGen ( logf , e . wgdev , e . tundev . Unwrap ( ) )
if err != nil {
e . magicConn . Close ( )
return nil , err
}
go func ( ) {
up := false
for event := range e . tundev . Events ( ) {
if event & tun . EventMTUUpdate != 0 {
mtu , err := e . tundev . MTU ( )
e . logf ( "external route MTU: %d (%v)" , mtu , err )
}
if event & tun . EventUp != 0 && ! up {
e . logf ( "external route: up" )
e . RequestStatus ( )
up = true
}
if event & tun . EventDown != 0 && up {
e . logf ( "external route: down" )
e . RequestStatus ( )
up = false
}
}
} ( )
e . logf ( "Bringing wireguard device up..." )
e . wgdev . Up ( )
e . logf ( "Bringing router up..." )
if err := e . router . Up ( ) ; err != nil {
e . magicConn . Close ( )
e . wgdev . Close ( )
return nil , err
}
// TODO(danderson): we should delete this. It's pointless to apply
// a no-op settings here.
// TODO(bradfitz): counter-point: it tests the router implementation early
// to see if any part of it might fail.
e . logf ( "Clearing router settings..." )
if err := e . router . Set ( nil ) ; err != nil {
e . magicConn . Close ( )
e . wgdev . Close ( )
return nil , err
}
e . logf ( "Starting link monitor..." )
e . linkMon . Start ( )
e . logf ( "Starting magicsock..." )
e . magicConn . Start ( )
e . logf ( "Starting resolver..." )
e . resolver . Start ( )
go e . pollResolver ( )
e . logf ( "Engine created." )
return e , nil
}
// echoRespondToAll is an inbound post-filter responding to all echo requests.
func echoRespondToAll ( p * packet . Parsed , t * tstun . TUN ) filter . Response {
if p . IsEchoRequest ( ) {
header := p . ICMP4Header ( )
header . ToResponse ( )
outp := packet . Generate ( & header , p . Payload ( ) )
t . InjectOutbound ( outp )
// We already responded to it, but it's not an error.
// Proceed with regular delivery. (Since this code is only
// used in fake mode, regular delivery just means throwing
// it away. If this ever gets run in non-fake mode, you'll
// get double responses to pings, which is an indicator you
// shouldn't be doing that I guess.)
return filter . Accept
}
return filter . Accept
}
// handleLocalPackets inspects packets coming from the local network
// stack, and intercepts any packets that should be handled by
// tailscaled directly. Other packets are allowed to proceed into the
// main ACL filter.
func ( e * userspaceEngine ) handleLocalPackets ( p * packet . Parsed , t * tstun . TUN ) filter . Response {
if verdict := e . handleDNS ( p , t ) ; verdict == filter . Drop {
// local DNS handled the packet.
return filter . Drop
}
if ( runtime . GOOS == "darwin" || runtime . GOOS == "ios" ) && e . isLocalAddr ( p . DstIP4 ) {
// macOS NetworkExtension directs packets destined to the
// tunnel's local IP address into the tunnel, instead of
// looping back within the kernel network stack. We have to
// notice that an outbound packet is actually destined for
// ourselves, and loop it back into macOS.
t . InjectInboundCopy ( p . Buffer ( ) )
return filter . Drop
}
return filter . Accept
}
func ( e * userspaceEngine ) isLocalAddr ( ip packet . IP4 ) bool {
localAddrs , ok := e . localAddrs . Load ( ) . ( map [ packet . IP4 ] bool )
if ! ok {
e . logf ( "[unexpected] e.localAddrs was nil, can't check for loopback packet" )
return false
}
return localAddrs [ ip ]
}
// handleDNS is an outbound pre-filter resolving Tailscale domains.
func ( e * userspaceEngine ) handleDNS ( p * packet . Parsed , t * tstun . TUN ) filter . Response {
if p . DstIP4 == magicDNSIP && p . DstPort == magicDNSPort && p . IPProto == packet . UDP {
request := tsdns . Packet {
Payload : append ( [ ] byte ( nil ) , p . Payload ( ) ... ) ,
Addr : netaddr . IPPort { IP : p . SrcIP4 . Netaddr ( ) , Port : p . SrcPort } ,
}
err := e . resolver . EnqueueRequest ( request )
if err != nil {
e . logf ( "tsdns: enqueue: %v" , err )
}
return filter . Drop
}
return filter . Accept
}
// pollResolver reads responses from the DNS resolver and injects them inbound.
func ( e * userspaceEngine ) pollResolver ( ) {
for {
resp , err := e . resolver . NextResponse ( )
if err == tsdns . ErrClosed {
return
}
if err != nil {
e . logf ( "tsdns: error: %v" , err )
continue
}
h := packet . UDP4Header {
IP4Header : packet . IP4Header {
SrcIP : packet . IP4 ( magicDNSIP ) ,
DstIP : packet . IP4FromNetaddr ( resp . Addr . IP ) ,
} ,
SrcPort : magicDNSPort ,
DstPort : resp . Addr . Port ,
}
hlen := h . Len ( )
// TODO(dmytro): avoid this allocation without importing tstun quirks into tsdns.
const offset = tstun . PacketStartOffset
buf := make ( [ ] byte , offset + hlen + len ( resp . Payload ) )
copy ( buf [ offset + hlen : ] , resp . Payload )
h . Marshal ( buf [ offset : ] )
e . tundev . InjectInboundDirect ( buf , offset )
}
}
// pinger sends ping packets for a few seconds.
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
//
// These are only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
type pinger struct {
e * userspaceEngine
done chan struct { } // closed after shutdown (not the ctx.Done() chan)
cancel context . CancelFunc
}
// close cleans up pinger and removes it from the userspaceEngine.pingers map.
// It cannot be called while p.e.mu is held.
func ( p * pinger ) close ( ) {
p . cancel ( )
<- p . done
}
func ( p * pinger ) run ( ctx context . Context , peerKey wgcfg . Key , ips [ ] wgcfg . IP , srcIP packet . IP4 ) {
defer func ( ) {
p . e . mu . Lock ( )
if p . e . pingers [ peerKey ] == p {
delete ( p . e . pingers , peerKey )
}
p . e . mu . Unlock ( )
close ( p . done )
} ( )
header := packet . ICMP4Header {
IP4Header : packet . IP4Header {
SrcIP : srcIP ,
} ,
Type : packet . ICMP4EchoRequest ,
Code : packet . ICMP4NoCode ,
}
// sendFreq is slightly longer than sprayFreq in magicsock to ensure
// that if these ping packets are the only source of early packets
// sent to the peer, that each one will be sprayed.
const sendFreq = 300 * time . Millisecond
const stopAfter = 3 * time . Second
start := time . Now ( )
var dstIPs [ ] packet . IP4
for _ , ip := range ips {
if ip . Is6 ( ) {
// This code is only used for legacy (pre-discovery)
// peers. They're not going to work right with IPv6 on the
// overlay anyway, so don't bother trying to make ping
// work.
continue
}
dstIPs = append ( dstIPs , packet . IP4FromNetaddr ( netaddr . IPFrom16 ( ip . Addr ) ) )
}
payload := [ ] byte ( "magicsock_spray" ) // no meaning
header . IPID = 1
t := time . NewTicker ( sendFreq )
defer t . Stop ( )
for {
select {
case <- ctx . Done ( ) :
return
case <- t . C :
}
if time . Since ( start ) > stopAfter {
return
}
for _ , dstIP := range dstIPs {
header . DstIP = dstIP
// InjectOutbound take ownership of the packet, so we allocate.
b := packet . Generate ( & header , payload )
p . e . tundev . InjectOutbound ( b )
}
header . IPID ++
}
}
// pinger sends ping packets for a few seconds.
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
//
// This is only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
func ( e * userspaceEngine ) pinger ( peerKey wgcfg . Key , ips [ ] wgcfg . IP ) {
e . logf ( "generating initial ping traffic to %s (%v)" , peerKey . ShortString ( ) , ips )
var srcIP packet . IP4
e . wgLock . Lock ( )
if len ( e . lastCfgFull . Addresses ) > 0 {
srcIP = packet . IP4FromNetaddr ( netaddr . IPFrom16 ( e . lastCfgFull . Addresses [ 0 ] . IP . Addr ) )
}
e . wgLock . Unlock ( )
if srcIP == 0 {
e . logf ( "generating initial ping traffic: no source IP" )
return
}
ctx , cancel := context . WithCancel ( context . Background ( ) )
p := & pinger {
e : e ,
done : make ( chan struct { } ) ,
cancel : cancel ,
}
e . mu . Lock ( )
if e . closing {
e . mu . Unlock ( )
return
}
oldPinger := e . pingers [ peerKey ]
e . pingers [ peerKey ] = p
e . mu . Unlock ( )
if oldPinger != nil {
oldPinger . close ( )
}
p . run ( ctx , peerKey , ips , srcIP )
}
var (
debugTrimWireguardEnv = os . Getenv ( "TS_DEBUG_TRIM_WIREGUARD" )
debugTrimWireguard , _ = strconv . ParseBool ( debugTrimWireguardEnv )
)
// forceFullWireguardConfig reports whether we should give wireguard
// our full network map, even for inactive peers
//
// TODO(bradfitz): remove this after our 1.0 launch; we don't want to
// enable wireguard config trimming quite yet because it just landed
// and we haven't got enough time testing it.
func forceFullWireguardConfig ( numPeers int ) bool {
// Did the user explicitly enable trimmming via the environment variable knob?
if debugTrimWireguardEnv != "" {
return ! debugTrimWireguard
}
if opt := controlclient . TrimWGConfig ( ) ; opt != "" {
return ! opt . EqualBool ( true )
}
// On iOS with large networks, it's critical, so turn on trimming.
// Otherwise we run out of memory from wireguard-go goroutine stacks+buffers.
// This will be the default later for all platforms and network sizes.
if numPeers > 50 && version . OS ( ) == "iOS" {
return false
}
return false
}
// isTrimmablePeer reports whether p is a peer that we can trim out of the
// network map.
//
// We can only trim peers that both a) support discovery (because we
// know who they are when we receive their data and don't need to rely
// on wireguard-go figuring it out) and b) for implementation
// simplicity, have only one IP address (an IPv4 /32), which is the
// common case for most peers. Subnet router nodes will just always be
// created in the wireguard-go config.
//
// XXXXXXX DO NOT SUBMIT fix docstring
func isTrimmablePeer ( p * wgcfg . Peer , numPeers int ) bool {
if forceFullWireguardConfig ( numPeers ) {
return false
}
if len ( p . Endpoints ) != 1 {
return false
}
if ! strings . HasSuffix ( p . Endpoints [ 0 ] . Host , ".disco.tailscale" ) {
return false
}
// AllowedIPs must all be single IPs, not subnets.
for _ , aip := range p . AllowedIPs {
if aip . IP . Is4 ( ) && aip . Mask != 32 {
return false
} else if aip . IP . Is6 ( ) && aip . Mask != 128 {
return false
}
}
return true
}
// noteReceiveActivity is called by magicsock when a packet has been received
// by the peer using discovery key dk. Magicsock calls this no more than
// every 10 seconds for a given peer.
func ( e * userspaceEngine ) noteReceiveActivity ( dk tailcfg . DiscoKey ) {
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
if _ , ok := e . recvActivityAt [ dk ] ; ! ok {
// Not a trimmable peer we care about tracking. (See isTrimmablePeer)
if e . trimmedDisco [ dk ] {
e . logf ( "wgengine: [unexpected] noteReceiveActivity called on idle discokey %v that's not in recvActivityAt" , dk . ShortString ( ) )
}
return
}
now := e . timeNow ( )
e . recvActivityAt [ dk ] = now
// If the last activity time jumped a bunch (say, at least
// half the idle timeout) then see if we need to reprogram
// Wireguard. This could probably be just
// lazyPeerIdleThreshold without the divide by 2, but
// maybeReconfigWireguardLocked is cheap enough to call every
// couple minutes (just not on every packet).
if e . trimmedDisco [ dk ] {
e . logf ( "wgengine: idle peer %v now active, reconfiguring wireguard" , dk . ShortString ( ) )
e . maybeReconfigWireguardLocked ( nil )
}
}
// isActiveSince reports whether the peer identified by (dk, ip) has
// had a packet sent to or received from it since t.
//
// e.wgLock must be held.
func ( e * userspaceEngine ) isActiveSince ( dk tailcfg . DiscoKey , ip wgcfg . IP , t time . Time ) bool {
if e . recvActivityAt [ dk ] . After ( t ) {
return true
}
var (
timePtr * int64
ok bool
)
if ip . Is4 ( ) {
pip := packet . IP4 ( binary . BigEndian . Uint32 ( ip . Addr [ 12 : ] ) )
timePtr , ok = e . sentActivityAt4 [ pip ]
} else {
pip := packet . IP6FromRaw16 ( ip . Addr )
timePtr , ok = e . sentActivityAt6 [ pip ]
}
if ! ok {
return false
}
unixTime := atomic . LoadInt64 ( timePtr )
return unixTime >= t . Unix ( )
}
// discoKeyFromPeer returns the DiscoKey for a wireguard config's Peer.
//
// Invariant: isTrimmablePeer(p) == true, so it should have 1 endpoint with
// Host of form "<64-hex-digits>.disco.tailscale". If invariant is violated,
// we return the zero value.
func discoKeyFromPeer ( p * wgcfg . Peer ) tailcfg . DiscoKey {
host := p . Endpoints [ 0 ] . Host
if len ( host ) < 64 {
return tailcfg . DiscoKey { }
}
k , err := key . NewPublicFromHexMem ( mem . S ( host [ : 64 ] ) )
if err != nil {
return tailcfg . DiscoKey { }
}
return tailcfg . DiscoKey ( k )
}
// discoChanged are the set of peers whose disco keys have changed, implying they've restarted.
// If a peer is in this set and was previously in the live wireguard config,
// it needs to be first removed and then re-added to flush out its wireguard session key.
// If discoChanged is nil or empty, this extra removal step isn't done.
//
// e.wgLock must be held.
func ( e * userspaceEngine ) maybeReconfigWireguardLocked ( discoChanged map [ key . Public ] bool ) error {
if hook := e . testMaybeReconfigHook ; hook != nil {
hook ( )
return nil
}
full := e . lastCfgFull
// Compute a minimal config to pass to wireguard-go
// based on the full config. Prune off all the peers
// and only add the active ones back.
min := full
min . Peers = nil
// We'll only keep a peer around if it's been active in
// the past 5 minutes. That's more than WireGuard's key
// rotation time anyway so it's no harm if we remove it
// later if it's been inactive.
activeCutoff := e . timeNow ( ) . Add ( - lazyPeerIdleThreshold )
// Not all peers can be trimmed from the network map (see
// isTrimmablePeer). For those are are trimmable, keep track
// of their DiscoKey and Tailscale IPs. These are the ones
// we'll need to install tracking hooks for to watch their
// send/receive activity.
trackDisco := make ( [ ] tailcfg . DiscoKey , 0 , len ( full . Peers ) )
trackIPs := make ( [ ] wgcfg . IP , 0 , len ( full . Peers ) )
trimmedDisco := map [ tailcfg . DiscoKey ] bool { } // TODO: don't re-alloc this map each time
needRemoveStep := false
for i := range full . Peers {
p := & full . Peers [ i ]
if ! isTrimmablePeer ( p , len ( full . Peers ) ) {
min . Peers = append ( min . Peers , * p )
if discoChanged [ key . Public ( p . PublicKey ) ] {
needRemoveStep = true
}
continue
}
tsIP := p . AllowedIPs [ 0 ] . IP
dk := discoKeyFromPeer ( p )
trackDisco = append ( trackDisco , dk )
trackIPs = append ( trackIPs , tsIP )
if e . isActiveSince ( dk , tsIP , activeCutoff ) {
min . Peers = append ( min . Peers , * p )
if discoChanged [ key . Public ( p . PublicKey ) ] {
needRemoveStep = true
}
} else {
trimmedDisco [ dk ] = true
}
}
if ! deepprint . UpdateHash ( & e . lastEngineSigTrim , min , trimmedDisco , trackDisco , trackIPs ) {
// No changes
return nil
}
e . trimmedDisco = trimmedDisco
e . updateActivityMapsLocked ( trackDisco , trackIPs )
if needRemoveStep {
minner := min
minner . Peers = nil
numRemove := 0
for _ , p := range min . Peers {
if discoChanged [ key . Public ( p . PublicKey ) ] {
numRemove ++
continue
}
minner . Peers = append ( minner . Peers , p )
}
if numRemove > 0 {
e . logf ( "wgengine: Reconfig: removing session keys for %d peers" , numRemove )
if err := e . wgdev . Reconfig ( & minner ) ; err != nil {
e . logf ( "wgdev.Reconfig: %v" , err )
return err
}
}
}
e . logf ( "wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)" , len ( min . Peers ) , len ( full . Peers ) )
if err := e . wgdev . Reconfig ( & min ) ; err != nil {
e . logf ( "wgdev.Reconfig: %v" , err )
return err
}
return nil
}
// updateActivityMapsLocked updates the data structures used for tracking the activity
// of wireguard peers that we might add/remove dynamically from the real config
// as given to wireguard-go.
//
// e.wgLock must be held.
func ( e * userspaceEngine ) updateActivityMapsLocked ( trackDisco [ ] tailcfg . DiscoKey , trackIPs [ ] wgcfg . IP ) {
// Generate the new map of which discokeys we want to track
// receive times for.
mr := map [ tailcfg . DiscoKey ] time . Time { } // TODO: only recreate this if set of keys changed
for _ , dk := range trackDisco {
// Preserve old times in the new map, but also
// populate map entries for new trackDisco values with
// time.Time{} zero values. (Only entries in this map
// are tracked, so the Time zero values allow it to be
// tracked later)
mr [ dk ] = e . recvActivityAt [ dk ]
}
e . recvActivityAt = mr
oldTime4 := e . sentActivityAt4
e . sentActivityAt4 = make ( map [ packet . IP4 ] * int64 , len ( oldTime4 ) )
oldFunc4 := e . destIPActivityFuncs4
e . destIPActivityFuncs4 = make ( map [ packet . IP4 ] func ( ) , len ( oldFunc4 ) )
oldTime6 := e . sentActivityAt6
e . sentActivityAt6 = make ( map [ packet . IP6 ] * int64 , len ( oldTime6 ) )
oldFunc6 := e . destIPActivityFuncs6
e . destIPActivityFuncs6 = make ( map [ packet . IP6 ] func ( ) , len ( oldFunc6 ) )
updateFn := func ( timePtr * int64 ) func ( ) {
return func ( ) {
now := e . timeNow ( ) . Unix ( )
old := atomic . LoadInt64 ( timePtr )
// How long's it been since we last sent a packet?
// For our first packet, old is Unix epoch time 0 (1970).
elapsedSec := now - old
if elapsedSec >= int64 ( packetSendTimeUpdateFrequency / time . Second ) {
atomic . StoreInt64 ( timePtr , now )
}
// On a big jump, assume we might no longer be in the wireguard
// config and go check.
if elapsedSec >= int64 ( packetSendRecheckWireguardThreshold / time . Second ) {
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
e . maybeReconfigWireguardLocked ( nil )
}
}
}
for _ , wip := range trackIPs {
if wip . Is4 ( ) {
pip := packet . IP4 ( binary . BigEndian . Uint32 ( wip . Addr [ 12 : ] ) )
timePtr := oldTime4 [ pip ]
if timePtr == nil {
timePtr = new ( int64 )
}
e . sentActivityAt4 [ pip ] = timePtr
fn := oldFunc4 [ pip ]
if fn == nil {
fn = updateFn ( timePtr )
}
e . destIPActivityFuncs4 [ pip ] = fn
} else {
pip := packet . IP6FromRaw16 ( wip . Addr )
timePtr := oldTime6 [ pip ]
if timePtr == nil {
timePtr = new ( int64 )
}
e . sentActivityAt6 [ pip ] = timePtr
fn := oldFunc6 [ pip ]
if fn == nil {
fn = updateFn ( timePtr )
}
e . destIPActivityFuncs6 [ pip ] = fn
}
}
e . tundev . SetDestIPActivityFuncs ( e . destIPActivityFuncs4 , e . destIPActivityFuncs6 )
}
func ( e * userspaceEngine ) Reconfig ( cfg * wgcfg . Config , routerCfg * router . Config ) error {
if routerCfg == nil {
panic ( "routerCfg must not be nil" )
}
localAddrs := map [ packet . IP4 ] bool { }
for _ , addr := range routerCfg . LocalAddrs {
// TODO: ipv6
if ! addr . IP . Is4 ( ) {
continue
}
localAddrs [ packet . IP4FromNetaddr ( addr . IP ) ] = true
}
e . localAddrs . Store ( localAddrs )
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
peerSet := make ( map [ key . Public ] struct { } , len ( cfg . Peers ) )
e . mu . Lock ( )
e . peerSequence = e . peerSequence [ : 0 ]
for _ , p := range cfg . Peers {
e . peerSequence = append ( e . peerSequence , p . PublicKey )
peerSet [ key . Public ( p . PublicKey ) ] = struct { } { }
}
e . mu . Unlock ( )
engineChanged := deepprint . UpdateHash ( & e . lastEngineSigFull , cfg )
routerChanged := deepprint . UpdateHash ( & e . lastRouterSig , routerCfg )
if ! engineChanged && ! routerChanged {
return ErrNoChanges
}
// See if any peers have changed disco keys, which means they've restarted.
// If so, we need to update the wireguard-go/device.Device in two phases:
// once without the node which has restarted, to clear its wireguard session key,
// and a second time with it.
discoChanged := make ( map [ key . Public ] bool )
{
prevEP := make ( map [ key . Public ] wgcfg . Endpoint )
for i := range e . lastCfgFull . Peers {
if p := & e . lastCfgFull . Peers [ i ] ; len ( p . Endpoints ) == 1 {
prevEP [ key . Public ( p . PublicKey ) ] = p . Endpoints [ 0 ]
}
}
for i := range cfg . Peers {
p := & cfg . Peers [ i ]
if len ( p . Endpoints ) != 1 {
continue
}
pub := key . Public ( p . PublicKey )
if old , ok := prevEP [ pub ] ; ok && old != p . Endpoints [ 0 ] {
discoChanged [ pub ] = true
e . logf ( "wgengine: Reconfig: %s changed from %s to %s" , pub . ShortString ( ) , & old , & p . Endpoints [ 0 ] )
}
}
}
e . lastCfgFull = cfg . Copy ( )
// Tell magicsock about the new (or initial) private key
// (which is needed by DERP) before wgdev gets it, as wgdev
// will start trying to handshake, which we want to be able to
// go over DERP.
if err := e . magicConn . SetPrivateKey ( cfg . PrivateKey ) ; err != nil {
e . logf ( "wgengine: Reconfig: SetPrivateKey: %v" , err )
}
e . magicConn . UpdatePeers ( peerSet )
if err := e . maybeReconfigWireguardLocked ( discoChanged ) ; err != nil {
return err
}
if routerChanged {
if routerCfg . DNS . Proxied {
ips := routerCfg . DNS . Nameservers
upstreams := make ( [ ] net . Addr , len ( ips ) )
for i , ip := range ips {
stdIP := ip . IPAddr ( )
upstreams [ i ] = & net . UDPAddr {
IP : stdIP . IP ,
Port : 53 ,
Zone : stdIP . Zone ,
}
}
e . resolver . SetUpstreams ( upstreams )
routerCfg . DNS . Nameservers = [ ] netaddr . IP { tsaddr . TailscaleServiceIP ( ) }
}
e . logf ( "wgengine: Reconfig: configuring router" )
if err := e . router . Set ( routerCfg ) ; err != nil {
return err
}
}
e . logf ( "wgengine: Reconfig done" )
return nil
}
func ( e * userspaceEngine ) GetFilter ( ) * filter . Filter {
return e . tundev . GetFilter ( )
}
func ( e * userspaceEngine ) SetFilter ( filt * filter . Filter ) {
e . tundev . SetFilter ( filt )
}
func ( e * userspaceEngine ) SetDNSMap ( dm * tsdns . Map ) {
e . resolver . SetMap ( dm )
}
func ( e * userspaceEngine ) SetStatusCallback ( cb StatusCallback ) {
e . mu . Lock ( )
defer e . mu . Unlock ( )
e . statusCallback = cb
}
func ( e * userspaceEngine ) getStatusCallback ( ) StatusCallback {
e . mu . Lock ( )
defer e . mu . Unlock ( )
return e . statusCallback
}
// TODO: this function returns an error but it's always nil, and when
// there's actually a problem it just calls log.Fatal. Why?
func ( e * userspaceEngine ) getStatus ( ) ( * Status , error ) {
// Grab derpConns before acquiring wgLock to not violate lock ordering;
// the DERPs method acquires magicsock.Conn.mu.
// (See comment in userspaceEngine's declaration.)
derpConns := e . magicConn . DERPs ( )
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
e . mu . Lock ( )
closing := e . closing
e . mu . Unlock ( )
if closing {
return nil , errors . New ( "engine closing; no status" )
}
if e . wgdev == nil {
// RequestStatus was invoked before the wgengine has
// finished initializing. This can happen when wgegine
// provides a callback to magicsock for endpoint
// updates that calls RequestStatus.
return nil , nil
}
// lineLen is the max UAPI line we expect. The longest I see is
// len("preshared_key=")+64 hex+"\n" == 79. Add some slop.
const lineLen = 100
pr , pw := io . Pipe ( )
errc := make ( chan error , 1 )
go func ( ) {
defer pw . Close ( )
bw := bufio . NewWriterSize ( pw , lineLen )
// TODO(apenwarr): get rid of silly uapi stuff for in-process comms
// FIXME: get notified of status changes instead of polling.
filter := device . IPCGetFilter {
// The allowed_ips are somewhat expensive to compute and they're
// unused below; request that they not be sent instead.
FilterAllowedIPs : true ,
}
if err := e . wgdev . IpcGetOperationFiltered ( bw , filter ) ; err != nil {
errc <- fmt . Errorf ( "IpcGetOperation: %w" , err )
return
}
errc <- bw . Flush ( )
} ( )
pp := make ( map [ wgcfg . Key ] * PeerStatus )
p := & PeerStatus { }
var hst1 , hst2 , n int64
var err error
bs := bufio . NewScanner ( pr )
bs . Buffer ( make ( [ ] byte , lineLen ) , lineLen )
for bs . Scan ( ) {
line := bs . Bytes ( )
k := line
var v mem . RO
if i := bytes . IndexByte ( line , '=' ) ; i != - 1 {
k = line [ : i ]
v = mem . B ( line [ i + 1 : ] )
}
switch string ( k ) {
case "public_key" :
pk , err := key . NewPublicFromHexMem ( v )
if err != nil {
log . Fatalf ( "IpcGetOperation: invalid key %#v" , v )
}
p = & PeerStatus { }
pp [ wgcfg . Key ( pk ) ] = p
key := tailcfg . NodeKey ( pk )
p . NodeKey = key
case "rx_bytes" :
n , err = mem . ParseInt ( v , 10 , 64 )
p . RxBytes = ByteCount ( n )
if err != nil {
log . Fatalf ( "IpcGetOperation: rx_bytes invalid: %#v" , line )
}
case "tx_bytes" :
n , err = mem . ParseInt ( v , 10 , 64 )
p . TxBytes = ByteCount ( n )
if err != nil {
log . Fatalf ( "IpcGetOperation: tx_bytes invalid: %#v" , line )
}
case "last_handshake_time_sec" :
hst1 , err = mem . ParseInt ( v , 10 , 64 )
if err != nil {
log . Fatalf ( "IpcGetOperation: hst1 invalid: %#v" , line )
}
case "last_handshake_time_nsec" :
hst2 , err = mem . ParseInt ( v , 10 , 64 )
if err != nil {
log . Fatalf ( "IpcGetOperation: hst2 invalid: %#v" , line )
}
if hst1 != 0 || hst2 != 0 {
p . LastHandshake = time . Unix ( hst1 , hst2 )
} // else leave at time.IsZero()
}
}
if err := bs . Err ( ) ; err != nil {
log . Fatalf ( "reading IpcGetOperation output: %v" , err )
}
if err := <- errc ; err != nil {
log . Fatalf ( "IpcGetOperation: %v" , err )
}
e . mu . Lock ( )
defer e . mu . Unlock ( )
var peers [ ] PeerStatus
for _ , pk := range e . peerSequence {
if p , ok := pp [ pk ] ; ok { // ignore idle ones not in wireguard-go's config
peers = append ( peers , * p )
}
}
return & Status {
LocalAddrs : append ( [ ] string ( nil ) , e . endpoints ... ) ,
Peers : peers ,
DERPs : derpConns ,
} , nil
}
func ( e * userspaceEngine ) RequestStatus ( ) {
// This is slightly tricky. e.getStatus() can theoretically get
// blocked inside wireguard for a while, and RequestStatus() is
// sometimes called from a goroutine, so we don't want a lot of
// them hanging around. On the other hand, requesting multiple
// status updates simultaneously is pointless anyway; they will
// all say the same thing.
// Enqueue at most one request. If one is in progress already, this
// adds one more to the queue. If one has been requested but not
// started, it is a no-op.
select {
case e . reqCh <- struct { } { } :
default :
}
// Dequeue at most one request. Another thread may have already
// dequeued the request we enqueued above, which is fine, since the
// information is guaranteed to be at least as recent as the current
// call to RequestStatus().
select {
case <- e . reqCh :
s , err := e . getStatus ( )
if s == nil && err == nil {
e . logf ( "RequestStatus: weird: both s and err are nil" )
return
}
if cb := e . getStatusCallback ( ) ; cb != nil {
cb ( s , err )
}
default :
}
}
func ( e * userspaceEngine ) Close ( ) {
var pingers [ ] * pinger
e . mu . Lock ( )
if e . closing {
e . mu . Unlock ( )
return
}
e . closing = true
for _ , pinger := range e . pingers {
pingers = append ( pingers , pinger )
}
e . mu . Unlock ( )
r := bufio . NewReader ( strings . NewReader ( "" ) )
e . wgdev . IpcSetOperation ( r )
e . resolver . Close ( )
e . magicConn . Close ( )
e . linkMon . Close ( )
e . router . Close ( )
e . wgdev . Close ( )
e . tundev . Close ( )
// Shut down pingers after tundev is closed (by e.wgdev.Close) so the
// synchronous close does not get stuck on InjectOutbound.
for _ , pinger := range pingers {
pinger . close ( )
}
close ( e . waitCh )
}
func ( e * userspaceEngine ) Wait ( ) {
<- e . waitCh
}
func ( e * userspaceEngine ) setLinkState ( st * interfaces . State ) ( changed bool , cb func ( major bool , newState * interfaces . State ) ) {
if st == nil {
return false , nil
}
e . mu . Lock ( )
defer e . mu . Unlock ( )
changed = e . linkState == nil || ! st . Equal ( e . linkState )
e . linkState = st
return changed , e . linkChangeCallback
}
func ( e * userspaceEngine ) LinkChange ( isExpensive bool ) {
cur , err := getLinkState ( )
if err != nil {
e . logf ( "LinkChange: interfaces.GetState: %v" , err )
return
}
cur . IsExpensive = isExpensive
needRebind , linkChangeCallback := e . setLinkState ( cur )
up := cur . AnyInterfaceUp ( )
if ! up {
e . logf ( "LinkChange: all links down; pausing: %v" , cur )
} else if needRebind {
e . logf ( "LinkChange: major, rebinding. New state: %v" , cur )
} else {
e . logf ( "LinkChange: minor" )
}
e . magicConn . SetNetworkUp ( up )
why := "link-change-minor"
if needRebind {
why = "link-change-major"
e . magicConn . Rebind ( )
}
e . magicConn . ReSTUN ( why )
if linkChangeCallback != nil {
go linkChangeCallback ( needRebind , cur )
}
}
func ( e * userspaceEngine ) SetLinkChangeCallback ( cb func ( major bool , newState * interfaces . State ) ) {
e . mu . Lock ( )
defer e . mu . Unlock ( )
e . linkChangeCallback = cb
if e . linkState != nil {
go cb ( false , e . linkState )
}
}
func getLinkState ( ) ( * interfaces . State , error ) {
s , err := interfaces . GetState ( )
if s != nil {
s . RemoveTailscaleInterfaces ( )
}
return s , err
}
func ( e * userspaceEngine ) SetNetInfoCallback ( cb NetInfoCallback ) {
e . magicConn . SetNetInfoCallback ( cb )
}
func ( e * userspaceEngine ) SetDERPMap ( dm * tailcfg . DERPMap ) {
e . magicConn . SetDERPMap ( dm )
}
func ( e * userspaceEngine ) SetNetworkMap ( nm * controlclient . NetworkMap ) {
e . magicConn . SetNetworkMap ( nm )
}
func ( e * userspaceEngine ) DiscoPublicKey ( ) tailcfg . DiscoKey {
return e . magicConn . DiscoPublicKey ( )
}
func ( e * userspaceEngine ) UpdateStatus ( sb * ipnstate . StatusBuilder ) {
st , err := e . getStatus ( )
if err != nil {
e . logf ( "wgengine: getStatus: %v" , err )
return
}
for _ , ps := range st . Peers {
sb . AddPeer ( key . Public ( ps . NodeKey ) , & ipnstate . PeerStatus {
RxBytes : int64 ( ps . RxBytes ) ,
TxBytes : int64 ( ps . TxBytes ) ,
LastHandshake : ps . LastHandshake ,
InEngine : true ,
} )
}
e . magicConn . UpdateStatus ( sb )
}
func ( e * userspaceEngine ) Ping ( ip netaddr . IP , cb func ( * ipnstate . PingResult ) ) {
e . magicConn . Ping ( ip , cb )
}
// diagnoseTUNFailure is called if tun.CreateTUN fails, to poke around
// the system and log some diagnostic info that might help debug why
// TUN failed. Because TUN's already failed and things the program's
// about to end, we might as well log a lot.
func diagnoseTUNFailure ( logf logger . Logf ) {
switch runtime . GOOS {
case "linux" :
diagnoseLinuxTUNFailure ( logf )
default :
logf ( "no TUN failure diagnostics for OS %q" , runtime . GOOS )
}
}
func diagnoseLinuxTUNFailure ( logf logger . Logf ) {
kernel , err := exec . Command ( "uname" , "-r" ) . Output ( )
kernel = bytes . TrimSpace ( kernel )
if err != nil {
logf ( "no TUN, and failed to look up kernel version: %v" , err )
return
}
logf ( "Linux kernel version: %s" , kernel )
modprobeOut , err := exec . Command ( "/sbin/modprobe" , "tun" ) . CombinedOutput ( )
if err == nil {
logf ( "'modprobe tun' successful" )
// Either tun is currently loaded, or it's statically
// compiled into the kernel (which modprobe checks
// with /lib/modules/$(uname -r)/modules.builtin)
//
// So if there's a problem at this point, it's
// probably because /dev/net/tun doesn't exist.
const dev = "/dev/net/tun"
if fi , err := os . Stat ( dev ) ; err != nil {
logf ( "tun module loaded in kernel, but %s does not exist" , dev )
} else {
logf ( "%s: %v" , dev , fi . Mode ( ) )
}
// We failed to find why it failed. Just let our
// caller report the error it got from wireguard-go.
return
}
logf ( "is CONFIG_TUN enabled in your kernel? `modprobe tun` failed with: %s" , modprobeOut )
switch distro . Get ( ) {
case distro . Debian :
dpkgOut , err := exec . Command ( "dpkg" , "-S" , "kernel/drivers/net/tun.ko" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( dpkgOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( dpkgOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , dpkgOut )
}
case distro . Arch :
findOut , err := exec . Command ( "find" , "/lib/modules/" , "-path" , "*/net/tun.ko*" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( findOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( findOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , findOut )
}
case distro . OpenWrt :
out , err := exec . Command ( "opkg" , "list-installed" ) . CombinedOutput ( )
if err != nil {
logf ( "error querying OpenWrt installed packages: %s" , out )
return
}
for _ , pkg := range [ ] string { "kmod-tun" , "ca-bundle" } {
if ! bytes . Contains ( out , [ ] byte ( pkg + " - " ) ) {
logf ( "Missing required package %s; run: opkg install %s" , pkg , pkg )
}
}
}
}