// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package wgengine
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"
"io"
"log"
"net"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/tailscale/wireguard-go/device"
"github.com/tailscale/wireguard-go/tun"
"github.com/tailscale/wireguard-go/wgcfg"
"go4.org/mem"
"inet.af/netaddr"
"tailscale.com/control/controlclient"
"tailscale.com/internal/deepprint"
"tailscale.com/ipn/ipnstate"
"tailscale.com/net/interfaces"
"tailscale.com/net/tsaddr"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/version"
"tailscale.com/wgengine/filter"
"tailscale.com/wgengine/magicsock"
"tailscale.com/wgengine/monitor"
"tailscale.com/wgengine/packet"
"tailscale.com/wgengine/router"
"tailscale.com/wgengine/tsdns"
"tailscale.com/wgengine/tstun"
)
// minimalMTU is the MTU we set on tailscale's tuntap
// interface. wireguard-go defaults to 1420 bytes, which only works if
// the "outer" MTU is 1500 bytes. This breaks on DSL connections
// (typically 1492 MTU) and on GCE (1460 MTU?!).
//
// 1280 is the smallest MTU allowed for IPv6, which is a sensible
// "probably works everywhere" setting until we develop proper PMTU
// discovery.
const minimalMTU = 1280
const (
magicDNSIP = 0x64646464 // 100.100.100.100
magicDNSPort = 53
)
// Lazy wireguard-go configuration parameters.
const (
// lazyPeerIdleThreshold is the idle duration after
// which we remove a peer from the wireguard configuration.
// (This includes peers that have never been idle, which
// effectively have infinite idleness)
lazyPeerIdleThreshold = 5 * time . Minute
// packetSendTimeUpdateFrequency controls how often we record
// the time that we wrote a packet to an IP address.
packetSendTimeUpdateFrequency = 10 * time . Second
// packetSendRecheckWireguardThreshold controls how long we can go
// between packet sends to an IP before checking to see
// whether this IP address needs to be added back to the
// Wireguard peer oconfig.
packetSendRecheckWireguardThreshold = 1 * time . Minute
)
type userspaceEngine struct {
logf logger . Logf
reqCh chan struct { }
waitCh chan struct { } // chan is closed when first Close call completes; contrast with closing bool
timeNow func ( ) time . Time
tundev * tstun . TUN
wgdev * device . Device
router router . Router
resolver * tsdns . Resolver
magicConn * magicsock . Conn
linkMon * monitor . Mon
testMaybeReconfigHook func ( ) // for tests; if non-nil, fires if maybeReconfigWireguardLocked called
// localAddrs is the set of IP addresses assigned to the local
// tunnel interface. It's used to reflect local packets
// incorrectly sent to us.
localAddrs atomic . Value // of map[packet.IP]bool
wgLock sync . Mutex // serializes all wgdev operations; see lock order comment below
lastCfgFull wgcfg . Config
lastRouterSig string // of router.Config
lastEngineSigFull string // of full wireguard config
lastEngineSigTrim string // of trimmed wireguard config
recvActivityAt map [ tailcfg . DiscoKey ] time . Time
sentActivityAt map [ packet . IP ] * int64 // value is atomic int64 of unixtime
destIPActivityFuncs map [ packet . IP ] func ( )
mu sync . Mutex // guards following; see lock order comment below
closing bool // Close was called (even if we're still closing)
statusCallback StatusCallback
peerSequence [ ] wgcfg . Key
endpoints [ ] string
pingers map [ wgcfg . Key ] * pinger // legacy pingers for pre-discovery peers
linkState * interfaces . State
// Lock ordering: magicsock.Conn.mu, wgLock, then mu.
}
// RouterGen is the signature for a function that creates a
// router.Router.
type RouterGen func ( router . InitConfig ) ( router . Router , error )
type EngineConfig struct {
// Logf is the logging function used by the engine.
Logf logger . Logf
// TUN is the tun device used by the engine.
TUN tun . Device
// RouterGen is the function used to instantiate the router.
RouterGen RouterGen
// ListenPort is the port on which the engine will listen.
ListenPort uint16
// Fake determines whether this engine is running in fake mode,
// which disables such features as DNS configuration and unrestricted ICMP Echo responses.
Fake bool
}
type Loggify struct {
f logger . Logf
}
func ( l * Loggify ) Write ( b [ ] byte ) ( int , error ) {
l . f ( string ( b ) )
return len ( b ) , nil
}
func NewFakeUserspaceEngine ( logf logger . Logf , listenPort uint16 ) ( Engine , error ) {
logf ( "Starting userspace wireguard engine (FAKE tuntap device)." )
conf := EngineConfig {
Logf : logf ,
TUN : tstun . NewFakeTUN ( ) ,
RouterGen : router . NewFake ,
ListenPort : listenPort ,
Fake : true ,
}
return NewUserspaceEngineAdvanced ( conf )
}
// NewUserspaceEngine creates the named tun device and returns a
// Tailscale Engine running on it.
func NewUserspaceEngine ( logf logger . Logf , tunname string , listenPort uint16 ) ( Engine , error ) {
if tunname == "" {
return nil , fmt . Errorf ( "--tun name must not be blank" )
}
logf ( "Starting userspace wireguard engine with tun device %q" , tunname )
tun , err := tun . CreateTUN ( tunname , minimalMTU )
if err != nil {
diagnoseTUNFailure ( logf )
logf ( "CreateTUN: %v" , err )
return nil , err
}
logf ( "CreateTUN ok." )
conf := EngineConfig {
Logf : logf ,
TUN : tun ,
RouterGen : router . New ,
ListenPort : listenPort ,
}
e , err := NewUserspaceEngineAdvanced ( conf )
if err != nil {
return nil , err
}
return e , err
}
// NewUserspaceEngineAdvanced is like NewUserspaceEngine
// but provides control over all config fields.
func NewUserspaceEngineAdvanced ( conf EngineConfig ) ( Engine , error ) {
return newUserspaceEngineAdvanced ( conf )
}
func newUserspaceEngineAdvanced ( conf EngineConfig ) ( _ Engine , reterr error ) {
logf := conf . Logf
rconf := tsdns . ResolverConfig {
Logf : conf . Logf ,
Forward : true ,
}
e := & userspaceEngine {
timeNow : time . Now ,
logf : logf ,
reqCh : make ( chan struct { } , 1 ) ,
waitCh : make ( chan struct { } ) ,
tundev : tstun . WrapTUN ( logf , conf . TUN ) ,
resolver : tsdns . NewResolver ( rconf ) ,
pingers : make ( map [ wgcfg . Key ] * pinger ) ,
}
e . localAddrs . Store ( map [ packet . IP ] bool { } )
e . linkState , _ = getLinkState ( )
logf ( "link state: %+v" , e . linkState )
// Respond to all pings only in fake mode.
if conf . Fake {
e . tundev . PostFilterIn = echoRespondToAll
}
e . tundev . PreFilterOut = e . handleLocalPackets
mon , err := monitor . New ( logf , func ( ) { e . LinkChange ( false ) } )
if err != nil {
e . tundev . Close ( )
return nil , err
}
e . linkMon = mon
endpointsFn := func ( endpoints [ ] string ) {
e . mu . Lock ( )
e . endpoints = append ( e . endpoints [ : 0 ] , endpoints ... )
e . mu . Unlock ( )
e . RequestStatus ( )
}
magicsockOpts := magicsock . Options {
Logf : logf ,
Port : conf . ListenPort ,
EndpointsFunc : endpointsFn ,
DERPActiveFunc : e . RequestStatus ,
IdleFunc : e . tundev . IdleDuration ,
NoteRecvActivity : e . noteReceiveActivity ,
}
e . magicConn , err = magicsock . NewConn ( magicsockOpts )
if err != nil {
e . tundev . Close ( )
return nil , fmt . Errorf ( "wgengine: %v" , err )
}
// flags==0 because logf is already nested in another logger.
// The outer one can display the preferred log prefixes, etc.
dlog := log . New ( & Loggify { logf } , "" , 0 )
logger := device . Logger {
Debug : dlog ,
Info : dlog ,
Error : dlog ,
}
opts := & device . DeviceOptions {
Logger : & logger ,
HandshakeDone : func ( peerKey wgcfg . Key , peer * device . Peer , deviceAllowedIPs * device . AllowedIPs ) {
// Send an unsolicited status event every time a
// handshake completes. This makes sure our UI can
// update quickly as soon as it connects to a peer.
//
// We use a goroutine here to avoid deadlocking
// wireguard, since RequestStatus() will call back
// into it, and wireguard is what called us to get
// here.
go e . RequestStatus ( )
if e . magicConn . PeerHasDiscoKey ( tailcfg . NodeKey ( peerKey ) ) {
e . logf ( "wireguard handshake complete for %v" , peerKey . ShortString ( ) )
// This is a modern peer with discovery support. No need to send pings.
return
}
e . logf ( "wireguard handshake complete for %v; sending legacy pings" , peerKey . ShortString ( ) )
// Ping every single-IP that peer routes.
// These synthetic packets are used to traverse NATs.
var ips [ ] wgcfg . IP
allowedIPs := deviceAllowedIPs . EntriesForPeer ( peer )
for _ , ipNet := range allowedIPs {
if ones , bits := ipNet . Mask . Size ( ) ; ones == bits && ones != 0 {
var ip wgcfg . IP
copy ( ip . Addr [ : ] , ipNet . IP . To16 ( ) )
ips = append ( ips , ip )
}
}
if len ( ips ) > 0 {
go e . pinger ( peerKey , ips )
} else {
logf ( "[unexpected] peer %s has no single-IP routes: %v" , peerKey . ShortString ( ) , allowedIPs )
}
} ,
CreateBind : e . magicConn . CreateBind ,
CreateEndpoint : e . magicConn . CreateEndpoint ,
SkipBindUpdate : true ,
}
// wgdev takes ownership of tundev, will close it when closed.
e . wgdev = device . NewDevice ( e . tundev , opts )
defer func ( ) {
if reterr != nil {
e . wgdev . Close ( )
}
} ( )
routerCfg := router . InitConfig {
Logf : logf ,
Wgdev : e . wgdev ,
// Pass the unwrapped tun.(*NativeDevice) to the router:
// routers do not Read or Write, but do access native interfaces.
Tun : conf . TUN ,
Resolver : e . resolver ,
}
e . router , err = conf . RouterGen ( routerCfg )
if err != nil {
e . magicConn . Close ( )
return nil , err
}
go func ( ) {
up := false
for event := range e . tundev . Events ( ) {
if event & tun . EventMTUUpdate != 0 {
mtu , err := e . tundev . MTU ( )
e . logf ( "external route MTU: %d (%v)" , mtu , err )
}
if event & tun . EventUp != 0 && ! up {
e . logf ( "external route: up" )
e . RequestStatus ( )
up = true
}
if event & tun . EventDown != 0 && up {
e . logf ( "external route: down" )
e . RequestStatus ( )
up = false
}
}
} ( )
e . wgdev . Up ( )
if err := e . router . Up ( ) ; err != nil {
e . magicConn . Close ( )
e . wgdev . Close ( )
return nil , err
}
// TODO(danderson): we should delete this. It's pointless to apply
// a no-op settings here.
if err := e . router . Set ( nil ) ; err != nil {
e . magicConn . Close ( )
e . wgdev . Close ( )
return nil , err
}
e . linkMon . Start ( )
e . magicConn . Start ( )
e . resolver . Start ( )
go e . pollResolver ( )
return e , nil
}
// echoRespondToAll is an inbound post-filter responding to all echo requests.
func echoRespondToAll ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
if p . IsEchoRequest ( ) {
header := p . ICMPHeader ( )
header . ToResponse ( )
packet := packet . Generate ( & header , p . Payload ( ) )
t . InjectOutbound ( packet )
// We already handled it, stop.
return filter . Drop
}
return filter . Accept
}
// handleLocalPackets inspects packets coming from the local network
// stack, and intercepts any packets that should be handled by
// tailscaled directly. Other packets are allowed to proceed into the
// main ACL filter.
func ( e * userspaceEngine ) handleLocalPackets ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
if verdict := e . handleDNS ( p , t ) ; verdict == filter . Drop {
// local DNS handled the packet.
return filter . Drop
}
if runtime . GOOS == "darwin" && e . isLocalAddr ( p . DstIP ) {
// macOS NetworkExtension directs packets destined to the
// tunnel's local IP address into the tunnel, instead of
// looping back within the kernel network stack. We have to
// notice that an outbound packet is actually destined for
// ourselves, and loop it back into macOS.
t . InjectInboundCopy ( p . Buffer ( ) )
return filter . Drop
}
return filter . Accept
}
func ( e * userspaceEngine ) isLocalAddr ( ip packet . IP ) bool {
localAddrs , ok := e . localAddrs . Load ( ) . ( map [ packet . IP ] bool )
if ! ok {
e . logf ( "[unexpected] e.localAddrs was nil, can't check for loopback packet" )
return false
}
return localAddrs [ ip ]
}
// handleDNS is an outbound pre-filter resolving Tailscale domains.
func ( e * userspaceEngine ) handleDNS ( p * packet . ParsedPacket , t * tstun . TUN ) filter . Response {
if p . DstIP == magicDNSIP && p . DstPort == magicDNSPort && p . IPProto == packet . UDP {
request := tsdns . Packet {
Payload : append ( [ ] byte ( nil ) , p . Payload ( ) ... ) ,
Addr : netaddr . IPPort { IP : p . SrcIP . Netaddr ( ) , Port : p . SrcPort } ,
}
err := e . resolver . EnqueueRequest ( request )
if err != nil {
e . logf ( "tsdns: enqueue: %v" , err )
}
return filter . Drop
}
return filter . Accept
}
// pollResolver reads responses from the DNS resolver and injects them inbound.
func ( e * userspaceEngine ) pollResolver ( ) {
for {
resp , err := e . resolver . NextResponse ( )
if err == tsdns . ErrClosed {
return
}
if err != nil {
e . logf ( "tsdns: error: %v" , err )
continue
}
h := packet . UDPHeader {
IPHeader : packet . IPHeader {
SrcIP : packet . IP ( magicDNSIP ) ,
DstIP : packet . IPFromNetaddr ( resp . Addr . IP ) ,
} ,
SrcPort : magicDNSPort ,
DstPort : resp . Addr . Port ,
}
hlen := h . Len ( )
// TODO(dmytro): avoid this allocation without importing tstun quirks into tsdns.
const offset = tstun . PacketStartOffset
buf := make ( [ ] byte , offset + hlen + len ( resp . Payload ) )
copy ( buf [ offset + hlen : ] , resp . Payload )
h . Marshal ( buf [ offset : ] )
e . tundev . InjectInboundDirect ( buf , offset )
}
}
// pinger sends ping packets for a few seconds.
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
//
// These are only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
type pinger struct {
e * userspaceEngine
done chan struct { } // closed after shutdown (not the ctx.Done() chan)
cancel context . CancelFunc
}
// close cleans up pinger and removes it from the userspaceEngine.pingers map.
// It cannot be called while p.e.mu is held.
func ( p * pinger ) close ( ) {
p . cancel ( )
<- p . done
}
func ( p * pinger ) run ( ctx context . Context , peerKey wgcfg . Key , ips [ ] wgcfg . IP , srcIP packet . IP ) {
defer func ( ) {
p . e . mu . Lock ( )
if p . e . pingers [ peerKey ] == p {
delete ( p . e . pingers , peerKey )
}
p . e . mu . Unlock ( )
close ( p . done )
} ( )
header := packet . ICMPHeader {
IPHeader : packet . IPHeader {
SrcIP : srcIP ,
} ,
Type : packet . ICMPEchoRequest ,
Code : packet . ICMPNoCode ,
}
// sendFreq is slightly longer than sprayFreq in magicsock to ensure
// that if these ping packets are the only source of early packets
// sent to the peer, that each one will be sprayed.
const sendFreq = 300 * time . Millisecond
const stopAfter = 3 * time . Second
start := time . Now ( )
var dstIPs [ ] packet . IP
for _ , ip := range ips {
dstIPs = append ( dstIPs , packet . NewIP ( ip . IP ( ) ) )
}
payload := [ ] byte ( "magicsock_spray" ) // no meaning
header . IPID = 1
t := time . NewTicker ( sendFreq )
defer t . Stop ( )
for {
select {
case <- ctx . Done ( ) :
return
case <- t . C :
}
if time . Since ( start ) > stopAfter {
return
}
for _ , dstIP := range dstIPs {
header . DstIP = dstIP
// InjectOutbound take ownership of the packet, so we allocate.
b := packet . Generate ( & header , payload )
p . e . tundev . InjectOutbound ( b )
}
header . IPID ++
}
}
// pinger sends ping packets for a few seconds.
//
// These generated packets are used to ensure we trigger the spray logic in
// the magicsock package for NAT traversal.
//
// This is only used with legacy peers (before 0.100.0) that don't
// have advertised discovery keys.
func ( e * userspaceEngine ) pinger ( peerKey wgcfg . Key , ips [ ] wgcfg . IP ) {
e . logf ( "generating initial ping traffic to %s (%v)" , peerKey . ShortString ( ) , ips )
var srcIP packet . IP
e . wgLock . Lock ( )
if len ( e . lastCfgFull . Addresses ) > 0 {
srcIP = packet . NewIP ( e . lastCfgFull . Addresses [ 0 ] . IP . IP ( ) )
}
e . wgLock . Unlock ( )
if srcIP == 0 {
e . logf ( "generating initial ping traffic: no source IP" )
return
}
ctx , cancel := context . WithCancel ( context . Background ( ) )
p := & pinger {
e : e ,
done : make ( chan struct { } ) ,
cancel : cancel ,
}
e . mu . Lock ( )
if e . closing {
e . mu . Unlock ( )
return
}
oldPinger := e . pingers [ peerKey ]
e . pingers [ peerKey ] = p
e . mu . Unlock ( )
if oldPinger != nil {
oldPinger . close ( )
}
p . run ( ctx , peerKey , ips , srcIP )
}
var (
debugTrimWireguardEnv = os . Getenv ( "TS_DEBUG_TRIM_WIREGUARD" )
debugTrimWireguard , _ = strconv . ParseBool ( debugTrimWireguardEnv )
)
// forceFullWireguardConfig reports whether we should give wireguard
// our full network map, even for inactive peers
//
// TODO(bradfitz): remove this after our 1.0 launch; we don't want to
// enable wireguard config trimming quite yet because it just landed
// and we haven't got enough time testing it.
func forceFullWireguardConfig ( numPeers int ) bool {
// Did the user explicitly enable trimmming via the environment variable knob?
if debugTrimWireguardEnv != "" {
return ! debugTrimWireguard
}
if opt := controlclient . TrimWGConfig ( ) ; opt != "" {
return ! opt . EqualBool ( true )
}
// On iOS with large networks, it's critical, so turn on trimming.
// Otherwise we run out of memory from wireguard-go goroutine stacks+buffers.
// This will be the default later for all platforms and network sizes.
iOS := runtime . GOOS == "darwin" && version . IsMobile ( )
if iOS && numPeers > 50 {
return false
}
return false
}
// isTrimmablePeer reports whether p is a peer that we can trim out of the
// network map.
//
// We can only trim peers that both a) support discovery (because we
// know who they are when we receive their data and don't need to rely
// on wireguard-go figuring it out) and b) for implementation
// simplicity, have only one IP address (an IPv4 /32), which is the
// common case for most peers. Subnet router nodes will just always be
// created in the wireguard-go config.
func isTrimmablePeer ( p * wgcfg . Peer , numPeers int ) bool {
if forceFullWireguardConfig ( numPeers ) {
return false
}
if len ( p . AllowedIPs ) != 1 || len ( p . Endpoints ) != 1 {
return false
}
if ! strings . HasSuffix ( p . Endpoints [ 0 ] . Host , ".disco.tailscale" ) {
return false
}
aip := p . AllowedIPs [ 0 ]
// TODO: IPv6 support, once we support IPv6 within the tunnel. In that case,
// len(p.AllowedIPs) probably will be more than 1.
if aip . Mask != 32 || ! aip . IP . Is4 ( ) {
return false
}
return true
}
// noteReceiveActivity is called by magicsock when a packet has been received
// by the peer using discovery key dk. Magicsock calls this no more than
// every 10 seconds for a given peer.
func ( e * userspaceEngine ) noteReceiveActivity ( dk tailcfg . DiscoKey ) {
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
was , ok := e . recvActivityAt [ dk ]
if ! ok {
// Not a trimmable peer we care about tracking. (See isTrimmablePeer)
return
}
now := e . timeNow ( )
e . recvActivityAt [ dk ] = now
// If the last activity time jumped a bunch (say, at least
// half the idle timeout) then see if we need to reprogram
// Wireguard. This could probably be just
// lazyPeerIdleThreshold without the divide by 2, but
// maybeReconfigWireguardLocked is cheap enough to call every
// couple minutes (just not on every packet).
if was . IsZero ( ) || now . Sub ( was ) > lazyPeerIdleThreshold / 2 {
e . maybeReconfigWireguardLocked ( )
}
}
// isActiveSince reports whether the peer identified by (dk, ip) has
// had a packet sent to or received from it since t.
//
// e.wgLock must be held.
func ( e * userspaceEngine ) isActiveSince ( dk tailcfg . DiscoKey , ip wgcfg . IP , t time . Time ) bool {
if e . recvActivityAt [ dk ] . After ( t ) {
return true
}
pip := packet . IP ( binary . BigEndian . Uint32 ( ip . Addr [ 12 : ] ) )
timePtr , ok := e . sentActivityAt [ pip ]
if ! ok {
return false
}
unixTime := atomic . LoadInt64 ( timePtr )
return unixTime >= t . Unix ( )
}
// discoKeyFromPeer returns the DiscoKey for a wireguard config's Peer.
//
// Invariant: isTrimmablePeer(p) == true, so it should have 1 endpoint with
// Host of form "<64-hex-digits>.disco.tailscale". If invariant is violated,
// we return the zero value.
func discoKeyFromPeer ( p * wgcfg . Peer ) tailcfg . DiscoKey {
host := p . Endpoints [ 0 ] . Host
if len ( host ) < 64 {
return tailcfg . DiscoKey { }
}
k , err := key . NewPublicFromHexMem ( mem . S ( host [ : 64 ] ) )
if err != nil {
return tailcfg . DiscoKey { }
}
return tailcfg . DiscoKey ( k )
}
// e.wgLock must be held.
func ( e * userspaceEngine ) maybeReconfigWireguardLocked ( ) error {
if hook := e . testMaybeReconfigHook ; hook != nil {
hook ( )
return nil
}
full := e . lastCfgFull
// Compute a minimal config to pass to wireguard-go
// based on the full config. Prune off all the peers
// and only add the active ones back.
min := full
min . Peers = nil
// We'll only keep a peer around if it's been active in
// the past 5 minutes. That's more than WireGuard's key
// rotation time anyway so it's no harm if we remove it
// later if it's been inactive.
activeCutoff := e . timeNow ( ) . Add ( - lazyPeerIdleThreshold )
// Not all peers can be trimmed from the network map (see
// isTrimmablePeer). For those are are trimmable, keep track
// of their DiscoKey and Tailscale IPs. These are the ones
// we'll need to install tracking hooks for to watch their
// send/receive activity.
trackDisco := make ( [ ] tailcfg . DiscoKey , 0 , len ( full . Peers ) )
trackIPs := make ( [ ] wgcfg . IP , 0 , len ( full . Peers ) )
for i := range full . Peers {
p := & full . Peers [ i ]
if ! isTrimmablePeer ( p , len ( full . Peers ) ) {
min . Peers = append ( min . Peers , * p )
continue
}
tsIP := p . AllowedIPs [ 0 ] . IP
dk := discoKeyFromPeer ( p )
trackDisco = append ( trackDisco , dk )
trackIPs = append ( trackIPs , tsIP )
if e . isActiveSince ( dk , tsIP , activeCutoff ) {
min . Peers = append ( min . Peers , * p )
}
}
if ! deepprint . UpdateHash ( & e . lastEngineSigTrim , min ) {
// No changes
return nil
}
e . updateActivityMapsLocked ( trackDisco , trackIPs )
e . logf ( "wgengine: Reconfig: configuring userspace wireguard config (with %d/%d peers)" , len ( min . Peers ) , len ( full . Peers ) )
if err := e . wgdev . Reconfig ( & min ) ; err != nil {
e . logf ( "wgdev.Reconfig: %v" , err )
return err
}
return nil
}
// updateActivityMapsLocked updates the data structures used for tracking the activity
// of wireguard peers that we might add/remove dynamically from the real config
// as given to wireguard-go.
//
// e.wgLock must be held.
func ( e * userspaceEngine ) updateActivityMapsLocked ( trackDisco [ ] tailcfg . DiscoKey , trackIPs [ ] wgcfg . IP ) {
// Generate the new map of which discokeys we want to track
// receive times for.
mr := map [ tailcfg . DiscoKey ] time . Time { } // TODO: only recreate this if set of keys changed
for _ , dk := range trackDisco {
// Preserve old times in the new map, but also
// populate map entries for new trackDisco values with
// time.Time{} zero values. (Only entries in this map
// are tracked, so the Time zero values allow it to be
// tracked later)
mr [ dk ] = e . recvActivityAt [ dk ]
}
e . recvActivityAt = mr
oldTime := e . sentActivityAt
e . sentActivityAt = make ( map [ packet . IP ] * int64 , len ( oldTime ) )
oldFunc := e . destIPActivityFuncs
e . destIPActivityFuncs = make ( map [ packet . IP ] func ( ) , len ( oldFunc ) )
for _ , wip := range trackIPs {
pip := packet . IP ( binary . BigEndian . Uint32 ( wip . Addr [ 12 : ] ) )
timePtr := oldTime [ pip ]
if timePtr == nil {
timePtr = new ( int64 )
}
e . sentActivityAt [ pip ] = timePtr
fn := oldFunc [ pip ]
if fn == nil {
// This is the func that gets run on every outgoing packet for tracked IPs:
fn = func ( ) {
now := e . timeNow ( ) . Unix ( )
old := atomic . LoadInt64 ( timePtr )
// How long's it been since we last sent a packet?
// For our first packet, old is Unix epoch time 0 (1970).
elapsedSec := now - old
if elapsedSec >= int64 ( packetSendTimeUpdateFrequency / time . Second ) {
atomic . StoreInt64 ( timePtr , now )
}
// On a big jump, assume we might no longer be in the wireguard
// config and go check.
if elapsedSec >= int64 ( packetSendRecheckWireguardThreshold / time . Second ) {
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
e . maybeReconfigWireguardLocked ( )
}
}
}
e . destIPActivityFuncs [ pip ] = fn
}
e . tundev . SetDestIPActivityFuncs ( e . destIPActivityFuncs )
}
func ( e * userspaceEngine ) Reconfig ( cfg * wgcfg . Config , routerCfg * router . Config ) error {
if routerCfg == nil {
panic ( "routerCfg must not be nil" )
}
localAddrs := map [ packet . IP ] bool { }
for _ , addr := range routerCfg . LocalAddrs {
// TODO: ipv6
if ! addr . IP . Is4 ( ) {
continue
}
localAddrs [ packet . IPFromNetaddr ( addr . IP ) ] = true
}
e . localAddrs . Store ( localAddrs )
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
peerSet := make ( map [ key . Public ] struct { } , len ( cfg . Peers ) )
e . mu . Lock ( )
e . peerSequence = e . peerSequence [ : 0 ]
for _ , p := range cfg . Peers {
e . peerSequence = append ( e . peerSequence , p . PublicKey )
peerSet [ key . Public ( p . PublicKey ) ] = struct { } { }
}
e . mu . Unlock ( )
engineChanged := deepprint . UpdateHash ( & e . lastEngineSigFull , cfg )
routerChanged := deepprint . UpdateHash ( & e . lastRouterSig , routerCfg )
if ! engineChanged && ! routerChanged {
return ErrNoChanges
}
e . lastCfgFull = cfg . Copy ( )
// Tell magicsock about the new (or initial) private key
// (which is needed by DERP) before wgdev gets it, as wgdev
// will start trying to handshake, which we want to be able to
// go over DERP.
if err := e . magicConn . SetPrivateKey ( cfg . PrivateKey ) ; err != nil {
e . logf ( "wgengine: Reconfig: SetPrivateKey: %v" , err )
}
e . magicConn . UpdatePeers ( peerSet )
if err := e . maybeReconfigWireguardLocked ( ) ; err != nil {
return err
}
if routerChanged {
if routerCfg . DNS . Proxied {
if len ( routerCfg . DNS . Nameservers ) == 0 {
routerCfg . DNS . PerDomain = true
} else {
ips := routerCfg . DNS . Nameservers
upstreams := make ( [ ] net . Addr , len ( ips ) )
for i , ip := range ips {
stdIP := ip . IPAddr ( )
upstreams [ i ] = & net . UDPAddr {
IP : stdIP . IP ,
Port : 53 ,
Zone : stdIP . Zone ,
}
}
e . resolver . SetUpstreams ( upstreams )
}
routerCfg . DNS . Nameservers = [ ] netaddr . IP { tsaddr . TailscaleServiceIP ( ) }
}
e . logf ( "wgengine: Reconfig: configuring router" )
if err := e . router . Set ( routerCfg ) ; err != nil {
return err
}
}
e . logf ( "wgengine: Reconfig done" )
return nil
}
func ( e * userspaceEngine ) GetFilter ( ) * filter . Filter {
return e . tundev . GetFilter ( )
}
func ( e * userspaceEngine ) SetFilter ( filt * filter . Filter ) {
e . tundev . SetFilter ( filt )
}
func ( e * userspaceEngine ) SetDNSMap ( dm * tsdns . Map ) {
e . resolver . SetMap ( dm )
}
func ( e * userspaceEngine ) SetStatusCallback ( cb StatusCallback ) {
e . mu . Lock ( )
defer e . mu . Unlock ( )
e . statusCallback = cb
}
func ( e * userspaceEngine ) getStatusCallback ( ) StatusCallback {
e . mu . Lock ( )
defer e . mu . Unlock ( )
return e . statusCallback
}
// TODO: this function returns an error but it's always nil, and when
// there's actually a problem it just calls log.Fatal. Why?
func ( e * userspaceEngine ) getStatus ( ) ( * Status , error ) {
// Grab derpConns before acquiring wgLock to not violate lock ordering;
// the DERPs method acquires magicsock.Conn.mu.
// (See comment in userspaceEngine's declaration.)
derpConns := e . magicConn . DERPs ( )
e . wgLock . Lock ( )
defer e . wgLock . Unlock ( )
e . mu . Lock ( )
closing := e . closing
e . mu . Unlock ( )
if closing {
return nil , errors . New ( "engine closing; no status" )
}
if e . wgdev == nil {
// RequestStatus was invoked before the wgengine has
// finished initializing. This can happen when wgegine
// provides a callback to magicsock for endpoint
// updates that calls RequestStatus.
return nil , nil
}
// lineLen is the max UAPI line we expect. The longest I see is
// len("preshared_key=")+64 hex+"\n" == 79. Add some slop.
const lineLen = 100
pr , pw := io . Pipe ( )
errc := make ( chan error , 1 )
go func ( ) {
defer pw . Close ( )
bw := bufio . NewWriterSize ( pw , lineLen )
// TODO(apenwarr): get rid of silly uapi stuff for in-process comms
// FIXME: get notified of status changes instead of polling.
filter := device . IPCGetFilter {
// The allowed_ips are somewhat expensive to compute and they're
// unused below; request that they not be sent instead.
FilterAllowedIPs : true ,
}
if err := e . wgdev . IpcGetOperationFiltered ( bw , filter ) ; err != nil {
errc <- fmt . Errorf ( "IpcGetOperation: %w" , err )
return
}
errc <- bw . Flush ( )
} ( )
pp := make ( map [ wgcfg . Key ] * PeerStatus )
p := & PeerStatus { }
var hst1 , hst2 , n int64
var err error
bs := bufio . NewScanner ( pr )
bs . Buffer ( make ( [ ] byte , lineLen ) , lineLen )
for bs . Scan ( ) {
line := bs . Bytes ( )
k := line
var v mem . RO
if i := bytes . IndexByte ( line , '=' ) ; i != - 1 {
k = line [ : i ]
v = mem . B ( line [ i + 1 : ] )
}
switch string ( k ) {
case "public_key" :
pk , err := key . NewPublicFromHexMem ( v )
if err != nil {
log . Fatalf ( "IpcGetOperation: invalid key %#v" , v )
}
p = & PeerStatus { }
pp [ wgcfg . Key ( pk ) ] = p
key := tailcfg . NodeKey ( pk )
p . NodeKey = key
case "rx_bytes" :
n , err = mem . ParseInt ( v , 10 , 64 )
p . RxBytes = ByteCount ( n )
if err != nil {
log . Fatalf ( "IpcGetOperation: rx_bytes invalid: %#v" , line )
}
case "tx_bytes" :
n , err = mem . ParseInt ( v , 10 , 64 )
p . TxBytes = ByteCount ( n )
if err != nil {
log . Fatalf ( "IpcGetOperation: tx_bytes invalid: %#v" , line )
}
case "last_handshake_time_sec" :
hst1 , err = mem . ParseInt ( v , 10 , 64 )
if err != nil {
log . Fatalf ( "IpcGetOperation: hst1 invalid: %#v" , line )
}
case "last_handshake_time_nsec" :
hst2 , err = mem . ParseInt ( v , 10 , 64 )
if err != nil {
log . Fatalf ( "IpcGetOperation: hst2 invalid: %#v" , line )
}
if hst1 != 0 || hst2 != 0 {
p . LastHandshake = time . Unix ( hst1 , hst2 )
} // else leave at time.IsZero()
}
}
if err := bs . Err ( ) ; err != nil {
log . Fatalf ( "reading IpcGetOperation output: %v" , err )
}
if err := <- errc ; err != nil {
log . Fatalf ( "IpcGetOperation: %v" , err )
}
e . mu . Lock ( )
defer e . mu . Unlock ( )
var peers [ ] PeerStatus
for _ , pk := range e . peerSequence {
if p , ok := pp [ pk ] ; ok { // ignore idle ones not in wireguard-go's config
peers = append ( peers , * p )
}
}
return & Status {
LocalAddrs : append ( [ ] string ( nil ) , e . endpoints ... ) ,
Peers : peers ,
DERPs : derpConns ,
} , nil
}
func ( e * userspaceEngine ) RequestStatus ( ) {
// This is slightly tricky. e.getStatus() can theoretically get
// blocked inside wireguard for a while, and RequestStatus() is
// sometimes called from a goroutine, so we don't want a lot of
// them hanging around. On the other hand, requesting multiple
// status updates simultaneously is pointless anyway; they will
// all say the same thing.
// Enqueue at most one request. If one is in progress already, this
// adds one more to the queue. If one has been requested but not
// started, it is a no-op.
select {
case e . reqCh <- struct { } { } :
default :
}
// Dequeue at most one request. Another thread may have already
// dequeued the request we enqueued above, which is fine, since the
// information is guaranteed to be at least as recent as the current
// call to RequestStatus().
select {
case <- e . reqCh :
s , err := e . getStatus ( )
if s == nil && err == nil {
e . logf ( "RequestStatus: weird: both s and err are nil" )
return
}
if cb := e . getStatusCallback ( ) ; cb != nil {
cb ( s , err )
}
default :
}
}
func ( e * userspaceEngine ) Close ( ) {
var pingers [ ] * pinger
e . mu . Lock ( )
if e . closing {
e . mu . Unlock ( )
return
}
e . closing = true
for _ , pinger := range e . pingers {
pingers = append ( pingers , pinger )
}
e . mu . Unlock ( )
r := bufio . NewReader ( strings . NewReader ( "" ) )
e . wgdev . IpcSetOperation ( r )
e . resolver . Close ( )
e . magicConn . Close ( )
e . linkMon . Close ( )
e . router . Close ( )
e . wgdev . Close ( )
// Shut down pingers after tundev is closed (by e.wgdev.Close) so the
// synchronous close does not get stuck on InjectOutbound.
for _ , pinger := range pingers {
pinger . close ( )
}
close ( e . waitCh )
}
func ( e * userspaceEngine ) Wait ( ) {
<- e . waitCh
}
func ( e * userspaceEngine ) setLinkState ( st * interfaces . State ) ( changed bool ) {
if st == nil {
return false
}
e . mu . Lock ( )
defer e . mu . Unlock ( )
changed = e . linkState == nil || ! st . Equal ( e . linkState )
e . linkState = st
return changed
}
func ( e * userspaceEngine ) LinkChange ( isExpensive bool ) {
cur , err := getLinkState ( )
if err != nil {
e . logf ( "LinkChange: interfaces.GetState: %v" , err )
return
}
cur . IsExpensive = isExpensive
needRebind := e . setLinkState ( cur )
if needRebind {
e . logf ( "LinkChange: major, rebinding. New state: %+v" , cur )
} else {
e . logf ( "LinkChange: minor" )
}
why := "link-change-minor"
if needRebind {
why = "link-change-major"
e . magicConn . Rebind ( )
}
e . magicConn . ReSTUN ( why )
}
func getLinkState ( ) ( * interfaces . State , error ) {
s , err := interfaces . GetState ( )
if s != nil {
s . RemoveTailscaleInterfaces ( )
}
return s , err
}
func ( e * userspaceEngine ) SetNetInfoCallback ( cb NetInfoCallback ) {
e . magicConn . SetNetInfoCallback ( cb )
}
func ( e * userspaceEngine ) SetDERPMap ( dm * tailcfg . DERPMap ) {
e . magicConn . SetDERPMap ( dm )
}
func ( e * userspaceEngine ) SetNetworkMap ( nm * controlclient . NetworkMap ) {
e . magicConn . SetNetworkMap ( nm )
}
func ( e * userspaceEngine ) DiscoPublicKey ( ) tailcfg . DiscoKey {
return e . magicConn . DiscoPublicKey ( )
}
func ( e * userspaceEngine ) UpdateStatus ( sb * ipnstate . StatusBuilder ) {
st , err := e . getStatus ( )
if err != nil {
e . logf ( "wgengine: getStatus: %v" , err )
return
}
for _ , ps := range st . Peers {
sb . AddPeer ( key . Public ( ps . NodeKey ) , & ipnstate . PeerStatus {
RxBytes : int64 ( ps . RxBytes ) ,
TxBytes : int64 ( ps . TxBytes ) ,
LastHandshake : ps . LastHandshake ,
InEngine : true ,
} )
}
e . magicConn . UpdateStatus ( sb )
}
func ( e * userspaceEngine ) Ping ( ip netaddr . IP , cb func ( * ipnstate . PingResult ) ) {
e . magicConn . Ping ( ip , cb )
}
// diagnoseTUNFailure is called if tun.CreateTUN fails, to poke around
// the system and log some diagnostic info that might help debug why
// TUN failed. Because TUN's already failed and things the program's
// about to end, we might as well log a lot.
func diagnoseTUNFailure ( logf logger . Logf ) {
switch runtime . GOOS {
case "linux" :
diagnoseLinuxTUNFailure ( logf )
default :
logf ( "no TUN failure diagnostics for OS %q" , runtime . GOOS )
}
}
func diagnoseLinuxTUNFailure ( logf logger . Logf ) {
kernel , err := exec . Command ( "uname" , "-r" ) . Output ( )
kernel = bytes . TrimSpace ( kernel )
if err != nil {
logf ( "no TUN, and failed to look up kernel version: %v" , err )
return
}
logf ( "Linux kernel version: %s" , kernel )
modprobeOut , err := exec . Command ( "/sbin/modprobe" , "tun" ) . CombinedOutput ( )
if err == nil {
logf ( "'modprobe tun' successful" )
// Either tun is currently loaded, or it's statically
// compiled into the kernel (which modprobe checks
// with /lib/modules/$(uname -r)/modules.builtin)
//
// So if there's a problem at this point, it's
// probably because /dev/net/tun doesn't exist.
const dev = "/dev/net/tun"
if fi , err := os . Stat ( dev ) ; err != nil {
logf ( "tun module loaded in kernel, but %s does not exist" , dev )
} else {
logf ( "%s: %v" , dev , fi . Mode ( ) )
}
// We failed to find why it failed. Just let our
// caller report the error it got from wireguard-go.
return
}
logf ( "is CONFIG_TUN enabled in your kernel? `modprobe tun` failed with: %s" , modprobeOut )
distro := linuxDistro ( )
switch distro {
case "debian" :
dpkgOut , err := exec . Command ( "dpkg" , "-S" , "kernel/drivers/net/tun.ko" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( dpkgOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( dpkgOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , dpkgOut )
}
case "arch" :
findOut , err := exec . Command ( "find" , "/lib/modules/" , "-path" , "*/net/tun.ko*" ) . CombinedOutput ( )
if len ( bytes . TrimSpace ( findOut ) ) == 0 || err != nil {
logf ( "tun module not loaded nor found on disk" )
return
}
if ! bytes . Contains ( findOut , kernel ) {
logf ( "kernel/drivers/net/tun.ko found on disk, but not for current kernel; are you in middle of a system update and haven't rebooted? found: %s" , findOut )
}
}
}
func linuxDistro ( ) string {
if _ , err := os . Stat ( "/etc/debian_version" ) ; err == nil {
return "debian"
}
if _ , err := os . Stat ( "/etc/arch-release" ) ; err == nil {
return "arch"
}
return ""
}