diff --git a/control/controlclient/auto.go b/control/controlclient/auto.go index e7648ae60..59d560ab6 100644 --- a/control/controlclient/auto.go +++ b/control/controlclient/auto.go @@ -498,6 +498,15 @@ func (c *Client) SetHostinfo(hi *tailcfg.Hostinfo) { c.cancelMapSafely() } +func (c *Client) SetNetInfo(ni *tailcfg.NetInfo) { + if ni == nil { + panic("nil NetInfo") + } + c.direct.SetNetInfo(ni) + // Send new Hostinfo (which includes NetInfo) to server + c.cancelMapSafely() +} + func (c *Client) sendStatus(who string, err error, url string, nm *NetworkMap) { c.mu.Lock() state := c.state diff --git a/control/controlclient/direct.go b/control/controlclient/direct.go index aba2ee3d0..7af66a64b 100644 --- a/control/controlclient/direct.go +++ b/control/controlclient/direct.go @@ -176,6 +176,23 @@ func (c *Direct) SetHostinfo(hi *tailcfg.Hostinfo) { c.hostinfo = hi.Clone() } +// SetNetInfo clones the provided NetInfo and remembers it for the +// next update. +func (c *Direct) SetNetInfo(ni *tailcfg.NetInfo) { + if ni == nil { + panic("nil NetInfo") + } + c.mu.Lock() + defer c.mu.Unlock() + + if c.hostinfo == nil { + c.logf("[unexpected] SetNetInfo called with no HostInfo; ignoring NetInfo update: %+v", ni) + return + } + c.logf("NetInfo: %v\n", ni) + c.hostinfo.NetInfo = ni.Clone() +} + func (c *Direct) GetPersist() Persist { c.mu.Lock() defer c.mu.Unlock() @@ -650,6 +667,12 @@ func encode(v interface{}, serverKey *wgcfg.Key, mkey *wgcfg.PrivateKey) ([]byte if err != nil { return nil, err } + const debugMapRequests = false + if debugMapRequests { + if _, ok := v.(tailcfg.MapRequest); ok { + log.Printf("MapRequest: %s", b) + } + } var nonce [24]byte if _, err := io.ReadFull(rand.Reader, nonce[:]); err != nil { panic(err) diff --git a/ipn/local.go b/ipn/local.go index 83d521d1e..7422483e4 100644 --- a/ipn/local.go +++ b/ipn/local.go @@ -5,7 +5,6 @@ package ipn import ( - "context" "errors" "fmt" "log" @@ -15,7 +14,6 @@ import ( "github.com/tailscale/wireguard-go/wgcfg" "tailscale.com/control/controlclient" - "tailscale.com/netcheck" "tailscale.com/portlist" "tailscale.com/tailcfg" "tailscale.com/types/empty" @@ -84,6 +82,8 @@ func NewLocalBackend(logf logger.Logf, logid string, store StateStore, e wgengin } b.statusChanged = sync.NewCond(&b.statusLock) + e.SetNetInfoCallback(b.SetNetInfo) + if b.portpoll != nil { go b.portpoll.Run() go b.runPoller() @@ -136,7 +136,6 @@ func (b *LocalBackend) Start(opts Options) error { hi := controlclient.NewHostinfo() hi.BackendLogID = b.backendLogID hi.FrontendLogID = opts.FrontendLogID - b.populateNetworkConditions(hi) b.mu.Lock() @@ -783,33 +782,16 @@ func (b *LocalBackend) assertClientLocked() { } } -// populateNetworkConditions spends up to 2 seconds populating hi's -// network condition fields. -// -// TODO: this is currently just done once at start-up, not regularly on -// link changes. This will probably need to be moved & rethought. For now -// we're just gathering some data. -func (b *LocalBackend) populateNetworkConditions(hi *tailcfg.Hostinfo) { - logf := logger.WithPrefix(b.logf, "populateNetworkConditions: ") - ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - - report, err := netcheck.GetReport(ctx, logf) - if err != nil { - logf("GetReport: %v", err) - return +func (b *LocalBackend) SetNetInfo(ni *tailcfg.NetInfo) { + b.mu.Lock() + c := b.c + if b.hiCache != nil { + b.hiCache.NetInfo = ni.Clone() } + b.mu.Unlock() - ni := &tailcfg.NetInfo{ - DERPLatency: map[string]float64{}, - MappingVariesByDestIP: report.MappingVariesByDestIP, - HairPinning: report.HairPinning, - } - for server, d := range report.DERPLatency { - ni.DERPLatency[server] = d.Seconds() + if c == nil { + return } - ni.WorkingIPv6.Set(report.IPv6) - ni.WorkingUDP.Set(report.UDP) - - hi.NetInfo = ni + c.SetNetInfo(ni) } diff --git a/ipn/local_test.go b/ipn/local_test.go deleted file mode 100644 index 2300e1afa..000000000 --- a/ipn/local_test.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package ipn - -import ( - "flag" - "testing" - - "tailscale.com/tailcfg" -) - -var external = flag.Bool("external", false, "run external network tests") - -func TestPopulateNetworkConditions(t *testing.T) { - if !*external { - t.Skip("skipping network test without -external flag") - } - b := &LocalBackend{logf: t.Logf} - hi := new(tailcfg.Hostinfo) - b.populateNetworkConditions(hi) - t.Logf("Got: %+v", hi) - -} diff --git a/netcheck/netcheck.go b/netcheck/netcheck.go index f0bfdef9b..dd995b89e 100644 --- a/netcheck/netcheck.go +++ b/netcheck/netcheck.go @@ -27,6 +27,7 @@ type Report struct { IPv6 bool // IPv6 works MappingVariesByDestIP opt.Bool // for IPv4 HairPinning opt.Bool // for IPv4 + PreferredDERP int // or 0 for unknown DERPLatency map[string]time.Duration // keyed by STUN host:port // TODO: update Clone when adding new fields @@ -81,6 +82,10 @@ func GetReport(ctx context.Context, logf logger.Logf) (*Report, error) { ret.IPv6 = true } gotIP[server] = ip + + if ret.PreferredDERP == 0 { + ret.PreferredDERP = derpIndexOfSTUNHostPort(server) + } } addHair := func(server, ip string, d time.Duration) { mu.Lock() @@ -204,3 +209,14 @@ func GetReport(ctx context.Context, logf logger.Logf) (*Report, error) { return ret.Clone(), nil } + +// derpIndexOfSTUNHostPort extracts the derp indes from a STUN host:port like +// "derp1-v6.tailscale.com:3478" or "derp2.tailscale.com:3478". +// It returns 0 on unexpected input. +func derpIndexOfSTUNHostPort(hp string) int { + hp = strings.TrimSuffix(hp, ".tailscale.com:3478") + hp = strings.TrimSuffix(hp, "-v6") + hp = strings.TrimPrefix(hp, "derp") + n, _ := strconv.Atoi(hp) + return n // 0 on error is okay +} diff --git a/tailcfg/tailcfg.go b/tailcfg/tailcfg.go index 2408e13d1..32224285f 100644 --- a/tailcfg/tailcfg.go +++ b/tailcfg/tailcfg.go @@ -260,6 +260,17 @@ type NetInfo struct { // WorkingUDP is whether UDP works. WorkingUDP opt.Bool + // PreferredDERP is this node's preferred DERP server + // for incoming traffic. The node might be be temporarily + // connected to multiple DERP servers (to send to other nodes) + // but PreferredDERP is the instance number that the node + // subscribes to traffic at. + // Zero means disconnected or unknown. + PreferredDERP int + + // LinkType is the current link type, if known. + LinkType string // "wired", "wifi", "mobile" (LTE, 4G, 3G, etc) + // DERPLatency is the fastest recent time to reach various // DERP STUN servers, in seconds. The map key is the DERP // server's STUN host:port. @@ -268,6 +279,25 @@ type NetInfo struct { // material change, as any change here also gets uploaded to // the control plane. DERPLatency map[string]float64 `json:",omitempty"` + + // Update Clone and BasicallyEqual when adding fields. +} + +// BasicallyEqual reports whether ni and ni2 are basically equal, ignoring +// changes in DERPLatency. +func (ni *NetInfo) BasicallyEqual(ni2 *NetInfo) bool { + if (ni == nil) != (ni2 == nil) { + return false + } + if ni == nil { + return true + } + return ni.MappingVariesByDestIP == ni2.MappingVariesByDestIP && + ni.HairPinning == ni2.HairPinning && + ni.WorkingIPv6 == ni2.WorkingIPv6 && + ni.WorkingUDP == ni2.WorkingUDP && + ni.PreferredDERP == ni2.PreferredDERP && + ni.LinkType == ni2.LinkType } func (ni *NetInfo) Clone() (res *NetInfo) { diff --git a/tailcfg/tailcfg_test.go b/tailcfg/tailcfg_test.go index e1c7ed4f3..32f30dbdb 100644 --- a/tailcfg/tailcfg_test.go +++ b/tailcfg/tailcfg_test.go @@ -304,3 +304,19 @@ func TestNodeEqual(t *testing.T) { } } } + +func TestNetInfoFields(t *testing.T) { + handled := []string{ + "MappingVariesByDestIP", + "HairPinning", + "WorkingIPv6", + "WorkingUDP", + "PreferredDERP", + "LinkType", + "DERPLatency", + } + if have := fieldsOf(reflect.TypeOf(NetInfo{})); !reflect.DeepEqual(have, handled) { + t.Errorf("NetInfo.Clone/BasicallyEqually check might be out of sync\nfields: %q\nhandled: %q\n", + have, handled) + } +} diff --git a/wgengine/magicsock/derpmap.go b/wgengine/magicsock/derpmap.go index c45659b4e..574a84591 100644 --- a/wgengine/magicsock/derpmap.go +++ b/wgengine/magicsock/derpmap.go @@ -19,8 +19,9 @@ const DerpMagicIP = "127.3.3.40" var derpMagicIP = net.ParseIP(DerpMagicIP).To4() var ( - derpHostOfIndex = map[int]string{} // index (fake port number) -> hostname + derpHostOfIndex = map[int]string{} // node ID index (fake port number) -> hostname derpIndexOfHost = map[string]int{} // derpHostOfIndex reversed + derpNodeID []int ) const ( @@ -42,6 +43,7 @@ func addDerper(i int, host string) { } derpHostOfIndex[i] = host derpIndexOfHost[host] = i + derpNodeID = append(derpNodeID, i) } // derpHost returns the hostname of a DERP server index (a fake port diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index b70f19133..dc511b213 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -11,7 +11,9 @@ import ( "encoding/binary" "errors" "fmt" + "hash/fnv" "log" + "math/rand" "net" "os" "strconv" @@ -28,9 +30,12 @@ import ( "tailscale.com/derp" "tailscale.com/derp/derphttp" "tailscale.com/interfaces" + "tailscale.com/netcheck" "tailscale.com/stun" "tailscale.com/stunner" + "tailscale.com/tailcfg" "tailscale.com/types/key" + "tailscale.com/types/logger" "tailscale.com/version" ) @@ -66,6 +71,10 @@ type Conn struct { // Its Loaded value is always non-nil. stunReceiveFunc atomic.Value // of func(p []byte, fromAddr *net.UDPAddr) + netInfoMu sync.Mutex + netInfoFunc func(*tailcfg.NetInfo) // nil until set + netInfoLast *tailcfg.NetInfo + udpRecvCh chan udpReadResult derpRecvCh chan derpReadResult @@ -204,15 +213,17 @@ func (c *Conn) epUpdate(ctx context.Context) { go func() { defer close(lastDone) - nearestDerp, endpoints, err := c.determineEndpoints(epCtx) + + c.updateNetInfo() // best effort + + endpoints, err := c.determineEndpoints(epCtx) if err != nil { c.logf("magicsock.Conn: endpoint update failed: %v", err) // TODO(crawshaw): are there any conditions under which // we should trigger a retry based on the error here? return } - derpChanged := c.setNearestDerp(nearestDerp) - if stringsEqual(endpoints, lastEndpoints) && !derpChanged { + if stringsEqual(endpoints, lastEndpoints) { return } lastEndpoints = endpoints @@ -222,6 +233,98 @@ func (c *Conn) epUpdate(ctx context.Context) { } } +func (c *Conn) updateNetInfo() { + logf := logger.WithPrefix(c.logf, "updateNetInfo: ") + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + report, err := netcheck.GetReport(ctx, logf) + if err != nil { + logf("GetReport: %v", err) + return + } + + ni := &tailcfg.NetInfo{ + DERPLatency: map[string]float64{}, + MappingVariesByDestIP: report.MappingVariesByDestIP, + HairPinning: report.HairPinning, + } + for server, d := range report.DERPLatency { + ni.DERPLatency[server] = d.Seconds() + } + ni.WorkingIPv6.Set(report.IPv6) + ni.WorkingUDP.Set(report.UDP) + ni.PreferredDERP = report.PreferredDERP + + if ni.PreferredDERP == 0 { + // Perhaps UDP is blocked. Pick a deterministic but arbitrary + // one. + ni.PreferredDERP = c.pickDERPFallback() + } + c.setNearestDerp(ni.PreferredDERP) + + // TODO: set link type + + c.callNetInfoCallback(ni) +} + +var processStartUnixNano = time.Now().UnixNano() + +// pickDERPFallback returns a non-zero but deterministic DERP node to +// connect to. This is only used if netcheck couldn't find the +// nearest one (for instance, if UDP is blocked and thus STUN latency +// checks aren't working). +func (c *Conn) pickDERPFallback() int { + c.derpMu.Lock() + defer c.derpMu.Unlock() + + if c.myDerp != 0 { + // If we already had one in the past, stay on it. + return c.myDerp + } + + if len(derpNodeID) == 0 { + // No DERP nodes registered. + return 0 + } + + h := fnv.New64() + h.Write([]byte(fmt.Sprintf("%p/%d", c, processStartUnixNano))) // arbitrary + return derpNodeID[rand.New(rand.NewSource(int64(h.Sum64()))).Intn(len(derpNodeID))] +} + +// callNetInfoCallback calls the NetInfo callback (if previously +// registered with SetNetInfoCallback) if ni has substantially changed +// since the last state. +// +// callNetInfoCallback takes ownership of ni. +func (c *Conn) callNetInfoCallback(ni *tailcfg.NetInfo) { + c.netInfoMu.Lock() + defer c.netInfoMu.Unlock() + if ni.BasicallyEqual(c.netInfoLast) { + return + } + c.netInfoLast = ni + if c.netInfoFunc != nil { + c.logf("netInfo update: %+v", ni) + go c.netInfoFunc(ni) + } +} + +func (c *Conn) SetNetInfoCallback(fn func(*tailcfg.NetInfo)) { + if fn == nil { + panic("nil NetInfoCallback") + } + c.netInfoMu.Lock() + last := c.netInfoLast + c.netInfoFunc = fn + c.netInfoMu.Unlock() + + if last != nil { + fn(last) + } +} + func (c *Conn) setNearestDerp(derpNum int) (changed bool) { c.derpMu.Lock() defer c.derpMu.Unlock() @@ -236,8 +339,7 @@ func (c *Conn) setNearestDerp(derpNum int) (changed bool) { // determineEndpoints returns the machine's endpoint addresses. It // does a STUN lookup to determine its public address. -func (c *Conn) determineEndpoints(ctx context.Context) (nearestDerp int, ipPorts []string, err error) { - nearestDerp = derpNYC // for now +func (c *Conn) determineEndpoints(ctx context.Context) (ipPorts []string, err error) { var ( alreadyMu sync.Mutex already = make(map[string]bool) // endpoint -> true @@ -265,7 +367,7 @@ func (c *Conn) determineEndpoints(ctx context.Context) (nearestDerp int, ipPorts c.stunReceiveFunc.Store(s.Receive) if err := s.Run(ctx); err != nil { - return 0, nil, err + return nil, err } c.ignoreSTUNPackets() @@ -273,7 +375,7 @@ func (c *Conn) determineEndpoints(ctx context.Context) (nearestDerp int, ipPorts if localAddr := c.pconn.LocalAddr(); localAddr.IP.IsUnspecified() { ips, loopback, err := interfaces.LocalAddresses() if err != nil { - return 0, nil, err + return nil, err } reason := "localAddresses" if len(ips) == 0 { @@ -303,7 +405,7 @@ func (c *Conn) determineEndpoints(ctx context.Context) (nearestDerp int, ipPorts // The STUN address(es) are always first so that legacy wireguard // can use eps[0] as its only known endpoint address (although that's // obviously non-ideal). - return nearestDerp, eps, nil + return eps, nil } func stringsEqual(x, y []string) bool { diff --git a/wgengine/magicsock/magicsock_test.go b/wgengine/magicsock/magicsock_test.go index cd596837d..3d3589666 100644 --- a/wgengine/magicsock/magicsock_test.go +++ b/wgengine/magicsock/magicsock_test.go @@ -80,3 +80,42 @@ func TestDerpIPConstant(t *testing.T) { t.Errorf("derpMagicIP is len %d; want 4", len(derpMagicIP)) } } + +func TestPickDERPFallback(t *testing.T) { + if len(derpNodeID) == 0 { + t.Fatal("no DERP nodes registered; this test needs an update after DERP node runtime discovery") + } + + c := new(Conn) + a := c.pickDERPFallback() + if a == 0 { + t.Fatalf("pickDERPFallback returned 0") + } + + // Test that it's consistent. + for i := 0; i < 50; i++ { + b := c.pickDERPFallback() + if a != b { + t.Fatalf("got inconsistent %d vs %d values", a, b) + } + } + + // Test that that the pointer value of c is blended in and + // distribution over nodes works. + got := map[int]int{} + for i := 0; i < 50; i++ { + c = new(Conn) + got[c.pickDERPFallback()]++ + } + t.Logf("distribution: %v", got) + if len(got) < 2 { + t.Errorf("expected more than 1 node; got %v", got) + } + + // Test that stickiness works. + const someNode = 123456 + c.myDerp = someNode + if got := c.pickDERPFallback(); got != someNode { + t.Errorf("not sticky: got %v; want %v", got, someNode) + } +} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 7940900ac..32cfb3d34 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -603,3 +603,7 @@ func (e *userspaceEngine) LinkChange(isExpensive bool) { e.logf("IpcSetOperation: %v\n", err) } } + +func (e *userspaceEngine) SetNetInfoCallback(cb NetInfoCallback) { + e.magicConn.SetNetInfoCallback(cb) +} diff --git a/wgengine/watchdog.go b/wgengine/watchdog.go index 5172f4b29..d6a89abe8 100644 --- a/wgengine/watchdog.go +++ b/wgengine/watchdog.go @@ -69,6 +69,9 @@ func (e *watchdogEngine) SetFilter(filt *filter.Filter) { func (e *watchdogEngine) SetStatusCallback(cb StatusCallback) { e.watchdog("SetStatusCallback", func() { e.wrap.SetStatusCallback(cb) }) } +func (e *watchdogEngine) SetNetInfoCallback(cb NetInfoCallback) { + e.watchdog("SetNetInfoCallback", func() { e.wrap.SetNetInfoCallback(cb) }) +} func (e *watchdogEngine) RequestStatus() { e.watchdog("RequestStatus", func() { e.wrap.RequestStatus() }) } diff --git a/wgengine/wgengine.go b/wgengine/wgengine.go index e9c7ae73c..ab6b5747b 100644 --- a/wgengine/wgengine.go +++ b/wgengine/wgengine.go @@ -40,6 +40,9 @@ type Status struct { // Exactly one of Status or error is non-nil. type StatusCallback func(*Status, error) +// NetInfoCallback is the type used by Engine.SetNetInfoCallback. +type NetInfoCallback func(*tailcfg.NetInfo) + // RouteSettings is the full WireGuard config data (set of peers keys, // IP, etc in wgcfg.Config) plus the things that WireGuard doesn't do // itself, like DNS stuff. @@ -123,4 +126,8 @@ type Engine interface { // where sending packets uses substantial power or money, // such as mobile data on a phone. LinkChange(isExpensive bool) + + // SetNetInfoCallback sets the function to call when a + // new NetInfo summary is available. + SetNetInfoCallback(NetInfoCallback) }