diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 62b8ad4aa..e8e189677 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -51,7 +51,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/logtail/backoff from tailscale.com/control/controlclient+ tailscale.com/metrics from tailscale.com/derp tailscale.com/net/dnscache from tailscale.com/control/controlclient+ - tailscale.com/net/flowtrack from tailscale.com/wgengine/filter + tailscale.com/net/flowtrack from tailscale.com/wgengine/filter+ 💣 tailscale.com/net/interfaces from tailscale.com/cmd/tailscale/cli+ tailscale.com/net/netcheck from tailscale.com/cmd/tailscale/cli+ tailscale.com/net/netns from tailscale.com/control/controlclient+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 9798ad6eb..811052f7d 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -86,7 +86,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/logtail/filch from tailscale.com/logpolicy tailscale.com/metrics from tailscale.com/derp tailscale.com/net/dnscache from tailscale.com/control/controlclient+ - tailscale.com/net/flowtrack from tailscale.com/wgengine/filter + tailscale.com/net/flowtrack from tailscale.com/wgengine/filter+ 💣 tailscale.com/net/interfaces from tailscale.com/ipn+ tailscale.com/net/netcheck from tailscale.com/wgengine/magicsock tailscale.com/net/netns from tailscale.com/control/controlclient+ diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index c31914a85..d80307e5f 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -710,14 +710,46 @@ func peerForIP(nm *controlclient.NetworkMap, ip netaddr.IP) (n *tailcfg.Node, ok } } } + + // TODO(bradfitz): this is O(n peers). Add ART to netaddr? + var best netaddr.IPPrefix for _, p := range nm.Peers { for _, cidr := range p.AllowedIPs { if cidr.Contains(ip) { - return p, true + if best.IsZero() || cidr.Bits > best.Bits { + n = p + best = cidr + } } } } - return nil, false + return n, n != nil +} + +// PeerForIP returns the node that ip should route to. +func (c *Conn) PeerForIP(ip netaddr.IP) (n *tailcfg.Node, ok bool) { + c.mu.Lock() + defer c.mu.Unlock() + if c.netMap == nil { + return + } + return peerForIP(c.netMap, ip) +} + +// LastRecvActivityOfDisco returns the time we last got traffic from +// this endpoint (updated every ~10 seconds). +func (c *Conn) LastRecvActivityOfDisco(dk tailcfg.DiscoKey) time.Time { + c.mu.Lock() + defer c.mu.Unlock() + de, ok := c.endpointOfDisco[dk] + if !ok { + return time.Time{} + } + unix := atomic.LoadInt64(&de.lastRecvUnixAtomic) + if unix == 0 { + return time.Time{} + } + return time.Unix(unix, 0) } // Ping handles a "tailscale ping" CLI query. diff --git a/wgengine/pendopen.go b/wgengine/pendopen.go new file mode 100644 index 000000000..dc1a21fec --- /dev/null +++ b/wgengine/pendopen.go @@ -0,0 +1,157 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package wgengine + +import ( + "os" + "strconv" + "time" + + "tailscale.com/net/flowtrack" + "tailscale.com/net/packet" + "tailscale.com/wgengine/filter" + "tailscale.com/wgengine/tstun" +) + +const tcpTimeoutBeforeDebug = 5 * time.Second + +// debugConnectFailures reports whether the local node should track +// outgoing TCP connections and log which ones fail and why. +func debugConnectFailures() bool { + s := os.Getenv("TS_DEBUG_CONNECT_FAILURES") + if s == "" { + return true + } + v, _ := strconv.ParseBool(s) + return v +} + +type pendingOpenFlow struct { + timer *time.Timer // until giving up on the flow +} + +func (e *userspaceEngine) trackOpenPreFilterIn(pp *packet.Parsed, t *tstun.TUN) (res filter.Response) { + res = filter.Accept // always + + if pp.IPVersion == 0 || + pp.IPProto != packet.TCP || + pp.TCPFlags&(packet.TCPSyn|packet.TCPRst) == 0 { + return + } + + flow := flowtrack.Tuple{Dst: pp.Src, Src: pp.Dst} // src/dst reversed + + e.mu.Lock() + defer e.mu.Unlock() + of, ok := e.pendOpen[flow] + if !ok { + // Not a tracked flow. + return + } + of.timer.Stop() + delete(e.pendOpen, flow) + + if pp.TCPFlags&packet.TCPRst != 0 { + // TODO(bradfitz): have peer send a IP proto 99 "why" + // packet first with details and log that instead + // (e.g. ACL prohibited, shields up, etc). + e.logf("open-conn-track: flow %v got RST by peer", flow) + return + } + + return +} + +func (e *userspaceEngine) trackOpenPostFilterOut(pp *packet.Parsed, t *tstun.TUN) (res filter.Response) { + res = filter.Accept // always + + if pp.IPVersion == 0 || + pp.IPProto != packet.TCP || + pp.TCPFlags&packet.TCPSyn == 0 { + return + } + + flow := flowtrack.Tuple{Src: pp.Src, Dst: pp.Dst} + timer := time.AfterFunc(tcpTimeoutBeforeDebug, func() { + e.onOpenTimeout(flow) + }) + + e.mu.Lock() + defer e.mu.Unlock() + if e.pendOpen == nil { + e.pendOpen = make(map[flowtrack.Tuple]*pendingOpenFlow) + } + if _, dup := e.pendOpen[flow]; dup { + // Duplicates are expected when the OS retransmits. Ignore. + return + } + e.pendOpen[flow] = &pendingOpenFlow{timer: timer} + + return filter.Accept +} + +func (e *userspaceEngine) onOpenTimeout(flow flowtrack.Tuple) { + e.mu.Lock() + if _, ok := e.pendOpen[flow]; !ok { + // Not a tracked flow, or already handled & deleted. + e.mu.Unlock() + return + } + delete(e.pendOpen, flow) + e.mu.Unlock() + + // Diagnose why it might've timed out. + n, ok := e.magicConn.PeerForIP(flow.Dst.IP) + if !ok { + e.logf("open-conn-track: timeout opening %v; no associated peer node", flow) + return + } + if n.DiscoKey.IsZero() { + e.logf("open-conn-track: timeout opening %v; peer node %v running pre-0.100", flow, n.Key.ShortString()) + return + } + if n.DERP == "" { + e.logf("open-conn-track: timeout opening %v; peer node %v not connected to any DERP relay", flow, n.Key.ShortString()) + return + } + var lastSeen time.Time + if n.LastSeen != nil { + lastSeen = *n.LastSeen + } + + var ps *PeerStatus + if st, err := e.getStatus(); err == nil { + for _, v := range st.Peers { + if v.NodeKey == n.Key { + v := v // copy + ps = &v + } + } + } else { + e.logf("open-conn-track: timeout opening %v to node %v; failed to get engine status: %v", flow, n.Key.ShortString(), err) + return + } + if ps == nil { + e.logf("open-conn-track: timeout opening %v; target node %v in netmap but unknown to wireguard", flow, n.Key.ShortString()) + return + } + + // TODO(bradfitz): figure out what PeerStatus.LastHandshake + // is. It appears to be the last time we sent a handshake, + // which isn't what I expected. I thought it was when a + // handshake completed, which is what I want. + _ = ps.LastHandshake + + e.logf("open-conn-track: timeout opening %v to node %v; lastSeen=%v, lastRecv=%v", + flow, n.Key.ShortString(), + agoOrNever(lastSeen), agoOrNever(e.magicConn.LastRecvActivityOfDisco(n.DiscoKey))) +} + +func agoOrNever(t time.Time) string { + if t.IsZero() { + return "never" + } + return time.Since(t).Round(time.Second).String() +} diff --git a/wgengine/userspace.go b/wgengine/userspace.go index 6b7d23526..32ef52d47 100644 --- a/wgengine/userspace.go +++ b/wgengine/userspace.go @@ -30,6 +30,7 @@ import ( "tailscale.com/control/controlclient" "tailscale.com/internal/deepprint" "tailscale.com/ipn/ipnstate" + "tailscale.com/net/flowtrack" "tailscale.com/net/interfaces" "tailscale.com/net/packet" "tailscale.com/net/tsaddr" @@ -118,6 +119,7 @@ type userspaceEngine struct { endpoints []string pingers map[wgkey.Key]*pinger // legacy pingers for pre-discovery peers linkState *interfaces.State + pendOpen map[flowtrack.Tuple]*pendingOpenFlow // see pendopen.go // Lock ordering: magicsock.Conn.mu, wgLock, then mu. } @@ -265,6 +267,17 @@ func newUserspaceEngineAdvanced(conf EngineConfig) (_ Engine, reterr error) { } e.tundev.PreFilterOut = e.handleLocalPackets + if debugConnectFailures() { + if e.tundev.PreFilterIn != nil { + return nil, errors.New("unexpected PreFilterIn already set") + } + e.tundev.PreFilterIn = e.trackOpenPreFilterIn + if e.tundev.PostFilterOut != nil { + return nil, errors.New("unexpected PostFilterOut already set") + } + e.tundev.PostFilterOut = e.trackOpenPostFilterOut + } + // wireguard-go logs as it starts and stops routines. // Silence those; there are a lot of them, and they're just noise. allowLogf := func(s string) bool {