From 77d3ef36f4a5c934ca751990655cf4e0fcb224df Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Wed, 1 Jul 2020 12:56:17 -0700 Subject: [PATCH] wgengine/magicsock: hook up discovery messages, upgrade to LAN works Ping messages now go out somewhat regularly, pong replies are sent, and pong replies are now partially handled enough to upgrade off DERP to LAN. CallMeMaybe packets are sent & received over DERP, but aren't yet handled. That's next (and regular maintenance timers), and then WAN should work. Updates #483 --- disco/disco.go | 6 ++ go.mod | 2 +- go.sum | 2 + wgengine/magicsock/magicsock.go | 154 ++++++++++++++++++++++++++------ 4 files changed, 135 insertions(+), 29 deletions(-) diff --git a/disco/disco.go b/disco/disco.go index e8bed16fc..2ede08ca7 100644 --- a/disco/disco.go +++ b/disco/disco.go @@ -28,6 +28,12 @@ import ( "inet.af/netaddr" ) +// Magic is the 6 byte header of all discovery messages. +const Magic = "TS💬" // 6 bytes: 0x54 53 f0 9f 92 ac + +// NonceLen is the length of the nonces used by nacl secretboxes. +const NonceLen = 24 + type MessageType byte const ( diff --git a/go.mod b/go.mod index 2227aa42c..0254776af 100644 --- a/go.mod +++ b/go.mod @@ -29,6 +29,6 @@ require ( golang.org/x/sys v0.0.0-20200501052902-10377860bb8e golang.org/x/time v0.0.0-20191024005414-555d28b269f0 honnef.co/go/tools v0.0.1-2020.1.4 // indirect - inet.af/netaddr v0.0.0-20200701171350-6509743f79d9 + inet.af/netaddr v0.0.0-20200701194149-10bc159763c4 rsc.io/goversion v1.2.0 ) diff --git a/go.sum b/go.sum index d01a0c902..f19bc30af 100644 --- a/go.sum +++ b/go.sum @@ -164,5 +164,7 @@ inet.af/netaddr v0.0.0-20200629220211-f44a6d25c536 h1:XFVw2MVOtmHBidx70M+I6vIw2F inet.af/netaddr v0.0.0-20200629220211-f44a6d25c536/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww= inet.af/netaddr v0.0.0-20200701171350-6509743f79d9 h1:F41nQsn8UGDPDXsOPwZQiaK8BmItPzyb0PYgjJSYIzw= inet.af/netaddr v0.0.0-20200701171350-6509743f79d9/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww= +inet.af/netaddr v0.0.0-20200701194149-10bc159763c4 h1:dSXrLpRy86h4wfZWvzjyA2tuhMzX/lx0st4hzAh1VMA= +inet.af/netaddr v0.0.0-20200701194149-10bc159763c4/go.mod h1:qqYzz/2whtrbWJvt+DNWQyvekNN4ePQZcg2xc2/Yjww= rsc.io/goversion v1.2.0 h1:SPn+NLTiAG7w30IRK/DKp1BjvpWabYgxlLp/+kx5J8w= rsc.io/goversion v1.2.0/go.mod h1:Eih9y/uIBS3ulggl7KNJ09xGSLcuNaLgmvvqa07sgfo= diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index c24610076..d1b65a35e 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -9,6 +9,7 @@ package magicsock import ( "bufio" "context" + crand "crypto/rand" "encoding/binary" "errors" "fmt" @@ -93,6 +94,7 @@ type Conn struct { peerSet map[key.Public]struct{} discoPrivate key.Private + discoPublic tailcfg.DiscoKey // public of discoPrivate nodeOfDisco map[tailcfg.DiscoKey]*tailcfg.Node discoOfNode map[tailcfg.NodeKey]tailcfg.DiscoKey @@ -511,7 +513,8 @@ func (c *Conn) SetDiscoPrivateKey(k key.Private) { panic("unsupported") } c.discoPrivate = k - c.logf("magicsock: disco key set; public: %x", k.Public()) + c.discoPublic = tailcfg.DiscoKey(k.Public()) + c.logf("magicsock: disco key set; public: %x", c.discoPublic) } // c.mu must NOT be held. @@ -658,7 +661,9 @@ func shouldSprayPacket(b []byte) bool { return false } -var logPacketDests, _ = strconv.ParseBool(os.Getenv("DEBUG_LOG_PACKET_DESTS")) +var logPacketDests, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_LOG_PACKET_DESTS")) + +var logDisco, _ = strconv.ParseBool(os.Getenv("TS_DEBUG_DISCO")) const sprayPeriod = 3 * time.Second @@ -1367,6 +1372,25 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) { } } +func (c *Conn) sendDiscoMessage(dst netaddr.IPPort, dstKey key.Public, dstDisco tailcfg.DiscoKey, m disco.Message) error { + c.mu.Lock() + var nonce [disco.NonceLen]byte + if _, err := crand.Read(nonce[:]); err != nil { + panic(err) // worth dying for + } + pkt := make([]byte, 0, 512) // TODO: size it correctly? pool? if it matters. + pkt = append(pkt, disco.Magic...) + pkt = append(pkt, c.discoPublic[:]...) + pkt = append(pkt, nonce[:]...) + sharedKey := c.sharedDiscoKeyLocked(dstDisco) + c.mu.Unlock() + + pkt = box.SealAfterPrecomputation(pkt, m.AppendMarshal(nil), &nonce, sharedKey) + err := c.sendAddr(dst, dstKey, pkt) + c.logf("magicsock: disco: sent %T to %v; err=%v", m, dst, err) + return err +} + // handleDiscoMessage reports whether msg was a Tailscale inter-node discovery message // that was handled. // @@ -1380,19 +1404,23 @@ func (c *Conn) ReceiveIPv6(b []byte) (int, conn.Endpoint, *net.UDPAddr, error) { // For messages received over DERP, the addr will be derpMagicIP (with // port being the region) func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { - const magic = "TS💬" - const nonceLen = 24 - const headerLen = len(magic) + len(tailcfg.DiscoKey{}) + nonceLen - if len(msg) < headerLen || string(msg[:len(magic)]) != magic { + const headerLen = len(disco.Magic) + len(tailcfg.DiscoKey{}) + disco.NonceLen + if len(msg) < headerLen || string(msg[:len(disco.Magic)]) != disco.Magic { return false } var sender tailcfg.DiscoKey - copy(sender[:], msg[len(magic):]) + copy(sender[:], msg[len(disco.Magic):]) c.mu.Lock() defer c.mu.Unlock() + if logDisco { + c.logf("magicsock: disco: got disco-looking frame %v", sender) + } if c.discoPrivate.IsZero() { + if logDisco { + c.logf("magicsock: disco: ignoring disco-looking frame, no local key") + } return false } @@ -1400,14 +1428,17 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { if !ok { // Returning false keeps passing it down, to WireGuard. // WireGuard will almost surely reject it, but give it a chance. + if logDisco { + c.logf("magicsock: disco: ignoring disco-looking frame, don't know about %v", sender) + } return false } // First, do we even know (and thus care) about this sender? If not, // don't bother decrypting it. - var nonce [nonceLen]byte - copy(nonce[:], msg[len(magic)+len(key.Public{}):]) + var nonce [disco.NonceLen]byte + copy(nonce[:], msg[len(disco.Magic)+len(key.Public{}):]) sealedBox := msg[headerLen:] payload, ok := box.OpenAfterPrecomputation(nil, sealedBox, &nonce, c.sharedDiscoKeyLocked(sender)) if !ok { @@ -1416,14 +1447,20 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { // and old packets might've still been in flight (or // scheduled). This is particularly the case for LANs // or non-NATed endpoints. - // Not worth logging. Pass on to wireguard, in case + // Don't log in normal case. Pass on to wireguard, in case // it's actually a a wireguard packet (super unlikely, // but). + if logDisco { + c.logf("magicsock: disco: failed to open naclbox from %v (wrong rcpt?)", sender) + } // TODO(bradfitz): add some counter for this that logs rarely return false } dm, err := disco.Parse(payload) + if logDisco { + c.logf("magicsock: disco: disco.Parse = %T, %v", dm, err) + } if err != nil { // Couldn't parse it, but it was inside a correctly // signed box, so just ignore it, assuming it's from a @@ -1452,17 +1489,20 @@ func (c *Conn) handleDiscoMessage(msg []byte, src netaddr.IPPort) bool { } func (c *Conn) handlePongLocked(m *disco.Pong, n *tailcfg.Node, dk tailcfg.DiscoKey, from netaddr.IPPort) { + de, ok := c.endpointOfDisco[dk] + if !ok { + return + } c.logf("magicsock: disco: got pong from %s, tx=%x, disco=%x, src=%v (they saw %v)", n.Key.ShortString(), m.TxID, dk[:8], from, m.Src) - // TODO: implement + go de.handlePong(m) } func (c *Conn) handlePingLocked(m *disco.Ping, n *tailcfg.Node, dk tailcfg.DiscoKey, from netaddr.IPPort) { c.logf("magicsock: disco: got ping tx %x from %s/%x at %v", m.TxID, n.Key.ShortString(), dk[:8], from) - reply := &disco.Pong{ + go c.sendDiscoMessage(from, key.Public(n.Key), dk, &disco.Pong{ TxID: m.TxID, Src: from, - } - go c.sendAddr(from, key.Public(n.Key), reply.AppendMarshal(nil)) + }) } // handleCallMeMaybeLocked is called when a discovery message arrives @@ -1471,7 +1511,7 @@ func (c *Conn) handlePingLocked(m *disco.Ping, n *tailcfg.Node, dk tailcfg.Disco // stateful firewall should be open. Now we can Ping back and make it // through. func (c *Conn) handleCallMeMaybeLocked(n *tailcfg.Node, dk tailcfg.DiscoKey) { - c.logf("magicsock: disco: got call-me-maybe packet from %s (disco=%x)", n.Key.ShortString, dk[:8]) + c.logf("magicsock: disco: got call-me-maybe packet from %s (disco=%x)", n.Key.ShortString(), dk[:8]) // TODO: implement } @@ -2491,22 +2531,24 @@ type discoEndpoint struct { lastSend time.Time derpAddr netaddr.IPPort // fallback/bootstrap path, if non-zero (non-zero for well-behaved clients) - bestAddr netaddr.IPPort // best non-DERP path; zero if none - sentPing map[stun.TxID]sentPing - endpointState map[netaddr.IPPort]*endpointState + bestAddr netaddr.IPPort // best non-DERP path; zero if none + bestAddrLatency time.Duration + bestAddrAt time.Time // time best address re-confirmed + sentPing map[stun.TxID]sentPing + endpointState map[netaddr.IPPort]*endpointState timers map[*time.Timer]bool } type endpointState struct { - // TODO: lastPing time.Time + lastPing time.Time // TODO: lastPong time.Time index int // index in nodecfg.Node.Endpoints } type sentPing struct { - // TODO: to netaddr.IPPort - // TODO: at time.Time + to netaddr.IPPort + at time.Time } // initFakeUDPAddr populates fakeWGAddr with a globally unique fake UDPAddr. @@ -2554,15 +2596,12 @@ func (de *discoEndpoint) UpdateDst(addr *net.UDPAddr) error { func (de *discoEndpoint) send(b []byte) error { now := time.Now() - // TODO: all the disco messaging & state tracking & spraying, - // bringing over relevant AddrSet code. For now, just do DERP - // as a crutch while I work on other bits. de.mu.Lock() de.lastSend = now derpAddr := de.derpAddr bestAddr := de.bestAddr - if bestAddr.Port == 0 { - de.sendPingsLocked() + if bestAddr.IsZero() || de.bestAddrAt.Before(now.Add(-5*time.Second)) { + de.sendPingsLocked(now) } de.mu.Unlock() @@ -2575,8 +2614,36 @@ func (de *discoEndpoint) send(b []byte) error { return de.c.sendAddr(bestAddr, de.publicKey, b) } -func (de *discoEndpoint) sendPingsLocked() { - // TODO +func (de *discoEndpoint) sendPingsLocked(now time.Time) { + sent := false + for ep, st := range de.endpointState { + if !st.lastPing.IsZero() && now.Sub(st.lastPing) < 5*time.Second { + continue + } + st.lastPing = now + + txid := stun.NewTxID() + de.sentPing[txid] = sentPing{ + to: ep, + at: now, + } + sent = true + go de.sendDiscoMessage(ep, &disco.Ping{TxID: [12]byte(txid)}) + } + derpAddr := de.derpAddr + if sent && derpAddr.Port != 0 { + // In just a bit of a time (for goroutines above to schedule and run), + // send a message to peer via DERP informing them that we've sent + // so our firewall ports are probably open and now would be a good time + // for them to connect. + time.AfterFunc(5*time.Millisecond, func() { + de.sendDiscoMessage(derpAddr, disco.CallMeMaybe{}) + }) + } +} + +func (de *discoEndpoint) sendDiscoMessage(dst netaddr.IPPort, dm disco.Message) error { + return de.c.sendDiscoMessage(dst, de.publicKey, de.discoKey, dm) } func (de *discoEndpoint) updateFromNode(n *tailcfg.Node) { @@ -2629,6 +2696,37 @@ func (de *discoEndpoint) noteConnectivityChange() { // TODO: reset state } +func (de *discoEndpoint) handlePong(m *disco.Pong) { + de.mu.Lock() + defer de.mu.Unlock() + + sp, ok := de.sentPing[m.TxID] + if !ok { + // This is not a pong for a ping we sent. Ignore. + return + } + delete(de.sentPing, m.TxID) + + now := time.Now() + delay := now.Sub(sp.at) + de.c.logf("magicsock: disco: got pong reply after %v", delay) + + // Expire our best address if we haven't heard from it in awhile. + tooOld := now.Add(-15 * time.Second) + if !de.bestAddr.IsZero() && de.bestAddrAt.Before(tooOld) { + de.bestAddr = netaddr.IPPort{} + } + + // Promote this pong response to our current best address if it's lower latency. + // TODO(bradfitz): decide how latency vs. preference order affects decision + if de.bestAddr.IsZero() || delay < de.bestAddrLatency { + de.bestAddr = sp.to + de.bestAddrLatency = delay + de.bestAddrAt = now + de.c.logf("magicsock: disco: promoted %v to best address for %v", sp.to, de.publicKey.ShortString()) + } +} + // cleanup is called when a discovery endpoint is no longer present in the NetworkMap. // This is where we can do cleanup such as closing goroutines or canceling timers. func (de *discoEndpoint) cleanup() {