From f0d6228c524400090b2d19316f4fa49adb80a492 Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Wed, 8 Mar 2023 12:29:25 -0500 Subject: [PATCH] ipn/localapi: flesh out the 'debug derp' checks Updates #6526 Signed-off-by: Andrew Dunham Change-Id: Ic18d9ff288b9c7b8d5ab1bd77dd59693cd776cc4 --- ipn/localapi/debugderp.go | 143 ++++++++++++++++++++++++++++++++++---- 1 file changed, 130 insertions(+), 13 deletions(-) diff --git a/ipn/localapi/debugderp.go b/ipn/localapi/debugderp.go index fac8ebeb7..2db3e4f2f 100644 --- a/ipn/localapi/debugderp.go +++ b/ipn/localapi/debugderp.go @@ -4,13 +4,17 @@ package localapi import ( + "crypto/tls" "encoding/json" "fmt" + "net" "net/http" "strconv" + "tailscale.com/derp/derphttp" "tailscale.com/ipn/ipnstate" "tailscale.com/tailcfg" + "tailscale.com/types/key" ) func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { @@ -51,6 +55,9 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { return } st.Info = append(st.Info, fmt.Sprintf("Region %v == %q", reg.RegionID, reg.RegionCode)) + if len(dm.Regions) == 1 { + st.Warnings = append(st.Warnings, "Having only a single DERP region (i.e. removing the default Tailscale-provided regions) is a single point of failure and could hamper connectivity") + } if reg.Avoid { st.Warnings = append(st.Warnings, "Region is marked with Avoid bit") @@ -60,10 +67,120 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { return } + ctx := r.Context() + + var ( + dialer net.Dialer + client *http.Client = http.DefaultClient + ) + checkConn := func(derpNode *tailcfg.DERPNode) bool { + port := firstNonzero(derpNode.DERPPort, 443) + + var ( + hasIPv4 bool + hasIPv6 bool + ) + + // Check IPv4 first + addr := net.JoinHostPort(firstNonzero(derpNode.IPv4, derpNode.HostName), strconv.Itoa(port)) + conn, err := dialer.DialContext(ctx, "tcp4", addr) + if err != nil { + st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ %q over IPv4: %v", derpNode.HostName, addr, err)) + } else { + defer conn.Close() + + // Upgrade to TLS and verify that works properly. + tlsConn := tls.Client(conn, &tls.Config{ + ServerName: firstNonzero(derpNode.CertName, derpNode.HostName), + }) + if err := tlsConn.HandshakeContext(ctx); err != nil { + st.Errors = append(st.Errors, fmt.Sprintf("Error upgrading connection to node %q @ %q to TLS over IPv4: %v", derpNode.HostName, addr, err)) + } else { + hasIPv4 = true + } + } + + // Check IPv6 + addr = net.JoinHostPort(firstNonzero(derpNode.IPv6, derpNode.HostName), strconv.Itoa(port)) + conn, err = dialer.DialContext(ctx, "tcp6", addr) + if err != nil { + st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ %q over IPv6: %v", derpNode.HostName, addr, err)) + } else { + defer conn.Close() + + // Upgrade to TLS and verify that works properly. + tlsConn := tls.Client(conn, &tls.Config{ + ServerName: firstNonzero(derpNode.CertName, derpNode.HostName), + // TODO(andrew-d): we should print more + // detailed failure information on if/why TLS + // verification fails + }) + if err := tlsConn.HandshakeContext(ctx); err != nil { + st.Errors = append(st.Errors, fmt.Sprintf("Error upgrading connection to node %q @ %q to TLS over IPv6: %v", derpNode.HostName, addr, err)) + } else { + hasIPv6 = true + } + } + + // If we only have an IPv6 conn, then warn; we want both. + if hasIPv6 && !hasIPv4 { + st.Warnings = append(st.Warnings, fmt.Sprintf("Node %q only has IPv6 connectivity, not IPv4", derpNode.HostName)) + } else if hasIPv6 && hasIPv4 { + st.Info = append(st.Info, fmt.Sprintf("Node %q has working IPv4 and IPv6 connectivity", derpNode.HostName)) + } + + return hasIPv4 || hasIPv6 + } + + // Start by checking whether we can establish a HTTP connection + for _, derpNode := range reg.Nodes { + connSuccess := checkConn(derpNode) + + // Verify that the /generate_204 endpoint works + captivePortalURL := "http://" + derpNode.HostName + "/generate_204" + resp, err := client.Get(captivePortalURL) + if err != nil { + st.Warnings = append(st.Warnings, fmt.Sprintf("Error making request to the captive portal check %q; is port 80 blocked?", captivePortalURL)) + } else { + resp.Body.Close() + } + + if !connSuccess { + continue + } + + fakePrivKey := key.NewNode() + + // Next, repeatedly get the server key to see if the node is + // behind a load balancer (incorrectly). + serverPubKeys := make(map[key.NodePublic]bool) + for i := 0; i < 5; i++ { + func() { + rc := derphttp.NewRegionClient(fakePrivKey, h.logf, func() *tailcfg.DERPRegion { + return &tailcfg.DERPRegion{ + RegionID: reg.RegionID, + RegionCode: reg.RegionCode, + RegionName: reg.RegionName, + Nodes: []*tailcfg.DERPNode{derpNode}, + } + }) + if err := rc.Connect(ctx); err != nil { + st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ try %d: %v", derpNode.HostName, i, err)) + return + } + + if len(serverPubKeys) == 0 { + st.Info = append(st.Info, fmt.Sprintf("Successfully established a DERP connection with node %q", derpNode.HostName)) + } + serverPubKeys[rc.ServerPublicKey()] = true + }() + } + if len(serverPubKeys) > 1 { + st.Errors = append(st.Errors, fmt.Sprintf("Received multiple server public keys (%d); is the DERP server behind a load balancer?", len(serverPubKeys))) + } + } + // TODO(bradfitz): finish: - // * first try TCP connection - // * reconnect 4 or 5 times; see if we ever get a different server key. - // if so, they're load balancing the wrong way. error. // * try to DERP auth with new public key. // * if rejected, add Info that it's likely the DERP server authz is on, // try with LocalBackend's node key instead. @@ -75,17 +192,17 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) { // in DERPRegion. Or maybe even list all their server pub keys that it's peered // with. // * try STUN queries - // * warn about IPv6 only // * If their certificate is bad, either expired or just wrongly // issued in the first place, tell them specifically that the // cert is bad not just that the connection failed. - // * If /generate_204 on port 80 cannot be reached, warn - // that they won't get captive portal detection and - // should allow port 80. - // * If they have exactly one DERP region because they - // removed all of Tailscale's DERPs, warn that they have - // a SPOF that will hamper even direct connections from - // working. (warning, not error, as that's probably a likely - // config for headscale users) - st.Info = append(st.Info, "TODO: 🦉") +} + +func firstNonzero[T comparable](items ...T) T { + var zero T + for _, item := range items { + if item != zero { + return item + } + } + return zero }