ipn/localapi: flesh out the 'debug derp' checks

Updates #6526

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: Ic18d9ff288b9c7b8d5ab1bd77dd59693cd776cc4
pull/7527/head
Andrew Dunham 1 year ago
parent 920de86cee
commit f0d6228c52

@ -4,13 +4,17 @@
package localapi
import (
"crypto/tls"
"encoding/json"
"fmt"
"net"
"net/http"
"strconv"
"tailscale.com/derp/derphttp"
"tailscale.com/ipn/ipnstate"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
)
func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) {
@ -51,6 +55,9 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) {
return
}
st.Info = append(st.Info, fmt.Sprintf("Region %v == %q", reg.RegionID, reg.RegionCode))
if len(dm.Regions) == 1 {
st.Warnings = append(st.Warnings, "Having only a single DERP region (i.e. removing the default Tailscale-provided regions) is a single point of failure and could hamper connectivity")
}
if reg.Avoid {
st.Warnings = append(st.Warnings, "Region is marked with Avoid bit")
@ -60,10 +67,120 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) {
return
}
ctx := r.Context()
var (
dialer net.Dialer
client *http.Client = http.DefaultClient
)
checkConn := func(derpNode *tailcfg.DERPNode) bool {
port := firstNonzero(derpNode.DERPPort, 443)
var (
hasIPv4 bool
hasIPv6 bool
)
// Check IPv4 first
addr := net.JoinHostPort(firstNonzero(derpNode.IPv4, derpNode.HostName), strconv.Itoa(port))
conn, err := dialer.DialContext(ctx, "tcp4", addr)
if err != nil {
st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ %q over IPv4: %v", derpNode.HostName, addr, err))
} else {
defer conn.Close()
// Upgrade to TLS and verify that works properly.
tlsConn := tls.Client(conn, &tls.Config{
ServerName: firstNonzero(derpNode.CertName, derpNode.HostName),
})
if err := tlsConn.HandshakeContext(ctx); err != nil {
st.Errors = append(st.Errors, fmt.Sprintf("Error upgrading connection to node %q @ %q to TLS over IPv4: %v", derpNode.HostName, addr, err))
} else {
hasIPv4 = true
}
}
// Check IPv6
addr = net.JoinHostPort(firstNonzero(derpNode.IPv6, derpNode.HostName), strconv.Itoa(port))
conn, err = dialer.DialContext(ctx, "tcp6", addr)
if err != nil {
st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ %q over IPv6: %v", derpNode.HostName, addr, err))
} else {
defer conn.Close()
// Upgrade to TLS and verify that works properly.
tlsConn := tls.Client(conn, &tls.Config{
ServerName: firstNonzero(derpNode.CertName, derpNode.HostName),
// TODO(andrew-d): we should print more
// detailed failure information on if/why TLS
// verification fails
})
if err := tlsConn.HandshakeContext(ctx); err != nil {
st.Errors = append(st.Errors, fmt.Sprintf("Error upgrading connection to node %q @ %q to TLS over IPv6: %v", derpNode.HostName, addr, err))
} else {
hasIPv6 = true
}
}
// If we only have an IPv6 conn, then warn; we want both.
if hasIPv6 && !hasIPv4 {
st.Warnings = append(st.Warnings, fmt.Sprintf("Node %q only has IPv6 connectivity, not IPv4", derpNode.HostName))
} else if hasIPv6 && hasIPv4 {
st.Info = append(st.Info, fmt.Sprintf("Node %q has working IPv4 and IPv6 connectivity", derpNode.HostName))
}
return hasIPv4 || hasIPv6
}
// Start by checking whether we can establish a HTTP connection
for _, derpNode := range reg.Nodes {
connSuccess := checkConn(derpNode)
// Verify that the /generate_204 endpoint works
captivePortalURL := "http://" + derpNode.HostName + "/generate_204"
resp, err := client.Get(captivePortalURL)
if err != nil {
st.Warnings = append(st.Warnings, fmt.Sprintf("Error making request to the captive portal check %q; is port 80 blocked?", captivePortalURL))
} else {
resp.Body.Close()
}
if !connSuccess {
continue
}
fakePrivKey := key.NewNode()
// Next, repeatedly get the server key to see if the node is
// behind a load balancer (incorrectly).
serverPubKeys := make(map[key.NodePublic]bool)
for i := 0; i < 5; i++ {
func() {
rc := derphttp.NewRegionClient(fakePrivKey, h.logf, func() *tailcfg.DERPRegion {
return &tailcfg.DERPRegion{
RegionID: reg.RegionID,
RegionCode: reg.RegionCode,
RegionName: reg.RegionName,
Nodes: []*tailcfg.DERPNode{derpNode},
}
})
if err := rc.Connect(ctx); err != nil {
st.Errors = append(st.Errors, fmt.Sprintf("Error connecting to node %q @ try %d: %v", derpNode.HostName, i, err))
return
}
if len(serverPubKeys) == 0 {
st.Info = append(st.Info, fmt.Sprintf("Successfully established a DERP connection with node %q", derpNode.HostName))
}
serverPubKeys[rc.ServerPublicKey()] = true
}()
}
if len(serverPubKeys) > 1 {
st.Errors = append(st.Errors, fmt.Sprintf("Received multiple server public keys (%d); is the DERP server behind a load balancer?", len(serverPubKeys)))
}
}
// TODO(bradfitz): finish:
// * first try TCP connection
// * reconnect 4 or 5 times; see if we ever get a different server key.
// if so, they're load balancing the wrong way. error.
// * try to DERP auth with new public key.
// * if rejected, add Info that it's likely the DERP server authz is on,
// try with LocalBackend's node key instead.
@ -75,17 +192,17 @@ func (h *Handler) serveDebugDERPRegion(w http.ResponseWriter, r *http.Request) {
// in DERPRegion. Or maybe even list all their server pub keys that it's peered
// with.
// * try STUN queries
// * warn about IPv6 only
// * If their certificate is bad, either expired or just wrongly
// issued in the first place, tell them specifically that the
// cert is bad not just that the connection failed.
// * If /generate_204 on port 80 cannot be reached, warn
// that they won't get captive portal detection and
// should allow port 80.
// * If they have exactly one DERP region because they
// removed all of Tailscale's DERPs, warn that they have
// a SPOF that will hamper even direct connections from
// working. (warning, not error, as that's probably a likely
// config for headscale users)
st.Info = append(st.Info, "TODO: 🦉")
}
func firstNonzero[T comparable](items ...T) T {
var zero T
for _, item := range items {
if item != zero {
return item
}
}
return zero
}

Loading…
Cancel
Save