diff --git a/cmd/derper/derpprobe/derpprobe.go b/cmd/derper/derpprobe/derpprobe.go
new file mode 100644
index 000000000..47b30bcdf
--- /dev/null
+++ b/cmd/derper/derpprobe/derpprobe.go
@@ -0,0 +1,337 @@
+// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The derpprobe binary probes derpers.
+package main // import "tailscale.com/cmd/derper/derpprobe"
+
+import (
+ "bytes"
+ "context"
+ crand "crypto/rand"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "html"
+ "io"
+ "log"
+ "net/http"
+ "sort"
+ "sync"
+ "time"
+
+ "tailscale.com/derp"
+ "tailscale.com/derp/derphttp"
+ "tailscale.com/tailcfg"
+ "tailscale.com/types/key"
+)
+
+var (
+ derpMapURL = flag.String("derp-map", "https://login.tailscale.com/derpmap/default", "URL to DERP map (https:// or file://)")
+ listen = flag.String("listen", ":8030", "HTTP listen address")
+)
+
+var (
+ mu sync.Mutex
+ state = map[nodePair]pairStatus{}
+ lastDERPMap *tailcfg.DERPMap
+ lastDERPMapAt time.Time
+)
+
+func main() {
+ flag.Parse()
+ go probeLoop()
+ log.Fatal(http.ListenAndServe(*listen, http.HandlerFunc(serve)))
+}
+
+type overallStatus struct {
+ good, bad []string
+}
+
+func (st *overallStatus) addBadf(format string, a ...interface{}) {
+ st.bad = append(st.bad, fmt.Sprintf(format, a...))
+}
+
+func (st *overallStatus) addGoodf(format string, a ...interface{}) {
+ st.good = append(st.good, fmt.Sprintf(format, a...))
+}
+
+func getOverallStatus() (o overallStatus) {
+ mu.Lock()
+ defer mu.Unlock()
+ if lastDERPMap == nil {
+ o.addBadf("no DERP map")
+ return
+ }
+ now := time.Now()
+ if age := now.Sub(lastDERPMapAt); age > time.Minute {
+ o.addBadf("DERPMap hasn't been successfully refreshed in %v", age.Round(time.Second))
+ }
+ for _, reg := range sortedRegions(lastDERPMap) {
+ for _, from := range reg.Nodes {
+ for _, to := range reg.Nodes {
+ pair := nodePair{from.Name, to.Name}
+ st, ok := state[pair]
+ age := now.Sub(st.at).Round(time.Second)
+ switch {
+ case !ok:
+ o.addBadf("no state for %v", pair)
+ case st.err != nil:
+ o.addBadf("%v: %v", pair, st.err)
+ case age > 90*time.Second:
+ o.addBadf("%v: update is %v old", pair, age)
+ default:
+ o.addGoodf("%v: %v, %v ago", pair, st.latency.Round(time.Millisecond), age)
+ }
+ }
+ }
+ }
+ return
+}
+
+func serve(w http.ResponseWriter, r *http.Request) {
+ st := getOverallStatus()
+ summary := "All good"
+ if len(st.bad) > 0 {
+ w.WriteHeader(500)
+ summary = fmt.Sprintf("%d problems", len(st.bad))
+ }
+ io.WriteString(w, "
\n")
+ fmt.Fprintf(w, "derp probe
\n%s:", summary)
+ for _, s := range st.bad {
+ fmt.Fprintf(w, "- %s
\n", html.EscapeString(s))
+ }
+ for _, s := range st.good {
+ fmt.Fprintf(w, "- %s
\n", html.EscapeString(s))
+ }
+ io.WriteString(w, "
\n")
+}
+
+func sortedRegions(dm *tailcfg.DERPMap) []*tailcfg.DERPRegion {
+ ret := make([]*tailcfg.DERPRegion, 0, len(dm.Regions))
+ for _, r := range dm.Regions {
+ ret = append(ret, r)
+ }
+ sort.Slice(ret, func(i, j int) bool { return ret[i].RegionID < ret[j].RegionID })
+ return ret
+}
+
+type nodePair struct {
+ from, to string // DERPNode.Name
+}
+
+func (p nodePair) String() string { return fmt.Sprintf("(%s→%s)", p.from, p.to) }
+
+type pairStatus struct {
+ err error
+ latency time.Duration
+ at time.Time
+}
+
+func setDERPMap(dm *tailcfg.DERPMap) {
+ mu.Lock()
+ defer mu.Unlock()
+ lastDERPMap = dm
+ lastDERPMapAt = time.Now()
+}
+
+func setState(p nodePair, latency time.Duration, err error) {
+ mu.Lock()
+ defer mu.Unlock()
+ st := pairStatus{
+ err: err,
+ latency: latency,
+ at: time.Now(),
+ }
+ state[p] = st
+ if err != nil {
+ log.Printf("%+v error: %v", p, err)
+ } else {
+ log.Printf("%+v: %v", p, latency.Round(time.Millisecond))
+ }
+}
+
+func probeLoop() {
+ ticker := time.NewTicker(15 * time.Second)
+ for {
+ err := probe()
+ if err != nil {
+ log.Printf("probe: %v", err)
+ }
+ <-ticker.C
+ }
+}
+
+func probe() error {
+ ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
+ defer cancel()
+ dm, err := getDERPMap(ctx)
+ if err != nil {
+ return err
+ }
+
+ var wg sync.WaitGroup
+ wg.Add(len(dm.Regions))
+ for _, reg := range dm.Regions {
+ reg := reg
+ go func() {
+ defer wg.Done()
+ for _, from := range reg.Nodes {
+ for _, to := range reg.Nodes {
+ latency, err := probeNodePair(ctx, dm, from, to)
+ setState(nodePair{from.Name, to.Name}, latency, err)
+ }
+ }
+ }()
+ }
+
+ wg.Wait()
+ return ctx.Err()
+}
+
+func probeNodePair(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode) (latency time.Duration, err error) {
+ fromc, err := newConn(ctx, dm, from)
+ if err != nil {
+ return 0, err
+ }
+ defer fromc.Close()
+ toc, err := newConn(ctx, dm, to)
+ if err != nil {
+ return 0, err
+ }
+ defer toc.Close()
+
+ // Make a random packet
+ pkt := make([]byte, 8)
+ crand.Read(pkt)
+
+ t0 := time.Now()
+
+ // Send the random packet.
+ sendc := make(chan error, 1)
+ go func() {
+ sendc <- fromc.Send(toc.SelfPublicKey(), pkt)
+ }()
+ select {
+ case <-ctx.Done():
+ return 0, fmt.Errorf("timeout sending via %q: %w", from.Name, ctx.Err())
+ case err := <-sendc:
+ if err != nil {
+ return 0, fmt.Errorf("error sending via %q: %w", from.Name, err)
+ }
+ }
+
+ // Receive the random packet.
+ recvc := make(chan interface{}, 1) // either derp.ReceivedPacket or error
+ go func() {
+ for {
+ m, err := toc.Recv()
+ if err != nil {
+ recvc <- err
+ return
+ }
+ switch v := m.(type) {
+ case derp.ReceivedPacket:
+ recvc <- v
+ default:
+ log.Printf("%v: ignoring Recv frame type %T", to.Name, v)
+ // Loop.
+ }
+ }
+ }()
+ select {
+ case <-ctx.Done():
+ return 0, fmt.Errorf("timeout receiving from %q: %w", to.Name, ctx.Err())
+ case v := <-recvc:
+ if err, ok := v.(error); ok {
+ return 0, fmt.Errorf("error receiving from %q: %w", to.Name, err)
+ }
+ p := v.(derp.ReceivedPacket)
+ if p.Source != fromc.SelfPublicKey() {
+ return 0, fmt.Errorf("got data packet from unexpected source, %v", p.Source)
+ }
+ if !bytes.Equal(p.Data, pkt) {
+ return 0, fmt.Errorf("unexpected data packet %q", p.Data)
+ }
+ }
+ return time.Since(t0), nil
+}
+
+func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*derphttp.Client, error) {
+ priv := key.NewPrivate()
+ dc := derphttp.NewRegionClient(priv, log.Printf, func() *tailcfg.DERPRegion {
+ rid := n.RegionID
+ return &tailcfg.DERPRegion{
+ RegionID: rid,
+ RegionCode: fmt.Sprintf("%s-%s", dm.Regions[rid].RegionCode, n.Name),
+ RegionName: dm.Regions[rid].RegionName,
+ Nodes: []*tailcfg.DERPNode{n},
+ }
+ })
+ dc.IsProber = true
+ err := dc.Connect(ctx)
+ if err != nil {
+ return nil, err
+ }
+ errc := make(chan error, 1)
+ go func() {
+ m, err := dc.Recv()
+ if err != nil {
+ errc <- err
+ return
+ }
+ switch m.(type) {
+ case derp.ServerInfoMessage:
+ errc <- nil
+ default:
+ errc <- fmt.Errorf("unexpected first message type %T", errc)
+ }
+ }()
+ select {
+ case err := <-errc:
+ if err != nil {
+ go dc.Close()
+ return nil, err
+ }
+ case <-ctx.Done():
+ go dc.Close()
+ return nil, fmt.Errorf("timeout waiting for ServerInfoMessage: %w", ctx.Err())
+ }
+ return dc, nil
+}
+
+var httpOrFileClient = &http.Client{Transport: httpOrFileTransport()}
+
+func httpOrFileTransport() http.RoundTripper {
+ tr := http.DefaultTransport.(*http.Transport).Clone()
+ tr.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
+ return tr
+}
+
+func getDERPMap(ctx context.Context) (*tailcfg.DERPMap, error) {
+ req, err := http.NewRequestWithContext(ctx, "GET", *derpMapURL, nil)
+ if err != nil {
+ return nil, err
+ }
+ res, err := httpOrFileClient.Do(req)
+ if err != nil {
+ mu.Lock()
+ defer mu.Unlock()
+ if lastDERPMap != nil && time.Since(lastDERPMapAt) < 10*time.Minute {
+ // Assume that control is restarting and use
+ // the same one for a bit.
+ return lastDERPMap, nil
+ }
+ return nil, err
+ }
+ defer res.Body.Close()
+ if res.StatusCode != 200 {
+ return nil, fmt.Errorf("fetching %s: %s", *derpMapURL, res.Status)
+ }
+ dm := new(tailcfg.DERPMap)
+ if err := json.NewDecoder(res.Body).Decode(dm); err != nil {
+ return nil, fmt.Errorf("decoding %s JSON: %v", *derpMapURL, err)
+ }
+ setDERPMap(dm)
+ return dm, nil
+}
diff --git a/derp/derp_client.go b/derp/derp_client.go
index 58205a361..ba258cf2b 100644
--- a/derp/derp_client.go
+++ b/derp/derp_client.go
@@ -29,6 +29,7 @@ type Client struct {
br *bufio.Reader
meshKey string
canAckPings bool
+ isProber bool
wmu sync.Mutex // hold while writing to bw
bw *bufio.Writer
@@ -52,6 +53,7 @@ type clientOpt struct {
MeshKey string
ServerPub key.Public
CanAckPings bool
+ IsProber bool
}
// MeshKey returns a ClientOpt to pass to the DERP server during connect to get
@@ -60,6 +62,10 @@ type clientOpt struct {
// An empty key means to not use a mesh key.
func MeshKey(key string) ClientOpt { return clientOptFunc(func(o *clientOpt) { o.MeshKey = key }) }
+// IsProber returns a ClientOpt to pass to the DERP server during connect to
+// declare that this client is a a prober.
+func IsProber(v bool) ClientOpt { return clientOptFunc(func(o *clientOpt) { o.IsProber = v }) }
+
// ServerPublicKey returns a ClientOpt to declare that the server's DERP public key is known.
// If key is the zero value, the returned ClientOpt is a no-op.
func ServerPublicKey(key key.Public) ClientOpt {
@@ -93,6 +99,7 @@ func newClient(privateKey key.Private, nc Conn, brw *bufio.ReadWriter, logf logg
bw: brw.Writer,
meshKey: opt.MeshKey,
canAckPings: opt.CanAckPings,
+ isProber: opt.IsProber,
}
if opt.ServerPub.IsZero() {
if err := c.recvServerKey(); err != nil {
@@ -160,6 +167,9 @@ type clientInfo struct {
// CanAckPings is whether the client declares it's able to ack
// pings.
CanAckPings bool
+
+ // IsProber is whether this client is a prober.
+ IsProber bool `json:",omitempty"`
}
func (c *Client) sendClientKey() error {
@@ -171,6 +181,7 @@ func (c *Client) sendClientKey() error {
Version: ProtocolVersion,
MeshKey: c.meshKey,
CanAckPings: c.canAckPings,
+ IsProber: c.isProber,
})
if err != nil {
return err
diff --git a/derp/derphttp/derphttp_client.go b/derp/derphttp/derphttp_client.go
index e7b34f948..17f1ca603 100644
--- a/derp/derphttp/derphttp_client.go
+++ b/derp/derphttp/derphttp_client.go
@@ -50,6 +50,7 @@ type Client struct {
TLSConfig *tls.Config // optional; nil means default
DNSCache *dnscache.Resolver // optional; nil means no caching
MeshKey string // optional; for trusted clients
+ IsProber bool // optional; for probers to optional declare themselves as such
privateKey key.Private
logf logger.Logf
@@ -130,6 +131,11 @@ func (c *Client) ServerPublicKey() key.Public {
return c.serverPubKey
}
+// SelfPublicKey returns our own public key.
+func (c *Client) SelfPublicKey() key.Public {
+ return c.privateKey.Public()
+}
+
func urlPort(u *url.URL) string {
if p := u.Port(); p != "" {
return p
@@ -338,6 +344,7 @@ func (c *Client) connect(ctx context.Context, caller string) (client *derp.Clien
derp.MeshKey(c.MeshKey),
derp.ServerPublicKey(serverPub),
derp.CanAckPings(c.canAckPings),
+ derp.IsProber(c.IsProber),
)
if err != nil {
return nil, 0, err