mirror of https://github.com/tailscale/tailscale/
cmd/derper/derpprobe: add derp prober
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>pull/2396/head
parent
87481282eb
commit
05da2691a5
@ -0,0 +1,337 @@
|
||||
// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// The derpprobe binary probes derpers.
|
||||
package main // import "tailscale.com/cmd/derper/derpprobe"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
crand "crypto/rand"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"tailscale.com/derp"
|
||||
"tailscale.com/derp/derphttp"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/key"
|
||||
)
|
||||
|
||||
var (
|
||||
derpMapURL = flag.String("derp-map", "https://login.tailscale.com/derpmap/default", "URL to DERP map (https:// or file://)")
|
||||
listen = flag.String("listen", ":8030", "HTTP listen address")
|
||||
)
|
||||
|
||||
var (
|
||||
mu sync.Mutex
|
||||
state = map[nodePair]pairStatus{}
|
||||
lastDERPMap *tailcfg.DERPMap
|
||||
lastDERPMapAt time.Time
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
go probeLoop()
|
||||
log.Fatal(http.ListenAndServe(*listen, http.HandlerFunc(serve)))
|
||||
}
|
||||
|
||||
type overallStatus struct {
|
||||
good, bad []string
|
||||
}
|
||||
|
||||
func (st *overallStatus) addBadf(format string, a ...interface{}) {
|
||||
st.bad = append(st.bad, fmt.Sprintf(format, a...))
|
||||
}
|
||||
|
||||
func (st *overallStatus) addGoodf(format string, a ...interface{}) {
|
||||
st.good = append(st.good, fmt.Sprintf(format, a...))
|
||||
}
|
||||
|
||||
func getOverallStatus() (o overallStatus) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if lastDERPMap == nil {
|
||||
o.addBadf("no DERP map")
|
||||
return
|
||||
}
|
||||
now := time.Now()
|
||||
if age := now.Sub(lastDERPMapAt); age > time.Minute {
|
||||
o.addBadf("DERPMap hasn't been successfully refreshed in %v", age.Round(time.Second))
|
||||
}
|
||||
for _, reg := range sortedRegions(lastDERPMap) {
|
||||
for _, from := range reg.Nodes {
|
||||
for _, to := range reg.Nodes {
|
||||
pair := nodePair{from.Name, to.Name}
|
||||
st, ok := state[pair]
|
||||
age := now.Sub(st.at).Round(time.Second)
|
||||
switch {
|
||||
case !ok:
|
||||
o.addBadf("no state for %v", pair)
|
||||
case st.err != nil:
|
||||
o.addBadf("%v: %v", pair, st.err)
|
||||
case age > 90*time.Second:
|
||||
o.addBadf("%v: update is %v old", pair, age)
|
||||
default:
|
||||
o.addGoodf("%v: %v, %v ago", pair, st.latency.Round(time.Millisecond), age)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func serve(w http.ResponseWriter, r *http.Request) {
|
||||
st := getOverallStatus()
|
||||
summary := "All good"
|
||||
if len(st.bad) > 0 {
|
||||
w.WriteHeader(500)
|
||||
summary = fmt.Sprintf("%d problems", len(st.bad))
|
||||
}
|
||||
io.WriteString(w, "<html><head><style>.bad { font-weight: bold; color: #700; }</style></head>\n")
|
||||
fmt.Fprintf(w, "<body><h1>derp probe</h1>\n%s:<ul>", summary)
|
||||
for _, s := range st.bad {
|
||||
fmt.Fprintf(w, "<li class=bad>%s</li>\n", html.EscapeString(s))
|
||||
}
|
||||
for _, s := range st.good {
|
||||
fmt.Fprintf(w, "<li>%s</li>\n", html.EscapeString(s))
|
||||
}
|
||||
io.WriteString(w, "</ul></body></html>\n")
|
||||
}
|
||||
|
||||
func sortedRegions(dm *tailcfg.DERPMap) []*tailcfg.DERPRegion {
|
||||
ret := make([]*tailcfg.DERPRegion, 0, len(dm.Regions))
|
||||
for _, r := range dm.Regions {
|
||||
ret = append(ret, r)
|
||||
}
|
||||
sort.Slice(ret, func(i, j int) bool { return ret[i].RegionID < ret[j].RegionID })
|
||||
return ret
|
||||
}
|
||||
|
||||
type nodePair struct {
|
||||
from, to string // DERPNode.Name
|
||||
}
|
||||
|
||||
func (p nodePair) String() string { return fmt.Sprintf("(%s→%s)", p.from, p.to) }
|
||||
|
||||
type pairStatus struct {
|
||||
err error
|
||||
latency time.Duration
|
||||
at time.Time
|
||||
}
|
||||
|
||||
func setDERPMap(dm *tailcfg.DERPMap) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
lastDERPMap = dm
|
||||
lastDERPMapAt = time.Now()
|
||||
}
|
||||
|
||||
func setState(p nodePair, latency time.Duration, err error) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
st := pairStatus{
|
||||
err: err,
|
||||
latency: latency,
|
||||
at: time.Now(),
|
||||
}
|
||||
state[p] = st
|
||||
if err != nil {
|
||||
log.Printf("%+v error: %v", p, err)
|
||||
} else {
|
||||
log.Printf("%+v: %v", p, latency.Round(time.Millisecond))
|
||||
}
|
||||
}
|
||||
|
||||
func probeLoop() {
|
||||
ticker := time.NewTicker(15 * time.Second)
|
||||
for {
|
||||
err := probe()
|
||||
if err != nil {
|
||||
log.Printf("probe: %v", err)
|
||||
}
|
||||
<-ticker.C
|
||||
}
|
||||
}
|
||||
|
||||
func probe() error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
defer cancel()
|
||||
dm, err := getDERPMap(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(dm.Regions))
|
||||
for _, reg := range dm.Regions {
|
||||
reg := reg
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for _, from := range reg.Nodes {
|
||||
for _, to := range reg.Nodes {
|
||||
latency, err := probeNodePair(ctx, dm, from, to)
|
||||
setState(nodePair{from.Name, to.Name}, latency, err)
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return ctx.Err()
|
||||
}
|
||||
|
||||
func probeNodePair(ctx context.Context, dm *tailcfg.DERPMap, from, to *tailcfg.DERPNode) (latency time.Duration, err error) {
|
||||
fromc, err := newConn(ctx, dm, from)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer fromc.Close()
|
||||
toc, err := newConn(ctx, dm, to)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer toc.Close()
|
||||
|
||||
// Make a random packet
|
||||
pkt := make([]byte, 8)
|
||||
crand.Read(pkt)
|
||||
|
||||
t0 := time.Now()
|
||||
|
||||
// Send the random packet.
|
||||
sendc := make(chan error, 1)
|
||||
go func() {
|
||||
sendc <- fromc.Send(toc.SelfPublicKey(), pkt)
|
||||
}()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return 0, fmt.Errorf("timeout sending via %q: %w", from.Name, ctx.Err())
|
||||
case err := <-sendc:
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error sending via %q: %w", from.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Receive the random packet.
|
||||
recvc := make(chan interface{}, 1) // either derp.ReceivedPacket or error
|
||||
go func() {
|
||||
for {
|
||||
m, err := toc.Recv()
|
||||
if err != nil {
|
||||
recvc <- err
|
||||
return
|
||||
}
|
||||
switch v := m.(type) {
|
||||
case derp.ReceivedPacket:
|
||||
recvc <- v
|
||||
default:
|
||||
log.Printf("%v: ignoring Recv frame type %T", to.Name, v)
|
||||
// Loop.
|
||||
}
|
||||
}
|
||||
}()
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return 0, fmt.Errorf("timeout receiving from %q: %w", to.Name, ctx.Err())
|
||||
case v := <-recvc:
|
||||
if err, ok := v.(error); ok {
|
||||
return 0, fmt.Errorf("error receiving from %q: %w", to.Name, err)
|
||||
}
|
||||
p := v.(derp.ReceivedPacket)
|
||||
if p.Source != fromc.SelfPublicKey() {
|
||||
return 0, fmt.Errorf("got data packet from unexpected source, %v", p.Source)
|
||||
}
|
||||
if !bytes.Equal(p.Data, pkt) {
|
||||
return 0, fmt.Errorf("unexpected data packet %q", p.Data)
|
||||
}
|
||||
}
|
||||
return time.Since(t0), nil
|
||||
}
|
||||
|
||||
func newConn(ctx context.Context, dm *tailcfg.DERPMap, n *tailcfg.DERPNode) (*derphttp.Client, error) {
|
||||
priv := key.NewPrivate()
|
||||
dc := derphttp.NewRegionClient(priv, log.Printf, func() *tailcfg.DERPRegion {
|
||||
rid := n.RegionID
|
||||
return &tailcfg.DERPRegion{
|
||||
RegionID: rid,
|
||||
RegionCode: fmt.Sprintf("%s-%s", dm.Regions[rid].RegionCode, n.Name),
|
||||
RegionName: dm.Regions[rid].RegionName,
|
||||
Nodes: []*tailcfg.DERPNode{n},
|
||||
}
|
||||
})
|
||||
dc.IsProber = true
|
||||
err := dc.Connect(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
errc := make(chan error, 1)
|
||||
go func() {
|
||||
m, err := dc.Recv()
|
||||
if err != nil {
|
||||
errc <- err
|
||||
return
|
||||
}
|
||||
switch m.(type) {
|
||||
case derp.ServerInfoMessage:
|
||||
errc <- nil
|
||||
default:
|
||||
errc <- fmt.Errorf("unexpected first message type %T", errc)
|
||||
}
|
||||
}()
|
||||
select {
|
||||
case err := <-errc:
|
||||
if err != nil {
|
||||
go dc.Close()
|
||||
return nil, err
|
||||
}
|
||||
case <-ctx.Done():
|
||||
go dc.Close()
|
||||
return nil, fmt.Errorf("timeout waiting for ServerInfoMessage: %w", ctx.Err())
|
||||
}
|
||||
return dc, nil
|
||||
}
|
||||
|
||||
var httpOrFileClient = &http.Client{Transport: httpOrFileTransport()}
|
||||
|
||||
func httpOrFileTransport() http.RoundTripper {
|
||||
tr := http.DefaultTransport.(*http.Transport).Clone()
|
||||
tr.RegisterProtocol("file", http.NewFileTransport(http.Dir("/")))
|
||||
return tr
|
||||
}
|
||||
|
||||
func getDERPMap(ctx context.Context) (*tailcfg.DERPMap, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", *derpMapURL, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
res, err := httpOrFileClient.Do(req)
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
if lastDERPMap != nil && time.Since(lastDERPMapAt) < 10*time.Minute {
|
||||
// Assume that control is restarting and use
|
||||
// the same one for a bit.
|
||||
return lastDERPMap, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("fetching %s: %s", *derpMapURL, res.Status)
|
||||
}
|
||||
dm := new(tailcfg.DERPMap)
|
||||
if err := json.NewDecoder(res.Body).Decode(dm); err != nil {
|
||||
return nil, fmt.Errorf("decoding %s JSON: %v", *derpMapURL, err)
|
||||
}
|
||||
setDERPMap(dm)
|
||||
return dm, nil
|
||||
}
|
Loading…
Reference in New Issue