@ -8,10 +8,12 @@
package wgengine
import (
"fmt"
"log"
"net/netip"
"runtime/pprof"
"strings"
"sync"
"time"
"tailscale.com/envknob"
@ -38,21 +40,49 @@ func NewWatchdog(e Engine) Engine {
return e
}
return & watchdogEngine {
wrap : e ,
logf : log . Printf ,
fatalf : log . Fatalf ,
maxWait : 45 * time . Second ,
wrap : e ,
logf : log . Printf ,
fatalf : log . Fatalf ,
maxWait : 45 * time . Second ,
inFlight : make ( map [ inFlightKey ] time . Time ) ,
}
}
type inFlightKey struct {
op string
ctr uint64
}
type watchdogEngine struct {
wrap Engine
logf func ( format string , args ... any )
fatalf func ( format string , args ... any )
maxWait time . Duration
// Track the start time(s) of in-flight operations
inFlightMu sync . Mutex
inFlight map [ inFlightKey ] time . Time
inFlightCtr uint64
}
func ( e * watchdogEngine ) watchdogErr ( name string , fn func ( ) error ) error {
// Track all in-flight operations so we can print more useful error
// messages on watchdog failure
e . inFlightMu . Lock ( )
key := inFlightKey {
op : name ,
ctr : e . inFlightCtr ,
}
e . inFlightCtr ++
e . inFlight [ key ] = time . Now ( )
e . inFlightMu . Unlock ( )
defer func ( ) {
e . inFlightMu . Lock ( )
defer e . inFlightMu . Unlock ( )
delete ( e . inFlight , key )
} ( )
errCh := make ( chan error )
go func ( ) {
errCh <- fn ( )
@ -66,6 +96,22 @@ func (e *watchdogEngine) watchdogErr(name string, fn func() error) error {
buf := new ( strings . Builder )
pprof . Lookup ( "goroutine" ) . WriteTo ( buf , 1 )
e . logf ( "wgengine watchdog stacks:\n%s" , buf . String ( ) )
// Collect the list of in-flight operations for debugging.
var (
b [ ] byte
now = time . Now ( )
)
e . inFlightMu . Lock ( )
for k , t := range e . inFlight {
dur := now . Sub ( t ) . Round ( time . Millisecond )
b = fmt . Appendf ( b , "in-flight[%d]: name=%s duration=%v start=%s\n" , k . ctr , k . op , dur , t . Format ( time . RFC3339Nano ) )
}
e . inFlightMu . Unlock ( )
// Print everything as a single string to avoid log
// rate limits.
e . logf ( "wgengine watchdog in-flight:\n%s" , b )
e . fatalf ( "wgengine: watchdog timeout on %s" , name )
return nil
}