@ -828,7 +828,7 @@ const watchdogTimeout = 120 * time.Second
// if the context expires or the server returns an error/closes the connection
// and as such always returns a non-nil error.
//
// If cb is nil, OmitPeers will be set to true.
// If nu is nil, OmitPeers will be set to true.
func ( c * Direct ) sendMapRequest ( ctx context . Context , isStreaming bool , nu NetmapUpdater ) error {
if isStreaming && nu == nil {
panic ( "cb must be non-nil if isStreaming is true" )
@ -992,7 +992,27 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
}
c . expiry = nm . Expiry
}
sess . StartWatchdog ( )
// Create a watchdog timer that breaks the connection if we don't receive a
// MapResponse from the network at least once every two minutes. The
// watchdog timer is stopped every time we receive a MapResponse (so it
// doesn't run when we're processing a MapResponse message, including any
// long-running requested operations like Debug.Sleep) and is reset whenever
// we go back to blocking on network reads.
watchdogTimer , watchdogTimedOut := c . clock . NewTimer ( watchdogTimeout )
defer watchdogTimer . Stop ( )
go func ( ) {
select {
case <- ctx . Done ( ) :
vlogf ( "netmap: ending timeout goroutine" )
return
case <- watchdogTimedOut :
c . logf ( "map response long-poll timed out!" )
cancel ( )
return
}
} ( )
// gotNonKeepAliveMessage is whether we've yet received a MapResponse message without
// KeepAlive set.
@ -1006,6 +1026,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
// We can use this same read loop either way.
var msg [ ] byte
for mapResIdx := 0 ; mapResIdx == 0 || isStreaming ; mapResIdx ++ {
watchdogTimer . Reset ( watchdogTimeout )
vlogf ( "netmap: starting size read after %v (poll %v)" , time . Since ( t0 ) . Round ( time . Millisecond ) , mapResIdx )
var siz [ 4 ] byte
if _ , err := io . ReadFull ( res . Body , siz [ : ] ) ; err != nil {
@ -1026,6 +1047,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
vlogf ( "netmap: decode error: %v" )
return err
}
watchdogTimer . Stop ( )
metricMapResponseMessages . Add ( 1 )
@ -1068,14 +1090,6 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
c . logf ( "netmap: [unexpected] new dial plan; nowhere to store it" )
}
}
select {
case sess . watchdogReset <- struct { } { } :
vlogf ( "netmap: sent timer reset" )
case <- ctx . Done ( ) :
c . logf ( "[v1] netmap: not resetting timer; context done: %v" , ctx . Err ( ) )
return ctx . Err ( )
}
if resp . KeepAlive {
metricMapResponseKeepAlives . Add ( 1 )
continue
@ -1102,7 +1116,7 @@ func (c *Direct) sendMapRequest(ctx context.Context, isStreaming bool, nu Netmap
return nil
}
func ( c * Direct ) handleDebugMessage ( ctx context . Context , debug * tailcfg . Debug , watchdogReset chan <- struct { } ) error {
func ( c * Direct ) handleDebugMessage ( ctx context . Context , debug * tailcfg . Debug ) error {
if code := debug . Exit ; code != nil {
c . logf ( "exiting process with status %v per controlplane" , * code )
os . Exit ( * code )
@ -1112,7 +1126,7 @@ func (c *Direct) handleDebugMessage(ctx context.Context, debug *tailcfg.Debug, w
envknob . SetNoLogsNoSupport ( )
}
if sleep := time . Duration ( debug . SleepSeconds * float64 ( time . Second ) ) ; sleep > 0 {
if err := sleepAsRequested ( ctx , c . logf , watchdogReset, sleep, c . clock ) ; err != nil {
if err := sleepAsRequested ( ctx , c . logf , sleep, c . clock ) ; err != nil {
return err
}
}
@ -1444,7 +1458,7 @@ func answerC2NPing(logf logger.Logf, c2nHandler http.Handler, c *http.Client, pr
// that the client sleep. The complication is that while we're sleeping (if for
// a long time), we need to periodically reset the watchdog timer before it
// expires.
func sleepAsRequested ( ctx context . Context , logf logger . Logf , watchdogReset chan <- struct { } , d time . Duration , clock tstime . Clock ) error {
func sleepAsRequested ( ctx context . Context , logf logger . Logf , d time . Duration , clock tstime . Clock ) error {
const maxSleep = 5 * time . Minute
if d > maxSleep {
logf ( "sleeping for %v, capped from server-requested %v ..." , maxSleep , d )
@ -1453,25 +1467,13 @@ func sleepAsRequested(ctx context.Context, logf logger.Logf, watchdogReset chan<
logf ( "sleeping for server-requested %v ..." , d )
}
ticker , tickerChannel := clock . NewTicker ( watchdogTimeout / 2 )
defer ticker . Stop ( )
timer , timerChannel := clock . NewTimer ( d )
defer timer . Stop ( )
for {
select {
case <- ctx . Done ( ) :
return ctx . Err ( )
case <- timerChannel :
return nil
case <- tickerChannel :
select {
case watchdogReset <- struct { } { } :
case <- timerChannel :
return nil
case <- ctx . Done ( ) :
return ctx . Err ( )
}
}
}
}