@ -7,6 +7,7 @@
package prober
import (
"cmp"
"container/ring"
"context"
"encoding/json"
@ -20,6 +21,7 @@ import (
"time"
"github.com/prometheus/client_golang/prometheus"
"tailscale.com/syncs"
"tailscale.com/tsweb"
)
@ -44,6 +46,14 @@ type ProbeClass struct {
// exposed by this probe class.
Labels Labels
// Timeout is the maximum time the probe function is allowed to run before
// its context is cancelled. Defaults to 80% of the scheduling interval.
Timeout time . Duration
// Concurrency is the maximum number of concurrent probe executions
// allowed for this probe class. Defaults to 1.
Concurrency int
// Metrics allows a probe class to export custom Metrics. Can be nil.
Metrics func ( prometheus . Labels ) [ ] prometheus . Metric
}
@ -131,9 +141,12 @@ func newProbe(p *Prober, name string, interval time.Duration, l prometheus.Label
cancel : cancel ,
stopped : make ( chan struct { } ) ,
runSema : syncs . NewSemaphore ( cmp . Or ( pc . Concurrency , 1 ) ) ,
name : name ,
probeClass : pc ,
interval : interval ,
timeout : cmp . Or ( pc . Timeout , time . Duration ( float64 ( interval ) * 0.8 ) ) ,
initialDelay : initialDelay ( name , interval ) ,
successHist : ring . New ( recentHistSize ) ,
latencyHist : ring . New ( recentHistSize ) ,
@ -226,11 +239,12 @@ type Probe struct {
ctx context . Context
cancel context . CancelFunc // run to initiate shutdown
stopped chan struct { } // closed when shutdown is complete
run Mu sync . Mutex // ensures only one probe runs at a tim e
run Sema syncs . Semaphore // restricts concurrency per prob e
name string
probeClass ProbeClass
interval time . Duration
timeout time . Duration
initialDelay time . Duration
tick ticker
@ -282,17 +296,15 @@ func (p *Probe) loop() {
t := p . prober . newTicker ( p . initialDelay )
select {
case <- t . Chan ( ) :
p . run ( )
case <- p . ctx . Done ( ) :
t . Stop ( )
return
}
t . Stop ( )
} else {
p . run ( )
}
if p . prober . once {
p . run ( )
return
}
@ -315,9 +327,12 @@ func (p *Probe) loop() {
p . tick = p . prober . newTicker ( p . interval )
defer p . tick . Stop ( )
for {
// Run the probe in a new goroutine every tick. Default concurrency & timeout
// settings will ensure that only one probe is running at a time.
go p . run ( )
select {
case <- p . tick . Chan ( ) :
p . run ( )
case <- p . ctx . Done ( ) :
return
}
@ -331,8 +346,13 @@ func (p *Probe) loop() {
// that the probe either succeeds or fails before the next cycle is scheduled to
// start.
func ( p * Probe ) run ( ) ( pi ProbeInfo , err error ) {
p . runMu . Lock ( )
defer p . runMu . Unlock ( )
// Probes are scheduled each p.interval, so we don't wait longer than that.
semaCtx , cancel := context . WithTimeout ( p . ctx , p . interval )
defer cancel ( )
if ! p . runSema . AcquireContext ( semaCtx ) {
return pi , fmt . Errorf ( "probe %s: context cancelled" , p . name )
}
defer p . runSema . Release ( )
p . recordStart ( )
defer func ( ) {
@ -344,19 +364,21 @@ func (p *Probe) run() (pi ProbeInfo, err error) {
if r := recover ( ) ; r != nil {
log . Printf ( "probe %s panicked: %v" , p . name , r )
err = fmt . Errorf ( "panic: %v" , r )
p . recordEnd ( err )
p . recordEnd Locked ( err )
}
} ( )
ctx := p . ctx
if ! p . IsContinuous ( ) {
timeout := time . Duration ( float64 ( p . interval ) * 0.8 )
var cancel func ( )
ctx , cancel = context . WithTimeout ( ctx , timeout)
ctx , cancel = context . WithTimeout ( ctx , p. timeout)
defer cancel ( )
}
err = p . probeClass . Probe ( ctx )
p . recordEnd ( err )
p . mu . Lock ( )
defer p . mu . Unlock ( )
p . recordEndLocked ( err )
if err != nil {
log . Printf ( "probe %s: %v" , p . name , err )
}
@ -370,10 +392,8 @@ func (p *Probe) recordStart() {
p . mu . Unlock ( )
}
func ( p * Probe ) recordEnd ( err error ) {
func ( p * Probe ) recordEnd Locked ( err error ) {
end := p . prober . now ( )
p . mu . Lock ( )
defer p . mu . Unlock ( )
p . end = end
p . succeeded = err == nil
p . lastErr = err