ipn/ipnlocal: rate-limit diagnose logs in bugreport

We can log too quickly for logtail to catch up, even when we opt out of
log rate-limiting. When the user passes the --diagnose flag to
bugreport, we use a token bucket to control how many logs per second are
printed and sleep until we're able to write more.

Signed-off-by: Andrew Dunham <andrew@du.nham.ca>
Change-Id: If27672d66b621b589280bd0fe228de367ffcbd8f
pull/7133/head
Andrew Dunham 2 years ago
parent 3eae75b1b8
commit d1146dc701

@ -4617,8 +4617,13 @@ func (b *LocalBackend) handleQuad100Port80Conn(w http.ResponseWriter, r *http.Re
} }
func (b *LocalBackend) Doctor(ctx context.Context, logf logger.Logf) { func (b *LocalBackend) Doctor(ctx context.Context, logf logger.Logf) {
var checks []doctor.Check // We can write logs too fast for logtail to handle, even when
// opting-out of rate limits. Limit ourselves to at most one message
// per 20ms and a burst of 60 log lines, which should be fast enough to
// not block for too long but slow enough that we can upload all lines.
logf = logger.SlowLoggerWithClock(ctx, logf, 20*time.Millisecond, 60, time.Now)
var checks []doctor.Check
checks = append(checks, routetable.Check{}) checks = append(checks, routetable.Check{})
// TODO(andrew): more // TODO(andrew): more

@ -228,6 +228,53 @@ func RateLimitedFnWithClock(logf Logf, f time.Duration, burst int, maxCache int,
} }
} }
// SlowLoggerWithClock is a logger that applies rate limits similar to
// RateLimitedFnWithClock, but instead of dropping logs will sleep until they
// can be written. This should only be used for debug logs, and not in a hot path.
//
// The provided context, if canceled, will cause all logs to be dropped and
// prevent any sleeps.
func SlowLoggerWithClock(ctx context.Context, logf Logf, f time.Duration, burst int, timeNow func() time.Time) Logf {
var (
mu sync.Mutex
tb = newTokenBucket(f, burst, timeNow())
)
return func(format string, args ...any) {
if ctx.Err() != nil {
return
}
// Hold the mutex for the entire length of the check + log
// since our token bucket isn't concurrency-safe.
mu.Lock()
defer mu.Unlock()
tb.AdvanceTo(timeNow())
// If we can get a token, then do that and return.
if tb.Get() {
logf(format, args...)
return
}
// Otherwise, sleep for 2x the duration so that we don't
// immediately sleep again on the next call.
tmr := time.NewTimer(2 * f)
defer tmr.Stop()
select {
case curr := <-tmr.C:
tb.AdvanceTo(curr)
case <-ctx.Done():
return
}
if !tb.Get() {
log.Printf("[unexpected] error rate-limiting in SlowLoggerWithClock")
return
}
logf(format, args...)
}
}
// LogOnChange logs a given line only if line != lastLine, or if maxInterval has passed // LogOnChange logs a given line only if line != lastLine, or if maxInterval has passed
// since the last time this identical line was logged. // since the last time this identical line was logged.
func LogOnChange(logf Logf, maxInterval time.Duration, timeNow func() time.Time) Logf { func LogOnChange(logf Logf, maxInterval time.Duration, timeNow func() time.Time) Logf {

Loading…
Cancel
Save