From 19c3e6cc9e9c8371f2aea1c8a19fe1e455038b0b Mon Sep 17 00:00:00 2001 From: Avery Pennarun Date: Mon, 26 Apr 2021 01:08:05 -0400 Subject: [PATCH] types/logger: rate limited: more hysteresis, better messages. - Switch to our own simpler token bucket, since x/time/rate is missing necessary stuff (can't provide your own time func; can't check the current bucket contents) and it's overkill anyway. - Add tests that actually include advancing time. - Don't remove the rate limit on a message until there's enough room to print at least two more of them. When we do, we'll also print how many we dropped, as a contextual reminder that some were previously lost. (This is more like how the Linux kernel does it.) - Reformat the [RATE LIMITED] messages to be shorter, and to not corrupt original message. Instead, we print the message, then print its format string. - Use %q instead of \"%s\", for more accurate parsing later, if the format string contained quotes. Fixes #1772 Signed-off-by: Avery Pennarun --- cmd/tailscale/depaware.txt | 2 +- cmd/tailscaled/depaware.txt | 2 +- types/logger/logger.go | 88 ++++++++++++++++++++++--------------- types/logger/logger_test.go | 30 +++++++++++-- types/logger/tokenbucket.go | 63 ++++++++++++++++++++++++++ 5 files changed, 144 insertions(+), 41 deletions(-) create mode 100644 types/logger/tokenbucket.go diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 329a2be51..1f97abce9 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -85,7 +85,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep golang.org/x/text/transform from golang.org/x/text/secure/bidirule+ golang.org/x/text/unicode/bidi from golang.org/x/net/idna+ golang.org/x/text/unicode/norm from golang.org/x/net/idna - golang.org/x/time/rate from tailscale.com/types/logger+ + golang.org/x/time/rate from tailscale.com/cmd/tailscale/cli+ bufio from compress/flate+ bytes from bufio+ compress/flate from compress/gzip+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 33b6d3db7..c1183bf57 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -188,7 +188,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de golang.org/x/text/transform from golang.org/x/text/secure/bidirule+ golang.org/x/text/unicode/bidi from golang.org/x/net/idna+ golang.org/x/text/unicode/norm from golang.org/x/net/idna - golang.org/x/time/rate from tailscale.com/types/logger+ + golang.org/x/time/rate from inet.af/netstack/tcpip/stack+ bufio from compress/flate+ bytes from bufio+ compress/flate from compress/gzip+ diff --git a/types/logger/logger.go b/types/logger/logger.go index dc79e8772..13809d31e 100644 --- a/types/logger/logger.go +++ b/types/logger/logger.go @@ -18,8 +18,6 @@ import ( "strings" "sync" "time" - - "golang.org/x/time/rate" ) // Logf is the basic Tailscale logger type: a printf-like func. @@ -57,9 +55,9 @@ func Discard(string, ...interface{}) {} // limitData is used to keep track of each format string's associated // rate-limiting data. type limitData struct { - lim *rate.Limiter // the token bucket associated with this string - msgBlocked bool // whether a "duplicate error" message has already been logged - ele *list.Element // list element used to access this string in the cache + bucket *tokenBucket // the token bucket associated with this string + nBlocked int // number of messages skipped + ele *list.Element // list element used to access this string in the cache } var disableRateLimit = os.Getenv("TS_DEBUG_LOG_RATE") == "all" @@ -71,31 +69,34 @@ var rateFree = []string{ "magicsock: CreateEndpoint:", } -// RateLimitedFn returns a rate-limiting Logf wrapping the given logf. -// Messages are allowed through at a maximum of one message every f (where f is a time.Duration), in -// bursts of up to burst messages at a time. Up to maxCache strings will be held at a time. +// RateLimitedFn is a wrapper for RateLimitedFnWithClock that includes the +// current time automatically. This is mainly for backward compatibility. func RateLimitedFn(logf Logf, f time.Duration, burst int, maxCache int) Logf { + return RateLimitedFnWithClock(logf, f, burst, maxCache, time.Now) +} + +// RateLimitedFnWithClock returns a rate-limiting Logf wrapping the given +// logf. Messages are allowed through at a maximum of one message every f +// (where f is a time.Duration), in bursts of up to burst messages at a +// time. Up to maxCache format strings will be tracked separately. +// timeNow is a function that returns the current time, used for calculating +// rate limits. +func RateLimitedFnWithClock(logf Logf, f time.Duration, burst int, maxCache int, timeNow func() time.Time) Logf { if disableRateLimit { return logf } - r := rate.Every(f) var ( mu sync.Mutex msgLim = make(map[string]*limitData) // keyed by logf format msgCache = list.New() // a rudimentary LRU that limits the size of the map ) - type verdict int - const ( - allow verdict = iota - warn - block - ) - - judge := func(format string) verdict { + return func(format string, args ...interface{}) { + // Shortcut for formats with no rate limit for _, sub := range rateFree { if strings.Contains(format, sub) { - return allow + logf(format, args...) + return } } @@ -106,8 +107,8 @@ func RateLimitedFn(logf Logf, f time.Duration, burst int, maxCache int) Logf { msgCache.MoveToFront(rl.ele) } else { rl = &limitData{ - lim: rate.NewLimiter(r, burst), - ele: msgCache.PushFront(format), + bucket: newTokenBucket(f, burst, timeNow()), + ele: msgCache.PushFront(format), } msgLim[format] = rl if msgCache.Len() > maxCache { @@ -115,24 +116,39 @@ func RateLimitedFn(logf Logf, f time.Duration, burst int, maxCache int) Logf { msgCache.Remove(msgCache.Back()) } } - if rl.lim.Allow() { - rl.msgBlocked = false - return allow - } - if !rl.msgBlocked { - rl.msgBlocked = true - return warn - } - return block - } - return func(format string, args ...interface{}) { - switch judge(format) { - case allow: + rl.bucket.AdvanceTo(timeNow()) + + // Make sure there's enough room for at least a few + // more logs before we unblock, so we don't alternate + // between blocking and unblocking. + if rl.nBlocked > 0 && rl.bucket.remaining >= 2 { + // Only print this if we dropped more than 1 + // message. Otherwise we'd *increase* the total + // number of log lines printed. + if rl.nBlocked > 1 { + logf("[RATELIMIT] format(%q) (%d dropped)", + format, rl.nBlocked-1) + } + rl.nBlocked = 0 + } + if rl.nBlocked == 0 && rl.bucket.Get() { logf(format, args...) - case warn: - // For the warning, log the specific format string - logf("[RATE LIMITED] format string \"%s\" (example: \"%s\")", format, strings.TrimSpace(fmt.Sprintf(format, args...))) + if rl.bucket.remaining == 0 { + // Enter "blocked" mode immediately after + // reaching the burst limit. We want to + // always accompany the format() message + // with an example of the format, which is + // effectively the same as printing the + // message anyway. But this way they can + // be on two separate lines and we don't + // corrupt the original message. + logf("[RATELIMIT] format(%q)", format) + rl.nBlocked = 1 + } + return + } else { + rl.nBlocked++ } } } diff --git a/types/logger/logger_test.go b/types/logger/logger_test.go index 15827e916..990e09617 100644 --- a/types/logger/logger_test.go +++ b/types/logger/logger_test.go @@ -44,16 +44,27 @@ func TestRateLimiter(t *testing.T) { "boring string with constant formatting (constant)", "templated format string no. 0", "boring string with constant formatting (constant)", + "[RATELIMIT] format(\"boring string with constant formatting %s\")", "templated format string no. 1", - "[RATE LIMITED] format string \"boring string with constant formatting %s\" (example: \"boring string with constant formatting (constant)\")", - "[RATE LIMITED] format string \"templated format string no. %d\" (example: \"templated format string no. 2\")", + "[RATELIMIT] format(\"templated format string no. %d\")", "Make sure this string makes it through the rest (that are blocked) 4", "4 shouldn't get filtered.", + "hello 1", + "hello 2", + "[RATELIMIT] format(\"hello %v\")", + "[RATELIMIT] format(\"hello %v\") (2 dropped)", + "hello 5", + "hello 6", + "[RATELIMIT] format(\"hello %v\")", + "hello 7", } + var now time.Time + nowf := func() time.Time { return now } + testsRun := 0 lgtest := logTester(want, t, &testsRun) - lg := RateLimitedFn(lgtest, 1*time.Minute, 2, 50) + lg := RateLimitedFnWithClock(lgtest, 1*time.Minute, 2, 50, nowf) var prefixed Logf for i := 0; i < 10; i++ { lg("boring string with constant formatting %s", "(constant)") @@ -64,6 +75,19 @@ func TestRateLimiter(t *testing.T) { prefixed(" shouldn't get filtered.") } } + + lg("hello %v", 1) + lg("hello %v", 2) // printed, but rate limit starts + lg("hello %v", 3) // rate limited (not printed) + now = now.Add(1 * time.Minute) + lg("hello %v", 4) // still limited (not printed) + now = now.Add(1 * time.Minute) + lg("hello %v", 5) // restriction lifted; prints drop count + message + + lg("hello %v", 6) // printed, but rate limit starts + now = now.Add(2 * time.Minute) + lg("hello %v", 7) // restriction lifted; no drop count needed + if testsRun < len(want) { t.Fatalf("Tests after %s weren't logged.", want[testsRun]) } diff --git a/types/logger/tokenbucket.go b/types/logger/tokenbucket.go new file mode 100644 index 000000000..293fe0863 --- /dev/null +++ b/types/logger/tokenbucket.go @@ -0,0 +1,63 @@ +// Copyright (c) 2021 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package logger + +import ( + "time" +) + +// tokenBucket is a simple token bucket style rate limiter. + +// It's similar in function to golang.org/x/time/rate.Limiter, which we +// can't use because: +// - It doesn't give access to the number of accumulated tokens, which we +// need for implementing hysteresis; +// - It doesn't let us provide our own time function, which we need for +// implementing proper unit tests. +// rate.Limiter is also much more complex than necessary, but that wouldn't +// be enough to disqualify it on its own. +// +// Unlike rate.Limiter, this token bucket does not attempt to +// do any locking of its own. Don't try to access it re-entrantly. +// That's fine inside this types/logger package because we already have +// locking at a higher level. +type tokenBucket struct { + remaining int + max int + tick time.Duration + t time.Time +} + +func newTokenBucket(tick time.Duration, max int, now time.Time) *tokenBucket { + return &tokenBucket{max, max, tick, now} +} + +func (tb *tokenBucket) Get() bool { + if tb.remaining > 0 { + tb.remaining-- + return true + } + return false +} + +func (tb *tokenBucket) Refund(n int) { + b := tb.remaining + n + if b > tb.max { + tb.remaining = tb.max + } else { + tb.remaining = b + } +} + +func (tb *tokenBucket) AdvanceTo(t time.Time) { + diff := t.Sub(tb.t) + + // only use up whole ticks. The remainder will be used up + // next time. + ticks := int(diff / tb.tick) + tb.t = tb.t.Add(time.Duration(ticks) * tb.tick) + + tb.Refund(ticks) +}