From 640b2fa3aebc6abf5ba4efb01f053b290886991c Mon Sep 17 00:00:00 2001 From: Andrew Dunham Date: Wed, 12 Mar 2025 17:04:57 -0400 Subject: [PATCH] net/netmon, wgengine/magicsock: be quieter with portmapper logs This adds a new helper to the netmon package that allows us to rate-limit log messages, so that they only print once per (major) LinkChange event. We then use this when constructing the portmapper, so that we don't keep spamming logs forever on the same network. Updates #13145 Signed-off-by: Andrew Dunham Change-Id: I6e7162509148abea674f96efd76be9dffb373ae4 --- net/netmon/loghelper.go | 42 ++++++++++++++++++ net/netmon/loghelper_test.go | 78 +++++++++++++++++++++++++++++++++ wgengine/magicsock/magicsock.go | 12 ++++- 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 net/netmon/loghelper.go create mode 100644 net/netmon/loghelper_test.go diff --git a/net/netmon/loghelper.go b/net/netmon/loghelper.go new file mode 100644 index 000000000..824faeef0 --- /dev/null +++ b/net/netmon/loghelper.go @@ -0,0 +1,42 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package netmon + +import ( + "sync" + + "tailscale.com/types/logger" +) + +// LinkChangeLogLimiter returns a new [logger.Logf] that logs each unique +// format string to the underlying logger only once per major LinkChange event. +// +// The returned function should be called when the logger is no longer needed, +// to release resources from the Monitor. +func LinkChangeLogLimiter(logf logger.Logf, nm *Monitor) (_ logger.Logf, unregister func()) { + var formatSeen sync.Map // map[string]bool + unregister = nm.RegisterChangeCallback(func(cd *ChangeDelta) { + // If we're in a major change or a time jump, clear the seen map. + if cd.Major || cd.TimeJumped { + formatSeen.Clear() + } + }) + + return func(format string, args ...any) { + // We only store 'true' in the map, so if it's present then it + // means we've already logged this format string. + _, loaded := formatSeen.LoadOrStore(format, true) + if loaded { + // TODO(andrew-d): we may still want to log this + // message every N minutes (1x/hour?) even if it's been + // seen, so that debugging doesn't require searching + // back in the logs for an unbounded amount of time. + // + // See: https://github.com/tailscale/tailscale/issues/13145 + return + } + + logf(format, args...) + }, unregister +} diff --git a/net/netmon/loghelper_test.go b/net/netmon/loghelper_test.go new file mode 100644 index 000000000..31777f4bc --- /dev/null +++ b/net/netmon/loghelper_test.go @@ -0,0 +1,78 @@ +// Copyright (c) Tailscale Inc & AUTHORS +// SPDX-License-Identifier: BSD-3-Clause + +package netmon + +import ( + "bytes" + "fmt" + "testing" +) + +func TestLinkChangeLogLimiter(t *testing.T) { + mon, err := New(t.Logf) + if err != nil { + t.Fatal(err) + } + defer mon.Close() + + var logBuffer bytes.Buffer + logf := func(format string, args ...any) { + t.Logf("captured log: "+format, args...) + + if format[len(format)-1] != '\n' { + format += "\n" + } + fmt.Fprintf(&logBuffer, format, args...) + } + + logf, unregister := LinkChangeLogLimiter(logf, mon) + defer unregister() + + // Log once, which should write to our log buffer. + logf("hello %s", "world") + if got := logBuffer.String(); got != "hello world\n" { + t.Errorf("unexpected log buffer contents: %q", got) + } + + // Log again, which should not write to our log buffer. + logf("hello %s", "andrew") + if got := logBuffer.String(); got != "hello world\n" { + t.Errorf("unexpected log buffer contents: %q", got) + } + + // Log a different message, which should write to our log buffer. + logf("other message") + if got := logBuffer.String(); got != "hello world\nother message\n" { + t.Errorf("unexpected log buffer contents: %q", got) + } + + // Synthesize a fake major change event, which should clear the format + // string cache and allow the next log to write to our log buffer. + // + // InjectEvent doesn't work because it's not a major event, so we + // instead reach into the netmon and grab the callback, and then call + // it ourselves. + mon.mu.Lock() + var cb func(*ChangeDelta) + for _, c := range mon.cbs { + cb = c + break + } + mon.mu.Unlock() + + cb(&ChangeDelta{Major: true}) + + logf("hello %s", "world") + if got := logBuffer.String(); got != "hello world\nother message\nhello world\n" { + t.Errorf("unexpected log buffer contents: %q", got) + } + + // Unregistering the callback should clear our 'cbs' set. + unregister() + mon.mu.Lock() + if len(mon.cbs) != 0 { + t.Errorf("expected no callbacks, got %v", mon.cbs) + } + mon.mu.Unlock() +} diff --git a/wgengine/magicsock/magicsock.go b/wgengine/magicsock/magicsock.go index acf7114e1..e8e966582 100644 --- a/wgengine/magicsock/magicsock.go +++ b/wgengine/magicsock/magicsock.go @@ -177,6 +177,10 @@ type Conn struct { // port mappings from NAT devices. portMapper *portmapper.Client + // portMapperLogfUnregister is the function to call to unregister + // the portmapper log limiter. + portMapperLogfUnregister func() + // derpRecvCh is used by receiveDERP to read DERP messages. // It must have buffer size > 0; see issue 3736. derpRecvCh chan derpReadResult @@ -532,10 +536,15 @@ func NewConn(opts Options) (*Conn, error) { c.idleFunc = opts.IdleFunc c.testOnlyPacketListener = opts.TestOnlyPacketListener c.noteRecvActivity = opts.NoteRecvActivity + + // Don't log the same log messages possibly every few seconds in our + // portmapper. + portmapperLogf := logger.WithPrefix(c.logf, "portmapper: ") + portmapperLogf, c.portMapperLogfUnregister = netmon.LinkChangeLogLimiter(portmapperLogf, opts.NetMon) portMapOpts := &portmapper.DebugKnobs{ DisableAll: func() bool { return opts.DisablePortMapper || c.onlyTCP443.Load() }, } - c.portMapper = portmapper.NewClient(logger.WithPrefix(c.logf, "portmapper: "), opts.NetMon, portMapOpts, opts.ControlKnobs, c.onPortMapChanged) + c.portMapper = portmapper.NewClient(portmapperLogf, opts.NetMon, portMapOpts, opts.ControlKnobs, c.onPortMapChanged) c.portMapper.SetGatewayLookupFunc(opts.NetMon.GatewayAndSelfIP) c.netMon = opts.NetMon c.health = opts.HealthTracker @@ -2481,6 +2490,7 @@ func (c *Conn) Close() error { } c.stopPeriodicReSTUNTimerLocked() c.portMapper.Close() + c.portMapperLogfUnregister() c.peerMap.forEachEndpoint(func(ep *endpoint) { ep.stopAndReset()