From edbad6d274c4434475dcc3a4df235e54b3a3e906 Mon Sep 17 00:00:00 2001 From: James Tucker Date: Sun, 18 Feb 2024 21:12:36 -0800 Subject: [PATCH] cmd/derper: add user timeout and reduce TCP keepalive The derper sends an in-protocol keepalive every 60-65s, so frequent TCP keepalives are unnecessary. In this tuning TCP keepalives should never occur for a DERP client connection, as they will send an L7 keepalive often enough to always reset the TCP keepalive timer. If however a connection does not receive an ACK promptly it will now be shutdown, which happens sooner than it would with a normal TCP keepalive tuning. This re-tuning reduces the frequency of network traffic from derp to client, reducing battery cost. Updates tailscale/corp#17587 Updates #3363 Signed-off-by: James Tucker --- cmd/derper/depaware.txt | 1 + cmd/derper/derper.go | 29 +++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index b130f917a..fb83b65f5 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -95,6 +95,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa tailscale.com/net/dnscache from tailscale.com/derp/derphttp tailscale.com/net/flowtrack from tailscale.com/net/packet+ 💣 tailscale.com/net/interfaces from tailscale.com/net/netmon+ + tailscale.com/net/ktimeout from tailscale.com/cmd/derper tailscale.com/net/netaddr from tailscale.com/ipn+ tailscale.com/net/netknob from tailscale.com/net/netns tailscale.com/net/netmon from tailscale.com/derp/derphttp+ diff --git a/cmd/derper/derper.go b/cmd/derper/derper.go index f126c5d6c..1e79b3dcc 100644 --- a/cmd/derper/derper.go +++ b/cmd/derper/derper.go @@ -32,6 +32,7 @@ import ( "tailscale.com/derp" "tailscale.com/derp/derphttp" "tailscale.com/metrics" + "tailscale.com/net/ktimeout" "tailscale.com/net/stunserver" "tailscale.com/tsweb" "tailscale.com/types/key" @@ -59,6 +60,11 @@ var ( acceptConnLimit = flag.Float64("accept-connection-limit", math.Inf(+1), "rate limit for accepting new connection") acceptConnBurst = flag.Int("accept-connection-burst", math.MaxInt, "burst limit for accepting new connection") + + // tcpKeepAlive is intentionally long, to reduce battery cost. There is an L7 keepalive on a higher frequency schedule. + tcpKeepAlive = flag.Duration("tcp-keepalive-time", 10*time.Minute, "TCP keepalive time") + // tcpUserTimeout is intentionally short, so that hung connections are cleaned up promptly. DERPs should be nearby users. + tcpUserTimeout = flag.Duration("tcp-user-timeout", 15*time.Second, "TCP user timeout") ) var ( @@ -220,6 +226,15 @@ func main() { })) debug.Handle("traffic", "Traffic check", http.HandlerFunc(s.ServeDebugTraffic)) + // Longer lived DERP connections send an application layer keepalive. Note + // if the keepalive is hit, the user timeout will take precedence over the + // keepalive counter, so the probe if unanswered will take effect promptly, + // this is less tolerant of high loss, but high loss is unexpected. + lc := net.ListenConfig{ + Control: ktimeout.UserTimeout(*tcpUserTimeout), + KeepAlive: *tcpKeepAlive, + } + quietLogger := log.New(logFilter{}, "", 0) httpsrv := &http.Server{ Addr: *addr, @@ -296,7 +311,12 @@ func main() { // duration exceeds server's WriteTimeout". WriteTimeout: 5 * time.Minute, } - err := port80srv.ListenAndServe() + ln, err := lc.Listen(context.Background(), "tcp", port80srv.Addr) + if err != nil { + log.Fatal(err) + } + defer ln.Close() + err = port80srv.Serve(ln) if err != nil { if err != http.ErrServerClosed { log.Fatal(err) @@ -307,7 +327,12 @@ func main() { err = rateLimitedListenAndServeTLS(httpsrv) } else { log.Printf("derper: serving on %s", *addr) - err = httpsrv.ListenAndServe() + var ln net.Listener + ln, err = lc.Listen(context.Background(), "tcp", httpsrv.Addr) + if err != nil { + log.Fatal(err) + } + err = httpsrv.Serve(ln) } if err != nil && err != http.ErrServerClosed { log.Fatalf("derper: %v", err)