diff --git a/cmd/stunstamp/stunstamp.go b/cmd/stunstamp/stunstamp.go index 3390a9c13..5c25fc898 100644 --- a/cmd/stunstamp/stunstamp.go +++ b/cmd/stunstamp/stunstamp.go @@ -94,27 +94,36 @@ type result struct { rtt *time.Duration // nil signifies failure, e.g. timeout } -func measureRTT(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (resp []byte, rtt time.Duration, err error) { +func measureRTT(conn io.ReadWriteCloser, dst *net.UDPAddr) (rtt time.Duration, err error) { uconn, ok := conn.(*net.UDPConn) if !ok { - return nil, 0, fmt.Errorf("unexpected conn type: %T", conn) + return 0, fmt.Errorf("unexpected conn type: %T", conn) } err = uconn.SetReadDeadline(time.Now().Add(time.Second * 2)) if err != nil { - return nil, 0, fmt.Errorf("error setting read deadline: %w", err) + return 0, fmt.Errorf("error setting read deadline: %w", err) } + txID := stun.NewTxID() + req := stun.Request(txID) txAt := time.Now() _, err = uconn.WriteToUDP(req, dst) if err != nil { - return nil, 0, fmt.Errorf("error writing to udp socket: %w", err) + return 0, fmt.Errorf("error writing to udp socket: %w", err) } b := make([]byte, 1460) - n, err := uconn.Read(b) - rxAt := time.Now() - if err != nil { - return nil, 0, fmt.Errorf("error reading from udp socket: %w", err) + for { + n, err := uconn.Read(b) + rxAt := time.Now() + if err != nil { + return 0, fmt.Errorf("error reading from udp socket: %w", err) + } + gotTxID, _, err := stun.ParseResponse(b[:n]) + if err != nil || gotTxID != txID { + continue + } + return rxAt.Sub(txAt), nil } - return b[:n], rxAt.Sub(txAt), nil + } func isTemporaryOrTimeoutErr(err error) bool { @@ -134,7 +143,7 @@ type nodeMeta struct { addr netip.Addr } -type measureFn func(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (resp []byte, rtt time.Duration, err error) +type measureFn func(conn io.ReadWriteCloser, dst *net.UDPAddr) (rtt time.Duration, err error) func probe(meta nodeMeta, conn io.ReadWriteCloser, fn measureFn) (*time.Duration, error) { ua := &net.UDPAddr{ @@ -142,25 +151,14 @@ func probe(meta nodeMeta, conn io.ReadWriteCloser, fn measureFn) (*time.Duration Port: 3478, } - var ( - resp []byte - rtt time.Duration - ) - txID := stun.NewTxID() - req := stun.Request(txID) time.Sleep(time.Millisecond * time.Duration(rand.Intn(200))) // jitter across tx - resp, rtt, err := fn(conn, ua, req) + rtt, err := fn(conn, ua) if err != nil { if isTemporaryOrTimeoutErr(err) { log.Printf("temp error measuring RTT to %s(%s): %v", meta.hostname, meta.addr, err) return nil, nil } } - _, _, err = stun.ParseResponse(resp) - if err != nil { - log.Printf("invalid stun response from %s: %v", meta.hostname, err) - return nil, nil - } return &rtt, nil } diff --git a/cmd/stunstamp/stunstamp_default.go b/cmd/stunstamp/stunstamp_default.go index 74f2ca4c9..2fb69dc68 100644 --- a/cmd/stunstamp/stunstamp_default.go +++ b/cmd/stunstamp/stunstamp_default.go @@ -16,8 +16,8 @@ func getConnKernelTimestamp() (io.ReadWriteCloser, error) { return nil, errors.New("unimplemented") } -func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (resp []byte, rtt time.Duration, err error) { - return nil, 0, errors.New("unimplemented") +func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr) (rtt time.Duration, err error) { + return 0, errors.New("unimplemented") } func supportsKernelTS() bool { diff --git a/cmd/stunstamp/stunstamp_linux.go b/cmd/stunstamp/stunstamp_linux.go index b9b78b07e..f21b0d2ef 100644 --- a/cmd/stunstamp/stunstamp_linux.go +++ b/cmd/stunstamp/stunstamp_linux.go @@ -15,6 +15,7 @@ import ( "github.com/mdlayher/socket" "golang.org/x/sys/unix" + "tailscale.com/net/stun" ) const ( @@ -55,10 +56,10 @@ func parseTimestampFromCmsgs(oob []byte) (time.Time, error) { return time.Time{}, errors.New("failed to parse timestamp from cmsgs") } -func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (resp []byte, rtt time.Duration, err error) { +func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr) (rtt time.Duration, err error) { sconn, ok := conn.(*socket.Conn) if !ok { - return nil, 0, fmt.Errorf("conn of unexpected type: %T", conn) + return 0, fmt.Errorf("conn of unexpected type: %T", conn) } var to unix.Sockaddr @@ -75,9 +76,12 @@ func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (re copy(to.(*unix.SockaddrInet6).Addr[:], dst.IP) } + txID := stun.NewTxID() + req := stun.Request(txID) + err = sconn.Sendto(context.Background(), req, 0, to) if err != nil { - return nil, 0, fmt.Errorf("sendto error: %v", err) // don't wrap + return 0, fmt.Errorf("sendto error: %v", err) // don't wrap } txCtx, txCancel := context.WithTimeout(context.Background(), time.Second*2) @@ -90,7 +94,7 @@ func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (re for { n, oobn, _, _, err := sconn.Recvmsg(txCtx, buf, oob, unix.MSG_ERRQUEUE) if err != nil { - return nil, 0, fmt.Errorf("recvmsg (MSG_ERRQUEUE) error: %v", err) // don't wrap + return 0, fmt.Errorf("recvmsg (MSG_ERRQUEUE) error: %v", err) // don't wrap } buf = buf[:n] @@ -101,24 +105,37 @@ func measureRTTKernel(conn io.ReadWriteCloser, dst *net.UDPAddr, req []byte) (re } txAt, err = parseTimestampFromCmsgs(oob[:oobn]) if err != nil { - return nil, 0, fmt.Errorf("failed to get tx timestamp: %v", err) // don't wrap + return 0, fmt.Errorf("failed to get tx timestamp: %v", err) // don't wrap } break } rxCtx, rxCancel := context.WithTimeout(context.Background(), time.Second*2) defer rxCancel() - n, oobn, _, _, err := sconn.Recvmsg(rxCtx, buf, oob, 0) - if err != nil { - return nil, 0, fmt.Errorf("recvmsg error: %w", err) // wrap for timeout-related error unwrapping - } - rxAt, err := parseTimestampFromCmsgs(oob[:oobn]) - if err != nil { - return nil, 0, fmt.Errorf("failed to get rx timestamp: %v", err) // don't wrap + for { + n, oobn, _, _, err := sconn.Recvmsg(rxCtx, buf, oob, 0) + if err != nil { + return 0, fmt.Errorf("recvmsg error: %w", err) // wrap for timeout-related error unwrapping + } + + gotTxID, _, err := stun.ParseResponse(buf[:n]) + if err != nil || gotTxID != txID { + // Spin until we find the txID we sent. We may end up reading + // extremely late arriving responses from previous intervals. As + // such, we can't be certain if we're parsing the "current" + // response, so spin for parse errors too. + continue + } + + rxAt, err := parseTimestampFromCmsgs(oob[:oobn]) + if err != nil { + return 0, fmt.Errorf("failed to get rx timestamp: %v", err) // don't wrap + } + + return rxAt.Sub(txAt), nil } - return buf[:n], rxAt.Sub(txAt), nil } func supportsKernelTS() bool {