logtail: optimize JSON processing (#11671)

Changes made: * Avoid "encoding/json" for JSON processing, and instead use "github.com/go-json-experiment/json/jsontext". Use jsontext.Value.IsValid for validation, which is much faster. Use jsontext.AppendQuote instead of our own JSON escaping. * In drainPending, use a different maxLen depending on lowMem. In lowMem mode, it is better to perform multiple uploads than it is to construct a large body that OOMs the process. * In drainPending, if an error is encountered draining, construct an error message in the logtail JSON format rather than something that is invalid JSON. * In appendTextOrJSONLocked, use jsontext.Decoder to check whether the input is a valid JSON object. This is faster than the previous approach of unmarshaling into map[string]any and then re-marshaling that data structure. This is especially beneficial for network flow logging, which produces relatively large JSON objects. * In appendTextOrJSONLocked, enforce maxSize on the input. If too large, then we may end up in a situation where the logs can never be uploaded because it exceeds the maximum body size that the Tailscale logs service accepts. * Use "tailscale.com/util/truncate" to properly truncate a string on valid UTF-8 boundaries. * In general, remove unnecessary spaces in JSON output. Performance: name old time/op new time/op delta WriteText 776ns ± 2% 596ns ± 1% -23.24% (p=0.000 n=10+10) WriteJSON 110µs ± 0% 9µs ± 0% -91.77% (p=0.000 n=8+8) name old alloc/op new alloc/op delta WriteText 448B ± 0% 0B -100.00% (p=0.000 n=10+10) WriteJSON 37.9kB ± 0% 0.0kB ± 0% -99.87% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WriteText 1.00 ± 0% 0.00 -100.00% (p=0.000 n=10+10) WriteJSON 1.08k ± 0% 0.00k ± 0% -99.91% (p=0.000 n=10+10) For text payloads, this is 1.30x faster. For JSON payloads, this is 12.2x faster. Updates #cleanup Updates tailscale/corp#18514 Signed-off-by: Joe Tsai <joetsai@digital-static.net>
2 months ago · 7a77a2edf1
parent 4d5d669cd5
commit 7a77a2edf1
3 changed files with 429 additions and 279 deletions
--- a/cmd/tailscaled/depaware.txt
+++ b/cmd/tailscaled/depaware.txt
@ -91,6 +91,11 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
     💣 github.com/djherbis/times                                    from tailscale.com/drive/driveimpl
        github.com/fxamacker/cbor/v2                                 from tailscale.com/tka
        github.com/gaissmai/bart                                     from tailscale.com/net/tstun
        github.com/go-json-experiment/json/internal                  from github.com/go-json-experiment/json/internal/jsonflags+
        github.com/go-json-experiment/json/internal/jsonflags        from github.com/go-json-experiment/json/internal/jsonopts+
        github.com/go-json-experiment/json/internal/jsonopts         from github.com/go-json-experiment/json/jsontext
        github.com/go-json-experiment/json/internal/jsonwire         from github.com/go-json-experiment/json/jsontext
        github.com/go-json-experiment/json/jsontext                  from tailscale.com/logtail
   W 💣 github.com/go-ole/go-ole                                     from github.com/go-ole/go-ole/oleutil+
   W 💣 github.com/go-ole/go-ole/oleutil                             from tailscale.com/wgengine/winnet
   L 💣 github.com/godbus/dbus/v5                                    from github.com/coreos/go-systemd/v22/dbus+
@ -388,6 +393,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
        tailscale.com/util/sysresources                              from tailscale.com/wgengine/magicsock
        tailscale.com/util/systemd                                   from tailscale.com/control/controlclient+
        tailscale.com/util/testenv                                   from tailscale.com/ipn/ipnlocal+
        tailscale.com/util/truncate                                  from tailscale.com/logtail
        tailscale.com/util/uniq                                      from tailscale.com/ipn/ipnlocal+
        tailscale.com/util/vizerror                                  from tailscale.com/tailcfg+
     💣 tailscale.com/util/winutil                                   from tailscale.com/clientupdate+
--- a/logtail/logtail.go
+++ b/logtail/logtail.go
@ -9,7 +9,6 @@ import (
 	"context"
 	"crypto/rand"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
 	"io"
 	"log"
@ -19,11 +18,13 @@ import (
 	"os"
 	"regexp"
 	"runtime"
 	"slices"
 	"strconv"
 	"sync"
 	"sync/atomic"
 	"time"
 	"github.com/go-json-experiment/json/jsontext"
 	"tailscale.com/envknob"
 	"tailscale.com/net/netmon"
 	"tailscale.com/net/sockstats"
@ -32,9 +33,26 @@ import (
 	tslogger "tailscale.com/types/logger"
 	"tailscale.com/types/logid"
 	"tailscale.com/util/set"
 	"tailscale.com/util/truncate"
 	"tailscale.com/util/zstdframe"
 )
 // maxSize is the maximum size that a single log entry can be.
 // It is also the maximum body size that may be uploaded at a time.
 const maxSize = 256 << 10
 // maxTextSize is the maximum size for a text log message.
 // Note that JSON log messages can be as large as maxSize.
 const maxTextSize = 16 << 10
 // lowMemRatio reduces maxSize and maxTextSize by this ratio in lowMem mode.
 const lowMemRatio = 4
 // bufferSize is the typical buffer size to retain.
 // It is large enough to handle most log messages,
 // but not too large to be a notable waste of memory if retained forever.
 const bufferSize = 4 << 10
 // DefaultHost is the default host name to upload logs to when
 // Config.BaseURL isn't provided.
 const DefaultHost = "log.tailscale.io"
@ -175,7 +193,7 @@ type Logger struct {
 	netMonitor     *netmon.Monitor
 	buffer         Buffer
 	drainWake      chan struct{}        // signal to speed up drain
-	drainBuf       bytes.Buffer         // owned by drainPending for reuse
+	drainBuf       []byte               // owned by drainPending for reuse
 	flushDelayFn   func() time.Duration // negative or zero return value to upload aggressively, or >0 to batch at this delay
 	flushPending   atomic.Bool
 	sentinel       chan int32
@ -194,6 +212,8 @@ type Logger struct {
 	writeLock    sync.Mutex // guards procSequence, flushTimer, buffer.Write calls
 	procSequence uint64
 	flushTimer   tstime.TimerController // used when flushDelay is >0
 	writeBuf     [bufferSize]byte       // owned by Write for reuse
 	jsonDec      jsontext.Decoder       // owned by appendTextOrJSONLocked for reuse
 	shutdownStartMu sync.Mutex    // guards the closing of shutdownStart
 	shutdownStart   chan struct{} // closed when shutdown begins
@ -290,42 +310,66 @@ func (l *Logger) drainBlock() (shuttingDown bool) {
 // drainPending drains and encodes a batch of logs from the buffer for upload.
 // If no logs are available, drainPending blocks until logs are available.
-func (l *Logger) drainPending() (res []byte) {
+// The returned buffer is only valid until the next call to drainPending.
-	buf := &l.drainBuf
+func (l *Logger) drainPending() (b []byte) {
-	buf.Reset()
+	b = l.drainBuf[:0]
-	buf.WriteByte('[')
+	b = append(b, '[')
-	entries := 0
+	defer func() {
-
+		b = bytes.TrimRight(b, ",")
-	var batchDone bool
+		b = append(b, ']')
-	const maxLen = 256 << 10
+		l.drainBuf = b
-	for buf.Len() < maxLen && !batchDone {
+		if len(b) <= len("[]") {
-		b, err := l.buffer.TryReadLine()
+			b = nil
-		if err == io.EOF {
+		}
-			break
+	}()
-		} else if err != nil {
+
-			b = fmt.Appendf(nil, "reading ringbuffer: %v", err)
+	maxLen := maxSize
-			batchDone = true
+	if l.lowMem {
-		} else if b == nil {
+		// When operating in a low memory environment, it is better to upload
-			if entries > 0 {
+		// in multiple operations than it is to allocate a large body and OOM.
-				break
+		// Even if maxLen is less than maxSize, we can still upload an entry
 		// that is up to maxSize if we happen to encounter one.
 		maxLen /= lowMemRatio
 	}
 	for len(b) < maxLen {
 		line, err := l.buffer.TryReadLine()
 		switch {
 		case err == io.EOF:
 			return b
 		case err != nil:
 			b = append(b, '{')
 			b = l.appendMetadata(b, false, true, 0, 0, "reading ringbuffer: "+err.Error(), nil, 0)
 			b = bytes.TrimRight(b, ",")
 			b = append(b, '}')
 			return b
 		case line == nil:
 			// If we read at least some log entries, return immediately.
 			if len(b) > len("[") {
 				return b
 			}
 			// We're about to block. If we're holding on to too much memory
 			// in our buffer from a previous large write, let it go.
-			if buf.Available() > 4<<10 {
+			if cap(b) > bufferSize {
-				cur := buf.Bytes()
+				b = bytes.Clone(b)
-				l.drainBuf = bytes.Buffer{}
+				l.drainBuf = b
 				buf.Write(cur)
 			}
-			batchDone = l.drainBlock()
+			if shuttingDown := l.drainBlock(); shuttingDown {
 				return b
 			}
 			continue
 		}
-		if len(b) == 0 {
+		switch {
 		case len(line) == 0:
 			continue
-		}
+		case line[0] == '{' && jsontext.Value(line).IsValid():
-		if b[0] != '{' || !json.Valid(b) {
+			// This is already a valid JSON object, so just append it.
 			// This may exceed maxLen, but should be no larger than maxSize
 			// so long as logic writing into the buffer enforces the limit.
 			b = append(b, line...)
 		default:
 			// This is probably a log added to stderr by filch
 			// outside of the logtail logger. Encode it.
 			if !l.explainedRaw {
@ -336,24 +380,14 @@ func (l *Logger) drainPending() (res []byte) {
 				l.explainedRaw = true
 			}
 			fmt.Fprintf(l.stderr, "RAW-STDERR: %s", b)
-			// Do not add a client time, as it could have been
+			// Do not add a client time, as it could be really old.
-			// been written a long time ago. Don't include instance key or ID
+			// Do not include instance key or ID either,
-			// either, since this came from a different instance.
+			// since this came from a different instance.
-			b = l.encodeText(b, true, 0, 0, 0)
+			b = l.appendText(b, line, true, 0, 0, 0)
 		}
 		if entries > 0 {
 			buf.WriteByte(',')
 		}
 		buf.Write(b)
 		entries++
 		}
-
+		b = append(b, ',')
 	buf.WriteByte(']')
 	if buf.Len() <= len("[]") {
 		return nil
 	}
-	return buf.Bytes()
+	return b
 }
 // This is the goroutine that repeatedly uploads logs in the background.
@ -573,169 +607,211 @@ func (l *Logger) sendLocked(jsonBlob []byte) (int, error) {
 	return n, err
 }
-// TODO: instead of allocating, this should probably just append
+// appendMetadata appends optional "logtail", "metrics", and "v" JSON members.
-// directly into the output log buffer.
+// This assumes dst is already within a JSON object.
-func (l *Logger) encodeText(buf []byte, skipClientTime bool, procID uint32, procSequence uint64, level int) []byte {
+// Each member is comma-terminated.
-	now := l.clock.Now()
+func (l *Logger) appendMetadata(dst []byte, skipClientTime, skipMetrics bool, procID uint32, procSequence uint64, errDetail string, errData jsontext.Value, level int) []byte {
-
+	// Append optional logtail metadata.
-	// Factor in JSON encoding overhead to try to only do one alloc
+	if !skipClientTime || procID != 0 || procSequence != 0 || errDetail != "" || errData != nil {
-	// in the make below (so appends don't resize the buffer).
+		dst = append(dst, `"logtail":{`...)
 	overhead := len(`{"text": ""}\n`)
 	includeLogtail := !skipClientTime || procID != 0 || procSequence != 0
 	if includeLogtail {
 		overhead += len(`"logtail": {},`)
 	}
 		if !skipClientTime {
-		overhead += len(`"client_time": "2006-01-02T15:04:05.999999999Z07:00",`)
+			dst = append(dst, `"client_time":"`...)
 			dst = l.clock.Now().UTC().AppendFormat(dst, time.RFC3339Nano)
 			dst = append(dst, '"', ',')
 		}
 		if procID != 0 {
-		overhead += len(`"proc_id": 4294967296,`)
+			dst = append(dst, `"proc_id":`...)
 			dst = strconv.AppendUint(dst, uint64(procID), 10)
 			dst = append(dst, ',')
 		}
 		if procSequence != 0 {
-		overhead += len(`"proc_seq": 9007199254740992,`)
+			dst = append(dst, `"proc_seq":`...)
-	}
+			dst = strconv.AppendUint(dst, procSequence, 10)
-	// TODO: do a pass over buf and count how many backslashes will be needed?
+			dst = append(dst, ',')
 	// For now just factor in a dozen.
 	overhead += 12
 	// Put a sanity cap on buf's size.
 	max := 16 << 10
 	if l.lowMem {
 		max = 4 << 10
 	}
 	var nTruncated int
 	if len(buf) > max {
 		nTruncated = len(buf) - max
 		// TODO: this can break a UTF-8 character
 		// mid-encoding.  We don't tend to log
 		// non-ASCII stuff ourselves, but e.g. client
 		// names might be.
 		buf = buf[:max]
 		}
-
+		if errDetail != "" || errData != nil {
-	b := make([]byte, 0, len(buf)+overhead)
+			dst = append(dst, `"error":{`...)
-	b = append(b, '{')
+			if errDetail != "" {
-
+				dst = append(dst, `"detail":`...)
-	if includeLogtail {
+				dst, _ = jsontext.AppendQuote(dst, errDetail)
-		b = append(b, `"logtail": {`...)
+				dst = append(dst, ',')
 		if !skipClientTime {
 			b = append(b, `"client_time": "`...)
 			b = now.UTC().AppendFormat(b, time.RFC3339Nano)
 			b = append(b, `",`...)
 			}
-		if procID != 0 {
+			if errData != nil {
-			b = append(b, `"proc_id": `...)
+				dst = append(dst, `"bad_data":`...)
-			b = strconv.AppendUint(b, uint64(procID), 10)
+				dst = append(dst, errData...)
-			b = append(b, ',')
+				dst = append(dst, ',')
 			}
-		if procSequence != 0 {
+			dst = bytes.TrimRight(dst, ",")
-			b = append(b, `"proc_seq": `...)
+			dst = append(dst, '}', ',')
 			b = strconv.AppendUint(b, procSequence, 10)
 			b = append(b, ',')
 		}
-		b = bytes.TrimRight(b, ",")
+		dst = bytes.TrimRight(dst, ",")
-		b = append(b, "}, "...)
+		dst = append(dst, '}', ',')
 	}
-	if l.metricsDelta != nil {
+	// Append optional metrics metadata.
 	if !skipMetrics && l.metricsDelta != nil {
 		if d := l.metricsDelta(); d != "" {
-			b = append(b, `"metrics": "`...)
+			dst = append(dst, `"metrics":"`...)
-			b = append(b, d...)
+			dst = append(dst, d...)
-			b = append(b, `",`...)
+			dst = append(dst, '"', ',')
 		}
 	}
-	// Add the log level, if non-zero. Note that we only use log
+	// Add the optional log level, if non-zero.
-	// levels 1 and 2 currently. It's unlikely we'll ever make it
+	// Note that we only use log levels 1 and 2 currently.
-	// past 9.
+	// It's unlikely we'll ever make it past 9.
 	if level > 0 && level < 10 {
-		b = append(b, `"v":`...)
+		dst = append(dst, `"v":`...)
-		b = append(b, '0'+byte(level))
+		dst = append(dst, '0'+byte(level))
-		b = append(b, ',')
+		dst = append(dst, ',')
 	}
-	b = append(b, "\"text\": \""...)
+
-	for _, c := range buf {
+	return dst
 		switch c {
 		case '\b':
 			b = append(b, '\\', 'b')
 		case '\f':
 			b = append(b, '\\', 'f')
 		case '\n':
 			b = append(b, '\\', 'n')
 		case '\r':
 			b = append(b, '\\', 'r')
 		case '\t':
 			b = append(b, '\\', 't')
 		case '"':
 			b = append(b, '\\', '"')
 		case '\\':
 			b = append(b, '\\', '\\')
 		default:
 			// TODO: what about binary gibberish or non UTF-8?
 			b = append(b, c)
 }
 // appendText appends a raw text message in the Tailscale JSON log entry format.
 func (l *Logger) appendText(dst, src []byte, skipClientTime bool, procID uint32, procSequence uint64, level int) []byte {
 	dst = slices.Grow(dst, len(src))
 	dst = append(dst, '{')
 	dst = l.appendMetadata(dst, skipClientTime, false, procID, procSequence, "", nil, level)
 	if len(src) == 0 {
 		dst = bytes.TrimRight(dst, ",")
 		return append(dst, "}\n"...)
 	}
-	if nTruncated > 0 {
+
-		b = append(b, "…+"...)
+	// Append the text string, which may be truncated.
-		b = strconv.AppendInt(b, int64(nTruncated), 10)
+	// Invalid UTF-8 will be mangled with the Unicode replacement character.
 	max := maxTextSize
 	if l.lowMem {
 		max /= lowMemRatio
 	}
-	b = append(b, "\"}\n"...)
+	dst = append(dst, `"text":`...)
-	return b
+	dst = appendTruncatedString(dst, src, max)
 	return append(dst, "}\n"...)
 }
-func (l *Logger) encodeLocked(buf []byte, level int) []byte {
+// appendTruncatedString appends a JSON string for src,
 // truncating the src to be no larger than n.
 func appendTruncatedString(dst, src []byte, n int) []byte {
 	srcLen := len(src)
 	src = truncate.String(src, n)
 	dst, _ = jsontext.AppendQuote(dst, src) // ignore error; only occurs for invalid UTF-8
 	if srcLen > len(src) {
 		dst = dst[:len(dst)-len(`"`)] // trim off preceding double-quote
 		dst = append(dst, "…+"...)
 		dst = strconv.AppendInt(dst, int64(srcLen-len(src)), 10)
 		dst = append(dst, '"') // re-append succeeding double-quote
 	}
 	return dst
 }
 func (l *Logger) AppendTextOrJSONLocked(dst, src []byte) []byte {
 	l.clock = tstime.StdClock{}
 	return l.appendTextOrJSONLocked(dst, src, 0)
 }
 // appendTextOrJSONLocked appends a raw text message or a raw JSON object
 // in the Tailscale JSON log format.
 func (l *Logger) appendTextOrJSONLocked(dst, src []byte, level int) []byte {
 	if l.includeProcSequence {
 		l.procSequence++
 	}
-	if buf[0] != '{' {
+	if len(src) == 0 || src[0] != '{' {
-		return l.encodeText(buf, l.skipClientTime, l.procID, l.procSequence, level) // text fast-path
+		return l.appendText(dst, src, l.skipClientTime, l.procID, l.procSequence, level)
 	}
-	now := l.clock.Now()
+	// Check whether the input is a valid JSON object and
-
+	// whether it contains the reserved "logtail" name at the top-level.
-	obj := make(map[string]any)
+	var logtailKeyOffset, logtailValOffset, logtailValLength int
-	if err := json.Unmarshal(buf, &obj); err != nil {
+	validJSON := func() bool {
-		for k := range obj {
+		// TODO(dsnet): Avoid allocation of bytes.Buffer struct.
-			delete(obj, k)
+		dec := &l.jsonDec
 		dec.Reset(bytes.NewBuffer(src))
 		if tok, err := dec.ReadToken(); tok.Kind() != '{' || err != nil {
 			return false
 		}
-		obj["text"] = string(buf)
+		for dec.PeekKind() != '}' {
 			keyOffset := dec.InputOffset()
 			tok, err := dec.ReadToken()
 			if err != nil {
 				return false
 			}
-	if txt, isStr := obj["text"].(string); l.lowMem && isStr && len(txt) > 254 {
+			isLogtail := tok.String() == "logtail"
-		// TODO(crawshaw): trim to unicode code point
+			valOffset := dec.InputOffset()
-		obj["text"] = txt[:254] + "…"
+			if dec.SkipValue() != nil {
 				return false
 			}
-
+			if isLogtail {
-	hasLogtail := obj["logtail"] != nil
+				logtailKeyOffset = int(keyOffset)
-	if hasLogtail {
+				logtailValOffset = int(valOffset)
-		obj["error_has_logtail"] = obj["logtail"]
+				logtailValLength = int(dec.InputOffset()) - logtailValOffset
 		obj["logtail"] = nil
 			}
 	if !l.skipClientTime || l.procID != 0 || l.procSequence != 0 {
 		logtail := map[string]any{}
 		if !l.skipClientTime {
 			logtail["client_time"] = now.UTC().Format(time.RFC3339Nano)
 		}
-		if l.procID != 0 {
+		if tok, err := dec.ReadToken(); tok.Kind() != '}' || err != nil {
-			logtail["proc_id"] = l.procID
+			return false
 		}
-		if l.procSequence != 0 {
+		if _, err := dec.ReadToken(); err != io.EOF {
-			logtail["proc_seq"] = l.procSequence
+			return false // trailing junk after JSON object
 		}
-		obj["logtail"] = logtail
+		return true
 	}()
 	// Treat invalid JSON as a raw text message.
 	if !validJSON {
 		return l.appendText(dst, src, l.skipClientTime, l.procID, l.procSequence, level)
 	}
 	// Check whether the JSON payload is too large.
 	// Due to logtail metadata, the formatted log entry could exceed maxSize.
 	// That's okay as the Tailscale log service limit is actually 2*maxSize.
 	// However, so long as logging applications aim to target the maxSize limit,
 	// there should be no trouble eventually uploading logs.
 	if len(src) > maxSize {
 		errDetail := fmt.Sprintf("entry too large: %d bytes", len(src))
 		errData := appendTruncatedString(nil, src, maxSize/len(`\uffff`)) // escaping could increase size
 		dst = append(dst, '{')
 		dst = l.appendMetadata(dst, l.skipClientTime, true, l.procID, l.procSequence, errDetail, errData, level)
 		dst = bytes.TrimRight(dst, ",")
 		return append(dst, "}\n"...)
 	}
 	// Check whether the reserved logtail member occurs in the log data.
 	// If so, it is moved to the the logtail/error member.
 	const jsonSeperators = ",:"      // per RFC 8259, section 2
 	const jsonWhitespace = " \n\r\t" // per RFC 8259, section 2
 	var errDetail string
 	var errData jsontext.Value
 	if logtailValLength > 0 {
 		errDetail = "duplicate logtail member"
 		errData = bytes.Trim(src[logtailValOffset:][:logtailValLength], jsonSeperators+jsonWhitespace)
 	}
 	dst = slices.Grow(dst, len(src))
 	dst = append(dst, '{')
 	dst = l.appendMetadata(dst, l.skipClientTime, true, l.procID, l.procSequence, errDetail, errData, level)
 	if logtailValLength > 0 {
 		// Exclude original logtail member from the message.
 		dst = appendWithoutNewline(dst, src[len("{"):logtailKeyOffset])
 		dst = bytes.TrimRight(dst, jsonSeperators+jsonWhitespace)
 		dst = appendWithoutNewline(dst, src[logtailValOffset+logtailValLength:])
 	} else {
 		dst = appendWithoutNewline(dst, src[len("{"):])
 	}
-	if level > 0 {
+	dst = bytes.TrimRight(dst, jsonWhitespace)
-		obj["v"] = level
+	dst = dst[:len(dst)-len("}")]
 	dst = bytes.TrimRight(dst, jsonSeperators+jsonWhitespace)
 	return append(dst, "}\n"...)
 }
-	b, err := json.Marshal(obj)
+// appendWithoutNewline appends src to dst except that it ignores newlines
-	if err != nil {
+// since newlines are used to frame individual log entries.
-		fmt.Fprintf(l.stderr, "logtail: re-encoding JSON failed: %v\n", err)
+func appendWithoutNewline(dst, src []byte) []byte {
-		// I know of no conditions under which this could fail.
+	for _, c := range src {
-		// Report it very loudly.
+		if c != '\n' {
-		panic("logtail: re-encoding JSON failed: " + err.Error())
+			dst = append(dst, c)
 		}
-	b = append(b, '\n')
+	}
-	return b
+	return dst
 }
 // Logf logs to l using the provided fmt-style format and optional arguments.
@ -776,7 +852,7 @@ func (l *Logger) Write(buf []byte) (int, error) {
 	l.writeLock.Lock()
 	defer l.writeLock.Unlock()
-	b := l.encodeLocked(buf, level)
+	b := l.appendTextOrJSONLocked(l.writeBuf[:0], buf, level)
 	_, err := l.sendLocked(b)
 	return inLen, err
 }
--- a/logtail/logtail_test.go
+++ b/logtail/logtail_test.go
@ -14,9 +14,11 @@ import (
 	"testing"
 	"time"
 	"github.com/go-json-experiment/json/jsontext"
 	"tailscale.com/envknob"
 	"tailscale.com/tstest"
 	"tailscale.com/tstime"
 	"tailscale.com/util/must"
 )
 func TestFastShutdown(t *testing.T) {
@ -144,88 +146,6 @@ func TestEncodeAndUploadMessages(t *testing.T) {
 	}
 }
 func TestEncodeSpecialCases(t *testing.T) {
 	ts, l := NewLogtailTestHarness(t)
 	// -------------------------------------------------------------------------
 	// JSON log message already contains a logtail field.
 	io.WriteString(l, `{"logtail": "LOGTAIL", "text": "text"}`)
 	body := <-ts.uploaded
 	data := unmarshalOne(t, body)
 	errorHasLogtail, ok := data["error_has_logtail"]
 	if ok {
 		if errorHasLogtail != "LOGTAIL" {
 			t.Errorf("error_has_logtail: got:%q; want:%q",
 				errorHasLogtail, "LOGTAIL")
 		}
 	} else {
 		t.Errorf("no error_has_logtail field: %v", data)
 	}
 	// -------------------------------------------------------------------------
 	// special characters
 	io.WriteString(l, "\b\f\n\r\t"+`"\`)
 	bodytext := string(<-ts.uploaded)
 	// json.Unmarshal would unescape the characters, we have to look at the encoded text
 	escaped := strings.Contains(bodytext, `\b\f\n\r\t\"\`)
 	if !escaped {
 		t.Errorf("special characters got %s", bodytext)
 	}
 	// -------------------------------------------------------------------------
 	// skipClientTime to omit the logtail metadata
 	l.skipClientTime = true
 	io.WriteString(l, "text")
 	body = <-ts.uploaded
 	data = unmarshalOne(t, body)
 	_, ok = data["logtail"]
 	if ok {
 		t.Errorf("skipClientTime: unexpected logtail map present: %v", data)
 	}
 	// -------------------------------------------------------------------------
 	// lowMem + long string
 	l.skipClientTime = false
 	l.lowMem = true
 	longStr := strings.Repeat("0", 5120)
 	io.WriteString(l, longStr)
 	body = <-ts.uploaded
 	data = unmarshalOne(t, body)
 	text, ok := data["text"]
 	if !ok {
 		t.Errorf("lowMem: no text %v", data)
 	}
 	if n := len(text.(string)); n > 4500 {
 		t.Errorf("lowMem: got %d chars; want <4500 chars", n)
 	}
 	// -------------------------------------------------------------------------
 	err := l.Shutdown(context.Background())
 	if err != nil {
 		t.Error(err)
 	}
 }
 var sink []byte
 func TestLoggerEncodeTextAllocs(t *testing.T) {
 	lg := &Logger{clock: tstime.StdClock{}}
 	inBuf := []byte("some text to encode")
 	procID := uint32(0x24d32ee9)
 	procSequence := uint64(0x12346)
 	err := tstest.MinAllocsPerRun(t, 1, func() {
 		sink = lg.encodeText(inBuf, false, procID, procSequence, 0)
 	})
 	if err != nil {
 		t.Fatal(err)
 	}
 }
 func TestLoggerWriteLength(t *testing.T) {
 	lg := &Logger{
 		clock:  tstime.StdClock{},
@ -310,17 +230,6 @@ func unmarshalOne(t *testing.T, body []byte) map[string]any {
 	return entries[0]
 }
 func TestEncodeTextTruncation(t *testing.T) {
 	lg := &Logger{clock: tstime.StdClock{}, lowMem: true}
 	in := bytes.Repeat([]byte("a"), 5120)
 	b := lg.encodeText(in, true, 0, 0, 0)
 	got := string(b)
 	want := `{"text": "` + strings.Repeat("a", 4096) + `…+1024"}` + "\n"
 	if got != want {
 		t.Errorf("got:\n%qwant:\n%q\n", got, want)
 	}
 }
 type simpleMemBuf struct {
 	Buffer
 	buf bytes.Buffer
@ -351,15 +260,15 @@ func TestEncode(t *testing.T) {
 		},
 		{
 			`{"foo":"bar"}`,
-			`{"foo":"bar","logtail":{"client_time":"1970-01-01T00:02:03.000000456Z","proc_id":7,"proc_seq":1}}` + "\n",
+			`{"logtail":{"client_time":"1970-01-01T00:02:03.000000456Z","proc_id":7,"proc_seq":1},"foo":"bar"}` + "\n",
 		},
 		{
 			"foo: [v\x00JSON]0{\"foo\":1}",
-			"{\"foo\":1,\"logtail\":{\"client_time\":\"1970-01-01T00:02:03.000000456Z\",\"proc_id\":7,\"proc_seq\":1}}\n",
+			"{\"logtail\":{\"client_time\":\"1970-01-01T00:02:03.000000456Z\",\"proc_id\":7,\"proc_seq\":1},\"foo\":1}\n",
 		},
 		{
 			"foo: [v\x00JSON]2{\"foo\":1}",
-			"{\"foo\":1,\"logtail\":{\"client_time\":\"1970-01-01T00:02:03.000000456Z\",\"proc_id\":7,\"proc_seq\":1},\"v\":2}\n",
+			"{\"logtail\":{\"client_time\":\"1970-01-01T00:02:03.000000456Z\",\"proc_id\":7,\"proc_seq\":1},\"v\":2,\"foo\":1}\n",
 		},
 	}
 	for _, tt := range tests {
@ -486,3 +395,162 @@ func TestRedact(t *testing.T) {
 		}
 	}
 }
 func TestAppendMetadata(t *testing.T) {
 	var l Logger
 	l.clock = tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 01, 01, 0, 0, 0, 0, time.UTC)})
 	l.metricsDelta = func() string { return "metrics" }
 	for _, tt := range []struct {
 		skipClientTime bool
 		skipMetrics    bool
 		procID         uint32
 		procSeq        uint64
 		errDetail      string
 		errData        jsontext.Value
 		level          int
 		want           string
 	}{
 		{want: `"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics",`},
 		{skipClientTime: true, want: `"metrics":"metrics",`},
 		{skipMetrics: true, want: `"logtail":{"client_time":"2000-01-01T00:00:00Z"},`},
 		{skipClientTime: true, skipMetrics: true, want: ``},
 		{skipClientTime: true, skipMetrics: true, procID: 1, want: `"logtail":{"proc_id":1},`},
 		{skipClientTime: true, skipMetrics: true, procSeq: 2, want: `"logtail":{"proc_seq":2},`},
 		{skipClientTime: true, skipMetrics: true, procID: 1, procSeq: 2, want: `"logtail":{"proc_id":1,"proc_seq":2},`},
 		{skipMetrics: true, procID: 1, procSeq: 2, want: `"logtail":{"client_time":"2000-01-01T00:00:00Z","proc_id":1,"proc_seq":2},`},
 		{skipClientTime: true, skipMetrics: true, errDetail: "error", want: `"logtail":{"error":{"detail":"error"}},`},
 		{skipClientTime: true, skipMetrics: true, errData: jsontext.Value("null"), want: `"logtail":{"error":{"bad_data":null}},`},
 		{skipClientTime: true, skipMetrics: true, level: 5, want: `"v":5,`},
 		{procID: 1, procSeq: 2, errDetail: "error", errData: jsontext.Value(`["something","bad","happened"]`), level: 2,
 			want: `"logtail":{"client_time":"2000-01-01T00:00:00Z","proc_id":1,"proc_seq":2,"error":{"detail":"error","bad_data":["something","bad","happened"]}},"metrics":"metrics","v":2,`},
 	} {
 		got := string(l.appendMetadata(nil, tt.skipClientTime, tt.skipMetrics, tt.procID, tt.procSeq, tt.errDetail, tt.errData, tt.level))
 		if got != tt.want {
 			t.Errorf("appendMetadata(%v, %v, %v, %v, %v, %v, %v):\n\tgot  %s\n\twant %s", tt.skipClientTime, tt.skipMetrics, tt.procID, tt.procSeq, tt.errDetail, tt.errData, tt.level, got, tt.want)
 		}
 		gotObj := "{" + strings.TrimSuffix(got, ",") + "}"
 		if !jsontext.Value(gotObj).IsValid() {
 			t.Errorf("`%s`.IsValid() = false, want true", gotObj)
 		}
 	}
 }
 func TestAppendText(t *testing.T) {
 	var l Logger
 	l.clock = tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 01, 01, 0, 0, 0, 0, time.UTC)})
 	l.metricsDelta = func() string { return "metrics" }
 	l.lowMem = true
 	for _, tt := range []struct {
 		text           string
 		skipClientTime bool
 		procID         uint32
 		procSeq        uint64
 		level          int
 		want           string
 	}{
 		{want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics"}`},
 		{skipClientTime: true, want: `{"metrics":"metrics"}`},
 		{skipClientTime: true, procID: 1, procSeq: 2, want: `{"logtail":{"proc_id":1,"proc_seq":2},"metrics":"metrics"}`},
 		{text: "fizz buzz", want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","text":"fizz buzz"}`},
 		{text: "\b\f\n\r\t\"\\", want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","text":"\b\f\n\r\t\"\\"}`},
 		{text: "x" + strings.Repeat("😐", maxSize), want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","text":"x` + strings.Repeat("😐", 1023) + `…+1044484"}`},
 	} {
 		got := string(l.appendText(nil, []byte(tt.text), tt.skipClientTime, tt.procID, tt.procSeq, tt.level))
 		if !strings.HasSuffix(got, "\n") {
 			t.Errorf("`%s` does not end with a newline", got)
 		}
 		got = got[:len(got)-1]
 		if got != tt.want {
 			t.Errorf("appendText(%v, %v, %v, %v, %v):\n\tgot  %s\n\twant %s", tt.text[:min(len(tt.text), 256)], tt.skipClientTime, tt.procID, tt.procSeq, tt.level, got, tt.want)
 		}
 		if !jsontext.Value(got).IsValid() {
 			t.Errorf("`%s`.IsValid() = false, want true", got)
 		}
 	}
 }
 func TestAppendTextOrJSON(t *testing.T) {
 	var l Logger
 	l.clock = tstest.NewClock(tstest.ClockOpts{Start: time.Date(2000, 01, 01, 0, 0, 0, 0, time.UTC)})
 	l.metricsDelta = func() string { return "metrics" }
 	l.lowMem = true
 	for _, tt := range []struct {
 		in    string
 		level int
 		want  string
 	}{
 		{want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics"}`},
 		{in: "[]", want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","text":"[]"}`},
 		{level: 1, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","v":1}`},
 		{in: `{}`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"}}`},
 		{in: `{}{}`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"metrics":"metrics","text":"{}{}"}`},
 		{in: "{\n\"fizz\"\n:\n\"buzz\"\n}", want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z"},"fizz":"buzz"}`},
 		{in: `{ "logtail" : "duplicate" }`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z","error":{"detail":"duplicate logtail member","bad_data":"duplicate"}}}`},
 		{in: `{ "fizz" : "buzz" , "logtail" : "duplicate" }`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z","error":{"detail":"duplicate logtail member","bad_data":"duplicate"}}, "fizz" : "buzz"}`},
 		{in: `{ "logtail" : "duplicate" , "fizz" : "buzz" }`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z","error":{"detail":"duplicate logtail member","bad_data":"duplicate"}} , "fizz" : "buzz"}`},
 		{in: `{ "fizz" : "buzz" , "logtail" : "duplicate" , "wizz" : "wuzz" }`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z","error":{"detail":"duplicate logtail member","bad_data":"duplicate"}}, "fizz" : "buzz" , "wizz" : "wuzz"}`},
 		{in: `{"long":"` + strings.Repeat("a", maxSize) + `"}`, want: `{"logtail":{"client_time":"2000-01-01T00:00:00Z","error":{"detail":"entry too large: 262155 bytes","bad_data":"{\"long\":\"` + strings.Repeat("a", 43681) + `…+218465"}}}`},
 	} {
 		got := string(l.appendTextOrJSONLocked(nil, []byte(tt.in), tt.level))
 		if !strings.HasSuffix(got, "\n") {
 			t.Errorf("`%s` does not end with a newline", got)
 		}
 		got = got[:len(got)-1]
 		if got != tt.want {
 			t.Errorf("appendTextOrJSON(%v, %v):\n\tgot  %s\n\twant %s", tt.in[:min(len(tt.in), 256)], tt.level, got, tt.want)
 		}
 		if !jsontext.Value(got).IsValid() {
 			t.Errorf("`%s`.IsValid() = false, want true", got)
 		}
 	}
 }
 var sink []byte
 func TestAppendTextAllocs(t *testing.T) {
 	lg := &Logger{clock: tstime.StdClock{}}
 	inBuf := []byte("some text to encode")
 	procID := uint32(0x24d32ee9)
 	procSequence := uint64(0x12346)
 	must.Do(tstest.MinAllocsPerRun(t, 0, func() {
 		sink = lg.appendText(sink[:0], inBuf, false, procID, procSequence, 0)
 	}))
 }
 func TestAppendJSONAllocs(t *testing.T) {
 	lg := &Logger{clock: tstime.StdClock{}}
 	inBuf := []byte(`{"fizz":"buzz"}`)
 	must.Do(tstest.MinAllocsPerRun(t, 1, func() {
 		sink = lg.appendTextOrJSONLocked(sink[:0], inBuf, 0)
 	}))
 }
 type discardBuffer struct{ Buffer }
 func (discardBuffer) Write(p []byte) (n int, err error) { return n, nil }
 var testdataTextLog = []byte(`netcheck: report: udp=true v6=false v6os=true mapvarydest=false hair=false portmap= v4a=174.xxx.xxx.xxx:18168 derp=2 derpdist=1v4:82ms,2v4:18ms,3v4:214ms,4v4:171ms,5v4:196ms,7v4:124ms,8v4:149ms,9v4:56ms,10v4:32ms,11v4:196ms,12v4:71ms,13v4:48ms,14v4:166ms,16v4:85ms,17v4:25ms,18v4:153ms,19v4:176ms,20v4:193ms,21v4:84ms,22v4:182ms,24v4:73ms`)
 var testdataJSONLog = []byte(`{"end":"2024-04-08T21:39:15.715291586Z","nodeId":"nQRJBE7CNTRL","physicalTraffic":[{"dst":"127.x.x.x:2","src":"100.x.x.x:0","txBytes":148,"txPkts":1},{"dst":"127.x.x.x:2","src":"100.x.x.x:0","txBytes":148,"txPkts":1},{"dst":"98.x.x.x:1025","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5},{"dst":"24.x.x.x:49973","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5},{"dst":"73.x.x.x:41641","rxBytes":732,"rxPkts":6,"src":"100.x.x.x:0","txBytes":820,"txPkts":7},{"dst":"75.x.x.x:1025","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5},{"dst":"75.x.x.x:41641","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5},{"dst":"174.x.x.x:35497","rxBytes":13008,"rxPkts":98,"src":"100.x.x.x:0","txBytes":26688,"txPkts":150},{"dst":"47.x.x.x:41641","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5},{"dst":"64.x.x.x:41641","rxBytes":640,"rxPkts":5,"src":"100.x.x.x:0","txBytes":640,"txPkts":5}],"start":"2024-04-08T21:39:11.099495616Z","virtualTraffic":[{"dst":"100.x.x.x:33008","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:32984","proto":6,"src":"100.x.x.x:22","txBytes":1340,"txPkts":10},{"dst":"100.x.x.x:32998","proto":6,"src":"100.x.x.x:22","txBytes":1020,"txPkts":10},{"dst":"100.x.x.x:32994","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:32980","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:32950","proto":6,"src":"100.x.x.x:22","txBytes":1340,"txPkts":10},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:53332","txBytes":60,"txPkts":1},{"dst":"100.x.x.x:0","proto":1,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:32966","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:57882","txBytes":60,"txPkts":1},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:53326","txBytes":60,"txPkts":1},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:57892","txBytes":60,"txPkts":1},{"dst":"100.x.x.x:32934","proto":6,"src":"100.x.x.x:22","txBytes":8712,"txPkts":55},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:32942","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:32964","proto":6,"src":"100.x.x.x:22","txBytes":1260,"txPkts":10},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:0","proto":1,"rxBytes":420,"rxPkts":5,"src":"100.x.x.x:0","txBytes":420,"txPkts":5},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:37238","txBytes":60,"txPkts":1},{"dst":"100.x.x.x:22","proto":6,"src":"100.x.x.x:37252","txBytes":60,"txPkts":1}]}`)
 func BenchmarkWriteText(b *testing.B) {
 	var l Logger
 	l.clock = tstime.StdClock{}
 	l.buffer = discardBuffer{}
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
 		must.Get(l.Write(testdataTextLog))
 	}
 }
 func BenchmarkWriteJSON(b *testing.B) {
 	var l Logger
 	l.clock = tstime.StdClock{}
 	l.buffer = discardBuffer{}
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
 		must.Get(l.Write(testdataJSONLog))
 	}
 }