From af931dcccd24564adebafaa8794563c620a9bcda Mon Sep 17 00:00:00 2001 From: Tom DNetto Date: Wed, 7 Feb 2024 13:15:43 -0800 Subject: [PATCH] tsweb: replace domains/emails in paths when bucketing stats Signed-off-by: Tom DNetto Updates: corp#17075 --- tsweb/tsweb.go | 22 ++++++++++++++-------- tsweb/tsweb_test.go | 3 +++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/tsweb/tsweb.go b/tsweb/tsweb.go index 5ce45226a..a45e6ebc7 100644 --- a/tsweb/tsweb.go +++ b/tsweb/tsweb.go @@ -192,24 +192,30 @@ type BucketedStatsOptions struct { Finished *expvar.Map } -var ( - hexSequenceRegex = regexp.MustCompile("[a-fA-F0-9]{9,}") -) +// normalizePathRegex matches components in a HTTP request path +// that should be replaced. +// +// See: https://regex101.com/r/WIfpaR/1 for the explainer and test cases. +var normalizePathRegex = regexp.MustCompile("([a-fA-F0-9]{9,}|([^\\/])+\\.([^\\/]){2,})") -// NormalizedPath returns the given path with any query parameters -// removed, and any hex strings of 9 or more characters replaced -// with an ellipsis. +// NormalizedPath returns the given path with the following modifications: +// +// - any query parameters are removed +// - any path component with a hex string of 9 or more characters is +// replaced by an ellipsis +// - any path component containing a period with at least two characters +// after the period (i.e. an email or domain) func NormalizedPath(p string) string { // Fastpath: No hex sequences in there we might have to trim. // Avoids allocating. - if hexSequenceRegex.FindStringIndex(p) == nil { + if normalizePathRegex.FindStringIndex(p) == nil { b, _, _ := strings.Cut(p, "?") return b } // If we got here, there's at least one hex sequences we need to // replace with an ellipsis. - replaced := hexSequenceRegex.ReplaceAllString(p, "…") + replaced := normalizePathRegex.ReplaceAllString(p, "…") b, _, _ := strings.Cut(replaced, "?") return b } diff --git a/tsweb/tsweb_test.go b/tsweb/tsweb_test.go index 3a97191fb..0ace46236 100644 --- a/tsweb/tsweb_test.go +++ b/tsweb/tsweb_test.go @@ -681,6 +681,9 @@ func TestBucket(t *testing.T) { {"/map/a87e865a9d1c7", "/map/…"}, {"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e", "/machine/…"}, {"/machine/37fc1acb57f256b69b0d76749d814d91c68b241057c6b127fee3df37e4af111e/map", "/machine/…/map"}, + {"/api/v2/tailnet/jeremiah@squish.com/devices", "/api/v2/tailnet/…/devices"}, + {"/machine/ssh/wait/5227109621243650/to/7111899293970143/a/a9e4e04cc01b", "/machine/ssh/wait/…/to/…/a/…"}, + {"/a/831a4bf39856?refreshed=true", "/a/…"}, } for _, tc := range tcs {