From febdac04997d0c747cb65638497f7d8715802b7d Mon Sep 17 00:00:00 2001 From: Brad Fitzpatrick Date: Sun, 5 Apr 2020 21:47:47 -0700 Subject: [PATCH] tstime: write Parse3339 parse that doesn't use time.Parse It doesn't allocate and it's half the time of time.Parse (which allocates), and 2/3rds the time of time.ParseInLocation (which doesn't). Go with a UTC time: BenchmarkGoParse3339/Z-8 2200995 534 ns/op 0 B/op 0 allocs/op BenchmarkGoParse3339/Z-8 2254816 554 ns/op 0 B/op 0 allocs/op BenchmarkGoParse3339/Z-8 2159504 522 ns/op 0 B/op 0 allocs/op Go allocates with a "-08:00" suffix instead of ending in "Z": BenchmarkGoParse3339/TZ-8 1276491 884 ns/op 144 B/op 3 allocs/op BenchmarkGoParse3339/TZ-8 1355858 942 ns/op 144 B/op 3 allocs/op BenchmarkGoParse3339/TZ-8 1385484 911 ns/op 144 B/op 3 allocs/op Go doesn't allocate if you use time.ParseInLocation, but then you need to parse the string to find the location anyway, so might as well go all the way (below). BenchmarkGoParse3339InLocation-8 1912254 597 ns/op 0 B/op 0 allocs/op BenchmarkGoParse3339InLocation-8 1980043 612 ns/op 0 B/op 0 allocs/op BenchmarkGoParse3339InLocation-8 1891366 612 ns/op 0 B/op 0 allocs/op Parsing RFC3339 ourselves, UTC: BenchmarkParse3339/Z-8 3889220 307 ns/op 0 B/op 0 allocs/op BenchmarkParse3339/Z-8 3718500 309 ns/op 0 B/op 0 allocs/op BenchmarkParse3339/Z-8 3621231 303 ns/op 0 B/op 0 allocs/op Parsing RFC3339 ourselves, with timezone (w/ *time.Location fetched from sync.Map) BenchmarkParse3339/TZ-8 3019612 418 ns/op 0 B/op 0 allocs/op BenchmarkParse3339/TZ-8 2921618 401 ns/op 0 B/op 0 allocs/op BenchmarkParse3339/TZ-8 3031671 408 ns/op 0 B/op 0 allocs/op Signed-off-by: Brad Fitzpatrick --- tstime/tstime.go | 80 +++++++++++++++++++++++----- tstime/tstime_test.go | 120 ++++++++++++++++++++++++++++++------------ 2 files changed, 154 insertions(+), 46 deletions(-) diff --git a/tstime/tstime.go b/tstime/tstime.go index 4867b7ff7..93ad3ef52 100644 --- a/tstime/tstime.go +++ b/tstime/tstime.go @@ -6,16 +6,20 @@ package tstime import ( + "errors" + "fmt" + "strconv" "strings" "sync" "time" ) -// zoneOf returns the RFC3339 zone suffix, or the empty string -// if it's invalid or not something we want to cache. +// zoneOf returns the RFC3339 zone suffix (either "Z" or like +// "+08:30"), or the empty string if it's invalid or not something we +// want to cache. func zoneOf(s string) string { if strings.HasSuffix(s, "Z") { - return "" + return "Z" } if len(s) < len("2020-04-05T15:56:00+08:00") { // Too short, invalid? Let time.Parse fail on it. @@ -36,23 +40,75 @@ func zoneOf(s string) string { // *time.Location (from FixedLocation). var locCache sync.Map +func getLocation(zone, timeValue string) (*time.Location, error) { + if zone == "Z" { + return time.UTC, nil + } + if loci, ok := locCache.Load(zone); ok { + return loci.(*time.Location), nil + } + // TODO(bradfitz): just parse it and call time.FixedLocation. + // For now, just have time.Parse do it once: + t, err := time.Parse(time.RFC3339Nano, timeValue) + if err != nil { + return nil, err + } + loc := t.Location() + locCache.LoadOrStore(zone, loc) + return loc, nil + +} + // Parse3339 is a wrapper around time.Parse(time.RFC3339Nano, s) that caches // timezone Locations for future parses. func Parse3339(s string) (time.Time, error) { zone := zoneOf(s) if zone == "" { + // Invalid or weird timezone offset. Use slow path, + // which'll probably return an error. return time.Parse(time.RFC3339Nano, s) } - loci, ok := locCache.Load(zone) - if ok { - // TODO(bradfitz): just rewrite this do the trivial parsing by hand - // which will be faster than Go's format-driven one. RFC3339 is trivial. - return time.ParseInLocation(time.RFC3339Nano, s, loci.(*time.Location)) - } - t, err := time.Parse(time.RFC3339Nano, s) + loc, err := getLocation(zone, s) if err != nil { return time.Time{}, err } - locCache.LoadOrStore(zone, t.Location()) - return t, nil + s = s[:len(s)-len(zone)] // remove zone suffix + var year, mon, day, hr, min, sec, nsec int + const baseLen = len("2020-04-05T15:56:00") + if len(s) < baseLen || + !parseInt(s[:4], &year) || + s[4] != '-' || + !parseInt(s[5:7], &mon) || + s[7] != '-' || + !parseInt(s[8:10], &day) || + s[10] != 'T' || + !parseInt(s[11:13], &hr) || + s[13] != ':' || + !parseInt(s[14:16], &min) || + s[16] != ':' || + !parseInt(s[17:19], &sec) { + return time.Time{}, errors.New("invalid time") + } + nsStr := s[baseLen:] + if nsStr != "" { + if nsStr[0] != '.' { + return time.Time{}, errors.New("invalid optional nanosecond prefix") + } + if !parseInt(nsStr[1:], &nsec) { + return time.Time{}, fmt.Errorf("invalid optional nanosecond number %q", nsStr[1:]) + } + for i := 0; i < len("999999999")-(len(nsStr)-1); i++ { + nsec *= 10 + } + } + return time.Date(year, time.Month(mon), day, hr, min, sec, nsec, loc), nil +} + +func parseInt(s string, dst *int) bool { + n, err := strconv.ParseInt(s, 10, 0) + if err != nil || n < 0 { + return false + } + *dst = int(n) + return true } diff --git a/tstime/tstime_test.go b/tstime/tstime_test.go index a598cec42..94804be89 100644 --- a/tstime/tstime_test.go +++ b/tstime/tstime_test.go @@ -9,6 +9,51 @@ import ( "time" ) +func TestParse3339(t *testing.T) { + tests := []string{ + "2020-04-05T15:56:00Z", + "2020-04-05T15:56:00.1234Z", + "2020-04-05T15:56:00+08:00", + + "2020-04-05T15:56:00.1+08:00", + "2020-04-05T15:56:00.12+08:00", + "2020-04-05T15:56:00.012+08:00", + "2020-04-05T15:56:00.0012+08:00", + "2020-04-05T15:56:00.148487491+08:00", + + "2020x04-05T15:56:00.1234+08:00", + "2020-04x05T15:56:00.1234+08:00", + "2020-04-05x15:56:00.1234+08:00", + "2020-04-05T15x56:00.1234+08:00", + "2020-04-05T15:56x00.1234+08:00", + "2020-04-05T15:56:00x1234+08:00", + } + for _, s := range tests { + t.Run(s, func(t *testing.T) { + goTime, goErr := time.Parse(time.RFC3339Nano, s) + + Parse3339(s) // prime the tz cache so next parse use fast path + got, err := Parse3339(s) + + if (err == nil) != (goErr == nil) { + t.Errorf("for %q, go err = %v; our err = %v", s, goErr, err) + return + } + if err != nil { + return + } + if !goTime.Equal(got) { + t.Errorf("for %q, times not Equal: we got %v, want go's %v", s, got, goTime) + } + if goStr, ourStr := goTime.Format(time.RFC3339Nano), got.Format(time.RFC3339Nano); goStr != ourStr { + t.Errorf("for %q, strings not equal: we got %q, want go's %q", s, ourStr, goStr) + } + + }) + } + +} + func TestZoneOf(t *testing.T) { tests := []struct { in, want string @@ -16,17 +61,14 @@ func TestZoneOf(t *testing.T) { {"2020-04-05T15:56:00+08:00", "+08:00"}, {"2020-04-05T15:56:00-08:00", "-08:00"}, {"2020-04-05T15:56:00.12345-08:00", "-08:00"}, - // don't cache weird offsets, only 00 15, 30: {"2020-04-05T15:56:00.12345-08:00", "-08:00"}, {"2020-04-05T15:56:00.12345-08:30", "-08:30"}, {"2020-04-05T15:56:00.12345-08:15", "-08:15"}, - {"2020-04-05T15:56:00.12345-08:17", ""}, - // don't cache UTC: - {"2020-04-05T15:56:00.12345Z", ""}, - {"2020-04-05T15:56:00Z", ""}, - // too short: - {"123+08:00", ""}, - {"+08:00", ""}, + {"2020-04-05T15:56:00.12345-08:17", ""}, // don't cache weird offsets + {"2020-04-05T15:56:00.12345Z", "Z"}, + {"2020-04-05T15:56:00Z", "Z"}, + {"123+08:00", ""}, // too short + {"+08:00", ""}, // too short } for _, tt := range tests { if got := zoneOf(tt.in); got != tt.want { @@ -36,14 +78,19 @@ func TestZoneOf(t *testing.T) { } func BenchmarkGoParse3339(b *testing.B) { - b.ReportAllocs() - const in = `2020-04-05T15:56:00.148487491+08:00` - for i := 0; i < b.N; i++ { - _, err := time.Parse(time.RFC3339Nano, in) - if err != nil { - b.Fatal(err) + run := func(in string) func(*testing.B) { + return func(b *testing.B) { + b.ReportAllocs() + for i := 0; i < b.N; i++ { + _, err := time.Parse(time.RFC3339Nano, in) + if err != nil { + b.Fatal(err) + } + } } } + b.Run("Z", run("2020-04-05T15:56:00.148487491Z")) + b.Run("TZ", run("2020-04-05T15:56:00.148487491+08:00")) } func BenchmarkGoParse3339InLocation(b *testing.B) { @@ -76,29 +123,34 @@ func BenchmarkGoParse3339InLocation(b *testing.B) { } func BenchmarkParse3339(b *testing.B) { - b.ReportAllocs() - const in = `2020-04-05T15:56:00.148487491+08:00` + run := func(in string) func(*testing.B) { + return func(b *testing.B) { + b.ReportAllocs() - t, err := time.Parse(time.RFC3339Nano, in) - if err != nil { - b.Fatal(err) - } + t, err := time.Parse(time.RFC3339Nano, in) + if err != nil { + b.Fatal(err) + } - t2, err := Parse3339(in) - if err != nil { - b.Fatal(err) - } - if !t.Equal(t2) { - b.Fatal("not equal") - } - if s1, s2 := t.Format(time.RFC3339Nano), t2.Format(time.RFC3339Nano); s1 != s2 { - b.Fatalf("times don't stringify the same: %q vs %q", s1, s2) - } + t2, err := Parse3339(in) + if err != nil { + b.Fatal(err) + } + if !t.Equal(t2) { + b.Fatal("not equal") + } + if s1, s2 := t.Format(time.RFC3339Nano), t2.Format(time.RFC3339Nano); s1 != s2 { + b.Fatalf("times don't stringify the same: %q vs %q", s1, s2) + } - for i := 0; i < b.N; i++ { - _, err := Parse3339(in) - if err != nil { - b.Fatal(err) + for i := 0; i < b.N; i++ { + _, err := Parse3339(in) + if err != nil { + b.Fatal(err) + } + } } } + b.Run("Z", run("2020-04-05T15:56:00.148487491Z")) + b.Run("TZ", run("2020-04-05T15:56:00.148487491+08:00")) }