From a5dd0bcb09206264dc650d6794bde7cb12a3e6e6 Mon Sep 17 00:00:00 2001 From: Josh Bleecher Snyder Date: Wed, 18 Nov 2020 14:40:39 -0800 Subject: [PATCH] util/jsonutil: new package The cornerstone API is a more memory-efficient Unmarshal. The savings come from re-using a json.Decoder. BenchmarkUnmarshal-8 4016418 288 ns/op 8 B/op 1 allocs/op BenchmarkStdUnmarshal-8 4189261 283 ns/op 184 B/op 2 allocs/op It also includes a Bytes type to reduce allocations when unmarshalling a non-hex-encoded JSON string into a []byte. Signed-off-by: Josh Bleecher Snyder --- util/jsonutil/types.go | 17 +++++++ util/jsonutil/unmarshal.go | 90 +++++++++++++++++++++++++++++++++ util/jsonutil/unmarshal_test.go | 65 ++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 util/jsonutil/types.go create mode 100644 util/jsonutil/unmarshal.go create mode 100644 util/jsonutil/unmarshal_test.go diff --git a/util/jsonutil/types.go b/util/jsonutil/types.go new file mode 100644 index 000000000..0de04fdc7 --- /dev/null +++ b/util/jsonutil/types.go @@ -0,0 +1,17 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonutil + +// Bytes is a byte slice in a json-encoded struct. +// encoding/json assumes that []byte fields are hex-encoded. +// Bytes are not hex-encoded; they are treated the same as strings. +// This can avoid unnecessary allocations due to a round trip through strings. +type Bytes []byte + +func (b *Bytes) UnmarshalText(text []byte) error { + // Copy the contexts of text. + *b = append(*b, text...) + return nil +} diff --git a/util/jsonutil/unmarshal.go b/util/jsonutil/unmarshal.go new file mode 100644 index 000000000..2d344ef01 --- /dev/null +++ b/util/jsonutil/unmarshal.go @@ -0,0 +1,90 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package jsonutil provides utilities to improve JSON performance. +// It includes an Unmarshal wrapper that amortizes allocated garbage over subsequent runs +// and a Bytes type to reduce allocations when unmarshalling a non-hex-encoded string into a []byte. +package jsonutil + +import ( + "bytes" + "encoding/json" + "sync" +) + +// decoder is a re-usable json decoder. +type decoder struct { + dec *json.Decoder + r *bytes.Reader +} + +var readerPool = sync.Pool{ + New: func() interface{} { + return bytes.NewReader(nil) + }, +} + +var decoderPool = sync.Pool{ + New: func() interface{} { + var d decoder + d.r = readerPool.Get().(*bytes.Reader) + d.dec = json.NewDecoder(d.r) + return &d + }, +} + +// Unmarshal is similar to encoding/json.Unmarshal. +// There are three major differences: +// +// On error, encoding/json.Unmarshal zeros v. +// This Unmarshal may leave partial data in v. +// Always check the error before using v! +// (Future improvements may remove this bug.) +// +// The errors they return don't always match perfectly. +// If you do error matching more precise than err != nil, +// don't use this Unmarshal. +// +// This Unmarshal allocates considerably less memory. +func Unmarshal(b []byte, v interface{}) error { + d := decoderPool.Get().(*decoder) + d.r.Reset(b) + off := d.dec.InputOffset() + err := d.dec.Decode(v) + d.r.Reset(nil) // don't keep a reference to b + // In case of error, report the offset in this byte slice, + // instead of in the totality of all bytes this decoder has processed. + // It is not possible to make all errors match json.Unmarshal exactly, + // but we can at least try. + switch jsonerr := err.(type) { + case *json.SyntaxError: + jsonerr.Offset -= off + case *json.UnmarshalTypeError: + jsonerr.Offset -= off + case nil: + // json.Unmarshal fails if there's any extra junk in the input. + // json.Decoder does not; see https://github.com/golang/go/issues/36225. + // We need to check for anything left over in the buffer. + if d.dec.More() { + // TODO: Provide a better error message. + // Unfortunately, we can't set the msg field. + // The offset doesn't perfectly match json: + // Ours is at the end of the valid data, + // and theirs is at the beginning of the extra data after whitespace. + // Close enough, though. + err = &json.SyntaxError{Offset: d.dec.InputOffset() - off} + + // TODO: zero v. This is hard; see encoding/json.indirect. + } + } + if err == nil { + decoderPool.Put(d) + } else { + // There might be junk left in the decoder's buffer. + // There's no way to flush it, no Reset method. + // Abandoned the decoder but reuse the reader. + readerPool.Put(d.r) + } + return err +} diff --git a/util/jsonutil/unmarshal_test.go b/util/jsonutil/unmarshal_test.go new file mode 100644 index 000000000..4b4a47578 --- /dev/null +++ b/util/jsonutil/unmarshal_test.go @@ -0,0 +1,65 @@ +// Copyright (c) 2020 Tailscale Inc & AUTHORS All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package jsonutil + +import ( + "encoding/json" + "reflect" + "testing" +) + +func TestCompareToStd(t *testing.T) { + tests := []string{ + `{}`, + `{"a": 1}`, + `{]`, + `"abc"`, + `5`, + `{"a": 1} `, + `{"a": 1} {}`, + `{} bad data`, + `{"a": 1} "hello"`, + `[]`, + ` {"x": {"t": [3,4,5]}}`, + } + + for _, test := range tests { + b := []byte(test) + var ourV, stdV interface{} + ourErr := Unmarshal(b, &ourV) + stdErr := json.Unmarshal(b, &stdV) + if (ourErr == nil) != (stdErr == nil) { + t.Errorf("Unmarshal(%q): our err = %#[2]v (%[2]T), std err = %#[3]v (%[3]T)", test, ourErr, stdErr) + } + // if !reflect.DeepEqual(ourErr, stdErr) { + // t.Logf("Unmarshal(%q): our err = %#[2]v (%[2]T), std err = %#[3]v (%[3]T)", test, ourErr, stdErr) + // } + if ourErr != nil { + // TODO: if we zero ourV on error, remove this continue. + continue + } + if !reflect.DeepEqual(ourV, stdV) { + t.Errorf("Unmarshal(%q): our val = %v, std val = %v", test, ourV, stdV) + } + } +} + +func BenchmarkUnmarshal(b *testing.B) { + var m interface{} + j := []byte("5") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + Unmarshal(j, &m) + } +} + +func BenchmarkStdUnmarshal(b *testing.B) { + var m interface{} + j := []byte("5") + b.ReportAllocs() + for i := 0; i < b.N; i++ { + json.Unmarshal(j, &m) + } +}