diff --git a/util/deephash/deephash.go b/util/deephash/deephash.go index 6424fe481..9b5f70410 100644 --- a/util/deephash/deephash.go +++ b/util/deephash/deephash.go @@ -8,8 +8,15 @@ // The hash is sufficiently strong and unique such that // Hash(x) == Hash(y) is an appropriate replacement for x == y. // -// This package, like most of the tailscale.com Go module, should be -// considered Tailscale-internal; we make no API promises. +// The definition of equality is identical to reflect.DeepEqual except: +// * Floating-point values are compared based on the raw bits, +// which means that NaNs (with the same bit pattern) are treated as equal. +// * Types which implement interface { AppendTo([]byte) []byte } use +// the AppendTo method to produce a textual representation of the value. +// Thus, two values are equal if AppendTo produces the same bytes. +// +// WARNING: This package, like most of the tailscale.com Go module, +// should be considered Tailscale-internal; we make no API promises. package deephash import ( @@ -26,6 +33,33 @@ import ( "unsafe" ) +// There is much overlap between the theory of serialization and hashing. +// A hash (useful for determing equality) can be produced by printing a value +// and hashing the output. The format must: +// * be deterministic such that the same value hashes to the same output, and +// * be parsable such that the same value can be reproduced by the output. +// +// The logic below hashes a value by printing it to a hash.Hash. +// To be parsable, it assumes that we know the Go type of each value: +// * scalar types (e.g., bool or int32) are printed as fixed-width fields. +// * list types (e.g., strings, slices, and AppendTo buffers) are prefixed +// by a fixed-width length field, followed by the contents of the list. +// * slices, arrays, and structs print each element/field consecutively. +// * interfaces print with a 1-byte prefix indicating whether it is nil. +// If non-nil, it is followed by a fixed-width field of the type index, +// followed by the format of the underlying value. +// * pointers print with a 1-byte prefix indicating whether the pointer is +// 1) nil, 2) previously seen, or 3) newly seen. Previously seen pointers are +// followed by a fixed-width field with the index of the previous pointer. +// Newly seen pointers are followed by the format of the underlying value. +// * maps print with a 1-byte prefix indicating whether the map pointer is +// 1) nil, 2) previously seen, or 3) newly seen. Previously seen pointers +// are followed by a fixed-width field of the index of the previous pointer. +// Newly seen maps are printed as a fixed-width field with the XOR of the +// hash of every map entry. With a sufficiently strong hash, this value is +// theoretically "parsable" by looking up the hash in a magical map that +// returns the set of entries for that given hash. + const scratchSize = 128 // hasher is reusable state for hashing a value. @@ -174,10 +208,7 @@ func (h *hasher) hashValue(v reflect.Value) { h.hashUint8(1) // indicates visiting a pointer h.hashValue(v.Elem()) case reflect.Struct: - w.WriteString("struct") - h.hashUint64(uint64(v.NumField())) for i, n := 0, v.NumField(); i < n; i++ { - h.hashUint64(uint64(i)) h.hashValue(v.Field(i)) } case reflect.Slice, reflect.Array: @@ -202,7 +233,6 @@ func (h *hasher) hashValue(v reflect.Value) { // TODO(dsnet): Perform cycle detection for slices, // which is functionally a list of pointers. // See https://github.com/google/go-cmp/blob/402949e8139bb890c71a707b6faf6dd05c92f4e5/cmp/compare.go#L438-L450 - h.hashUint64(uint64(i)) h.hashValue(v.Index(i)) } case reflect.Interface: diff --git a/util/deephash/deephash_test.go b/util/deephash/deephash_test.go index e2be3c13a..235b8207d 100644 --- a/util/deephash/deephash_test.go +++ b/util/deephash/deephash_test.go @@ -269,11 +269,7 @@ func TestPrintArray(t *testing.T) { h := &hasher{bw: bw} h.hashValue(reflect.ValueOf(x)) bw.Flush() - const want = "struct" + - "\x01\x00\x00\x00\x00\x00\x00\x00" + // 1 field - "\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field - // the 32 bytes: - "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f" + const want = "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f" if got := got.Bytes(); string(got) != want { t.Errorf("wrong:\n got: %q\nwant: %q\n", got, want) }