From 60657ac83f415555c19027b0b18c0fe2d15bf40a Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 1 Feb 2024 17:07:41 -0800 Subject: [PATCH] util/deephash: tighten up SelfHasher API (#11012) Providing a hash.Block512 is an implementation detail of how deephash works today, but providing an opaque type with mostly equivalent API (i.e., HashUint8, HashBytes, etc. methods) is still sensible. Thus, define a public Hasher type that exposes exactly the API that an implementation of SelfHasher would want to call. This gives us freedom to change the hashing algorithm of deephash at some point in the future. Also, this type is likely going to be called by types that are going to memoize their own hash results, we additionally add a HashSum method to simplify this use case. Add documentation to SelfHasher on how a type might implement it. Updates: corp#16409 Signed-off-by: Joe Tsai --- util/deephash/deephash.go | 71 ++++++++++++++++++++++++++++++++-- util/deephash/deephash_test.go | 24 ++++++++---- util/deephash/types.go | 13 ------- util/deephash/types_test.go | 13 +------ 4 files changed, 85 insertions(+), 36 deletions(-) diff --git a/util/deephash/deephash.go b/util/deephash/deephash.go index c1997a4b5..bff0fc435 100644 --- a/util/deephash/deephash.go +++ b/util/deephash/deephash.go @@ -104,6 +104,70 @@ import ( // by looking up the hash in a magical map that returns the set of entries // for that given hash. +// SelfHasher is implemented by types that can compute their own hash +// by writing values through the provided [Hasher] parameter. +// Implementations must not leak the provided [Hasher]. +// +// If the implementation of SelfHasher recursively calls [deephash.Hash], +// then infinite recursion is quite likely to occur. +// To avoid this, use a type definition to drop methods before calling [deephash.Hash]: +// +// func (v *MyType) Hash(h deephash.Hasher) { +// v.hashMu.Lock() +// defer v.hashMu.Unlock() +// if v.dirtyHash { +// type MyTypeWithoutMethods MyType // type define MyType to drop Hash method +// v.dirtyHash = false // clear out dirty bit to avoid hashing over it +// v.hashSum = deephash.Sum{} // clear out hashSum to avoid hashing over it +// v.hashSum = deephash.Hash((*MyTypeWithoutMethods)(v)) +// } +// h.HashSum(v.hashSum) +// } +// +// In the above example, we acquire a lock since it is possible that deephash +// is called in a concurrent manner, which implies that MyType.Hash may also +// be called in a concurrent manner. Whether this lock is necessary is +// application-dependent and left as an exercise to the reader. +// Also, the example assumes that dirtyHash is set elsewhere by application +// logic whenever a mutation is made to MyType that would alter the hash. +type SelfHasher interface { + Hash(Hasher) +} + +// Hasher is a value passed to [SelfHasher.Hash] that allow implementations +// to hash themselves in a structured manner. +type Hasher struct{ h *hashx.Block512 } + +// HashBytes hashes a sequence of bytes b. +// The length of b is not explicitly hashed. +func (h Hasher) HashBytes(b []byte) { h.h.HashBytes(b) } + +// HashString hashes the string data of s +// The length of s is not explicitly hashed. +func (h Hasher) HashString(s string) { h.h.HashString(s) } + +// HashUint8 hashes a uint8. +func (h Hasher) HashUint8(n uint8) { h.h.HashUint8(n) } + +// HashUint16 hashes a uint16. +func (h Hasher) HashUint16(n uint16) { h.h.HashUint16(n) } + +// HashUint32 hashes a uint32. +func (h Hasher) HashUint32(n uint32) { h.h.HashUint32(n) } + +// HashUint64 hashes a uint64. +func (h Hasher) HashUint64(n uint64) { h.h.HashUint64(n) } + +// HashSum hashes a [Sum]. +func (h Hasher) HashSum(s Sum) { + // NOTE: Avoid calling h.HashBytes since it escapes b, + // which would force s to be heap allocated. + h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[0:8])) + h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[8:16])) + h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[16:24])) + h.h.HashUint64(binary.LittleEndian.Uint64(s.sum[24:32])) +} + // hasher is reusable state for hashing a value. // Get one via hasherPool. type hasher struct { @@ -339,7 +403,7 @@ func makeTypeHasher(t reflect.Type) typeHasherFunc { if t.Kind() != reflect.Pointer && t.Kind() != reflect.Interface { // A method can be implemented on either the value receiver or pointer receiver. if t.Implements(selfHasherType) || reflect.PointerTo(t).Implements(selfHasherType) { - return makeSelfHasherImpl(t) + return makeSelfHasher(t) } } @@ -401,10 +465,9 @@ func hashAddr(h *hasher, p pointer) { } } -func makeSelfHasherImpl(t reflect.Type) typeHasherFunc { +func makeSelfHasher(t reflect.Type) typeHasherFunc { return func(h *hasher, p pointer) { - e := p.asValue(t) - e.Interface().(SelfHasher).Hash(&h.Block512) + p.asValue(t).Interface().(SelfHasher).Hash(Hasher{&h.Block512}) } } diff --git a/util/deephash/deephash_test.go b/util/deephash/deephash_test.go index 05efa451f..7a2d65ac6 100644 --- a/util/deephash/deephash_test.go +++ b/util/deephash/deephash_test.go @@ -42,11 +42,19 @@ func (p appendBytes) AppendTo(b []byte) []byte { return append(b, p...) } -type implsSelfHasherValueRecv struct { +type selfHasherValueRecv struct { emit uint64 } -func (s implsSelfHasherValueRecv) Hash(h *hashx.Block512) { +func (s selfHasherValueRecv) Hash(h *hashx.Block512) { + h.HashUint64(s.emit) +} + +type selfHasherPointerRecv struct { + emit uint64 +} + +func (s *selfHasherPointerRecv) Hash(h *hashx.Block512) { h.HashUint64(s.emit) } @@ -178,12 +186,12 @@ func TestHash(t *testing.T) { b[0] = 1 return b }()))}, wantEq: false}, - {in: tuple{&implsSelfHasher{}, &implsSelfHasher{}}, wantEq: true}, - {in: tuple{(*implsSelfHasher)(nil), (*implsSelfHasher)(nil)}, wantEq: true}, - {in: tuple{(*implsSelfHasher)(nil), &implsSelfHasher{}}, wantEq: false}, - {in: tuple{&implsSelfHasher{emit: 1}, &implsSelfHasher{emit: 2}}, wantEq: false}, - {in: tuple{implsSelfHasherValueRecv{emit: 1}, implsSelfHasherValueRecv{emit: 2}}, wantEq: false}, - {in: tuple{implsSelfHasherValueRecv{emit: 2}, implsSelfHasherValueRecv{emit: 2}}, wantEq: true}, + {in: tuple{&selfHasherPointerRecv{}, &selfHasherPointerRecv{}}, wantEq: true}, + {in: tuple{(*selfHasherPointerRecv)(nil), (*selfHasherPointerRecv)(nil)}, wantEq: true}, + {in: tuple{(*selfHasherPointerRecv)(nil), &selfHasherPointerRecv{}}, wantEq: false}, + {in: tuple{&selfHasherPointerRecv{emit: 1}, &selfHasherPointerRecv{emit: 2}}, wantEq: false}, + {in: tuple{selfHasherValueRecv{emit: 1}, selfHasherValueRecv{emit: 2}}, wantEq: false}, + {in: tuple{selfHasherValueRecv{emit: 2}, selfHasherValueRecv{emit: 2}}, wantEq: true}, } for _, tt := range tests { diff --git a/util/deephash/types.go b/util/deephash/types.go index 5a2045dcc..e7ea54eb1 100644 --- a/util/deephash/types.go +++ b/util/deephash/types.go @@ -7,21 +7,8 @@ import ( "net/netip" "reflect" "time" - - "tailscale.com/util/hashx" ) -// SelfHasher is the interface implemented by types that can compute their own hash -// by writing values through the given parameter. -// -// Implementations of Hash MUST NOT call `Reset` or `Sum` on the provided argument. -// -// This interface should not be considered stable and is likely to change in the -// future. -type SelfHasher interface { - Hash(*hashx.Block512) -} - var ( timeTimeType = reflect.TypeOf((*time.Time)(nil)).Elem() netipAddrType = reflect.TypeOf((*netip.Addr)(nil)).Elem() diff --git a/util/deephash/types_test.go b/util/deephash/types_test.go index cce5b52a5..78b40d88e 100644 --- a/util/deephash/types_test.go +++ b/util/deephash/types_test.go @@ -12,17 +12,8 @@ import ( "tailscale.com/tailcfg" "tailscale.com/types/structs" - "tailscale.com/util/hashx" ) -type implsSelfHasher struct { - emit uint64 -} - -func (s *implsSelfHasher) Hash(h *hashx.Block512) { - h.HashUint64(s.emit) -} - func TestTypeIsMemHashable(t *testing.T) { tests := []struct { val any @@ -76,7 +67,7 @@ func TestTypeIsMemHashable(t *testing.T) { false}, {[0]chan bool{}, true}, {struct{ f [0]func() }{}, true}, - {&implsSelfHasher{}, false}, + {&selfHasherPointerRecv{}, false}, } for _, tt := range tests { got := typeIsMemHashable(reflect.TypeOf(tt.val)) @@ -112,7 +103,7 @@ func TestTypeIsRecursive(t *testing.T) { {val: unsafe.Pointer(nil), want: false}, {val: make(RecursiveChan), want: true}, {val: make(chan int), want: false}, - {val: (*implsSelfHasher)(nil), want: false}, + {val: (*selfHasherPointerRecv)(nil), want: false}, } for _, tt := range tests { got := typeIsRecursive(reflect.TypeOf(tt.val))