diff --git a/util/deephash/deephash.go b/util/deephash/deephash.go index 63f06f871..027532dec 100644 --- a/util/deephash/deephash.go +++ b/util/deephash/deephash.go @@ -6,7 +6,7 @@ // without looping. The hash is only valid within the lifetime of a program. // Users should not store the hash on disk or send it over the network. // The hash is sufficiently strong and unique such that -// Hash(x) == Hash(y) is an appropriate replacement for x == y. +// Hash(&x) == Hash(&y) is an appropriate replacement for x == y. // // The definition of equality is identical to reflect.DeepEqual except: // - Floating-point values are compared based on the raw bits, @@ -65,6 +65,33 @@ type hasher struct { visitStack visitStack } +var hasherPool = &sync.Pool{ + New: func() any { return new(hasher) }, +} + +func (h *hasher) reset() { + if h.Block512.Hash == nil { + h.Block512.Hash = sha256.New() + } + h.Block512.Reset() +} + +// hashType hashes a reflect.Type. +// The hash is only consistent within the lifetime of a program. +func (h *hasher) hashType(t reflect.Type) { + // This approach relies on reflect.Type always being backed by a unique + // *reflect.rtype pointer. A safer approach is to use a global sync.Map + // that maps reflect.Type to some arbitrary and unique index. + // While safer, it requires global state with memory that can never be GC'd. + rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype + h.HashUint64(uint64(rtypeAddr)) +} + +func (h *hasher) sum() (s Sum) { + h.Sum(s.sum[:0]) + return s +} + // Sum is an opaque checksum type that is comparable. type Sum struct { sum [sha256.Size]byte @@ -89,97 +116,57 @@ func initSeed() { seed = uint64(time.Now().UnixNano()) } -func (h *hasher) Reset() { - if h.Block512.Hash == nil { - h.Block512.Hash = sha256.New() - } - h.Block512.Reset() -} - -func (h *hasher) sum() (s Sum) { - h.Sum(s.sum[:0]) - return s -} - -var hasherPool = &sync.Pool{ - New: func() any { return new(hasher) }, -} - // Hash returns the hash of v. -// For performance, this should be a non-nil pointer. -func Hash(v any) (s Sum) { +func Hash[T any](v *T) Sum { h := hasherPool.Get().(*hasher) defer hasherPool.Put(h) - h.Reset() + h.reset() seedOnce.Do(initSeed) h.HashUint64(seed) - rv := reflect.ValueOf(v) - if rv.IsValid() { - var t reflect.Type - var p pointer - if rv.Kind() == reflect.Pointer && !rv.IsNil() { - t = rv.Type().Elem() - p = pointerOf(rv) - } else { - t = rv.Type() - va := reflect.New(t).Elem() - va.Set(rv) - p = pointerOf(va.Addr()) - } - - // Always treat the Hash input as an interface (it is), including hashing - // its type, otherwise two Hash calls of different types could hash to the - // same bytes off the different types and get equivalent Sum values. This is - // the same thing that we do for reflect.Kind Interface in hashValue, but - // the initial reflect.ValueOf from an interface value effectively strips - // the interface box off so we have to do it at the top level by hand. - h.hashType(t) - ti := getTypeInfo(t) - ti.hasher()(h, p) + // Always treat the Hash input as if it were an interface by including + // a hash of the type. This ensures that hashing of two different types + // but with the same value structure produces different hashes. + t := reflect.TypeOf(v).Elem() + h.hashType(t) + if v == nil { + h.HashUint8(0) // indicates nil + } else { + h.HashUint8(1) // indicates visiting pointer element + p := pointerOf(reflect.ValueOf(v)) + hash := getTypeInfo(t).hasher() + hash(h, p) } return h.sum() } -// HasherForType is like Hash, but it returns a Hash func that's specialized for -// the provided reflect type, avoiding a map lookup per value. -func HasherForType[T any]() func(T) Sum { - var zeroT T - t := reflect.TypeOf(zeroT) - ti := getTypeInfo(t) - var tiElem *typeInfo - if t.Kind() == reflect.Pointer { - tiElem = getTypeInfo(t.Elem()) - } +// HasherForType returns a hash that is specialized for the provided type. +func HasherForType[T any]() func(*T) Sum { + var v *T seedOnce.Do(initSeed) - - return func(v T) (s Sum) { + t := reflect.TypeOf(v).Elem() + hash := getTypeInfo(t).hasher() + return func(v *T) (s Sum) { + // This logic is identical to Hash, but pull out a few statements. h := hasherPool.Get().(*hasher) defer hasherPool.Put(h) - h.Reset() + h.reset() h.HashUint64(seed) - rv := reflect.ValueOf(v) - - if rv.IsValid() { - if rv.Kind() == reflect.Pointer && !rv.IsNil() { - p := pointerOf(rv) - h.hashType(t.Elem()) - tiElem.hasher()(h, p) - } else { - va := reflect.New(t).Elem() - va.Set(rv) - p := pointerOf(va.Addr()) - h.hashType(t) - ti.hasher()(h, p) - } + h.hashType(t) + if v == nil { + h.HashUint8(0) // indicates nil + } else { + h.HashUint8(1) // indicates visiting pointer element + p := pointerOf(reflect.ValueOf(v)) + hash(h, p) } return h.sum() } } // Update sets last to the hash of v and reports whether its value changed. -func Update(last *Sum, v any) (changed bool) { +func Update[T any](last *Sum, v *T) (changed bool) { sum := Hash(v) changed = sum != *last if changed { @@ -233,9 +220,9 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc { // Types with specific hashing. switch t { case timeTimeType: - return (*hasher).hashTimev + return hashTime case netipAddrType: - return (*hasher).hashAddrv + return hashAddr } // Types that can have their memory representation directly hashed. @@ -245,7 +232,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc { switch t.Kind() { case reflect.String: - return (*hasher).hashString + return hashString case reflect.Array: return makeArrayHasher(t) case reflect.Slice: @@ -263,14 +250,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc { } } -func (h *hasher) hashString(p pointer) { - s := *p.asString() - h.HashUint64(uint64(len(s))) - h.HashString(s) -} - -// hashTimev hashes v, of kind time.Time. -func (h *hasher) hashTimev(p pointer) { +func hashTime(h *hasher, p pointer) { // Include the zone offset (but not the name) to keep // Hash(t1) == Hash(t2) being semantically equivalent to // t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano). @@ -281,8 +261,7 @@ func (h *hasher) hashTimev(p pointer) { h.HashUint32(uint32(offset)) } -// hashAddrv hashes v, of type netip.Addr. -func (h *hasher) hashAddrv(p pointer) { +func hashAddr(h *hasher, p pointer) { // The formatting of netip.Addr covers the // IP version, the address, and the optional zone name (for v6). // This is equivalent to a1.MarshalBinary() == a2.MarshalBinary(). @@ -304,6 +283,12 @@ func (h *hasher) hashAddrv(p pointer) { } } +func hashString(h *hasher, p pointer) { + s := *p.asString() + h.HashUint64(uint64(len(s))) + h.HashString(s) +} + func makeMemHasher(n uintptr) typeHasherFunc { return func(h *hasher, p pointer) { h.HashBytes(p.asMemory(n)) @@ -448,7 +433,7 @@ func makeMapHasher(t reflect.Type) typeHasherFunc { for iter := v.MapRange(); iter.Next(); { k.SetIterKey(iter) e.SetIterValue(iter) - mh.h.Reset() + mh.h.reset() hashKey(&mh.h, pointerOf(k.Addr())) hashValue(&mh.h, pointerOf(e.Addr())) mh.sum.xor(mh.h.sum()) @@ -567,14 +552,3 @@ func (c *valueCache) get(t reflect.Type) reflect.Value { } return v } - -// hashType hashes a reflect.Type. -// The hash is only consistent within the lifetime of a program. -func (h *hasher) hashType(t reflect.Type) { - // This approach relies on reflect.Type always being backed by a unique - // *reflect.rtype pointer. A safer approach is to use a global sync.Map - // that maps reflect.Type to some arbitrary and unique index. - // While safer, it requires global state with memory that can never be GC'd. - rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype - h.HashUint64(uint64(rtypeAddr)) -} diff --git a/util/deephash/deephash_test.go b/util/deephash/deephash_test.go index e7066b963..1dca9304f 100644 --- a/util/deephash/deephash_test.go +++ b/util/deephash/deephash_test.go @@ -160,7 +160,7 @@ func TestHash(t *testing.T) { } for _, tt := range tests { - gotEq := Hash(tt.in[0]) == Hash(tt.in[1]) + gotEq := Hash(&tt.in[0]) == Hash(&tt.in[1]) if gotEq != tt.wantEq { t.Errorf("(Hash(%T %v) == Hash(%T %v)) = %v, want %v", tt.in[0], tt.in[0], tt.in[1], tt.in[1], gotEq, tt.wantEq) } @@ -171,11 +171,11 @@ func TestDeepHash(t *testing.T) { // v contains the types of values we care about for our current callers. // Mostly we're just testing that we don't panic on handled types. v := getVal() - hash1 := Hash(v) t.Logf("hash: %v", hash1) for i := 0; i < 20; i++ { - hash2 := Hash(getVal()) + v := getVal() + hash2 := Hash(v) if hash1 != hash2 { t.Error("second hash didn't match") } @@ -186,7 +186,7 @@ func TestDeepHash(t *testing.T) { func TestIssue4868(t *testing.T) { m1 := map[int]string{1: "foo"} m2 := map[int]string{1: "bar"} - if Hash(m1) == Hash(m2) { + if Hash(&m1) == Hash(&m2) { t.Error("bogus") } } @@ -194,7 +194,7 @@ func TestIssue4868(t *testing.T) { func TestIssue4871(t *testing.T) { m1 := map[string]string{"": "", "x": "foo"} m2 := map[string]string{} - if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { + if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 { t.Errorf("bogus: h1=%x, h2=%x", h1, h2) } } @@ -202,7 +202,7 @@ func TestIssue4871(t *testing.T) { func TestNilVsEmptymap(t *testing.T) { m1 := map[string]string(nil) m2 := map[string]string{} - if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { + if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 { t.Errorf("bogus: h1=%x, h2=%x", h1, h2) } } @@ -210,7 +210,7 @@ func TestNilVsEmptymap(t *testing.T) { func TestMapFraming(t *testing.T) { m1 := map[string]string{"foo": "", "fo": "o"} m2 := map[string]string{} - if h1, h2 := Hash(m1), Hash(m2); h1 == h2 { + if h1, h2 := Hash(&m1), Hash(&m2); h1 == h2 { t.Errorf("bogus: h1=%x, h2=%x", h1, h2) } } @@ -218,23 +218,25 @@ func TestMapFraming(t *testing.T) { func TestQuick(t *testing.T) { initSeed() err := quick.Check(func(v, w map[string]string) bool { - return (Hash(v) == Hash(w)) == reflect.DeepEqual(v, w) + return (Hash(&v) == Hash(&w)) == reflect.DeepEqual(v, w) }, &quick.Config{MaxCount: 1000, Rand: rand.New(rand.NewSource(int64(seed)))}) if err != nil { t.Fatalf("seed=%v, err=%v", seed, err) } } -func getVal() any { - return &struct { - WGConfig *wgcfg.Config - RouterConfig *router.Config - MapFQDNAddrs map[dnsname.FQDN][]netip.Addr - MapFQDNAddrPorts map[dnsname.FQDN][]netip.AddrPort - MapDiscoPublics map[key.DiscoPublic]bool - MapResponse *tailcfg.MapResponse - FilterMatch filter.Match - }{ +type tailscaleTypes struct { + WGConfig *wgcfg.Config + RouterConfig *router.Config + MapFQDNAddrs map[dnsname.FQDN][]netip.Addr + MapFQDNAddrPorts map[dnsname.FQDN][]netip.AddrPort + MapDiscoPublics map[key.DiscoPublic]bool + MapResponse *tailcfg.MapResponse + FilterMatch filter.Match +} + +func getVal() *tailscaleTypes { + return &tailscaleTypes{ &wgcfg.Config{ Name: "foo", Addresses: []netip.Prefix{netip.PrefixFrom(netip.AddrFrom16([16]byte{3: 3}).Unmap(), 5)}, @@ -600,23 +602,23 @@ func TestMapCycle(t *testing.T) { a["self"] = a b := make(M) // cylic graph of 1 node b["self"] = b - ha := Hash(a) - hb := Hash(b) + ha := Hash(&a) + hb := Hash(&b) c.Assert(ha, qt.Equals, hb) c1 := make(M) // cyclic graph of 2 nodes c2 := make(M) // cyclic graph of 2 nodes c1["peer"] = c2 c2["peer"] = c1 - hc1 := Hash(c1) - hc2 := Hash(c2) + hc1 := Hash(&c1) + hc2 := Hash(&c2) c.Assert(hc1, qt.Equals, hc2) c.Assert(ha, qt.Not(qt.Equals), hc1) c.Assert(hb, qt.Not(qt.Equals), hc2) c3 := make(M) // graph of 1 node pointing to cyclic graph of 2 nodes c3["child"] = c1 - hc3 := Hash(c3) + hc3 := Hash(&c3) c.Assert(hc1, qt.Not(qt.Equals), hc3) } @@ -732,9 +734,8 @@ var filterRules = []tailcfg.FilterRule{ func BenchmarkHashPacketFilter(b *testing.B) { b.ReportAllocs() - hash := HasherForType[*[]tailcfg.FilterRule]() for i := 0; i < b.N; i++ { - sink = hash(&filterRules) + sink = Hash(&filterRules) } } @@ -815,7 +816,7 @@ func BenchmarkTailcfgNode(b *testing.B) { func TestExhaustive(t *testing.T) { seen := make(map[Sum]bool) for i := 0; i < 100000; i++ { - s := Hash(i) + s := Hash(&i) if seen[s] { t.Fatalf("hash collision %v", i) }