@ -6,7 +6,7 @@
// without looping. The hash is only valid within the lifetime of a program.
// without looping. The hash is only valid within the lifetime of a program.
// Users should not store the hash on disk or send it over the network.
// Users should not store the hash on disk or send it over the network.
// The hash is sufficiently strong and unique such that
// The hash is sufficiently strong and unique such that
// Hash( x) == Hash(y) is an appropriate replacement for x == y.
// Hash( & x) == Hash(& y) is an appropriate replacement for x == y.
//
//
// The definition of equality is identical to reflect.DeepEqual except:
// The definition of equality is identical to reflect.DeepEqual except:
// - Floating-point values are compared based on the raw bits,
// - Floating-point values are compared based on the raw bits,
@ -65,6 +65,33 @@ type hasher struct {
visitStack visitStack
visitStack visitStack
}
}
var hasherPool = & sync . Pool {
New : func ( ) any { return new ( hasher ) } ,
}
func ( h * hasher ) reset ( ) {
if h . Block512 . Hash == nil {
h . Block512 . Hash = sha256 . New ( )
}
h . Block512 . Reset ( )
}
// hashType hashes a reflect.Type.
// The hash is only consistent within the lifetime of a program.
func ( h * hasher ) hashType ( t reflect . Type ) {
// This approach relies on reflect.Type always being backed by a unique
// *reflect.rtype pointer. A safer approach is to use a global sync.Map
// that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect . ValueOf ( t ) . Pointer ( ) // address of *reflect.rtype
h . HashUint64 ( uint64 ( rtypeAddr ) )
}
func ( h * hasher ) sum ( ) ( s Sum ) {
h . Sum ( s . sum [ : 0 ] )
return s
}
// Sum is an opaque checksum type that is comparable.
// Sum is an opaque checksum type that is comparable.
type Sum struct {
type Sum struct {
sum [ sha256 . Size ] byte
sum [ sha256 . Size ] byte
@ -89,97 +116,57 @@ func initSeed() {
seed = uint64 ( time . Now ( ) . UnixNano ( ) )
seed = uint64 ( time . Now ( ) . UnixNano ( ) )
}
}
func ( h * hasher ) Reset ( ) {
if h . Block512 . Hash == nil {
h . Block512 . Hash = sha256 . New ( )
}
h . Block512 . Reset ( )
}
func ( h * hasher ) sum ( ) ( s Sum ) {
h . Sum ( s . sum [ : 0 ] )
return s
}
var hasherPool = & sync . Pool {
New : func ( ) any { return new ( hasher ) } ,
}
// Hash returns the hash of v.
// Hash returns the hash of v.
// For performance, this should be a non-nil pointer.
func Hash [ T any ] ( v * T ) Sum {
func Hash ( v any ) ( s Sum ) {
h := hasherPool . Get ( ) . ( * hasher )
h := hasherPool . Get ( ) . ( * hasher )
defer hasherPool . Put ( h )
defer hasherPool . Put ( h )
h . R eset( )
h . reset ( )
seedOnce . Do ( initSeed )
seedOnce . Do ( initSeed )
h . HashUint64 ( seed )
h . HashUint64 ( seed )
rv := reflect . ValueOf ( v )
// Always treat the Hash input as if it were an interface by including
if rv . IsValid ( ) {
// a hash of the type. This ensures that hashing of two different types
var t reflect . Type
// but with the same value structure produces different hashes.
var p pointer
t := reflect . TypeOf ( v ) . Elem ( )
if rv . Kind ( ) == reflect . Pointer && ! rv . IsNil ( ) {
h . hashType ( t )
t = rv . Type ( ) . Elem ( )
if v == nil {
p = pointerOf ( rv )
h . HashUint8 ( 0 ) // indicates nil
} else {
} else {
t = rv . Type ( )
h . HashUint8 ( 1 ) // indicates visiting pointer element
va := reflect . New ( t ) . Elem ( )
p := pointerOf ( reflect . ValueOf ( v ) )
va . Set ( rv )
hash := getTypeInfo ( t ) . hasher ( )
p = pointerOf ( va . Addr ( ) )
hash ( h , p )
}
// Always treat the Hash input as an interface (it is), including hashing
// its type, otherwise two Hash calls of different types could hash to the
// same bytes off the different types and get equivalent Sum values. This is
// the same thing that we do for reflect.Kind Interface in hashValue, but
// the initial reflect.ValueOf from an interface value effectively strips
// the interface box off so we have to do it at the top level by hand.
h . hashType ( t )
ti := getTypeInfo ( t )
ti . hasher ( ) ( h , p )
}
}
return h . sum ( )
return h . sum ( )
}
}
// HasherForType is like Hash, but it returns a Hash func that's specialized for
// HasherForType returns a hash that is specialized for the provided type.
// the provided reflect type, avoiding a map lookup per value.
func HasherForType [ T any ] ( ) func ( * T ) Sum {
func HasherForType [ T any ] ( ) func ( T ) Sum {
var v * T
var zeroT T
t := reflect . TypeOf ( zeroT )
ti := getTypeInfo ( t )
var tiElem * typeInfo
if t . Kind ( ) == reflect . Pointer {
tiElem = getTypeInfo ( t . Elem ( ) )
}
seedOnce . Do ( initSeed )
seedOnce . Do ( initSeed )
t := reflect . TypeOf ( v ) . Elem ( )
return func ( v T ) ( s Sum ) {
hash := getTypeInfo ( t ) . hasher ( )
return func ( v * T ) ( s Sum ) {
// This logic is identical to Hash, but pull out a few statements.
h := hasherPool . Get ( ) . ( * hasher )
h := hasherPool . Get ( ) . ( * hasher )
defer hasherPool . Put ( h )
defer hasherPool . Put ( h )
h . R eset( )
h . r eset( )
h . HashUint64 ( seed )
h . HashUint64 ( seed )
rv := reflect . ValueOf ( v )
h . hashType ( t )
if v == nil {
if rv . IsValid ( ) {
h . HashUint8 ( 0 ) // indicates nil
if rv . Kind ( ) == reflect . Pointer && ! rv . IsNil ( ) {
} else {
p := pointerOf ( rv )
h . HashUint8 ( 1 ) // indicates visiting pointer element
h . hashType ( t . Elem ( ) )
p := pointerOf ( reflect . ValueOf ( v ) )
tiElem . hasher ( ) ( h , p )
hash ( h , p )
} else {
va := reflect . New ( t ) . Elem ( )
va . Set ( rv )
p := pointerOf ( va . Addr ( ) )
h . hashType ( t )
ti . hasher ( ) ( h , p )
}
}
}
return h . sum ( )
return h . sum ( )
}
}
}
}
// Update sets last to the hash of v and reports whether its value changed.
// Update sets last to the hash of v and reports whether its value changed.
func Update ( last * Sum , v any ) ( changed bool ) {
func Update [ T any ] ( last * Sum , v * T ) ( changed bool ) {
sum := Hash ( v )
sum := Hash ( v )
changed = sum != * last
changed = sum != * last
if changed {
if changed {
@ -233,9 +220,9 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
// Types with specific hashing.
// Types with specific hashing.
switch t {
switch t {
case timeTimeType :
case timeTimeType :
return ( * hasher ) . hashTime v
return hashTime
case netipAddrType :
case netipAddrType :
return ( * hasher ) . hashAddr v
return hashAddr
}
}
// Types that can have their memory representation directly hashed.
// Types that can have their memory representation directly hashed.
@ -245,7 +232,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
switch t . Kind ( ) {
switch t . Kind ( ) {
case reflect . String :
case reflect . String :
return ( * hasher ) . hashString
return hashString
case reflect . Array :
case reflect . Array :
return makeArrayHasher ( t )
return makeArrayHasher ( t )
case reflect . Slice :
case reflect . Slice :
@ -263,14 +250,7 @@ func genTypeHasher(ti *typeInfo) typeHasherFunc {
}
}
}
}
func ( h * hasher ) hashString ( p pointer ) {
func hashTime ( h * hasher , p pointer ) {
s := * p . asString ( )
h . HashUint64 ( uint64 ( len ( s ) ) )
h . HashString ( s )
}
// hashTimev hashes v, of kind time.Time.
func ( h * hasher ) hashTimev ( p pointer ) {
// Include the zone offset (but not the name) to keep
// Include the zone offset (but not the name) to keep
// Hash(t1) == Hash(t2) being semantically equivalent to
// Hash(t1) == Hash(t2) being semantically equivalent to
// t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano).
// t1.Format(time.RFC3339Nano) == t2.Format(time.RFC3339Nano).
@ -281,8 +261,7 @@ func (h *hasher) hashTimev(p pointer) {
h . HashUint32 ( uint32 ( offset ) )
h . HashUint32 ( uint32 ( offset ) )
}
}
// hashAddrv hashes v, of type netip.Addr.
func hashAddr ( h * hasher , p pointer ) {
func ( h * hasher ) hashAddrv ( p pointer ) {
// The formatting of netip.Addr covers the
// The formatting of netip.Addr covers the
// IP version, the address, and the optional zone name (for v6).
// IP version, the address, and the optional zone name (for v6).
// This is equivalent to a1.MarshalBinary() == a2.MarshalBinary().
// This is equivalent to a1.MarshalBinary() == a2.MarshalBinary().
@ -304,6 +283,12 @@ func (h *hasher) hashAddrv(p pointer) {
}
}
}
}
func hashString ( h * hasher , p pointer ) {
s := * p . asString ( )
h . HashUint64 ( uint64 ( len ( s ) ) )
h . HashString ( s )
}
func makeMemHasher ( n uintptr ) typeHasherFunc {
func makeMemHasher ( n uintptr ) typeHasherFunc {
return func ( h * hasher , p pointer ) {
return func ( h * hasher , p pointer ) {
h . HashBytes ( p . asMemory ( n ) )
h . HashBytes ( p . asMemory ( n ) )
@ -448,7 +433,7 @@ func makeMapHasher(t reflect.Type) typeHasherFunc {
for iter := v . MapRange ( ) ; iter . Next ( ) ; {
for iter := v . MapRange ( ) ; iter . Next ( ) ; {
k . SetIterKey ( iter )
k . SetIterKey ( iter )
e . SetIterValue ( iter )
e . SetIterValue ( iter )
mh . h . R eset( )
mh . h . r eset( )
hashKey ( & mh . h , pointerOf ( k . Addr ( ) ) )
hashKey ( & mh . h , pointerOf ( k . Addr ( ) ) )
hashValue ( & mh . h , pointerOf ( e . Addr ( ) ) )
hashValue ( & mh . h , pointerOf ( e . Addr ( ) ) )
mh . sum . xor ( mh . h . sum ( ) )
mh . sum . xor ( mh . h . sum ( ) )
@ -567,14 +552,3 @@ func (c *valueCache) get(t reflect.Type) reflect.Value {
}
}
return v
return v
}
}
// hashType hashes a reflect.Type.
// The hash is only consistent within the lifetime of a program.
func ( h * hasher ) hashType ( t reflect . Type ) {
// This approach relies on reflect.Type always being backed by a unique
// *reflect.rtype pointer. A safer approach is to use a global sync.Map
// that maps reflect.Type to some arbitrary and unique index.
// While safer, it requires global state with memory that can never be GC'd.
rtypeAddr := reflect . ValueOf ( t ) . Pointer ( ) // address of *reflect.rtype
h . HashUint64 ( uint64 ( rtypeAddr ) )
}