From d8fbce7eefe6151fa9f09425c453ddc6e5c55bba Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Mon, 2 Aug 2021 21:44:13 -0700 Subject: [PATCH] util/deephash: hash uint{8,16,32,64} explicitly (#2502) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of hashing the humanly formatted forms of a number, hash the native machine bits of the integers themselves. There is a small performance gain for this: name old time/op new time/op delta Hash-8 75.7µs ± 1% 76.0µs ± 2% ~ (p=0.315 n=10+9) HashMapAcyclic-8 63.1µs ± 3% 61.3µs ± 1% -2.77% (p=0.000 n=10+10) TailcfgNode-8 10.3µs ± 1% 10.2µs ± 1% -1.48% (p=0.000 n=10+10) HashArray-8 1.07µs ± 1% 1.05µs ± 1% -1.79% (p=0.000 n=10+10) Signed-off-by: Joe Tsai --- util/deephash/deephash.go | 110 +++++++++++++++++++++------------ util/deephash/deephash_test.go | 52 ++++++++++++++-- 2 files changed, 116 insertions(+), 46 deletions(-) diff --git a/util/deephash/deephash.go b/util/deephash/deephash.go index ab4ad77ef..6424fe481 100644 --- a/util/deephash/deephash.go +++ b/util/deephash/deephash.go @@ -21,7 +21,6 @@ import ( "hash" "math" "reflect" - "strconv" "sync" "time" "unsafe" @@ -91,8 +90,8 @@ func Hash(v interface{}) (s Sum) { once.Do(func() { seed = uint64(time.Now().UnixNano()) }) - h.uint(seed) - h.print(reflect.ValueOf(v)) + h.hashUint64(seed) + h.hashValue(reflect.ValueOf(v)) return h.sum() } @@ -113,19 +112,25 @@ type appenderTo interface { AppendTo([]byte) []byte } -func (h *hasher) uint(i uint64) { - binary.BigEndian.PutUint64(h.scratch[:8], i) - h.bw.Write(h.scratch[:8]) +func (h *hasher) hashUint8(i uint8) { + h.bw.WriteByte(i) } - -func (h *hasher) int(i int) { - binary.BigEndian.PutUint64(h.scratch[:8], uint64(i)) +func (h *hasher) hashUint16(i uint16) { + binary.LittleEndian.PutUint16(h.scratch[:2], i) + h.bw.Write(h.scratch[:2]) +} +func (h *hasher) hashUint32(i uint32) { + binary.LittleEndian.PutUint32(h.scratch[:4], i) + h.bw.Write(h.scratch[:4]) +} +func (h *hasher) hashUint64(i uint64) { + binary.LittleEndian.PutUint64(h.scratch[:8], i) h.bw.Write(h.scratch[:8]) } var uint8Type = reflect.TypeOf(byte(0)) -func (h *hasher) print(v reflect.Value) { +func (h *hasher) hashValue(v reflect.Value) { if !v.IsValid() { return } @@ -152,33 +157,33 @@ func (h *hasher) print(v reflect.Value) { panic(fmt.Sprintf("unhandled kind %v for type %v", v.Kind(), v.Type())) case reflect.Ptr: if v.IsNil() { - w.WriteByte(0) // indicates nil + h.hashUint8(0) // indicates nil return } // Check for cycle. ptr := pointerOf(v) if idx, ok := h.visitStack.seen(ptr); ok { - w.WriteByte(2) // indicates cycle - h.uint(uint64(idx)) + h.hashUint8(2) // indicates cycle + h.hashUint64(uint64(idx)) return } h.visitStack.push(ptr) defer h.visitStack.pop(ptr) - w.WriteByte(1) // indicates visiting a pointer - h.print(v.Elem()) + h.hashUint8(1) // indicates visiting a pointer + h.hashValue(v.Elem()) case reflect.Struct: w.WriteString("struct") - h.int(v.NumField()) + h.hashUint64(uint64(v.NumField())) for i, n := 0, v.NumField(); i < n; i++ { - h.int(i) - h.print(v.Field(i)) + h.hashUint64(uint64(i)) + h.hashValue(v.Field(i)) } case reflect.Slice, reflect.Array: vLen := v.Len() if v.Kind() == reflect.Slice { - h.int(vLen) + h.hashUint64(uint64(vLen)) } if v.Type().Elem() == uint8Type && v.CanInterface() { if vLen > 0 && vLen <= scratchSize { @@ -197,45 +202,68 @@ func (h *hasher) print(v reflect.Value) { // TODO(dsnet): Perform cycle detection for slices, // which is functionally a list of pointers. // See https://github.com/google/go-cmp/blob/402949e8139bb890c71a707b6faf6dd05c92f4e5/cmp/compare.go#L438-L450 - h.int(i) - h.print(v.Index(i)) + h.hashUint64(uint64(i)) + h.hashValue(v.Index(i)) } case reflect.Interface: if v.IsNil() { - w.WriteByte(0) // indicates nil + h.hashUint8(0) // indicates nil return } v = v.Elem() - w.WriteByte(1) // indicates visiting interface value + h.hashUint8(1) // indicates visiting interface value h.hashType(v.Type()) - h.print(v) + h.hashValue(v) case reflect.Map: // Check for cycle. ptr := pointerOf(v) if idx, ok := h.visitStack.seen(ptr); ok { - w.WriteByte(2) // indicates cycle - h.uint(uint64(idx)) + h.hashUint8(2) // indicates cycle + h.hashUint64(uint64(idx)) return } h.visitStack.push(ptr) defer h.visitStack.pop(ptr) - w.WriteByte(1) // indicates visiting a map + h.hashUint8(1) // indicates visiting a map h.hashMap(v) case reflect.String: - h.int(v.Len()) - w.WriteString(v.String()) + s := v.String() + h.hashUint64(uint64(len(s))) + w.WriteString(s) case reflect.Bool: - w.Write(strconv.AppendBool(h.scratch[:0], v.Bool())) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - w.Write(strconv.AppendInt(h.scratch[:0], v.Int(), 10)) - case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: - h.uint(v.Uint()) - case reflect.Float32, reflect.Float64: - w.Write(strconv.AppendUint(h.scratch[:0], math.Float64bits(v.Float()), 10)) - case reflect.Complex64, reflect.Complex128: - fmt.Fprintf(w, "%v", v.Complex()) + if v.Bool() { + h.hashUint8(1) + } else { + h.hashUint8(0) + } + case reflect.Int8: + h.hashUint8(uint8(v.Int())) + case reflect.Int16: + h.hashUint16(uint16(v.Int())) + case reflect.Int32: + h.hashUint32(uint32(v.Int())) + case reflect.Int64, reflect.Int: + h.hashUint64(uint64(v.Int())) + case reflect.Uint8: + h.hashUint8(uint8(v.Uint())) + case reflect.Uint16: + h.hashUint16(uint16(v.Uint())) + case reflect.Uint32: + h.hashUint32(uint32(v.Uint())) + case reflect.Uint64, reflect.Uint, reflect.Uintptr: + h.hashUint64(uint64(v.Uint())) + case reflect.Float32: + h.hashUint32(math.Float32bits(float32(v.Float()))) + case reflect.Float64: + h.hashUint64(math.Float64bits(float64(v.Float()))) + case reflect.Complex64: + h.hashUint32(math.Float32bits(real(complex64(v.Complex())))) + h.hashUint32(math.Float32bits(imag(complex64(v.Complex())))) + case reflect.Complex128: + h.hashUint64(math.Float64bits(real(complex128(v.Complex())))) + h.hashUint64(math.Float64bits(imag(complex128(v.Complex())))) } } @@ -281,8 +309,8 @@ func (h *hasher) hashMap(v reflect.Value) { key := iterKey(iter, k) val := iterVal(iter, e) mh.h.reset() - mh.h.print(key) - mh.h.print(val) + mh.h.hashValue(key) + mh.h.hashValue(val) sum.xor(mh.h.sum()) } h.bw.Write(append(h.scratch[:0], sum.sum[:]...)) // append into scratch to avoid heap allocation @@ -327,5 +355,5 @@ func (h *hasher) hashType(t reflect.Type) { // that maps reflect.Type to some arbitrary and unique index. // While safer, it requires global state with memory that can never be GC'd. rtypeAddr := reflect.ValueOf(t).Pointer() // address of *reflect.rtype - h.uint(uint64(rtypeAddr)) + h.hashUint64(uint64(rtypeAddr)) } diff --git a/util/deephash/deephash_test.go b/util/deephash/deephash_test.go index a60b229b7..e2be3c13a 100644 --- a/util/deephash/deephash_test.go +++ b/util/deephash/deephash_test.go @@ -9,6 +9,7 @@ import ( "bufio" "bytes" "fmt" + "math" "reflect" "testing" @@ -31,12 +32,56 @@ func (p appendBytes) AppendTo(b []byte) []byte { func TestHash(t *testing.T) { type tuple [2]interface{} type iface struct{ X interface{} } + type scalars struct { + I8 int8 + I16 int16 + I32 int32 + I64 int64 + I int + U8 uint8 + U16 uint16 + U32 uint32 + U64 uint64 + U uint + UP uintptr + F32 float32 + F64 float64 + C64 complex64 + C128 complex128 + } type MyBool bool type MyHeader tar.Header tests := []struct { in tuple wantEq bool }{ + {in: tuple{false, true}, wantEq: false}, + {in: tuple{true, true}, wantEq: true}, + {in: tuple{false, false}, wantEq: true}, + { + in: tuple{ + scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i}, + scalars{-8, -16, -32, -64, -1234, 8, 16, 32, 64, 1234, 5678, 32.32, 64.64, 32 + 32i, 64 + 64i}, + }, + wantEq: true, + }, + {in: tuple{scalars{I8: math.MinInt8}, scalars{I8: math.MinInt8 / 2}}, wantEq: false}, + {in: tuple{scalars{I16: math.MinInt16}, scalars{I16: math.MinInt16 / 2}}, wantEq: false}, + {in: tuple{scalars{I32: math.MinInt32}, scalars{I32: math.MinInt32 / 2}}, wantEq: false}, + {in: tuple{scalars{I64: math.MinInt64}, scalars{I64: math.MinInt64 / 2}}, wantEq: false}, + {in: tuple{scalars{I: -1234}, scalars{I: -1234 / 2}}, wantEq: false}, + {in: tuple{scalars{U8: math.MaxUint8}, scalars{U8: math.MaxUint8 / 2}}, wantEq: false}, + {in: tuple{scalars{U16: math.MaxUint16}, scalars{U16: math.MaxUint16 / 2}}, wantEq: false}, + {in: tuple{scalars{U32: math.MaxUint32}, scalars{U32: math.MaxUint32 / 2}}, wantEq: false}, + {in: tuple{scalars{U64: math.MaxUint64}, scalars{U64: math.MaxUint64 / 2}}, wantEq: false}, + {in: tuple{scalars{U: 1234}, scalars{U: 1234 / 2}}, wantEq: false}, + {in: tuple{scalars{UP: 5678}, scalars{UP: 5678 / 2}}, wantEq: false}, + {in: tuple{scalars{F32: 32.32}, scalars{F32: math.Nextafter32(32.32, 0)}}, wantEq: false}, + {in: tuple{scalars{F64: 64.64}, scalars{F64: math.Nextafter(64.64, 0)}}, wantEq: false}, + {in: tuple{scalars{F32: float32(math.NaN())}, scalars{F32: float32(math.NaN())}}, wantEq: true}, + {in: tuple{scalars{F64: float64(math.NaN())}, scalars{F64: float64(math.NaN())}}, wantEq: true}, + {in: tuple{scalars{C64: 32 + 32i}, scalars{C64: complex(math.Nextafter32(32, 0), 32)}}, wantEq: false}, + {in: tuple{scalars{C128: 64 + 64i}, scalars{C128: complex(math.Nextafter(64, 0), 64)}}, wantEq: false}, {in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}}, wantEq: true}, {in: tuple{[]appendBytes{{}, {0, 0, 0, 0, 0, 0, 0, 1}}, []appendBytes{{0, 0, 0, 0, 0, 0, 0, 1}, {}}}, wantEq: false}, {in: tuple{iface{MyBool(true)}, iface{MyBool(true)}}, wantEq: true}, @@ -47,9 +92,6 @@ func TestHash(t *testing.T) { {in: tuple{iface{&MyHeader{}}, iface{&tar.Header{}}}, wantEq: false}, {in: tuple{iface{[]map[string]MyBool{}}, iface{[]map[string]MyBool{}}}, wantEq: true}, {in: tuple{iface{[]map[string]bool{}}, iface{[]map[string]MyBool{}}}, wantEq: false}, - {in: tuple{false, true}, wantEq: false}, - {in: tuple{true, true}, wantEq: true}, - {in: tuple{false, false}, wantEq: true}, { in: func() tuple { i1 := 1 @@ -225,10 +267,10 @@ func TestPrintArray(t *testing.T) { var got bytes.Buffer bw := bufio.NewWriter(&got) h := &hasher{bw: bw} - h.print(reflect.ValueOf(x)) + h.hashValue(reflect.ValueOf(x)) bw.Flush() const want = "struct" + - "\x00\x00\x00\x00\x00\x00\x00\x01" + // 1 field + "\x01\x00\x00\x00\x00\x00\x00\x00" + // 1 field "\x00\x00\x00\x00\x00\x00\x00\x00" + // 0th field // the 32 bytes: "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1f"