diff --git a/util/deephash/deephash.go b/util/deephash/deephash.go index 5d03e8539..c1997a4b5 100644 --- a/util/deephash/deephash.go +++ b/util/deephash/deephash.go @@ -17,8 +17,51 @@ // // WARNING: This package, like most of the tailscale.com Go module, // should be considered Tailscale-internal; we make no API promises. +// +// # Cycle detection +// +// This package correctly handles cycles in the value graph, +// but in a way that is potentially pathological in some situations. +// +// The algorithm for cycle detection operates by +// pushing a pointer onto a stack whenever deephash is visiting a pointer and +// popping the pointer from the stack after deephash is leaving the pointer. +// Before visiting a new pointer, deephash checks whether it has already been +// visited on the pointer stack. If so, it hashes the index of the pointer +// on the stack and avoids visiting the pointer. +// +// This algorithm is guaranteed to detect cycles, but may expand pointers +// more often than a potential alternate algorithm that remembers all pointers +// ever visited in a map. The current algorithm uses O(D) memory, where D +// is the maximum depth of the recursion, while the alternate algorithm +// would use O(P) memory where P is all pointers ever seen, which can be a lot, +// and most of which may have nothing to do with cycles. +// Also, the alternate algorithm has to deal with challenges of producing +// deterministic results when pointers are visited in non-deterministic ways +// such as when iterating through a Go map. The stack-based algorithm avoids +// this challenge since the stack is always deterministic regardless of +// non-deterministic iteration order of Go maps. +// +// To concretely see how this algorithm can be pathological, +// consider the following data structure: +// +// var big *Item = ... // some large data structure that is slow to hash +// var manyBig []*Item +// for i := 0; i < 1000; i++ { +// manyBig = append(manyBig, &big) +// } +// deephash.Hash(manyBig) +// +// Here, the manyBig data structure is not even cyclic. +// We have the same big *Item being stored multiple times in a []*Item. +// When deephash hashes []*Item, it hashes each individual *Item +// not realizing that it had just done the computation earlier. +// To avoid the pathological situation, Item should implement [SelfHasher] and +// memoize attempts to hash itself. package deephash +// TODO: Add option to teach deephash to memoize the Hash result of particular types? + import ( "crypto/sha256" "encoding/binary"