From 1a38d2a3b4185acac37a8b3ed04297ea4d50a3d0 Mon Sep 17 00:00:00 2001 From: Joe Tsai Date: Thu, 4 Apr 2024 10:46:20 -0700 Subject: [PATCH] util/zstdframe: support specifying a MaxWindowSize (#11595) Specifying a smaller window size during compression provides a knob to tweak the tradeoff between memory usage and the compression ratio. Updates tailscale/corp#18514 Signed-off-by: Joe Tsai --- util/zstdframe/options.go | 90 +++++++++++++++++++++++++++++++------ util/zstdframe/zstd_test.go | 29 ++++++++++++ 2 files changed, 106 insertions(+), 13 deletions(-) diff --git a/util/zstdframe/options.go b/util/zstdframe/options.go index 0a8665c84..b4b0f2b85 100644 --- a/util/zstdframe/options.go +++ b/util/zstdframe/options.go @@ -5,6 +5,7 @@ package zstdframe import ( "math/bits" + "strconv" "sync" "github.com/klauspost/compress/zstd" @@ -52,14 +53,48 @@ type maxDecodedSize uint64 func (maxDecodedSize) isOption() {} +type maxDecodedSizeLog2 uint8 // uint8 avoids allocation when storing into interface + +func (maxDecodedSizeLog2) isOption() {} + // MaxDecodedSize specifies the maximum decoded size and // is used to protect against hostile content. // By default, there is no limit. // This option is ignored when encoding. func MaxDecodedSize(maxSize uint64) Option { + if bits.OnesCount64(maxSize) == 1 { + return maxDecodedSizeLog2(log2(maxSize)) + } return maxDecodedSize(maxSize) } +type maxWindowSizeLog2 uint8 // uint8 avoids allocation when storing into interface + +func (maxWindowSizeLog2) isOption() {} + +// MaxWindowSize specifies the maximum window size, which must be a power-of-two +// and be in the range of [[zstd.MinWindowSize], [zstd.MaxWindowSize]]. +// +// The compression or decompression algorithm will use a LZ77 rolling window +// no larger than the specified size. The compression ratio will be +// adversely affected, but memory requirements will be lower. +// When decompressing, an error is reported if a LZ77 back reference exceeds +// the specified maximum window size. +// +// For decompression, [MaxDecodedSize] is generally more useful. +func MaxWindowSize(maxSize uint64) Option { + switch { + case maxSize < zstd.MinWindowSize: + panic("maximum window size cannot be less than " + strconv.FormatUint(zstd.MinWindowSize, 10)) + case bits.OnesCount64(maxSize) != 1: + panic("maximum window size must be a power-of-two") + case maxSize > zstd.MaxWindowSize: + panic("maximum window size cannot be greater than " + strconv.FormatUint(zstd.MaxWindowSize, 10)) + default: + return maxWindowSizeLog2(log2(maxSize)) + } +} + type lowMemory bool func (lowMemory) isOption() {} @@ -72,9 +107,10 @@ func LowMemory(low bool) Option { return lowMemory(low) } var encoderPools sync.Map // map[encoderOptions]*sync.Pool -> *zstd.Encoder type encoderOptions struct { - level zstd.EncoderLevel - checksum bool - lowMemory bool + level zstd.EncoderLevel + maxWindowLog2 uint8 + checksum bool + lowMemory bool } type encoder struct { @@ -88,6 +124,8 @@ func getEncoder(opts ...Option) encoder { switch opt := opt.(type) { case encoderLevel: eopts.level = zstd.EncoderLevel(opt) + case maxWindowSizeLog2: + eopts.maxWindowLog2 = uint8(opt) case withChecksum: eopts.checksum = bool(opt) case lowMemory: @@ -102,7 +140,8 @@ func getEncoder(opts ...Option) encoder { pool := vpool.(*sync.Pool) enc, _ := pool.Get().(*zstd.Encoder) if enc == nil { - enc = must.Get(zstd.NewWriter(nil, + var noopts int + zopts := [...]zstd.EOption{ // Set concurrency=1 to ensure synchronous operation. zstd.WithEncoderConcurrency(1), // In stateless compression, the data is already in a single buffer, @@ -115,7 +154,15 @@ func getEncoder(opts ...Option) encoder { zstd.WithZeroFrames(true), zstd.WithEncoderLevel(eopts.level), zstd.WithEncoderCRC(eopts.checksum), - zstd.WithLowerEncoderMem(eopts.lowMemory))) + zstd.WithLowerEncoderMem(eopts.lowMemory), + nil, // reserved for zstd.WithWindowSize + } + if eopts.maxWindowLog2 > 0 { + zopts[len(zopts)-noopts-1] = zstd.WithWindowSize(1 << eopts.maxWindowLog2) + } else { + noopts++ + } + enc = must.Get(zstd.NewWriter(nil, zopts[:len(zopts)-noopts]...)) } return encoder{pool, enc} } @@ -125,9 +172,10 @@ func putEncoder(e encoder) { e.pool.Put(e.Encoder) } var decoderPools sync.Map // map[decoderOptions]*sync.Pool -> *zstd.Decoder type decoderOptions struct { - maxSizeLog2 int - checksum bool - lowMemory bool + maxSizeLog2 uint8 + maxWindowLog2 uint8 + checksum bool + lowMemory bool } type decoder struct { @@ -142,10 +190,14 @@ func getDecoder(opts ...Option) decoder { dopts := decoderOptions{maxSizeLog2: 63, checksum: true} for _, opt := range opts { switch opt := opt.(type) { + case maxDecodedSizeLog2: + maxSize = 1 << uint8(opt) + dopts.maxSizeLog2 = uint8(opt) case maxDecodedSize: maxSize = uint64(opt) - dopts.maxSizeLog2 = 64 - bits.LeadingZeros64(maxSize-1) - dopts.maxSizeLog2 = min(max(10, dopts.maxSizeLog2), 63) + dopts.maxSizeLog2 = uint8(log2(maxSize)) + case maxWindowSizeLog2: + dopts.maxWindowLog2 = uint8(opt) case withChecksum: dopts.checksum = bool(opt) case lowMemory: @@ -160,12 +212,21 @@ func getDecoder(opts ...Option) decoder { pool := vpool.(*sync.Pool) dec, _ := pool.Get().(*zstd.Decoder) if dec == nil { - dec = must.Get(zstd.NewReader(nil, + var noopts int + zopts := [...]zstd.DOption{ // Set concurrency=1 to ensure synchronous operation. zstd.WithDecoderConcurrency(1), - zstd.WithDecoderMaxMemory(1< 0 { + zopts[len(zopts)-noopts-1] = zstd.WithDecoderMaxWindow(1 << dopts.maxWindowLog2) + } else { + noopts++ + } + dec = must.Get(zstd.NewReader(nil, zopts[:len(zopts)-noopts]...)) } return decoder{pool, dec, maxSize} } @@ -181,3 +242,6 @@ func (d decoder) DecodeAll(src, dst []byte) ([]byte, error) { } return dst2, err } + +// log2 computes log2 of x rounded up to the nearest integer. +func log2(x uint64) int { return 64 - bits.LeadingZeros64(x-1) } diff --git a/util/zstdframe/zstd_test.go b/util/zstdframe/zstd_test.go index db7b7801f..fb012cde7 100644 --- a/util/zstdframe/zstd_test.go +++ b/util/zstdframe/zstd_test.go @@ -121,6 +121,7 @@ func BenchmarkEncode(b *testing.B) { {name: "Default", opts: []Option{DefaultCompression}}, {name: "Fastest", opts: []Option{FastestCompression}}, {name: "FastestLowMemory", opts: []Option{FastestCompression, LowMemory(true)}}, + {name: "FastestWindowSize", opts: []Option{FastestCompression, MaxWindowSize(1 << 10)}}, {name: "FastestNoChecksum", opts: []Option{FastestCompression, WithChecksum(false)}}, } for _, bb := range options { @@ -207,3 +208,31 @@ func BenchmarkDecodeParallel(b *testing.B) { }) } } + +var opt Option + +func TestOptionAllocs(t *testing.T) { + t.Run("EncoderLevel", func(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { opt = EncoderLevel(zstd.SpeedFastest) })) + }) + t.Run("MaxDecodedSize/PowerOfTwo", func(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { opt = MaxDecodedSize(1024) })) + }) + t.Run("MaxDecodedSize/Prime", func(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { opt = MaxDecodedSize(1021) })) + }) + t.Run("MaxWindowSize", func(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { opt = MaxWindowSize(1024) })) + }) + t.Run("LowMemory", func(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { opt = LowMemory(true) })) + }) +} + +func TestGetDecoderAllocs(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { getDecoder() })) +} + +func TestGetEncoderAllocs(t *testing.T) { + t.Log(testing.AllocsPerRun(1e3, func() { getEncoder() })) +}