blob: a7c5e1aac432316f778376c0963721966a63d402 [file] [log] [blame]
khenaidoo7d3c5582021-08-11 18:09:44 -04001package zstd
2
3import (
4 "errors"
5 "fmt"
6 "runtime"
7 "strings"
8)
9
10// EOption is an option for creating a encoder.
11type EOption func(*encoderOptions) error
12
13// options retains accumulated state of multiple options.
14type encoderOptions struct {
15 concurrent int
16 level EncoderLevel
17 single *bool
18 pad int
19 blockSize int
20 windowSize int
21 crc bool
22 fullZero bool
23 noEntropy bool
24 allLitEntropy bool
25 customWindow bool
26 customALEntropy bool
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053027 customBlockSize bool
khenaidoo7d3c5582021-08-11 18:09:44 -040028 lowMem bool
29 dict *dict
30}
31
32func (o *encoderOptions) setDefault() {
33 *o = encoderOptions{
34 concurrent: runtime.GOMAXPROCS(0),
35 crc: true,
36 single: nil,
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053037 blockSize: maxCompressedBlockSize,
khenaidoo7d3c5582021-08-11 18:09:44 -040038 windowSize: 8 << 20,
39 level: SpeedDefault,
40 allLitEntropy: true,
41 lowMem: false,
42 }
43}
44
45// encoder returns an encoder with the selected options.
46func (o encoderOptions) encoder() encoder {
47 switch o.level {
48 case SpeedFastest:
49 if o.dict != nil {
50 return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
51 }
52 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
53
54 case SpeedDefault:
55 if o.dict != nil {
56 return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
57 }
58 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
59 case SpeedBetterCompression:
60 if o.dict != nil {
61 return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
62 }
63 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
64 case SpeedBestCompression:
65 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
66 }
67 panic("unknown compression level")
68}
69
70// WithEncoderCRC will add CRC value to output.
71// Output will be 4 bytes larger.
72func WithEncoderCRC(b bool) EOption {
73 return func(o *encoderOptions) error { o.crc = b; return nil }
74}
75
76// WithEncoderConcurrency will set the concurrency,
77// meaning the maximum number of encoders to run concurrently.
78// The value supplied must be at least 1.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053079// For streams, setting a value of 1 will disable async compression.
khenaidoo7d3c5582021-08-11 18:09:44 -040080// By default this will be set to GOMAXPROCS.
81func WithEncoderConcurrency(n int) EOption {
82 return func(o *encoderOptions) error {
83 if n <= 0 {
84 return fmt.Errorf("concurrency must be at least 1")
85 }
86 o.concurrent = n
87 return nil
88 }
89}
90
91// WithWindowSize will set the maximum allowed back-reference distance.
92// The value must be a power of two between MinWindowSize and MaxWindowSize.
93// A larger value will enable better compression but allocate more memory and,
94// for above-default values, take considerably longer.
95// The default value is determined by the compression level.
96func WithWindowSize(n int) EOption {
97 return func(o *encoderOptions) error {
98 switch {
99 case n < MinWindowSize:
100 return fmt.Errorf("window size must be at least %d", MinWindowSize)
101 case n > MaxWindowSize:
102 return fmt.Errorf("window size must be at most %d", MaxWindowSize)
103 case (n & (n - 1)) != 0:
104 return errors.New("window size must be a power of 2")
105 }
106
107 o.windowSize = n
108 o.customWindow = true
109 if o.blockSize > o.windowSize {
110 o.blockSize = o.windowSize
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530111 o.customBlockSize = true
khenaidoo7d3c5582021-08-11 18:09:44 -0400112 }
113 return nil
114 }
115}
116
117// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
118// This can be used to obfuscate the exact output size or make blocks of a certain size.
119// The contents will be a skippable frame, so it will be invisible by the decoder.
120// n must be > 0 and <= 1GB, 1<<30 bytes.
121// The padded area will be filled with data from crypto/rand.Reader.
122// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
123func WithEncoderPadding(n int) EOption {
124 return func(o *encoderOptions) error {
125 if n <= 0 {
126 return fmt.Errorf("padding must be at least 1")
127 }
128 // No need to waste our time.
129 if n == 1 {
130 o.pad = 0
131 }
132 if n > 1<<30 {
133 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
134 }
135 o.pad = n
136 return nil
137 }
138}
139
140// EncoderLevel predefines encoder compression levels.
141// Only use the constants made available, since the actual mapping
142// of these values are very likely to change and your compression could change
143// unpredictably when upgrading the library.
144type EncoderLevel int
145
146const (
147 speedNotSet EncoderLevel = iota
148
149 // SpeedFastest will choose the fastest reasonable compression.
150 // This is roughly equivalent to the fastest Zstandard mode.
151 SpeedFastest
152
153 // SpeedDefault is the default "pretty fast" compression option.
154 // This is roughly equivalent to the default Zstandard mode (level 3).
155 SpeedDefault
156
157 // SpeedBetterCompression will yield better compression than the default.
158 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
159 // By using this, notice that CPU usage may go up in the future.
160 SpeedBetterCompression
161
162 // SpeedBestCompression will choose the best available compression option.
163 // This will offer the best compression no matter the CPU cost.
164 SpeedBestCompression
165
166 // speedLast should be kept as the last actual compression option.
167 // The is not for external usage, but is used to keep track of the valid options.
168 speedLast
169)
170
171// EncoderLevelFromString will convert a string representation of an encoding level back
172// to a compression level. The compare is not case sensitive.
173// If the string wasn't recognized, (false, SpeedDefault) will be returned.
174func EncoderLevelFromString(s string) (bool, EncoderLevel) {
175 for l := speedNotSet + 1; l < speedLast; l++ {
176 if strings.EqualFold(s, l.String()) {
177 return true, l
178 }
179 }
180 return false, SpeedDefault
181}
182
183// EncoderLevelFromZstd will return an encoder level that closest matches the compression
184// ratio of a specific zstd compression level.
185// Many input values will provide the same compression level.
186func EncoderLevelFromZstd(level int) EncoderLevel {
187 switch {
188 case level < 3:
189 return SpeedFastest
190 case level >= 3 && level < 6:
191 return SpeedDefault
192 case level >= 6 && level < 10:
193 return SpeedBetterCompression
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530194 default:
195 return SpeedBestCompression
khenaidoo7d3c5582021-08-11 18:09:44 -0400196 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400197}
198
199// String provides a string representation of the compression level.
200func (e EncoderLevel) String() string {
201 switch e {
202 case SpeedFastest:
203 return "fastest"
204 case SpeedDefault:
205 return "default"
206 case SpeedBetterCompression:
207 return "better"
208 case SpeedBestCompression:
209 return "best"
210 default:
211 return "invalid"
212 }
213}
214
215// WithEncoderLevel specifies a predefined compression level.
216func WithEncoderLevel(l EncoderLevel) EOption {
217 return func(o *encoderOptions) error {
218 switch {
219 case l <= speedNotSet || l >= speedLast:
220 return fmt.Errorf("unknown encoder level")
221 }
222 o.level = l
223 if !o.customWindow {
224 switch o.level {
225 case SpeedFastest:
226 o.windowSize = 4 << 20
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530227 if !o.customBlockSize {
228 o.blockSize = 1 << 16
229 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400230 case SpeedDefault:
231 o.windowSize = 8 << 20
232 case SpeedBetterCompression:
233 o.windowSize = 16 << 20
234 case SpeedBestCompression:
235 o.windowSize = 32 << 20
236 }
237 }
238 if !o.customALEntropy {
239 o.allLitEntropy = l > SpeedFastest
240 }
241
242 return nil
243 }
244}
245
246// WithZeroFrames will encode 0 length input as full frames.
247// This can be needed for compatibility with zstandard usage,
248// but is not needed for this package.
249func WithZeroFrames(b bool) EOption {
250 return func(o *encoderOptions) error {
251 o.fullZero = b
252 return nil
253 }
254}
255
256// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
257// Disabling this will skip incompressible data faster, but in cases with no matches but
258// skewed character distribution compression is lost.
259// Default value depends on the compression level selected.
260func WithAllLitEntropyCompression(b bool) EOption {
261 return func(o *encoderOptions) error {
262 o.customALEntropy = true
263 o.allLitEntropy = b
264 return nil
265 }
266}
267
268// WithNoEntropyCompression will always skip entropy compression of literals.
269// This can be useful if content has matches, but unlikely to benefit from entropy
270// compression. Usually the slight speed improvement is not worth enabling this.
271func WithNoEntropyCompression(b bool) EOption {
272 return func(o *encoderOptions) error {
273 o.noEntropy = b
274 return nil
275 }
276}
277
278// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
279// If this flag is set, data must be regenerated within a single continuous memory segment.
280// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
281// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
282// In order to preserve the decoder from unreasonable memory requirements,
283// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
284// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
285// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530286// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
khenaidoo7d3c5582021-08-11 18:09:44 -0400287// This setting has no effect on streamed encodes.
288func WithSingleSegment(b bool) EOption {
289 return func(o *encoderOptions) error {
290 o.single = &b
291 return nil
292 }
293}
294
295// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
296// slower encoding speed.
297// This will not change the window size which is the primary function for reducing
298// memory usage. See WithWindowSize.
299func WithLowerEncoderMem(b bool) EOption {
300 return func(o *encoderOptions) error {
301 o.lowMem = b
302 return nil
303 }
304}
305
306// WithEncoderDict allows to register a dictionary that will be used for the encode.
307// The encoder *may* choose to use no dictionary instead for certain payloads.
308func WithEncoderDict(dict []byte) EOption {
309 return func(o *encoderOptions) error {
310 d, err := loadDict(dict)
311 if err != nil {
312 return err
313 }
314 o.dict = d
315 return nil
316 }
317}