blob: 16d4ab63c19d8cc1b6a70a5e2e55fde39a4228a2 [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001package zstd
2
3import (
4 "errors"
5 "fmt"
6 "runtime"
7 "strings"
8)
9
10// EOption is an option for creating a encoder.
11type EOption func(*encoderOptions) error
12
13// options retains accumulated state of multiple options.
14type encoderOptions struct {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000015 concurrent int
16 level EncoderLevel
17 single *bool
18 pad int
19 blockSize int
20 windowSize int
21 crc bool
22 fullZero bool
23 noEntropy bool
24 allLitEntropy bool
25 customWindow bool
26 customALEntropy bool
27 lowMem bool
28 dict *dict
Scott Bakered4efab2020-01-13 19:12:25 -080029}
30
31func (o *encoderOptions) setDefault() {
32 *o = encoderOptions{
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000033 concurrent: runtime.GOMAXPROCS(0),
34 crc: true,
35 single: nil,
36 blockSize: 1 << 16,
37 windowSize: 8 << 20,
38 level: SpeedDefault,
39 allLitEntropy: true,
40 lowMem: false,
Scott Bakered4efab2020-01-13 19:12:25 -080041 }
42}
43
44// encoder returns an encoder with the selected options.
45func (o encoderOptions) encoder() encoder {
46 switch o.level {
Scott Bakered4efab2020-01-13 19:12:25 -080047 case SpeedFastest:
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000048 if o.dict != nil {
49 return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
50 }
51 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
52
53 case SpeedDefault:
54 if o.dict != nil {
55 return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
56 }
57 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
58 case SpeedBetterCompression:
59 if o.dict != nil {
60 return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
61 }
62 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
63 case SpeedBestCompression:
64 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
Scott Bakered4efab2020-01-13 19:12:25 -080065 }
66 panic("unknown compression level")
67}
68
69// WithEncoderCRC will add CRC value to output.
70// Output will be 4 bytes larger.
71func WithEncoderCRC(b bool) EOption {
72 return func(o *encoderOptions) error { o.crc = b; return nil }
73}
74
75// WithEncoderConcurrency will set the concurrency,
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000076// meaning the maximum number of encoders to run concurrently.
Scott Bakered4efab2020-01-13 19:12:25 -080077// The value supplied must be at least 1.
78// By default this will be set to GOMAXPROCS.
79func WithEncoderConcurrency(n int) EOption {
80 return func(o *encoderOptions) error {
81 if n <= 0 {
82 return fmt.Errorf("concurrency must be at least 1")
83 }
84 o.concurrent = n
85 return nil
86 }
87}
88
89// WithWindowSize will set the maximum allowed back-reference distance.
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000090// The value must be a power of two between MinWindowSize and MaxWindowSize.
Scott Bakered4efab2020-01-13 19:12:25 -080091// A larger value will enable better compression but allocate more memory and,
92// for above-default values, take considerably longer.
93// The default value is determined by the compression level.
94func WithWindowSize(n int) EOption {
95 return func(o *encoderOptions) error {
96 switch {
97 case n < MinWindowSize:
98 return fmt.Errorf("window size must be at least %d", MinWindowSize)
99 case n > MaxWindowSize:
100 return fmt.Errorf("window size must be at most %d", MaxWindowSize)
101 case (n & (n - 1)) != 0:
102 return errors.New("window size must be a power of 2")
103 }
104
105 o.windowSize = n
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000106 o.customWindow = true
Scott Bakered4efab2020-01-13 19:12:25 -0800107 if o.blockSize > o.windowSize {
108 o.blockSize = o.windowSize
109 }
110 return nil
111 }
112}
113
114// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
115// This can be used to obfuscate the exact output size or make blocks of a certain size.
116// The contents will be a skippable frame, so it will be invisible by the decoder.
117// n must be > 0 and <= 1GB, 1<<30 bytes.
118// The padded area will be filled with data from crypto/rand.Reader.
119// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
120func WithEncoderPadding(n int) EOption {
121 return func(o *encoderOptions) error {
122 if n <= 0 {
123 return fmt.Errorf("padding must be at least 1")
124 }
125 // No need to waste our time.
126 if n == 1 {
127 o.pad = 0
128 }
129 if n > 1<<30 {
130 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
131 }
132 o.pad = n
133 return nil
134 }
135}
136
137// EncoderLevel predefines encoder compression levels.
138// Only use the constants made available, since the actual mapping
139// of these values are very likely to change and your compression could change
140// unpredictably when upgrading the library.
141type EncoderLevel int
142
143const (
144 speedNotSet EncoderLevel = iota
145
146 // SpeedFastest will choose the fastest reasonable compression.
147 // This is roughly equivalent to the fastest Zstandard mode.
148 SpeedFastest
149
150 // SpeedDefault is the default "pretty fast" compression option.
151 // This is roughly equivalent to the default Zstandard mode (level 3).
152 SpeedDefault
153
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000154 // SpeedBetterCompression will yield better compression than the default.
155 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
156 // By using this, notice that CPU usage may go up in the future.
157 SpeedBetterCompression
158
159 // SpeedBestCompression will choose the best available compression option.
160 // This will offer the best compression no matter the CPU cost.
161 SpeedBestCompression
162
Scott Bakered4efab2020-01-13 19:12:25 -0800163 // speedLast should be kept as the last actual compression option.
164 // The is not for external usage, but is used to keep track of the valid options.
165 speedLast
Scott Bakered4efab2020-01-13 19:12:25 -0800166)
167
168// EncoderLevelFromString will convert a string representation of an encoding level back
169// to a compression level. The compare is not case sensitive.
170// If the string wasn't recognized, (false, SpeedDefault) will be returned.
171func EncoderLevelFromString(s string) (bool, EncoderLevel) {
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000172 for l := speedNotSet + 1; l < speedLast; l++ {
Scott Bakered4efab2020-01-13 19:12:25 -0800173 if strings.EqualFold(s, l.String()) {
174 return true, l
175 }
176 }
177 return false, SpeedDefault
178}
179
180// EncoderLevelFromZstd will return an encoder level that closest matches the compression
181// ratio of a specific zstd compression level.
182// Many input values will provide the same compression level.
183func EncoderLevelFromZstd(level int) EncoderLevel {
184 switch {
185 case level < 3:
186 return SpeedFastest
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000187 case level >= 3 && level < 6:
Scott Bakered4efab2020-01-13 19:12:25 -0800188 return SpeedDefault
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000189 case level >= 6 && level < 10:
190 return SpeedBetterCompression
191 case level >= 10:
192 return SpeedBetterCompression
Scott Bakered4efab2020-01-13 19:12:25 -0800193 }
194 return SpeedDefault
195}
196
197// String provides a string representation of the compression level.
198func (e EncoderLevel) String() string {
199 switch e {
200 case SpeedFastest:
201 return "fastest"
202 case SpeedDefault:
203 return "default"
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000204 case SpeedBetterCompression:
205 return "better"
206 case SpeedBestCompression:
207 return "best"
Scott Bakered4efab2020-01-13 19:12:25 -0800208 default:
209 return "invalid"
210 }
211}
212
213// WithEncoderLevel specifies a predefined compression level.
214func WithEncoderLevel(l EncoderLevel) EOption {
215 return func(o *encoderOptions) error {
216 switch {
217 case l <= speedNotSet || l >= speedLast:
218 return fmt.Errorf("unknown encoder level")
219 }
220 o.level = l
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000221 if !o.customWindow {
222 switch o.level {
223 case SpeedFastest:
224 o.windowSize = 4 << 20
225 case SpeedDefault:
226 o.windowSize = 8 << 20
227 case SpeedBetterCompression:
228 o.windowSize = 16 << 20
229 case SpeedBestCompression:
230 o.windowSize = 32 << 20
231 }
232 }
233 if !o.customALEntropy {
234 o.allLitEntropy = l > SpeedFastest
235 }
236
Scott Bakered4efab2020-01-13 19:12:25 -0800237 return nil
238 }
239}
240
241// WithZeroFrames will encode 0 length input as full frames.
242// This can be needed for compatibility with zstandard usage,
243// but is not needed for this package.
244func WithZeroFrames(b bool) EOption {
245 return func(o *encoderOptions) error {
246 o.fullZero = b
247 return nil
248 }
249}
250
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000251// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
252// Disabling this will skip incompressible data faster, but in cases with no matches but
253// skewed character distribution compression is lost.
254// Default value depends on the compression level selected.
255func WithAllLitEntropyCompression(b bool) EOption {
256 return func(o *encoderOptions) error {
257 o.customALEntropy = true
258 o.allLitEntropy = b
259 return nil
260 }
261}
262
Scott Bakered4efab2020-01-13 19:12:25 -0800263// WithNoEntropyCompression will always skip entropy compression of literals.
264// This can be useful if content has matches, but unlikely to benefit from entropy
265// compression. Usually the slight speed improvement is not worth enabling this.
266func WithNoEntropyCompression(b bool) EOption {
267 return func(o *encoderOptions) error {
268 o.noEntropy = b
269 return nil
270 }
271}
272
273// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
274// If this flag is set, data must be regenerated within a single continuous memory segment.
275// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
276// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
277// In order to preserve the decoder from unreasonable memory requirements,
278// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
279// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
280// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
281// If this is not specified, block encodes will automatically choose this based on the input size.
282// This setting has no effect on streamed encodes.
283func WithSingleSegment(b bool) EOption {
284 return func(o *encoderOptions) error {
285 o.single = &b
286 return nil
287 }
288}
David K. Bainbridgebd6b2882021-08-26 13:31:02 +0000289
290// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
291// slower encoding speed.
292// This will not change the window size which is the primary function for reducing
293// memory usage. See WithWindowSize.
294func WithLowerEncoderMem(b bool) EOption {
295 return func(o *encoderOptions) error {
296 o.lowMem = b
297 return nil
298 }
299}
300
301// WithEncoderDict allows to register a dictionary that will be used for the encode.
302// The encoder *may* choose to use no dictionary instead for certain payloads.
303func WithEncoderDict(dict []byte) EOption {
304 return func(o *encoderOptions) error {
305 d, err := loadDict(dict)
306 if err != nil {
307 return err
308 }
309 o.dict = d
310 return nil
311 }
312}