blob: 5f2e1d020eec4c949e0748bedf932df6c8c65155 [file] [log] [blame]
kesavandc71914f2022-03-25 11:19:03 +05301package zstd
2
3import (
4 "errors"
5 "fmt"
6 "runtime"
7 "strings"
8)
9
10// EOption is an option for creating a encoder.
11type EOption func(*encoderOptions) error
12
13// options retains accumulated state of multiple options.
14type encoderOptions struct {
15 concurrent int
16 level EncoderLevel
17 single *bool
18 pad int
19 blockSize int
20 windowSize int
21 crc bool
22 fullZero bool
23 noEntropy bool
24 allLitEntropy bool
25 customWindow bool
26 customALEntropy bool
27 customBlockSize bool
28 lowMem bool
29 dict *dict
30}
31
32func (o *encoderOptions) setDefault() {
33 *o = encoderOptions{
34 concurrent: runtime.GOMAXPROCS(0),
35 crc: true,
36 single: nil,
37 blockSize: maxCompressedBlockSize,
38 windowSize: 8 << 20,
39 level: SpeedDefault,
40 allLitEntropy: true,
41 lowMem: false,
42 }
43}
44
45// encoder returns an encoder with the selected options.
46func (o encoderOptions) encoder() encoder {
47 switch o.level {
48 case SpeedFastest:
49 if o.dict != nil {
50 return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
51 }
52 return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
53
54 case SpeedDefault:
55 if o.dict != nil {
56 return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
57 }
58 return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
59 case SpeedBetterCompression:
60 if o.dict != nil {
61 return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
62 }
63 return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
64 case SpeedBestCompression:
65 return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
66 }
67 panic("unknown compression level")
68}
69
70// WithEncoderCRC will add CRC value to output.
71// Output will be 4 bytes larger.
72func WithEncoderCRC(b bool) EOption {
73 return func(o *encoderOptions) error { o.crc = b; return nil }
74}
75
76// WithEncoderConcurrency will set the concurrency,
77// meaning the maximum number of encoders to run concurrently.
78// The value supplied must be at least 1.
79// By default this will be set to GOMAXPROCS.
80func WithEncoderConcurrency(n int) EOption {
81 return func(o *encoderOptions) error {
82 if n <= 0 {
83 return fmt.Errorf("concurrency must be at least 1")
84 }
85 o.concurrent = n
86 return nil
87 }
88}
89
90// WithWindowSize will set the maximum allowed back-reference distance.
91// The value must be a power of two between MinWindowSize and MaxWindowSize.
92// A larger value will enable better compression but allocate more memory and,
93// for above-default values, take considerably longer.
94// The default value is determined by the compression level.
95func WithWindowSize(n int) EOption {
96 return func(o *encoderOptions) error {
97 switch {
98 case n < MinWindowSize:
99 return fmt.Errorf("window size must be at least %d", MinWindowSize)
100 case n > MaxWindowSize:
101 return fmt.Errorf("window size must be at most %d", MaxWindowSize)
102 case (n & (n - 1)) != 0:
103 return errors.New("window size must be a power of 2")
104 }
105
106 o.windowSize = n
107 o.customWindow = true
108 if o.blockSize > o.windowSize {
109 o.blockSize = o.windowSize
110 o.customBlockSize = true
111 }
112 return nil
113 }
114}
115
116// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
117// This can be used to obfuscate the exact output size or make blocks of a certain size.
118// The contents will be a skippable frame, so it will be invisible by the decoder.
119// n must be > 0 and <= 1GB, 1<<30 bytes.
120// The padded area will be filled with data from crypto/rand.Reader.
121// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
122func WithEncoderPadding(n int) EOption {
123 return func(o *encoderOptions) error {
124 if n <= 0 {
125 return fmt.Errorf("padding must be at least 1")
126 }
127 // No need to waste our time.
128 if n == 1 {
129 o.pad = 0
130 }
131 if n > 1<<30 {
132 return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
133 }
134 o.pad = n
135 return nil
136 }
137}
138
139// EncoderLevel predefines encoder compression levels.
140// Only use the constants made available, since the actual mapping
141// of these values are very likely to change and your compression could change
142// unpredictably when upgrading the library.
143type EncoderLevel int
144
145const (
146 speedNotSet EncoderLevel = iota
147
148 // SpeedFastest will choose the fastest reasonable compression.
149 // This is roughly equivalent to the fastest Zstandard mode.
150 SpeedFastest
151
152 // SpeedDefault is the default "pretty fast" compression option.
153 // This is roughly equivalent to the default Zstandard mode (level 3).
154 SpeedDefault
155
156 // SpeedBetterCompression will yield better compression than the default.
157 // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
158 // By using this, notice that CPU usage may go up in the future.
159 SpeedBetterCompression
160
161 // SpeedBestCompression will choose the best available compression option.
162 // This will offer the best compression no matter the CPU cost.
163 SpeedBestCompression
164
165 // speedLast should be kept as the last actual compression option.
166 // The is not for external usage, but is used to keep track of the valid options.
167 speedLast
168)
169
170// EncoderLevelFromString will convert a string representation of an encoding level back
171// to a compression level. The compare is not case sensitive.
172// If the string wasn't recognized, (false, SpeedDefault) will be returned.
173func EncoderLevelFromString(s string) (bool, EncoderLevel) {
174 for l := speedNotSet + 1; l < speedLast; l++ {
175 if strings.EqualFold(s, l.String()) {
176 return true, l
177 }
178 }
179 return false, SpeedDefault
180}
181
182// EncoderLevelFromZstd will return an encoder level that closest matches the compression
183// ratio of a specific zstd compression level.
184// Many input values will provide the same compression level.
185func EncoderLevelFromZstd(level int) EncoderLevel {
186 switch {
187 case level < 3:
188 return SpeedFastest
189 case level >= 3 && level < 6:
190 return SpeedDefault
191 case level >= 6 && level < 10:
192 return SpeedBetterCompression
193 default:
194 return SpeedBestCompression
195 }
196}
197
198// String provides a string representation of the compression level.
199func (e EncoderLevel) String() string {
200 switch e {
201 case SpeedFastest:
202 return "fastest"
203 case SpeedDefault:
204 return "default"
205 case SpeedBetterCompression:
206 return "better"
207 case SpeedBestCompression:
208 return "best"
209 default:
210 return "invalid"
211 }
212}
213
214// WithEncoderLevel specifies a predefined compression level.
215func WithEncoderLevel(l EncoderLevel) EOption {
216 return func(o *encoderOptions) error {
217 switch {
218 case l <= speedNotSet || l >= speedLast:
219 return fmt.Errorf("unknown encoder level")
220 }
221 o.level = l
222 if !o.customWindow {
223 switch o.level {
224 case SpeedFastest:
225 o.windowSize = 4 << 20
226 if !o.customBlockSize {
227 o.blockSize = 1 << 16
228 }
229 case SpeedDefault:
230 o.windowSize = 8 << 20
231 case SpeedBetterCompression:
232 o.windowSize = 16 << 20
233 case SpeedBestCompression:
234 o.windowSize = 32 << 20
235 }
236 }
237 if !o.customALEntropy {
238 o.allLitEntropy = l > SpeedFastest
239 }
240
241 return nil
242 }
243}
244
245// WithZeroFrames will encode 0 length input as full frames.
246// This can be needed for compatibility with zstandard usage,
247// but is not needed for this package.
248func WithZeroFrames(b bool) EOption {
249 return func(o *encoderOptions) error {
250 o.fullZero = b
251 return nil
252 }
253}
254
255// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
256// Disabling this will skip incompressible data faster, but in cases with no matches but
257// skewed character distribution compression is lost.
258// Default value depends on the compression level selected.
259func WithAllLitEntropyCompression(b bool) EOption {
260 return func(o *encoderOptions) error {
261 o.customALEntropy = true
262 o.allLitEntropy = b
263 return nil
264 }
265}
266
267// WithNoEntropyCompression will always skip entropy compression of literals.
268// This can be useful if content has matches, but unlikely to benefit from entropy
269// compression. Usually the slight speed improvement is not worth enabling this.
270func WithNoEntropyCompression(b bool) EOption {
271 return func(o *encoderOptions) error {
272 o.noEntropy = b
273 return nil
274 }
275}
276
277// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
278// If this flag is set, data must be regenerated within a single continuous memory segment.
279// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
280// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
281// In order to preserve the decoder from unreasonable memory requirements,
282// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
283// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
284// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
285// If this is not specified, block encodes will automatically choose this based on the input size.
286// This setting has no effect on streamed encodes.
287func WithSingleSegment(b bool) EOption {
288 return func(o *encoderOptions) error {
289 o.single = &b
290 return nil
291 }
292}
293
294// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
295// slower encoding speed.
296// This will not change the window size which is the primary function for reducing
297// memory usage. See WithWindowSize.
298func WithLowerEncoderMem(b bool) EOption {
299 return func(o *encoderOptions) error {
300 o.lowMem = b
301 return nil
302 }
303}
304
305// WithEncoderDict allows to register a dictionary that will be used for the encode.
306// The encoder *may* choose to use no dictionary instead for certain payloads.
307func WithEncoderDict(dict []byte) EOption {
308 return func(o *encoderOptions) error {
309 d, err := loadDict(dict)
310 if err != nil {
311 return err
312 }
313 o.dict = d
314 return nil
315 }
316}