blob: 7eed729be2ad3325f01ed865d757ee86a49b9534 [file] [log] [blame]
khenaidoo7d3c5582021-08-11 18:09:44 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +05308 "bytes"
9 "encoding/binary"
khenaidoo7d3c5582021-08-11 18:09:44 -040010 "errors"
11 "fmt"
12 "io"
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053013 "io/ioutil"
14 "os"
15 "path/filepath"
khenaidoo7d3c5582021-08-11 18:09:44 -040016 "sync"
17
18 "github.com/klauspost/compress/huff0"
19 "github.com/klauspost/compress/zstd/internal/xxhash"
20)
21
22type blockType uint8
23
24//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
25
26const (
27 blockTypeRaw blockType = iota
28 blockTypeRLE
29 blockTypeCompressed
30 blockTypeReserved
31)
32
33type literalsBlockType uint8
34
35const (
36 literalsBlockRaw literalsBlockType = iota
37 literalsBlockRLE
38 literalsBlockCompressed
39 literalsBlockTreeless
40)
41
42const (
43 // maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
44 maxCompressedBlockSize = 128 << 10
45
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053046 compressedBlockOverAlloc = 16
47 maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
48
khenaidoo7d3c5582021-08-11 18:09:44 -040049 // Maximum possible block size (all Raw+Uncompressed).
50 maxBlockSize = (1 << 21) - 1
51
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053052 maxMatchLen = 131074
53 maxSequences = 0x7f00 + 0xffff
khenaidoo7d3c5582021-08-11 18:09:44 -040054
55 // We support slightly less than the reference decoder to be able to
56 // use ints on 32 bit archs.
57 maxOffsetBits = 30
58)
59
60var (
61 huffDecoderPool = sync.Pool{New: func() interface{} {
62 return &huff0.Scratch{}
63 }}
64
65 fseDecoderPool = sync.Pool{New: func() interface{} {
66 return &fseDecoder{}
67 }}
68)
69
70type blockDec struct {
71 // Raw source data of the block.
72 data []byte
73 dataStorage []byte
74
75 // Destination of the decoded data.
76 dst []byte
77
78 // Buffer for literals data.
79 literalBuf []byte
80
81 // Window size of the block.
82 WindowSize uint64
83
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053084 err error
85
86 // Check against this crc
87 checkCRC []byte
khenaidoo7d3c5582021-08-11 18:09:44 -040088
89 // Frame to use for singlethreaded decoding.
90 // Should not be used by the decoder itself since parent may be another frame.
91 localFrame *frameDec
92
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053093 sequence []seqVals
94
95 async struct {
96 newHist *history
97 literals []byte
98 seqData []byte
99 seqSize int // Size of uncompressed sequences
100 fcs uint64
101 }
102
khenaidoo7d3c5582021-08-11 18:09:44 -0400103 // Block is RLE, this is the size.
104 RLESize uint32
khenaidoo7d3c5582021-08-11 18:09:44 -0400105
106 Type blockType
107
108 // Is this the last block of a frame?
109 Last bool
110
111 // Use less memory
112 lowMem bool
113}
114
115func (b *blockDec) String() string {
116 if b == nil {
117 return "<nil>"
118 }
119 return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
120}
121
122func newBlockDec(lowMem bool) *blockDec {
123 b := blockDec{
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530124 lowMem: lowMem,
khenaidoo7d3c5582021-08-11 18:09:44 -0400125 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400126 return &b
127}
128
129// reset will reset the block.
130// Input must be a start of a block and will be at the end of the block when returned.
131func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
132 b.WindowSize = windowSize
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530133 tmp, err := br.readSmall(3)
134 if err != nil {
135 println("Reading block header:", err)
136 return err
khenaidoo7d3c5582021-08-11 18:09:44 -0400137 }
138 bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
139 b.Last = bh&1 != 0
140 b.Type = blockType((bh >> 1) & 3)
141 // find size.
142 cSize := int(bh >> 3)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530143 maxSize := maxCompressedBlockSizeAlloc
khenaidoo7d3c5582021-08-11 18:09:44 -0400144 switch b.Type {
145 case blockTypeReserved:
146 return ErrReservedBlockType
147 case blockTypeRLE:
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530148 if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
149 if debugDecoder {
150 printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
151 }
152 return ErrWindowSizeExceeded
153 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400154 b.RLESize = uint32(cSize)
155 if b.lowMem {
156 maxSize = cSize
157 }
158 cSize = 1
159 case blockTypeCompressed:
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530160 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400161 println("Data size on stream:", cSize)
162 }
163 b.RLESize = 0
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530164 maxSize = maxCompressedBlockSizeAlloc
khenaidoo7d3c5582021-08-11 18:09:44 -0400165 if windowSize < maxCompressedBlockSize && b.lowMem {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530166 maxSize = int(windowSize) + compressedBlockOverAlloc
khenaidoo7d3c5582021-08-11 18:09:44 -0400167 }
168 if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530169 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400170 printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
171 }
172 return ErrCompressedSizeTooBig
173 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530174 // Empty compressed blocks must at least be 2 bytes
175 // for Literals_Block_Type and one for Sequences_Section_Header.
176 if cSize < 2 {
177 return ErrBlockTooSmall
178 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400179 case blockTypeRaw:
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530180 if cSize > maxCompressedBlockSize || cSize > int(b.WindowSize) {
181 if debugDecoder {
182 printf("rle block too big: csize:%d block: %+v\n", uint64(cSize), b)
183 }
184 return ErrWindowSizeExceeded
185 }
186
khenaidoo7d3c5582021-08-11 18:09:44 -0400187 b.RLESize = 0
188 // We do not need a destination for raw blocks.
189 maxSize = -1
190 default:
191 panic("Invalid block type")
192 }
193
194 // Read block data.
195 if cap(b.dataStorage) < cSize {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530196 if b.lowMem || cSize > maxCompressedBlockSize {
197 b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400198 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530199 b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400200 }
201 }
202 if cap(b.dst) <= maxSize {
203 b.dst = make([]byte, 0, maxSize+1)
204 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400205 b.data, err = br.readBig(cSize, b.dataStorage)
206 if err != nil {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530207 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400208 println("Reading block:", err, "(", cSize, ")", len(b.data))
209 printf("%T", br)
210 }
211 return err
212 }
213 return nil
214}
215
216// sendEOF will make the decoder send EOF on this frame.
217func (b *blockDec) sendErr(err error) {
218 b.Last = true
219 b.Type = blockTypeReserved
220 b.err = err
khenaidoo7d3c5582021-08-11 18:09:44 -0400221}
222
223// Close will release resources.
224// Closed blockDec cannot be reset.
225func (b *blockDec) Close() {
khenaidoo7d3c5582021-08-11 18:09:44 -0400226}
227
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530228// decodeBuf
khenaidoo7d3c5582021-08-11 18:09:44 -0400229func (b *blockDec) decodeBuf(hist *history) error {
230 switch b.Type {
231 case blockTypeRLE:
232 if cap(b.dst) < int(b.RLESize) {
233 if b.lowMem {
234 b.dst = make([]byte, b.RLESize)
235 } else {
236 b.dst = make([]byte, maxBlockSize)
237 }
238 }
239 b.dst = b.dst[:b.RLESize]
240 v := b.data[0]
241 for i := range b.dst {
242 b.dst[i] = v
243 }
244 hist.appendKeep(b.dst)
245 return nil
246 case blockTypeRaw:
247 hist.appendKeep(b.data)
248 return nil
249 case blockTypeCompressed:
250 saved := b.dst
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530251 // Append directly to history
252 if hist.ignoreBuffer == 0 {
253 b.dst = hist.b
254 hist.b = nil
255 } else {
256 b.dst = b.dst[:0]
257 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400258 err := b.decodeCompressed(hist)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530259 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400260 println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
261 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530262 if hist.ignoreBuffer == 0 {
263 hist.b = b.dst
264 b.dst = saved
265 } else {
266 hist.appendKeep(b.dst)
267 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400268 return err
269 case blockTypeReserved:
270 // Used for returning errors.
271 return b.err
272 default:
273 panic("Invalid block type")
274 }
275}
276
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530277func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err error) {
khenaidoo7d3c5582021-08-11 18:09:44 -0400278 // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
279 if len(in) < 2 {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530280 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400281 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530282
khenaidoo7d3c5582021-08-11 18:09:44 -0400283 litType := literalsBlockType(in[0] & 3)
284 var litRegenSize int
285 var litCompSize int
286 sizeFormat := (in[0] >> 2) & 3
287 var fourStreams bool
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530288 var literals []byte
khenaidoo7d3c5582021-08-11 18:09:44 -0400289 switch litType {
290 case literalsBlockRaw, literalsBlockRLE:
291 switch sizeFormat {
292 case 0, 2:
293 // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
294 litRegenSize = int(in[0] >> 3)
295 in = in[1:]
296 case 1:
297 // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
298 litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
299 in = in[2:]
300 case 3:
301 // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
302 if len(in) < 3 {
303 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530304 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400305 }
306 litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
307 in = in[3:]
308 }
309 case literalsBlockCompressed, literalsBlockTreeless:
310 switch sizeFormat {
311 case 0, 1:
312 // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
313 if len(in) < 3 {
314 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530315 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400316 }
317 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
318 litRegenSize = int(n & 1023)
319 litCompSize = int(n >> 10)
320 fourStreams = sizeFormat == 1
321 in = in[3:]
322 case 2:
323 fourStreams = true
324 if len(in) < 4 {
325 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530326 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400327 }
328 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
329 litRegenSize = int(n & 16383)
330 litCompSize = int(n >> 14)
331 in = in[4:]
332 case 3:
333 fourStreams = true
334 if len(in) < 5 {
335 println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530336 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400337 }
338 n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
339 litRegenSize = int(n & 262143)
340 litCompSize = int(n >> 18)
341 in = in[5:]
342 }
343 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530344 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400345 println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
346 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530347 if litRegenSize > int(b.WindowSize) || litRegenSize > maxCompressedBlockSize {
348 return in, ErrWindowSizeExceeded
349 }
350
khenaidoo7d3c5582021-08-11 18:09:44 -0400351 switch litType {
352 case literalsBlockRaw:
353 if len(in) < litRegenSize {
354 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530355 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400356 }
357 literals = in[:litRegenSize]
358 in = in[litRegenSize:]
359 //printf("Found %d uncompressed literals\n", litRegenSize)
360 case literalsBlockRLE:
361 if len(in) < 1 {
362 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530363 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400364 }
365 if cap(b.literalBuf) < litRegenSize {
366 if b.lowMem {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530367 b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400368 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530369 b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400370 }
371 }
372 literals = b.literalBuf[:litRegenSize]
373 v := in[0]
374 for i := range literals {
375 literals[i] = v
376 }
377 in = in[1:]
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530378 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400379 printf("Found %d RLE compressed literals\n", litRegenSize)
380 }
381 case literalsBlockTreeless:
382 if len(in) < litCompSize {
383 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530384 return in, ErrBlockTooSmall
khenaidoo7d3c5582021-08-11 18:09:44 -0400385 }
386 // Store compressed literals, so we defer decoding until we get history.
387 literals = in[:litCompSize]
388 in = in[litCompSize:]
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530389 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400390 printf("Found %d compressed literals\n", litCompSize)
391 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530392 huff := hist.huffTree
393 if huff == nil {
394 return in, errors.New("literal block was treeless, but no history was defined")
khenaidoo7d3c5582021-08-11 18:09:44 -0400395 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400396 // Ensure we have space to store it.
397 if cap(b.literalBuf) < litRegenSize {
398 if b.lowMem {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530399 b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400400 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530401 b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
khenaidoo7d3c5582021-08-11 18:09:44 -0400402 }
403 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530404 var err error
405 // Use our out buffer.
406 huff.MaxDecodedSize = litRegenSize
407 if fourStreams {
408 literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
409 } else {
410 literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
khenaidoo7d3c5582021-08-11 18:09:44 -0400411 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530412 // Make sure we don't leak our literals buffer
413 if err != nil {
414 println("decompressing literals:", err)
415 return in, err
416 }
417 if len(literals) != litRegenSize {
418 return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
419 }
420
421 case literalsBlockCompressed:
422 if len(in) < litCompSize {
423 println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
424 return in, ErrBlockTooSmall
425 }
426 literals = in[:litCompSize]
427 in = in[litCompSize:]
428 // Ensure we have space to store it.
429 if cap(b.literalBuf) < litRegenSize {
430 if b.lowMem {
431 b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
432 } else {
433 b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
434 }
435 }
436 huff := hist.huffTree
437 if huff == nil || (hist.dict != nil && huff == hist.dict.litEnc) {
438 huff = huffDecoderPool.Get().(*huff0.Scratch)
439 if huff == nil {
440 huff = &huff0.Scratch{}
441 }
442 }
443 var err error
khenaidoo7d3c5582021-08-11 18:09:44 -0400444 huff, literals, err = huff0.ReadTable(literals, huff)
445 if err != nil {
446 println("reading huffman table:", err)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530447 return in, err
khenaidoo7d3c5582021-08-11 18:09:44 -0400448 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530449 hist.huffTree = huff
450 huff.MaxDecodedSize = litRegenSize
khenaidoo7d3c5582021-08-11 18:09:44 -0400451 // Use our out buffer.
452 if fourStreams {
453 literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
454 } else {
455 literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
456 }
457 if err != nil {
458 println("decoding compressed literals:", err)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530459 return in, err
khenaidoo7d3c5582021-08-11 18:09:44 -0400460 }
461 // Make sure we don't leak our literals buffer
462 if len(literals) != litRegenSize {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530463 return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
khenaidoo7d3c5582021-08-11 18:09:44 -0400464 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530465 // Re-cap to get extra size.
466 literals = b.literalBuf[:len(literals)]
467 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400468 printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
469 }
470 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530471 hist.decoders.literals = literals
472 return in, nil
473}
khenaidoo7d3c5582021-08-11 18:09:44 -0400474
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530475// decodeCompressed will start decompressing a block.
476func (b *blockDec) decodeCompressed(hist *history) error {
477 in := b.data
478 in, err := b.decodeLiterals(in, hist)
479 if err != nil {
480 return err
481 }
482 err = b.prepareSequences(in, hist)
483 if err != nil {
484 return err
485 }
486 if hist.decoders.nSeqs == 0 {
487 b.dst = append(b.dst, hist.decoders.literals...)
488 return nil
489 }
490 before := len(hist.decoders.out)
491 err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
492 if err != nil {
493 return err
494 }
495 if hist.decoders.maxSyncLen > 0 {
496 hist.decoders.maxSyncLen += uint64(before)
497 hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
498 }
499 b.dst = hist.decoders.out
500 hist.recentOffsets = hist.decoders.prevOffset
501 return nil
502}
503
504func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
505 if debugDecoder {
506 printf("prepareSequences: %d byte(s) input\n", len(in))
507 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400508 // Decode Sequences
509 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
510 if len(in) < 1 {
511 return ErrBlockTooSmall
512 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530513 var nSeqs int
khenaidoo7d3c5582021-08-11 18:09:44 -0400514 seqHeader := in[0]
khenaidoo7d3c5582021-08-11 18:09:44 -0400515 switch {
khenaidoo7d3c5582021-08-11 18:09:44 -0400516 case seqHeader < 128:
517 nSeqs = int(seqHeader)
518 in = in[1:]
519 case seqHeader < 255:
520 if len(in) < 2 {
521 return ErrBlockTooSmall
522 }
523 nSeqs = int(seqHeader-128)<<8 | int(in[1])
524 in = in[2:]
525 case seqHeader == 255:
526 if len(in) < 3 {
527 return ErrBlockTooSmall
528 }
529 nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
530 in = in[3:]
531 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530532 if nSeqs == 0 && len(in) != 0 {
533 // When no sequences, there should not be any more data...
534 if debugDecoder {
535 printf("prepareSequences: 0 sequences, but %d byte(s) left on stream\n", len(in))
khenaidoo7d3c5582021-08-11 18:09:44 -0400536 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530537 return ErrUnexpectedBlockSize
khenaidoo7d3c5582021-08-11 18:09:44 -0400538 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530539
540 var seqs = &hist.decoders
541 seqs.nSeqs = nSeqs
khenaidoo7d3c5582021-08-11 18:09:44 -0400542 if nSeqs > 0 {
543 if len(in) < 1 {
544 return ErrBlockTooSmall
545 }
546 br := byteReader{b: in, off: 0}
547 compMode := br.Uint8()
548 br.advance(1)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530549 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400550 printf("Compression modes: 0b%b", compMode)
551 }
552 for i := uint(0); i < 3; i++ {
553 mode := seqCompMode((compMode >> (6 - i*2)) & 3)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530554 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400555 println("Table", tableIndex(i), "is", mode)
556 }
557 var seq *sequenceDec
558 switch tableIndex(i) {
559 case tableLiteralLengths:
560 seq = &seqs.litLengths
561 case tableOffsets:
562 seq = &seqs.offsets
563 case tableMatchLengths:
564 seq = &seqs.matchLengths
565 default:
566 panic("unknown table")
567 }
568 switch mode {
569 case compModePredefined:
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530570 if seq.fse != nil && !seq.fse.preDefined {
571 fseDecoderPool.Put(seq.fse)
572 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400573 seq.fse = &fsePredef[i]
574 case compModeRLE:
575 if br.remain() < 1 {
576 return ErrBlockTooSmall
577 }
578 v := br.Uint8()
579 br.advance(1)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530580 if seq.fse == nil || seq.fse.preDefined {
581 seq.fse = fseDecoderPool.Get().(*fseDecoder)
582 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400583 symb, err := decSymbolValue(v, symbolTableX[i])
584 if err != nil {
585 printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
586 return err
587 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530588 seq.fse.setRLE(symb)
589 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400590 printf("RLE set to %+v, code: %v", symb, v)
591 }
592 case compModeFSE:
593 println("Reading table for", tableIndex(i))
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530594 if seq.fse == nil || seq.fse.preDefined {
595 seq.fse = fseDecoderPool.Get().(*fseDecoder)
596 }
597 err := seq.fse.readNCount(&br, uint16(maxTableSymbol[i]))
khenaidoo7d3c5582021-08-11 18:09:44 -0400598 if err != nil {
599 println("Read table error:", err)
600 return err
601 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530602 err = seq.fse.transform(symbolTableX[i])
khenaidoo7d3c5582021-08-11 18:09:44 -0400603 if err != nil {
604 println("Transform table error:", err)
605 return err
606 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530607 if debugDecoder {
608 println("Read table ok", "symbolLen:", seq.fse.symbolLen)
khenaidoo7d3c5582021-08-11 18:09:44 -0400609 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400610 case compModeRepeat:
611 seq.repeat = true
612 }
613 if br.overread() {
614 return io.ErrUnexpectedEOF
615 }
616 }
617 in = br.unread()
618 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530619 if debugDecoder {
620 println("Literals:", len(seqs.literals), "hash:", xxhash.Sum64(seqs.literals), "and", seqs.nSeqs, "sequences.")
khenaidoo7d3c5582021-08-11 18:09:44 -0400621 }
622
623 if nSeqs == 0 {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530624 if len(b.sequence) > 0 {
625 b.sequence = b.sequence[:0]
khenaidoo7d3c5582021-08-11 18:09:44 -0400626 }
627 return nil
628 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530629 br := seqs.br
630 if br == nil {
631 br = &bitReader{}
khenaidoo7d3c5582021-08-11 18:09:44 -0400632 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400633 if err := br.init(in); err != nil {
634 return err
635 }
636
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530637 if err := seqs.initialize(br, hist, b.dst); err != nil {
638 println("initializing sequences:", err)
639 return err
640 }
641 // Extract blocks...
642 if false && hist.dict == nil {
643 fatalErr := func(err error) {
644 if err != nil {
645 panic(err)
646 }
647 }
648 fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
649 var buf bytes.Buffer
650 fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
651 fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
652 fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
653 buf.Write(in)
654 ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
655 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400656
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530657 return nil
658}
659
660func (b *blockDec) decodeSequences(hist *history) error {
661 if cap(b.sequence) < hist.decoders.nSeqs {
662 if b.lowMem {
663 b.sequence = make([]seqVals, 0, hist.decoders.nSeqs)
664 } else {
665 b.sequence = make([]seqVals, 0, 0x7F00+0xffff)
666 }
667 }
668 b.sequence = b.sequence[:hist.decoders.nSeqs]
669 if hist.decoders.nSeqs == 0 {
670 hist.decoders.seqSize = len(hist.decoders.literals)
671 return nil
672 }
673 hist.decoders.windowSize = hist.windowSize
674 hist.decoders.prevOffset = hist.recentOffsets
675
676 err := hist.decoders.decode(b.sequence)
677 hist.recentOffsets = hist.decoders.prevOffset
678 return err
679}
680
681func (b *blockDec) executeSequences(hist *history) error {
khenaidoo7d3c5582021-08-11 18:09:44 -0400682 hbytes := hist.b
683 if len(hbytes) > hist.windowSize {
684 hbytes = hbytes[len(hbytes)-hist.windowSize:]
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530685 // We do not need history anymore.
khenaidoo7d3c5582021-08-11 18:09:44 -0400686 if hist.dict != nil {
687 hist.dict.content = nil
688 }
689 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530690 hist.decoders.windowSize = hist.windowSize
691 hist.decoders.out = b.dst[:0]
692 err := hist.decoders.execute(b.sequence, hbytes)
khenaidoo7d3c5582021-08-11 18:09:44 -0400693 if err != nil {
694 return err
695 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530696 return b.updateHistory(hist)
697}
khenaidoo7d3c5582021-08-11 18:09:44 -0400698
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530699func (b *blockDec) updateHistory(hist *history) error {
khenaidoo7d3c5582021-08-11 18:09:44 -0400700 if len(b.data) > maxCompressedBlockSize {
701 return fmt.Errorf("compressed block size too large (%d)", len(b.data))
702 }
703 // Set output and release references.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530704 b.dst = hist.decoders.out
705 hist.recentOffsets = hist.decoders.prevOffset
khenaidoo7d3c5582021-08-11 18:09:44 -0400706
khenaidoo7d3c5582021-08-11 18:09:44 -0400707 if b.Last {
708 // if last block we don't care about history.
709 println("Last block, no history returned")
710 hist.b = hist.b[:0]
711 return nil
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530712 } else {
713 hist.append(b.dst)
714 if debugDecoder {
715 println("Finished block with ", len(b.sequence), "sequences. Added", len(b.dst), "to history, now length", len(hist.b))
716 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400717 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530718 hist.decoders.out, hist.decoders.literals = nil, nil
khenaidoo7d3c5582021-08-11 18:09:44 -0400719
720 return nil
721}