blob: 9568a4ba314ed698b0d21fb8cf6460c87ade9add [file] [log] [blame]
khenaidoo7d3c5582021-08-11 18:09:44 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "bytes"
9 "encoding/hex"
10 "errors"
khenaidoo7d3c5582021-08-11 18:09:44 -040011 "io"
khenaidoo7d3c5582021-08-11 18:09:44 -040012
13 "github.com/klauspost/compress/zstd/internal/xxhash"
14)
15
16type frameDec struct {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053017 o decoderOptions
18 crc *xxhash.Digest
khenaidoo7d3c5582021-08-11 18:09:44 -040019
20 WindowSize uint64
21
khenaidoo7d3c5582021-08-11 18:09:44 -040022 // Frame history passed between blocks
23 history history
24
25 rawInput byteBuffer
26
27 // Byte buffer that can be reused for small input blocks.
28 bBuf byteBuf
29
30 FrameContentSize uint64
khenaidoo7d3c5582021-08-11 18:09:44 -040031
32 DictionaryID *uint32
33 HasCheckSum bool
34 SingleSegment bool
khenaidoo7d3c5582021-08-11 18:09:44 -040035}
36
37const (
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053038 // MinWindowSize is the minimum Window Size, which is 1 KB.
khenaidoo7d3c5582021-08-11 18:09:44 -040039 MinWindowSize = 1 << 10
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053040
41 // MaxWindowSize is the maximum encoder window size
42 // and the default decoder maximum window size.
khenaidoo7d3c5582021-08-11 18:09:44 -040043 MaxWindowSize = 1 << 29
44)
45
46var (
47 frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
48 skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
49)
50
51func newFrameDec(o decoderOptions) *frameDec {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053052 if o.maxWindowSize > o.maxDecodedSize {
53 o.maxWindowSize = o.maxDecodedSize
khenaidoo7d3c5582021-08-11 18:09:44 -040054 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053055 d := frameDec{
56 o: o,
khenaidoo7d3c5582021-08-11 18:09:44 -040057 }
58 return &d
59}
60
61// reset will read the frame header and prepare for block decoding.
62// If nothing can be read from the input, io.EOF will be returned.
63// Any other error indicated that the stream contained data, but
64// there was a problem.
65func (d *frameDec) reset(br byteBuffer) error {
66 d.HasCheckSum = false
67 d.WindowSize = 0
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053068 var signature [4]byte
khenaidoo7d3c5582021-08-11 18:09:44 -040069 for {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053070 var err error
71 // Check if we can read more...
72 b, err := br.readSmall(1)
73 switch err {
74 case io.EOF, io.ErrUnexpectedEOF:
khenaidoo7d3c5582021-08-11 18:09:44 -040075 return io.EOF
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053076 default:
77 return err
78 case nil:
79 signature[0] = b[0]
khenaidoo7d3c5582021-08-11 18:09:44 -040080 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053081 // Read the rest, don't allow io.ErrUnexpectedEOF
82 b, err = br.readSmall(3)
83 switch err {
84 case io.EOF:
85 return io.EOF
86 default:
87 return err
88 case nil:
89 copy(signature[1:], b)
90 }
91
92 if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
93 if debugDecoder {
94 println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
khenaidoo7d3c5582021-08-11 18:09:44 -040095 }
96 // Break if not skippable frame.
97 break
98 }
99 // Read size to skip
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530100 b, err = br.readSmall(4)
101 if err != nil {
102 if debugDecoder {
103 println("Reading Frame Size", err)
104 }
105 return err
khenaidoo7d3c5582021-08-11 18:09:44 -0400106 }
107 n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
108 println("Skipping frame with", n, "bytes.")
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530109 err = br.skipN(int64(n))
khenaidoo7d3c5582021-08-11 18:09:44 -0400110 if err != nil {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530111 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400112 println("Reading discarded frame", err)
113 }
114 return err
115 }
116 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530117 if !bytes.Equal(signature[:], frameMagic) {
118 if debugDecoder {
119 println("Got magic numbers: ", signature, "want:", frameMagic)
120 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400121 return ErrMagicMismatch
122 }
123
124 // Read Frame_Header_Descriptor
125 fhd, err := br.readByte()
126 if err != nil {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530127 if debugDecoder {
128 println("Reading Frame_Header_Descriptor", err)
129 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400130 return err
131 }
132 d.SingleSegment = fhd&(1<<5) != 0
133
134 if fhd&(1<<3) != 0 {
135 return errors.New("reserved bit set on frame header")
136 }
137
138 // Read Window_Descriptor
139 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
140 d.WindowSize = 0
141 if !d.SingleSegment {
142 wd, err := br.readByte()
143 if err != nil {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530144 if debugDecoder {
145 println("Reading Window_Descriptor", err)
146 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400147 return err
148 }
149 printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
150 windowLog := 10 + (wd >> 3)
151 windowBase := uint64(1) << windowLog
152 windowAdd := (windowBase / 8) * uint64(wd&0x7)
153 d.WindowSize = windowBase + windowAdd
154 }
155
156 // Read Dictionary_ID
157 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
158 d.DictionaryID = nil
159 if size := fhd & 3; size != 0 {
160 if size == 3 {
161 size = 4
162 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530163
164 b, err := br.readSmall(int(size))
165 if err != nil {
166 println("Reading Dictionary_ID", err)
167 return err
khenaidoo7d3c5582021-08-11 18:09:44 -0400168 }
169 var id uint32
170 switch size {
171 case 1:
172 id = uint32(b[0])
173 case 2:
174 id = uint32(b[0]) | (uint32(b[1]) << 8)
175 case 4:
176 id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
177 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530178 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400179 println("Dict size", size, "ID:", id)
180 }
181 if id > 0 {
182 // ID 0 means "sorry, no dictionary anyway".
183 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
184 d.DictionaryID = &id
185 }
186 }
187
188 // Read Frame_Content_Size
189 // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
190 var fcsSize int
191 v := fhd >> 6
192 switch v {
193 case 0:
194 if d.SingleSegment {
195 fcsSize = 1
196 }
197 default:
198 fcsSize = 1 << v
199 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530200 d.FrameContentSize = fcsUnknown
khenaidoo7d3c5582021-08-11 18:09:44 -0400201 if fcsSize > 0 {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530202 b, err := br.readSmall(fcsSize)
203 if err != nil {
204 println("Reading Frame content", err)
205 return err
khenaidoo7d3c5582021-08-11 18:09:44 -0400206 }
207 switch fcsSize {
208 case 1:
209 d.FrameContentSize = uint64(b[0])
210 case 2:
211 // When FCS_Field_Size is 2, the offset of 256 is added.
212 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
213 case 4:
214 d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
215 case 8:
216 d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
217 d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
218 d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
219 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530220 if debugDecoder {
221 println("Read FCS:", d.FrameContentSize)
khenaidoo7d3c5582021-08-11 18:09:44 -0400222 }
223 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530224
khenaidoo7d3c5582021-08-11 18:09:44 -0400225 // Move this to shared.
226 d.HasCheckSum = fhd&(1<<2) != 0
227 if d.HasCheckSum {
228 if d.crc == nil {
229 d.crc = xxhash.New()
230 }
231 d.crc.Reset()
232 }
233
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530234 if d.WindowSize > d.o.maxWindowSize {
235 if debugDecoder {
236 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
237 }
238 return ErrWindowSizeExceeded
239 }
240
khenaidoo7d3c5582021-08-11 18:09:44 -0400241 if d.WindowSize == 0 && d.SingleSegment {
242 // We may not need window in this case.
243 d.WindowSize = d.FrameContentSize
244 if d.WindowSize < MinWindowSize {
245 d.WindowSize = MinWindowSize
246 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530247 if d.WindowSize > d.o.maxDecodedSize {
248 if debugDecoder {
249 printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
250 }
251 return ErrDecoderSizeExceeded
252 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400253 }
254
khenaidoo7d3c5582021-08-11 18:09:44 -0400255 // The minimum Window_Size is 1 KB.
256 if d.WindowSize < MinWindowSize {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530257 if debugDecoder {
258 println("got window size: ", d.WindowSize)
259 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400260 return ErrWindowSizeTooSmall
261 }
262 d.history.windowSize = int(d.WindowSize)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530263 if !d.o.lowMem || d.history.windowSize < maxBlockSize {
264 // Alloc 2x window size if not low-mem, or very small window size.
265 d.history.allocFrameBuffer = d.history.windowSize * 2
khenaidoo7d3c5582021-08-11 18:09:44 -0400266 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530267 // Alloc with one additional block
268 d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
khenaidoo7d3c5582021-08-11 18:09:44 -0400269 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530270
271 if debugDecoder {
272 println("Frame: Dict:", d.DictionaryID, "FrameContentSize:", d.FrameContentSize, "singleseg:", d.SingleSegment, "window:", d.WindowSize, "crc:", d.HasCheckSum)
273 }
274
khenaidoo7d3c5582021-08-11 18:09:44 -0400275 // history contains input - maybe we do something
276 d.rawInput = br
277 return nil
278}
279
280// next will start decoding the next block from stream.
281func (d *frameDec) next(block *blockDec) error {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530282 if debugDecoder {
283 println("decoding new block")
khenaidoo7d3c5582021-08-11 18:09:44 -0400284 }
285 err := block.reset(d.rawInput, d.WindowSize)
286 if err != nil {
287 println("block error:", err)
288 // Signal the frame decoder we have a problem.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530289 block.sendErr(err)
khenaidoo7d3c5582021-08-11 18:09:44 -0400290 return err
291 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400292 return nil
293}
294
khenaidoo7d3c5582021-08-11 18:09:44 -0400295// checkCRC will check the checksum if the frame has one.
296// Will return ErrCRCMismatch if crc check failed, otherwise nil.
297func (d *frameDec) checkCRC() error {
298 if !d.HasCheckSum {
299 return nil
300 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530301
302 // We can overwrite upper tmp now
303 want, err := d.rawInput.readSmall(4)
304 if err != nil {
305 println("CRC missing?", err)
306 return err
307 }
308
309 if d.o.ignoreChecksum {
310 return nil
311 }
312
khenaidoo7d3c5582021-08-11 18:09:44 -0400313 var tmp [4]byte
314 got := d.crc.Sum64()
315 // Flip to match file order.
316 tmp[0] = byte(got >> 0)
317 tmp[1] = byte(got >> 8)
318 tmp[2] = byte(got >> 16)
319 tmp[3] = byte(got >> 24)
320
khenaidoo7d3c5582021-08-11 18:09:44 -0400321 if !bytes.Equal(tmp[:], want) {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530322 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400323 println("CRC Check Failed:", tmp[:], "!=", want)
324 }
325 return ErrCRCMismatch
326 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530327 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400328 println("CRC ok", tmp[:])
329 }
330 return nil
331}
332
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530333// consumeCRC reads the checksum data if the frame has one.
334func (d *frameDec) consumeCRC() error {
335 if d.HasCheckSum {
336 _, err := d.rawInput.readSmall(4)
337 if err != nil {
338 println("CRC missing?", err)
339 return err
340 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400341 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400342
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530343 return nil
khenaidoo7d3c5582021-08-11 18:09:44 -0400344}
345
346// runDecoder will create a sync decoder that will decode a block of data.
347func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
348 saved := d.history.b
349
350 // We use the history for output to avoid copying it.
351 d.history.b = dst
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530352 d.history.ignoreBuffer = len(dst)
khenaidoo7d3c5582021-08-11 18:09:44 -0400353 // Store input length, so we only check new data.
354 crcStart := len(dst)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530355 d.history.decoders.maxSyncLen = 0
356 if d.FrameContentSize != fcsUnknown {
357 d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
358 if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
359 return dst, ErrDecoderSizeExceeded
360 }
361 if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
362 // Alloc for output
363 dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
364 copy(dst2, dst)
365 dst = dst2
366 }
367 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400368 var err error
369 for {
370 err = dec.reset(d.rawInput, d.WindowSize)
371 if err != nil {
372 break
373 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530374 if debugDecoder {
khenaidoo7d3c5582021-08-11 18:09:44 -0400375 println("next block:", dec)
376 }
377 err = dec.decodeBuf(&d.history)
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530378 if err != nil {
khenaidoo7d3c5582021-08-11 18:09:44 -0400379 break
380 }
381 if uint64(len(d.history.b)) > d.o.maxDecodedSize {
382 err = ErrDecoderSizeExceeded
383 break
384 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530385 if uint64(len(d.history.b)-crcStart) > d.FrameContentSize {
386 println("runDecoder: FrameContentSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.FrameContentSize)
khenaidoo7d3c5582021-08-11 18:09:44 -0400387 err = ErrFrameSizeExceeded
388 break
389 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530390 if dec.Last {
391 break
392 }
393 if debugDecoder {
394 println("runDecoder: FrameContentSize", uint64(len(d.history.b)-crcStart), "<=", d.FrameContentSize)
395 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400396 }
397 dst = d.history.b
398 if err == nil {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530399 if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
400 err = ErrFrameSizeMismatch
401 } else if d.HasCheckSum {
402 if d.o.ignoreChecksum {
403 err = d.consumeCRC()
404 } else {
405 var n int
406 n, err = d.crc.Write(dst[crcStart:])
407 if err == nil {
408 if n != len(dst)-crcStart {
409 err = io.ErrShortWrite
410 } else {
411 err = d.checkCRC()
412 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400413 }
414 }
415 }
416 }
417 d.history.b = saved
418 return dst, err
419}