blob: df044720302467aa7dc1efde8a06773edcdb8d07 [file] [log] [blame]
khenaidoo7d3c5582021-08-11 18:09:44 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "errors"
9 "fmt"
10 "io"
11)
12
13type seq struct {
14 litLen uint32
15 matchLen uint32
16 offset uint32
17
18 // Codes are stored here for the encoder
19 // so they only have to be looked up once.
20 llCode, mlCode, ofCode uint8
21}
22
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053023type seqVals struct {
24 ll, ml, mo int
25}
26
khenaidoo7d3c5582021-08-11 18:09:44 -040027func (s seq) String() string {
28 if s.offset <= 3 {
29 if s.offset == 0 {
30 return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)")
31 }
32 return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)")
33 }
34 return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)")
35}
36
37type seqCompMode uint8
38
39const (
40 compModePredefined seqCompMode = iota
41 compModeRLE
42 compModeFSE
43 compModeRepeat
44)
45
46type sequenceDec struct {
47 // decoder keeps track of the current state and updates it from the bitstream.
48 fse *fseDecoder
49 state fseState
50 repeat bool
51}
52
53// init the state of the decoder with input from stream.
54func (s *sequenceDec) init(br *bitReader) error {
55 if s.fse == nil {
56 return errors.New("sequence decoder not defined")
57 }
58 s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<<s.fse.actualTableLog])
59 return nil
60}
61
62// sequenceDecs contains all 3 sequence decoders and their state.
63type sequenceDecs struct {
64 litLengths sequenceDec
65 offsets sequenceDec
66 matchLengths sequenceDec
67 prevOffset [3]int
khenaidoo7d3c5582021-08-11 18:09:44 -040068 dict []byte
69 literals []byte
70 out []byte
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053071 nSeqs int
72 br *bitReader
73 seqSize int
khenaidoo7d3c5582021-08-11 18:09:44 -040074 windowSize int
75 maxBits uint8
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053076 maxSyncLen uint64
khenaidoo7d3c5582021-08-11 18:09:44 -040077}
78
79// initialize all 3 decoders from the stream input.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053080func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) error {
khenaidoo7d3c5582021-08-11 18:09:44 -040081 if err := s.litLengths.init(br); err != nil {
82 return errors.New("litLengths:" + err.Error())
83 }
84 if err := s.offsets.init(br); err != nil {
85 return errors.New("offsets:" + err.Error())
86 }
87 if err := s.matchLengths.init(br); err != nil {
88 return errors.New("matchLengths:" + err.Error())
89 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +053090 s.br = br
khenaidoo7d3c5582021-08-11 18:09:44 -040091 s.prevOffset = hist.recentOffsets
92 s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
93 s.windowSize = hist.windowSize
94 s.out = out
95 s.dict = nil
96 if hist.dict != nil {
97 s.dict = hist.dict.content
98 }
99 return nil
100}
101
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530102// execute will execute the decoded sequence with the provided history.
103// The sequence must be evaluated before being sent.
104func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
105 if len(s.dict) == 0 {
106 return s.executeSimple(seqs, hist)
107 }
108
109 // Ensure we have enough output size...
110 if len(s.out)+s.seqSize > cap(s.out) {
111 addBytes := s.seqSize + len(s.out)
112 s.out = append(s.out, make([]byte, addBytes)...)
113 s.out = s.out[:len(s.out)-addBytes]
114 }
115
116 if debugDecoder {
117 printf("Execute %d seqs with hist %d, dict %d, literals: %d into %d bytes\n", len(seqs), len(hist), len(s.dict), len(s.literals), s.seqSize)
118 }
119
120 var t = len(s.out)
121 out := s.out[:t+s.seqSize]
122
123 for _, seq := range seqs {
124 // Add literals
125 copy(out[t:], s.literals[:seq.ll])
126 t += seq.ll
127 s.literals = s.literals[seq.ll:]
128
129 // Copy from dictionary...
130 if seq.mo > t+len(hist) || seq.mo > s.windowSize {
131 if len(s.dict) == 0 {
132 return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
133 }
134
135 // we may be in dictionary.
136 dictO := len(s.dict) - (seq.mo - (t + len(hist)))
137 if dictO < 0 || dictO >= len(s.dict) {
138 return fmt.Errorf("match offset (%d) bigger than current history+dict (%d)", seq.mo, t+len(hist)+len(s.dict))
139 }
140 end := dictO + seq.ml
141 if end > len(s.dict) {
142 n := len(s.dict) - dictO
143 copy(out[t:], s.dict[dictO:])
144 t += n
145 seq.ml -= n
146 } else {
147 copy(out[t:], s.dict[dictO:end])
148 t += end - dictO
149 continue
150 }
151 }
152
153 // Copy from history.
154 if v := seq.mo - t; v > 0 {
155 // v is the start position in history from end.
156 start := len(hist) - v
157 if seq.ml > v {
158 // Some goes into current block.
159 // Copy remainder of history
160 copy(out[t:], hist[start:])
161 t += v
162 seq.ml -= v
163 } else {
164 copy(out[t:], hist[start:start+seq.ml])
165 t += seq.ml
166 continue
167 }
168 }
169 // We must be in current buffer now
170 if seq.ml > 0 {
171 start := t - seq.mo
172 if seq.ml <= t-start {
173 // No overlap
174 copy(out[t:], out[start:start+seq.ml])
175 t += seq.ml
176 continue
177 } else {
178 // Overlapping copy
179 // Extend destination slice and copy one byte at the time.
180 src := out[start : start+seq.ml]
181 dst := out[t:]
182 dst = dst[:len(src)]
183 t += len(src)
184 // Destination is the space we just added.
185 for i := range src {
186 dst[i] = src[i]
187 }
188 }
189 }
190 }
191
192 // Add final literals
193 copy(out[t:], s.literals)
194 if debugDecoder {
195 t += len(s.literals)
196 if t != len(out) {
197 panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
198 }
199 }
200 s.out = out
201
202 return nil
203}
204
khenaidoo7d3c5582021-08-11 18:09:44 -0400205// decode sequences from the stream with the provided history.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530206func (s *sequenceDecs) decodeSync(hist []byte) error {
207 supported, err := s.decodeSyncSimple(hist)
208 if supported {
209 return err
210 }
211
212 br := s.br
213 seqs := s.nSeqs
khenaidoo7d3c5582021-08-11 18:09:44 -0400214 startSize := len(s.out)
215 // Grab full sizes tables, to avoid bounds checks.
216 llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
217 llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530218 out := s.out
219 maxBlockSize := maxCompressedBlockSize
220 if s.windowSize < maxBlockSize {
221 maxBlockSize = s.windowSize
222 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400223
224 for i := seqs - 1; i >= 0; i-- {
225 if br.overread() {
226 printf("reading sequence %d, exceeded available data\n", seqs-i)
227 return io.ErrUnexpectedEOF
228 }
229 var ll, mo, ml int
230 if br.off > 4+((maxOffsetBits+16+16)>>3) {
231 // inlined function:
232 // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
233
234 // Final will not read from stream.
235 var llB, mlB, moB uint8
236 ll, llB = llState.final()
237 ml, mlB = mlState.final()
238 mo, moB = ofState.final()
239
240 // extra bits are stored in reverse order.
241 br.fillFast()
242 mo += br.getBits(moB)
243 if s.maxBits > 32 {
244 br.fillFast()
245 }
246 ml += br.getBits(mlB)
247 ll += br.getBits(llB)
248
249 if moB > 1 {
250 s.prevOffset[2] = s.prevOffset[1]
251 s.prevOffset[1] = s.prevOffset[0]
252 s.prevOffset[0] = mo
253 } else {
254 // mo = s.adjustOffset(mo, ll, moB)
255 // Inlined for rather big speedup
256 if ll == 0 {
257 // There is an exception though, when current sequence's literals_length = 0.
258 // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
259 // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
260 mo++
261 }
262
263 if mo == 0 {
264 mo = s.prevOffset[0]
265 } else {
266 var temp int
267 if mo == 3 {
268 temp = s.prevOffset[0] - 1
269 } else {
270 temp = s.prevOffset[mo]
271 }
272
273 if temp == 0 {
274 // 0 is not valid; input is corrupted; force offset to 1
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530275 println("WARNING: temp was 0")
khenaidoo7d3c5582021-08-11 18:09:44 -0400276 temp = 1
277 }
278
279 if mo != 1 {
280 s.prevOffset[2] = s.prevOffset[1]
281 }
282 s.prevOffset[1] = s.prevOffset[0]
283 s.prevOffset[0] = temp
284 mo = temp
285 }
286 }
287 br.fillFast()
288 } else {
289 ll, mo, ml = s.next(br, llState, mlState, ofState)
290 br.fill()
291 }
292
293 if debugSequences {
294 println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
295 }
296
297 if ll > len(s.literals) {
298 return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
299 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530300 size := ll + ml + len(out)
khenaidoo7d3c5582021-08-11 18:09:44 -0400301 if size-startSize > maxBlockSize {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530302 return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
khenaidoo7d3c5582021-08-11 18:09:44 -0400303 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530304 if size > cap(out) {
khenaidoo7d3c5582021-08-11 18:09:44 -0400305 // Not enough size, which can happen under high volume block streaming conditions
306 // but could be if destination slice is too small for sync operations.
307 // over-allocating here can create a large amount of GC pressure so we try to keep
308 // it as contained as possible
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530309 used := len(out) - startSize
khenaidoo7d3c5582021-08-11 18:09:44 -0400310 addBytes := 256 + ll + ml + used>>2
311 // Clamp to max block size.
312 if used+addBytes > maxBlockSize {
313 addBytes = maxBlockSize - used
314 }
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530315 out = append(out, make([]byte, addBytes)...)
316 out = out[:len(out)-addBytes]
khenaidoo7d3c5582021-08-11 18:09:44 -0400317 }
318 if ml > maxMatchLen {
319 return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
320 }
321
322 // Add literals
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530323 out = append(out, s.literals[:ll]...)
khenaidoo7d3c5582021-08-11 18:09:44 -0400324 s.literals = s.literals[ll:]
khenaidoo7d3c5582021-08-11 18:09:44 -0400325
326 if mo == 0 && ml > 0 {
327 return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
328 }
329
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530330 if mo > len(out)+len(hist) || mo > s.windowSize {
khenaidoo7d3c5582021-08-11 18:09:44 -0400331 if len(s.dict) == 0 {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530332 return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
khenaidoo7d3c5582021-08-11 18:09:44 -0400333 }
334
335 // we may be in dictionary.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530336 dictO := len(s.dict) - (mo - (len(out) + len(hist)))
khenaidoo7d3c5582021-08-11 18:09:44 -0400337 if dictO < 0 || dictO >= len(s.dict) {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530338 return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
khenaidoo7d3c5582021-08-11 18:09:44 -0400339 }
340 end := dictO + ml
341 if end > len(s.dict) {
342 out = append(out, s.dict[dictO:]...)
khenaidoo7d3c5582021-08-11 18:09:44 -0400343 ml -= len(s.dict) - dictO
344 } else {
345 out = append(out, s.dict[dictO:end]...)
346 mo = 0
347 ml = 0
348 }
349 }
350
351 // Copy from history.
352 // TODO: Blocks without history could be made to ignore this completely.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530353 if v := mo - len(out); v > 0 {
khenaidoo7d3c5582021-08-11 18:09:44 -0400354 // v is the start position in history from end.
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530355 start := len(hist) - v
khenaidoo7d3c5582021-08-11 18:09:44 -0400356 if ml > v {
357 // Some goes into current block.
358 // Copy remainder of history
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530359 out = append(out, hist[start:]...)
khenaidoo7d3c5582021-08-11 18:09:44 -0400360 ml -= v
361 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530362 out = append(out, hist[start:start+ml]...)
khenaidoo7d3c5582021-08-11 18:09:44 -0400363 ml = 0
364 }
365 }
366 // We must be in current buffer now
367 if ml > 0 {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530368 start := len(out) - mo
369 if ml <= len(out)-start {
khenaidoo7d3c5582021-08-11 18:09:44 -0400370 // No overlap
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530371 out = append(out, out[start:start+ml]...)
khenaidoo7d3c5582021-08-11 18:09:44 -0400372 } else {
373 // Overlapping copy
374 // Extend destination slice and copy one byte at the time.
375 out = out[:len(out)+ml]
376 src := out[start : start+ml]
377 // Destination is the space we just added.
378 dst := out[len(out)-ml:]
379 dst = dst[:len(src)]
380 for i := range src {
381 dst[i] = src[i]
382 }
383 }
384 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400385 if i == 0 {
386 // This is the last sequence, so we shouldn't update state.
387 break
388 }
389
390 // Manually inlined, ~ 5-20% faster
391 // Update all 3 states at once. Approx 20% faster.
392 nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
393 if nBits == 0 {
394 llState = llTable[llState.newState()&maxTableMask]
395 mlState = mlTable[mlState.newState()&maxTableMask]
396 ofState = ofTable[ofState.newState()&maxTableMask]
397 } else {
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530398 bits := br.get32BitsFast(nBits)
399
khenaidoo7d3c5582021-08-11 18:09:44 -0400400 lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
401 llState = llTable[(llState.newState()+lowBits)&maxTableMask]
402
403 lowBits = uint16(bits >> (ofState.nbBits() & 31))
404 lowBits &= bitMask[mlState.nbBits()&15]
405 mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
406
407 lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
408 ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
409 }
410 }
411
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530412 // Check if space for literals
413 if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
414 return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
415 }
khenaidoo7d3c5582021-08-11 18:09:44 -0400416
Akash Reddy Kankanalac28f0e22025-06-16 11:00:55 +0530417 // Add final literals
418 s.out = append(out, s.literals...)
419 return br.close()
khenaidoo7d3c5582021-08-11 18:09:44 -0400420}
421
422var bitMask [16]uint16
423
424func init() {
425 for i := range bitMask[:] {
426 bitMask[i] = uint16((1 << uint(i)) - 1)
427 }
428}
429
khenaidoo7d3c5582021-08-11 18:09:44 -0400430func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
431 // Final will not read from stream.
432 ll, llB := llState.final()
433 ml, mlB := mlState.final()
434 mo, moB := ofState.final()
435
436 // extra bits are stored in reverse order.
437 br.fill()
438 if s.maxBits <= 32 {
439 mo += br.getBits(moB)
440 ml += br.getBits(mlB)
441 ll += br.getBits(llB)
442 } else {
443 mo += br.getBits(moB)
444 br.fill()
445 // matchlength+literal length, max 32 bits
446 ml += br.getBits(mlB)
447 ll += br.getBits(llB)
448
449 }
450 mo = s.adjustOffset(mo, ll, moB)
451 return
452}
453
454func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
455 if offsetB > 1 {
456 s.prevOffset[2] = s.prevOffset[1]
457 s.prevOffset[1] = s.prevOffset[0]
458 s.prevOffset[0] = offset
459 return offset
460 }
461
462 if litLen == 0 {
463 // There is an exception though, when current sequence's literals_length = 0.
464 // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
465 // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
466 offset++
467 }
468
469 if offset == 0 {
470 return s.prevOffset[0]
471 }
472 var temp int
473 if offset == 3 {
474 temp = s.prevOffset[0] - 1
475 } else {
476 temp = s.prevOffset[offset]
477 }
478
479 if temp == 0 {
480 // 0 is not valid; input is corrupted; force offset to 1
481 println("temp was 0")
482 temp = 1
483 }
484
485 if offset != 1 {
486 s.prevOffset[2] = s.prevOffset[1]
487 }
488 s.prevOffset[1] = s.prevOffset[0]
489 s.prevOffset[0] = temp
490 return temp
491}