blob: 5442061b18df547665b84995cb1f7ad03299586c [file] [log] [blame]
kesavandc71914f2022-03-25 11:19:03 +05301// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "errors"
9 "fmt"
10 "math"
11)
12
13const (
14 // For encoding we only support up to
15 maxEncTableLog = 8
16 maxEncTablesize = 1 << maxTableLog
17 maxEncTableMask = (1 << maxTableLog) - 1
18 minEncTablelog = 5
19 maxEncSymbolValue = maxMatchLengthSymbol
20)
21
22// Scratch provides temporary storage for compression and decompression.
23type fseEncoder struct {
24 symbolLen uint16 // Length of active part of the symbol table.
25 actualTableLog uint8 // Selected tablelog.
26 ct cTable // Compression tables.
27 maxCount int // count of the most probable symbol
28 zeroBits bool // no bits has prob > 50%.
29 clearCount bool // clear count
30 useRLE bool // This encoder is for RLE
31 preDefined bool // This encoder is predefined.
32 reUsed bool // Set to know when the encoder has been reused.
33 rleVal uint8 // RLE Symbol
34 maxBits uint8 // Maximum output bits after transform.
35
36 // TODO: Technically zstd should be fine with 64 bytes.
37 count [256]uint32
38 norm [256]int16
39}
40
41// cTable contains tables used for compression.
42type cTable struct {
43 tableSymbol []byte
44 stateTable []uint16
45 symbolTT []symbolTransform
46}
47
48// symbolTransform contains the state transform for a symbol.
49type symbolTransform struct {
50 deltaNbBits uint32
51 deltaFindState int16
52 outBits uint8
53}
54
55// String prints values as a human readable string.
56func (s symbolTransform) String() string {
57 return fmt.Sprintf("{deltabits: %08x, findstate:%d outbits:%d}", s.deltaNbBits, s.deltaFindState, s.outBits)
58}
59
60// Histogram allows to populate the histogram and skip that step in the compression,
61// It otherwise allows to inspect the histogram when compression is done.
62// To indicate that you have populated the histogram call HistogramFinished
63// with the value of the highest populated symbol, as well as the number of entries
64// in the most populated entry. These are accepted at face value.
65func (s *fseEncoder) Histogram() *[256]uint32 {
66 return &s.count
67}
68
69// HistogramFinished can be called to indicate that the histogram has been populated.
70// maxSymbol is the index of the highest set symbol of the next data segment.
71// maxCount is the number of entries in the most populated entry.
72// These are accepted at face value.
73func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
74 s.maxCount = maxCount
75 s.symbolLen = uint16(maxSymbol) + 1
76 s.clearCount = maxCount != 0
77}
78
79// prepare will prepare and allocate scratch tables used for both compression and decompression.
80func (s *fseEncoder) prepare() (*fseEncoder, error) {
81 if s == nil {
82 s = &fseEncoder{}
83 }
84 s.useRLE = false
85 if s.clearCount && s.maxCount == 0 {
86 for i := range s.count {
87 s.count[i] = 0
88 }
89 s.clearCount = false
90 }
91 return s, nil
92}
93
94// allocCtable will allocate tables needed for compression.
95// If existing tables a re big enough, they are simply re-used.
96func (s *fseEncoder) allocCtable() {
97 tableSize := 1 << s.actualTableLog
98 // get tableSymbol that is big enough.
99 if cap(s.ct.tableSymbol) < tableSize {
100 s.ct.tableSymbol = make([]byte, tableSize)
101 }
102 s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
103
104 ctSize := tableSize
105 if cap(s.ct.stateTable) < ctSize {
106 s.ct.stateTable = make([]uint16, ctSize)
107 }
108 s.ct.stateTable = s.ct.stateTable[:ctSize]
109
110 if cap(s.ct.symbolTT) < 256 {
111 s.ct.symbolTT = make([]symbolTransform, 256)
112 }
113 s.ct.symbolTT = s.ct.symbolTT[:256]
114}
115
116// buildCTable will populate the compression table so it is ready to be used.
117func (s *fseEncoder) buildCTable() error {
118 tableSize := uint32(1 << s.actualTableLog)
119 highThreshold := tableSize - 1
120 var cumul [256]int16
121
122 s.allocCtable()
123 tableSymbol := s.ct.tableSymbol[:tableSize]
124 // symbol start positions
125 {
126 cumul[0] = 0
127 for ui, v := range s.norm[:s.symbolLen-1] {
128 u := byte(ui) // one less than reference
129 if v == -1 {
130 // Low proba symbol
131 cumul[u+1] = cumul[u] + 1
132 tableSymbol[highThreshold] = u
133 highThreshold--
134 } else {
135 cumul[u+1] = cumul[u] + v
136 }
137 }
138 // Encode last symbol separately to avoid overflowing u
139 u := int(s.symbolLen - 1)
140 v := s.norm[s.symbolLen-1]
141 if v == -1 {
142 // Low proba symbol
143 cumul[u+1] = cumul[u] + 1
144 tableSymbol[highThreshold] = byte(u)
145 highThreshold--
146 } else {
147 cumul[u+1] = cumul[u] + v
148 }
149 if uint32(cumul[s.symbolLen]) != tableSize {
150 return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
151 }
152 cumul[s.symbolLen] = int16(tableSize) + 1
153 }
154 // Spread symbols
155 s.zeroBits = false
156 {
157 step := tableStep(tableSize)
158 tableMask := tableSize - 1
159 var position uint32
160 // if any symbol > largeLimit, we may have 0 bits output.
161 largeLimit := int16(1 << (s.actualTableLog - 1))
162 for ui, v := range s.norm[:s.symbolLen] {
163 symbol := byte(ui)
164 if v > largeLimit {
165 s.zeroBits = true
166 }
167 for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
168 tableSymbol[position] = symbol
169 position = (position + step) & tableMask
170 for position > highThreshold {
171 position = (position + step) & tableMask
172 } /* Low proba area */
173 }
174 }
175
176 // Check if we have gone through all positions
177 if position != 0 {
178 return errors.New("position!=0")
179 }
180 }
181
182 // Build table
183 table := s.ct.stateTable
184 {
185 tsi := int(tableSize)
186 for u, v := range tableSymbol {
187 // TableU16 : sorted by symbol order; gives next state value
188 table[cumul[v]] = uint16(tsi + u)
189 cumul[v]++
190 }
191 }
192
193 // Build Symbol Transformation Table
194 {
195 total := int16(0)
196 symbolTT := s.ct.symbolTT[:s.symbolLen]
197 tableLog := s.actualTableLog
198 tl := (uint32(tableLog) << 16) - (1 << tableLog)
199 for i, v := range s.norm[:s.symbolLen] {
200 switch v {
201 case 0:
202 case -1, 1:
203 symbolTT[i].deltaNbBits = tl
204 symbolTT[i].deltaFindState = total - 1
205 total++
206 default:
207 maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
208 minStatePlus := uint32(v) << maxBitsOut
209 symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
210 symbolTT[i].deltaFindState = total - v
211 total += v
212 }
213 }
214 if total != int16(tableSize) {
215 return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
216 }
217 }
218 return nil
219}
220
221var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
222
223func (s *fseEncoder) setRLE(val byte) {
224 s.allocCtable()
225 s.actualTableLog = 0
226 s.ct.stateTable = s.ct.stateTable[:1]
227 s.ct.symbolTT[val] = symbolTransform{
228 deltaFindState: 0,
229 deltaNbBits: 0,
230 }
231 if debugEncoder {
232 println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
233 }
234 s.rleVal = val
235 s.useRLE = true
236}
237
238// setBits will set output bits for the transform.
239// if nil is provided, the number of bits is equal to the index.
240func (s *fseEncoder) setBits(transform []byte) {
241 if s.reUsed || s.preDefined {
242 return
243 }
244 if s.useRLE {
245 if transform == nil {
246 s.ct.symbolTT[s.rleVal].outBits = s.rleVal
247 s.maxBits = s.rleVal
248 return
249 }
250 s.maxBits = transform[s.rleVal]
251 s.ct.symbolTT[s.rleVal].outBits = s.maxBits
252 return
253 }
254 if transform == nil {
255 for i := range s.ct.symbolTT[:s.symbolLen] {
256 s.ct.symbolTT[i].outBits = uint8(i)
257 }
258 s.maxBits = uint8(s.symbolLen - 1)
259 return
260 }
261 s.maxBits = 0
262 for i, v := range transform[:s.symbolLen] {
263 s.ct.symbolTT[i].outBits = v
264 if v > s.maxBits {
265 // We could assume bits always going up, but we play safe.
266 s.maxBits = v
267 }
268 }
269}
270
271// normalizeCount will normalize the count of the symbols so
272// the total is equal to the table size.
273// If successful, compression tables will also be made ready.
274func (s *fseEncoder) normalizeCount(length int) error {
275 if s.reUsed {
276 return nil
277 }
278 s.optimalTableLog(length)
279 var (
280 tableLog = s.actualTableLog
281 scale = 62 - uint64(tableLog)
282 step = (1 << 62) / uint64(length)
283 vStep = uint64(1) << (scale - 20)
284 stillToDistribute = int16(1 << tableLog)
285 largest int
286 largestP int16
287 lowThreshold = (uint32)(length >> tableLog)
288 )
289 if s.maxCount == length {
290 s.useRLE = true
291 return nil
292 }
293 s.useRLE = false
294 for i, cnt := range s.count[:s.symbolLen] {
295 // already handled
296 // if (count[s] == s.length) return 0; /* rle special case */
297
298 if cnt == 0 {
299 s.norm[i] = 0
300 continue
301 }
302 if cnt <= lowThreshold {
303 s.norm[i] = -1
304 stillToDistribute--
305 } else {
306 proba := (int16)((uint64(cnt) * step) >> scale)
307 if proba < 8 {
308 restToBeat := vStep * uint64(rtbTable[proba])
309 v := uint64(cnt)*step - (uint64(proba) << scale)
310 if v > restToBeat {
311 proba++
312 }
313 }
314 if proba > largestP {
315 largestP = proba
316 largest = i
317 }
318 s.norm[i] = proba
319 stillToDistribute -= proba
320 }
321 }
322
323 if -stillToDistribute >= (s.norm[largest] >> 1) {
324 // corner case, need another normalization method
325 err := s.normalizeCount2(length)
326 if err != nil {
327 return err
328 }
329 if debugAsserts {
330 err = s.validateNorm()
331 if err != nil {
332 return err
333 }
334 }
335 return s.buildCTable()
336 }
337 s.norm[largest] += stillToDistribute
338 if debugAsserts {
339 err := s.validateNorm()
340 if err != nil {
341 return err
342 }
343 }
344 return s.buildCTable()
345}
346
347// Secondary normalization method.
348// To be used when primary method fails.
349func (s *fseEncoder) normalizeCount2(length int) error {
350 const notYetAssigned = -2
351 var (
352 distributed uint32
353 total = uint32(length)
354 tableLog = s.actualTableLog
355 lowThreshold = total >> tableLog
356 lowOne = (total * 3) >> (tableLog + 1)
357 )
358 for i, cnt := range s.count[:s.symbolLen] {
359 if cnt == 0 {
360 s.norm[i] = 0
361 continue
362 }
363 if cnt <= lowThreshold {
364 s.norm[i] = -1
365 distributed++
366 total -= cnt
367 continue
368 }
369 if cnt <= lowOne {
370 s.norm[i] = 1
371 distributed++
372 total -= cnt
373 continue
374 }
375 s.norm[i] = notYetAssigned
376 }
377 toDistribute := (1 << tableLog) - distributed
378
379 if (total / toDistribute) > lowOne {
380 // risk of rounding to zero
381 lowOne = (total * 3) / (toDistribute * 2)
382 for i, cnt := range s.count[:s.symbolLen] {
383 if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
384 s.norm[i] = 1
385 distributed++
386 total -= cnt
387 continue
388 }
389 }
390 toDistribute = (1 << tableLog) - distributed
391 }
392 if distributed == uint32(s.symbolLen)+1 {
393 // all values are pretty poor;
394 // probably incompressible data (should have already been detected);
395 // find max, then give all remaining points to max
396 var maxV int
397 var maxC uint32
398 for i, cnt := range s.count[:s.symbolLen] {
399 if cnt > maxC {
400 maxV = i
401 maxC = cnt
402 }
403 }
404 s.norm[maxV] += int16(toDistribute)
405 return nil
406 }
407
408 if total == 0 {
409 // all of the symbols were low enough for the lowOne or lowThreshold
410 for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
411 if s.norm[i] > 0 {
412 toDistribute--
413 s.norm[i]++
414 }
415 }
416 return nil
417 }
418
419 var (
420 vStepLog = 62 - uint64(tableLog)
421 mid = uint64((1 << (vStepLog - 1)) - 1)
422 rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
423 tmpTotal = mid
424 )
425 for i, cnt := range s.count[:s.symbolLen] {
426 if s.norm[i] == notYetAssigned {
427 var (
428 end = tmpTotal + uint64(cnt)*rStep
429 sStart = uint32(tmpTotal >> vStepLog)
430 sEnd = uint32(end >> vStepLog)
431 weight = sEnd - sStart
432 )
433 if weight < 1 {
434 return errors.New("weight < 1")
435 }
436 s.norm[i] = int16(weight)
437 tmpTotal = end
438 }
439 }
440 return nil
441}
442
443// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
444func (s *fseEncoder) optimalTableLog(length int) {
445 tableLog := uint8(maxEncTableLog)
446 minBitsSrc := highBit(uint32(length)) + 1
447 minBitsSymbols := highBit(uint32(s.symbolLen-1)) + 2
448 minBits := uint8(minBitsSymbols)
449 if minBitsSrc < minBitsSymbols {
450 minBits = uint8(minBitsSrc)
451 }
452
453 maxBitsSrc := uint8(highBit(uint32(length-1))) - 2
454 if maxBitsSrc < tableLog {
455 // Accuracy can be reduced
456 tableLog = maxBitsSrc
457 }
458 if minBits > tableLog {
459 tableLog = minBits
460 }
461 // Need a minimum to safely represent all symbol values
462 if tableLog < minEncTablelog {
463 tableLog = minEncTablelog
464 }
465 if tableLog > maxEncTableLog {
466 tableLog = maxEncTableLog
467 }
468 s.actualTableLog = tableLog
469}
470
471// validateNorm validates the normalized histogram table.
472func (s *fseEncoder) validateNorm() (err error) {
473 var total int
474 for _, v := range s.norm[:s.symbolLen] {
475 if v >= 0 {
476 total += int(v)
477 } else {
478 total -= int(v)
479 }
480 }
481 defer func() {
482 if err == nil {
483 return
484 }
485 fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
486 for i, v := range s.norm[:s.symbolLen] {
487 fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
488 }
489 }()
490 if total != (1 << s.actualTableLog) {
491 return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
492 }
493 for i, v := range s.count[s.symbolLen:] {
494 if v != 0 {
495 return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
496 }
497 }
498 return nil
499}
500
501// writeCount will write the normalized histogram count to header.
502// This is read back by readNCount.
503func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
504 if s.useRLE {
505 return append(out, s.rleVal), nil
506 }
507 if s.preDefined || s.reUsed {
508 // Never write predefined.
509 return out, nil
510 }
511
512 var (
513 tableLog = s.actualTableLog
514 tableSize = 1 << tableLog
515 previous0 bool
516 charnum uint16
517
518 // maximum header size plus 2 extra bytes for final output if bitCount == 0.
519 maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
520
521 // Write Table Size
522 bitStream = uint32(tableLog - minEncTablelog)
523 bitCount = uint(4)
524 remaining = int16(tableSize + 1) /* +1 for extra accuracy */
525 threshold = int16(tableSize)
526 nbBits = uint(tableLog + 1)
527 outP = len(out)
528 )
529 if cap(out) < outP+maxHeaderSize {
530 out = append(out, make([]byte, maxHeaderSize*3)...)
531 out = out[:len(out)-maxHeaderSize*3]
532 }
533 out = out[:outP+maxHeaderSize]
534
535 // stops at 1
536 for remaining > 1 {
537 if previous0 {
538 start := charnum
539 for s.norm[charnum] == 0 {
540 charnum++
541 }
542 for charnum >= start+24 {
543 start += 24
544 bitStream += uint32(0xFFFF) << bitCount
545 out[outP] = byte(bitStream)
546 out[outP+1] = byte(bitStream >> 8)
547 outP += 2
548 bitStream >>= 16
549 }
550 for charnum >= start+3 {
551 start += 3
552 bitStream += 3 << bitCount
553 bitCount += 2
554 }
555 bitStream += uint32(charnum-start) << bitCount
556 bitCount += 2
557 if bitCount > 16 {
558 out[outP] = byte(bitStream)
559 out[outP+1] = byte(bitStream >> 8)
560 outP += 2
561 bitStream >>= 16
562 bitCount -= 16
563 }
564 }
565
566 count := s.norm[charnum]
567 charnum++
568 max := (2*threshold - 1) - remaining
569 if count < 0 {
570 remaining += count
571 } else {
572 remaining -= count
573 }
574 count++ // +1 for extra accuracy
575 if count >= threshold {
576 count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
577 }
578 bitStream += uint32(count) << bitCount
579 bitCount += nbBits
580 if count < max {
581 bitCount--
582 }
583
584 previous0 = count == 1
585 if remaining < 1 {
586 return nil, errors.New("internal error: remaining < 1")
587 }
588 for remaining < threshold {
589 nbBits--
590 threshold >>= 1
591 }
592
593 if bitCount > 16 {
594 out[outP] = byte(bitStream)
595 out[outP+1] = byte(bitStream >> 8)
596 outP += 2
597 bitStream >>= 16
598 bitCount -= 16
599 }
600 }
601
602 if outP+2 > len(out) {
603 return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
604 }
605 out[outP] = byte(bitStream)
606 out[outP+1] = byte(bitStream >> 8)
607 outP += int((bitCount + 7) / 8)
608
609 if charnum > s.symbolLen {
610 return nil, errors.New("internal error: charnum > s.symbolLen")
611 }
612 return out[:outP], nil
613}
614
615// Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
616// note 1 : assume symbolValue is valid (<= maxSymbolValue)
617// note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits *
618func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
619 minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16
620 threshold := (minNbBits + 1) << 16
621 if debugAsserts {
622 if !(s.actualTableLog < 16) {
623 panic("!s.actualTableLog < 16")
624 }
625 // ensure enough room for renormalization double shift
626 if !(uint8(accuracyLog) < 31-s.actualTableLog) {
627 panic("!uint8(accuracyLog) < 31-s.actualTableLog")
628 }
629 }
630 tableSize := uint32(1) << s.actualTableLog
631 deltaFromThreshold := threshold - (s.ct.symbolTT[symbolValue].deltaNbBits + tableSize)
632 // linear interpolation (very approximate)
633 normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog
634 bitMultiplier := uint32(1) << accuracyLog
635 if debugAsserts {
636 if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold {
637 panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold")
638 }
639 if normalizedDeltaFromThreshold > bitMultiplier {
640 panic("normalizedDeltaFromThreshold > bitMultiplier")
641 }
642 }
643 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold
644}
645
646// Returns the cost in bits of encoding the distribution in count using ctable.
647// Histogram should only be up to the last non-zero symbol.
648// Returns an -1 if ctable cannot represent all the symbols in count.
649func (s *fseEncoder) approxSize(hist []uint32) uint32 {
650 if int(s.symbolLen) < len(hist) {
651 // More symbols than we have.
652 return math.MaxUint32
653 }
654 if s.useRLE {
655 // We will never reuse RLE encoders.
656 return math.MaxUint32
657 }
658 const kAccuracyLog = 8
659 badCost := (uint32(s.actualTableLog) + 1) << kAccuracyLog
660 var cost uint32
661 for i, v := range hist {
662 if v == 0 {
663 continue
664 }
665 if s.norm[i] == 0 {
666 return math.MaxUint32
667 }
668 bitCost := s.bitCost(uint8(i), kAccuracyLog)
669 if bitCost > badCost {
670 return math.MaxUint32
671 }
672 cost += v * bitCost
673 }
674 return cost >> kAccuracyLog
675}
676
677// maxHeaderSize returns the maximum header size in bits.
678// This is not exact size, but we want a penalty for new tables anyway.
679func (s *fseEncoder) maxHeaderSize() uint32 {
680 if s.preDefined {
681 return 0
682 }
683 if s.useRLE {
684 return 8
685 }
686 return (((uint32(s.symbolLen) * uint32(s.actualTableLog)) >> 3) + 3) * 8
687}
688
689// cState contains the compression state of a stream.
690type cState struct {
691 bw *bitWriter
692 stateTable []uint16
693 state uint16
694}
695
696// init will initialize the compression state to the first symbol of the stream.
697func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
698 c.bw = bw
699 c.stateTable = ct.stateTable
700 if len(c.stateTable) == 1 {
701 // RLE
702 c.stateTable[0] = uint16(0)
703 c.state = 0
704 return
705 }
706 nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
707 im := int32((nbBitsOut << 16) - first.deltaNbBits)
708 lu := (im >> nbBitsOut) + int32(first.deltaFindState)
709 c.state = c.stateTable[lu]
710}
711
712// encode the output symbol provided and write it to the bitstream.
713func (c *cState) encode(symbolTT symbolTransform) {
714 nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
715 dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
716 c.bw.addBits16NC(c.state, uint8(nbBitsOut))
717 c.state = c.stateTable[dstState]
718}
719
720// flush will write the tablelog to the output and flush the remaining full bytes.
721func (c *cState) flush(tableLog uint8) {
722 c.bw.flush32()
723 c.bw.addBits16NC(c.state, tableLog)
724}