blob: b69237c9b8f56e86f5c2e49111d86f907300ecb1 [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Copyright 2018 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5
6package fse
7
8import (
9 "errors"
10 "fmt"
11)
12
13// Compress the input bytes. Input must be < 2GB.
14// Provide a Scratch buffer to avoid memory allocations.
15// Note that the output is also kept in the scratch buffer.
16// If input is too hard to compress, ErrIncompressible is returned.
17// If input is a single byte value repeated ErrUseRLE is returned.
18func Compress(in []byte, s *Scratch) ([]byte, error) {
19 if len(in) <= 1 {
20 return nil, ErrIncompressible
21 }
22 if len(in) > (2<<30)-1 {
23 return nil, errors.New("input too big, must be < 2GB")
24 }
25 s, err := s.prepare(in)
26 if err != nil {
27 return nil, err
28 }
29
30 // Create histogram, if none was provided.
31 maxCount := s.maxCount
32 if maxCount == 0 {
33 maxCount = s.countSimple(in)
34 }
35 // Reset for next run.
36 s.clearCount = true
37 s.maxCount = 0
38 if maxCount == len(in) {
39 // One symbol, use RLE
40 return nil, ErrUseRLE
41 }
42 if maxCount == 1 || maxCount < (len(in)>>7) {
43 // Each symbol present maximum once or too well distributed.
44 return nil, ErrIncompressible
45 }
46 s.optimalTableLog()
47 err = s.normalizeCount()
48 if err != nil {
49 return nil, err
50 }
51 err = s.writeCount()
52 if err != nil {
53 return nil, err
54 }
55
56 if false {
57 err = s.validateNorm()
58 if err != nil {
59 return nil, err
60 }
61 }
62
63 err = s.buildCTable()
64 if err != nil {
65 return nil, err
66 }
67 err = s.compress(in)
68 if err != nil {
69 return nil, err
70 }
71 s.Out = s.bw.out
72 // Check if we compressed.
73 if len(s.Out) >= len(in) {
74 return nil, ErrIncompressible
75 }
76 return s.Out, nil
77}
78
79// cState contains the compression state of a stream.
80type cState struct {
81 bw *bitWriter
82 stateTable []uint16
83 state uint16
84}
85
86// init will initialize the compression state to the first symbol of the stream.
87func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
88 c.bw = bw
89 c.stateTable = ct.stateTable
90
91 nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
92 im := int32((nbBitsOut << 16) - first.deltaNbBits)
93 lu := (im >> nbBitsOut) + first.deltaFindState
94 c.state = c.stateTable[lu]
95 return
96}
97
98// encode the output symbol provided and write it to the bitstream.
99func (c *cState) encode(symbolTT symbolTransform) {
100 nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
101 dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
102 c.bw.addBits16NC(c.state, uint8(nbBitsOut))
103 c.state = c.stateTable[dstState]
104}
105
106// encode the output symbol provided and write it to the bitstream.
107func (c *cState) encodeZero(symbolTT symbolTransform) {
108 nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
109 dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
110 c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
111 c.state = c.stateTable[dstState]
112}
113
114// flush will write the tablelog to the output and flush the remaining full bytes.
115func (c *cState) flush(tableLog uint8) {
116 c.bw.flush32()
117 c.bw.addBits16NC(c.state, tableLog)
118 c.bw.flush()
119}
120
121// compress is the main compression loop that will encode the input from the last byte to the first.
122func (s *Scratch) compress(src []byte) error {
123 if len(src) <= 2 {
124 return errors.New("compress: src too small")
125 }
126 tt := s.ct.symbolTT[:256]
127 s.bw.reset(s.Out)
128
129 // Our two states each encodes every second byte.
130 // Last byte encoded (first byte decoded) will always be encoded by c1.
131 var c1, c2 cState
132
133 // Encode so remaining size is divisible by 4.
134 ip := len(src)
135 if ip&1 == 1 {
136 c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
137 c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
138 c1.encodeZero(tt[src[ip-3]])
139 ip -= 3
140 } else {
141 c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
142 c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
143 ip -= 2
144 }
145 if ip&2 != 0 {
146 c2.encodeZero(tt[src[ip-1]])
147 c1.encodeZero(tt[src[ip-2]])
148 ip -= 2
149 }
150
151 // Main compression loop.
152 switch {
153 case !s.zeroBits && s.actualTableLog <= 8:
154 // We can encode 4 symbols without requiring a flush.
155 // We do not need to check if any output is 0 bits.
156 for ip >= 4 {
157 s.bw.flush32()
158 v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
159 c2.encode(tt[v0])
160 c1.encode(tt[v1])
161 c2.encode(tt[v2])
162 c1.encode(tt[v3])
163 ip -= 4
164 }
165 case !s.zeroBits:
166 // We do not need to check if any output is 0 bits.
167 for ip >= 4 {
168 s.bw.flush32()
169 v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
170 c2.encode(tt[v0])
171 c1.encode(tt[v1])
172 s.bw.flush32()
173 c2.encode(tt[v2])
174 c1.encode(tt[v3])
175 ip -= 4
176 }
177 case s.actualTableLog <= 8:
178 // We can encode 4 symbols without requiring a flush
179 for ip >= 4 {
180 s.bw.flush32()
181 v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
182 c2.encodeZero(tt[v0])
183 c1.encodeZero(tt[v1])
184 c2.encodeZero(tt[v2])
185 c1.encodeZero(tt[v3])
186 ip -= 4
187 }
188 default:
189 for ip >= 4 {
190 s.bw.flush32()
191 v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
192 c2.encodeZero(tt[v0])
193 c1.encodeZero(tt[v1])
194 s.bw.flush32()
195 c2.encodeZero(tt[v2])
196 c1.encodeZero(tt[v3])
197 ip -= 4
198 }
199 }
200
201 // Flush final state.
202 // Used to initialize state when decoding.
203 c2.flush(s.actualTableLog)
204 c1.flush(s.actualTableLog)
205
206 return s.bw.close()
207}
208
209// writeCount will write the normalized histogram count to header.
210// This is read back by readNCount.
211func (s *Scratch) writeCount() error {
212 var (
213 tableLog = s.actualTableLog
214 tableSize = 1 << tableLog
215 previous0 bool
216 charnum uint16
217
218 maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
219
220 // Write Table Size
221 bitStream = uint32(tableLog - minTablelog)
222 bitCount = uint(4)
223 remaining = int16(tableSize + 1) /* +1 for extra accuracy */
224 threshold = int16(tableSize)
225 nbBits = uint(tableLog + 1)
226 )
227 if cap(s.Out) < maxHeaderSize {
228 s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
229 }
230 outP := uint(0)
231 out := s.Out[:maxHeaderSize]
232
233 // stops at 1
234 for remaining > 1 {
235 if previous0 {
236 start := charnum
237 for s.norm[charnum] == 0 {
238 charnum++
239 }
240 for charnum >= start+24 {
241 start += 24
242 bitStream += uint32(0xFFFF) << bitCount
243 out[outP] = byte(bitStream)
244 out[outP+1] = byte(bitStream >> 8)
245 outP += 2
246 bitStream >>= 16
247 }
248 for charnum >= start+3 {
249 start += 3
250 bitStream += 3 << bitCount
251 bitCount += 2
252 }
253 bitStream += uint32(charnum-start) << bitCount
254 bitCount += 2
255 if bitCount > 16 {
256 out[outP] = byte(bitStream)
257 out[outP+1] = byte(bitStream >> 8)
258 outP += 2
259 bitStream >>= 16
260 bitCount -= 16
261 }
262 }
263
264 count := s.norm[charnum]
265 charnum++
266 max := (2*threshold - 1) - remaining
267 if count < 0 {
268 remaining += count
269 } else {
270 remaining -= count
271 }
272 count++ // +1 for extra accuracy
273 if count >= threshold {
274 count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
275 }
276 bitStream += uint32(count) << bitCount
277 bitCount += nbBits
278 if count < max {
279 bitCount--
280 }
281
282 previous0 = count == 1
283 if remaining < 1 {
284 return errors.New("internal error: remaining<1")
285 }
286 for remaining < threshold {
287 nbBits--
288 threshold >>= 1
289 }
290
291 if bitCount > 16 {
292 out[outP] = byte(bitStream)
293 out[outP+1] = byte(bitStream >> 8)
294 outP += 2
295 bitStream >>= 16
296 bitCount -= 16
297 }
298 }
299
300 out[outP] = byte(bitStream)
301 out[outP+1] = byte(bitStream >> 8)
302 outP += (bitCount + 7) / 8
303
304 if uint16(charnum) > s.symbolLen {
305 return errors.New("internal error: charnum > s.symbolLen")
306 }
307 s.Out = out[:outP]
308 return nil
309}
310
311// symbolTransform contains the state transform for a symbol.
312type symbolTransform struct {
313 deltaFindState int32
314 deltaNbBits uint32
315}
316
317// String prints values as a human readable string.
318func (s symbolTransform) String() string {
319 return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
320}
321
322// cTable contains tables used for compression.
323type cTable struct {
324 tableSymbol []byte
325 stateTable []uint16
326 symbolTT []symbolTransform
327}
328
329// allocCtable will allocate tables needed for compression.
330// If existing tables a re big enough, they are simply re-used.
331func (s *Scratch) allocCtable() {
332 tableSize := 1 << s.actualTableLog
333 // get tableSymbol that is big enough.
334 if cap(s.ct.tableSymbol) < int(tableSize) {
335 s.ct.tableSymbol = make([]byte, tableSize)
336 }
337 s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
338
339 ctSize := tableSize
340 if cap(s.ct.stateTable) < ctSize {
341 s.ct.stateTable = make([]uint16, ctSize)
342 }
343 s.ct.stateTable = s.ct.stateTable[:ctSize]
344
345 if cap(s.ct.symbolTT) < 256 {
346 s.ct.symbolTT = make([]symbolTransform, 256)
347 }
348 s.ct.symbolTT = s.ct.symbolTT[:256]
349}
350
351// buildCTable will populate the compression table so it is ready to be used.
352func (s *Scratch) buildCTable() error {
353 tableSize := uint32(1 << s.actualTableLog)
354 highThreshold := tableSize - 1
355 var cumul [maxSymbolValue + 2]int16
356
357 s.allocCtable()
358 tableSymbol := s.ct.tableSymbol[:tableSize]
359 // symbol start positions
360 {
361 cumul[0] = 0
362 for ui, v := range s.norm[:s.symbolLen-1] {
363 u := byte(ui) // one less than reference
364 if v == -1 {
365 // Low proba symbol
366 cumul[u+1] = cumul[u] + 1
367 tableSymbol[highThreshold] = u
368 highThreshold--
369 } else {
370 cumul[u+1] = cumul[u] + v
371 }
372 }
373 // Encode last symbol separately to avoid overflowing u
374 u := int(s.symbolLen - 1)
375 v := s.norm[s.symbolLen-1]
376 if v == -1 {
377 // Low proba symbol
378 cumul[u+1] = cumul[u] + 1
379 tableSymbol[highThreshold] = byte(u)
380 highThreshold--
381 } else {
382 cumul[u+1] = cumul[u] + v
383 }
384 if uint32(cumul[s.symbolLen]) != tableSize {
385 return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
386 }
387 cumul[s.symbolLen] = int16(tableSize) + 1
388 }
389 // Spread symbols
390 s.zeroBits = false
391 {
392 step := tableStep(tableSize)
393 tableMask := tableSize - 1
394 var position uint32
395 // if any symbol > largeLimit, we may have 0 bits output.
396 largeLimit := int16(1 << (s.actualTableLog - 1))
397 for ui, v := range s.norm[:s.symbolLen] {
398 symbol := byte(ui)
399 if v > largeLimit {
400 s.zeroBits = true
401 }
402 for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
403 tableSymbol[position] = symbol
404 position = (position + step) & tableMask
405 for position > highThreshold {
406 position = (position + step) & tableMask
407 } /* Low proba area */
408 }
409 }
410
411 // Check if we have gone through all positions
412 if position != 0 {
413 return errors.New("position!=0")
414 }
415 }
416
417 // Build table
418 table := s.ct.stateTable
419 {
420 tsi := int(tableSize)
421 for u, v := range tableSymbol {
422 // TableU16 : sorted by symbol order; gives next state value
423 table[cumul[v]] = uint16(tsi + u)
424 cumul[v]++
425 }
426 }
427
428 // Build Symbol Transformation Table
429 {
430 total := int16(0)
431 symbolTT := s.ct.symbolTT[:s.symbolLen]
432 tableLog := s.actualTableLog
433 tl := (uint32(tableLog) << 16) - (1 << tableLog)
434 for i, v := range s.norm[:s.symbolLen] {
435 switch v {
436 case 0:
437 case -1, 1:
438 symbolTT[i].deltaNbBits = tl
439 symbolTT[i].deltaFindState = int32(total - 1)
440 total++
441 default:
442 maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
443 minStatePlus := uint32(v) << maxBitsOut
444 symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
445 symbolTT[i].deltaFindState = int32(total - v)
446 total += v
447 }
448 }
449 if total != int16(tableSize) {
450 return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
451 }
452 }
453 return nil
454}
455
456// countSimple will create a simple histogram in s.count.
457// Returns the biggest count.
458// Does not update s.clearCount.
459func (s *Scratch) countSimple(in []byte) (max int) {
460 for _, v := range in {
461 s.count[v]++
462 }
463 m := uint32(0)
464 for i, v := range s.count[:] {
465 if v > m {
466 m = v
467 }
468 if v > 0 {
469 s.symbolLen = uint16(i) + 1
470 }
471 }
472 return int(m)
473}
474
475// minTableLog provides the minimum logSize to safely represent a distribution.
476func (s *Scratch) minTableLog() uint8 {
477 minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
478 minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
479 if minBitsSrc < minBitsSymbols {
480 return uint8(minBitsSrc)
481 }
482 return uint8(minBitsSymbols)
483}
484
485// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
486func (s *Scratch) optimalTableLog() {
487 tableLog := s.TableLog
488 minBits := s.minTableLog()
489 maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
490 if maxBitsSrc < tableLog {
491 // Accuracy can be reduced
492 tableLog = maxBitsSrc
493 }
494 if minBits > tableLog {
495 tableLog = minBits
496 }
497 // Need a minimum to safely represent all symbol values
498 if tableLog < minTablelog {
499 tableLog = minTablelog
500 }
501 if tableLog > maxTableLog {
502 tableLog = maxTableLog
503 }
504 s.actualTableLog = tableLog
505}
506
507var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
508
509// normalizeCount will normalize the count of the symbols so
510// the total is equal to the table size.
511func (s *Scratch) normalizeCount() error {
512 var (
513 tableLog = s.actualTableLog
514 scale = 62 - uint64(tableLog)
515 step = (1 << 62) / uint64(s.br.remain())
516 vStep = uint64(1) << (scale - 20)
517 stillToDistribute = int16(1 << tableLog)
518 largest int
519 largestP int16
520 lowThreshold = (uint32)(s.br.remain() >> tableLog)
521 )
522
523 for i, cnt := range s.count[:s.symbolLen] {
524 // already handled
525 // if (count[s] == s.length) return 0; /* rle special case */
526
527 if cnt == 0 {
528 s.norm[i] = 0
529 continue
530 }
531 if cnt <= lowThreshold {
532 s.norm[i] = -1
533 stillToDistribute--
534 } else {
535 proba := (int16)((uint64(cnt) * step) >> scale)
536 if proba < 8 {
537 restToBeat := vStep * uint64(rtbTable[proba])
538 v := uint64(cnt)*step - (uint64(proba) << scale)
539 if v > restToBeat {
540 proba++
541 }
542 }
543 if proba > largestP {
544 largestP = proba
545 largest = i
546 }
547 s.norm[i] = proba
548 stillToDistribute -= proba
549 }
550 }
551
552 if -stillToDistribute >= (s.norm[largest] >> 1) {
553 // corner case, need another normalization method
554 return s.normalizeCount2()
555 }
556 s.norm[largest] += stillToDistribute
557 return nil
558}
559
560// Secondary normalization method.
561// To be used when primary method fails.
562func (s *Scratch) normalizeCount2() error {
563 const notYetAssigned = -2
564 var (
565 distributed uint32
566 total = uint32(s.br.remain())
567 tableLog = s.actualTableLog
568 lowThreshold = uint32(total >> tableLog)
569 lowOne = uint32((total * 3) >> (tableLog + 1))
570 )
571 for i, cnt := range s.count[:s.symbolLen] {
572 if cnt == 0 {
573 s.norm[i] = 0
574 continue
575 }
576 if cnt <= lowThreshold {
577 s.norm[i] = -1
578 distributed++
579 total -= cnt
580 continue
581 }
582 if cnt <= lowOne {
583 s.norm[i] = 1
584 distributed++
585 total -= cnt
586 continue
587 }
588 s.norm[i] = notYetAssigned
589 }
590 toDistribute := (1 << tableLog) - distributed
591
592 if (total / toDistribute) > lowOne {
593 // risk of rounding to zero
594 lowOne = uint32((total * 3) / (toDistribute * 2))
595 for i, cnt := range s.count[:s.symbolLen] {
596 if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
597 s.norm[i] = 1
598 distributed++
599 total -= cnt
600 continue
601 }
602 }
603 toDistribute = (1 << tableLog) - distributed
604 }
605 if distributed == uint32(s.symbolLen)+1 {
606 // all values are pretty poor;
607 // probably incompressible data (should have already been detected);
608 // find max, then give all remaining points to max
609 var maxV int
610 var maxC uint32
611 for i, cnt := range s.count[:s.symbolLen] {
612 if cnt > maxC {
613 maxV = i
614 maxC = cnt
615 }
616 }
617 s.norm[maxV] += int16(toDistribute)
618 return nil
619 }
620
621 if total == 0 {
622 // all of the symbols were low enough for the lowOne or lowThreshold
623 for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
624 if s.norm[i] > 0 {
625 toDistribute--
626 s.norm[i]++
627 }
628 }
629 return nil
630 }
631
632 var (
633 vStepLog = 62 - uint64(tableLog)
634 mid = uint64((1 << (vStepLog - 1)) - 1)
635 rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
636 tmpTotal = mid
637 )
638 for i, cnt := range s.count[:s.symbolLen] {
639 if s.norm[i] == notYetAssigned {
640 var (
641 end = tmpTotal + uint64(cnt)*rStep
642 sStart = uint32(tmpTotal >> vStepLog)
643 sEnd = uint32(end >> vStepLog)
644 weight = sEnd - sStart
645 )
646 if weight < 1 {
647 return errors.New("weight < 1")
648 }
649 s.norm[i] = int16(weight)
650 tmpTotal = end
651 }
652 }
653 return nil
654}
655
656// validateNorm validates the normalized histogram table.
657func (s *Scratch) validateNorm() (err error) {
658 var total int
659 for _, v := range s.norm[:s.symbolLen] {
660 if v >= 0 {
661 total += int(v)
662 } else {
663 total -= int(v)
664 }
665 }
666 defer func() {
667 if err == nil {
668 return
669 }
670 fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
671 for i, v := range s.norm[:s.symbolLen] {
672 fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
673 }
674 }()
675 if total != (1 << s.actualTableLog) {
676 return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
677 }
678 for i, v := range s.count[s.symbolLen:] {
679 if v != 0 {
680 return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
681 }
682 }
683 return nil
684}