vendor/github.com/klauspost/compress/fse/compress.go - voltha-go - Gitiles

 // Copyright 2018 Klaus Post. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 // Based on work Copyright (c) 2013, Yann Collet, released under BSD License.

 package fse

 import (
 	"errors"
 	"fmt"
 )

 // Compress the input bytes. Input must be < 2GB.
 // Provide a Scratch buffer to avoid memory allocations.
 // Note that the output is also kept in the scratch buffer.
 // If input is too hard to compress, ErrIncompressible is returned.
 // If input is a single byte value repeated ErrUseRLE is returned.
 func Compress(in []byte, s *Scratch) ([]byte, error) {
 	if len(in) <= 1 {
 		return nil, ErrIncompressible
 	}
 	if len(in) > (2<<30)-1 {
 		return nil, errors.New("input too big, must be < 2GB")
 	}
 	s, err := s.prepare(in)
 	if err != nil {
 		return nil, err
 	}

 	// Create histogram, if none was provided.
 	maxCount := s.maxCount
 	if maxCount == 0 {
 		maxCount = s.countSimple(in)
 	}
 	// Reset for next run.
 	s.clearCount = true
 	s.maxCount = 0
 	if maxCount == len(in) {
 		// One symbol, use RLE
 		return nil, ErrUseRLE
 	}
 	if maxCount == 1 || maxCount < (len(in)>>7) {
 		// Each symbol present maximum once or too well distributed.
 		return nil, ErrIncompressible
 	}
 	s.optimalTableLog()
 	err = s.normalizeCount()
 	if err != nil {
 		return nil, err
 	}
 	err = s.writeCount()
 	if err != nil {
 		return nil, err
 	}

 	if false {
 		err = s.validateNorm()
 		if err != nil {
 			return nil, err
 		}
 	}

 	err = s.buildCTable()
 	if err != nil {
 		return nil, err
 	}
 	err = s.compress(in)
 	if err != nil {
 		return nil, err
 	}
 	s.Out = s.bw.out
 	// Check if we compressed.
 	if len(s.Out) >= len(in) {
 		return nil, ErrIncompressible
 	}
 	return s.Out, nil
 }

 // cState contains the compression state of a stream.
 type cState struct {
 	bw         *bitWriter
 	stateTable []uint16
 	state      uint16
 }

 // init will initialize the compression state to the first symbol of the stream.
 func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
 	c.bw = bw
 	c.stateTable = ct.stateTable

 	nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
 	im := int32((nbBitsOut << 16) - first.deltaNbBits)
 	lu := (im >> nbBitsOut) + first.deltaFindState
 	c.state = c.stateTable[lu]
 }

 // encode the output symbol provided and write it to the bitstream.
 func (c *cState) encode(symbolTT symbolTransform) {
 	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
 	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
 	c.bw.addBits16NC(c.state, uint8(nbBitsOut))
 	c.state = c.stateTable[dstState]
 }

 // encode the output symbol provided and write it to the bitstream.
 func (c *cState) encodeZero(symbolTT symbolTransform) {
 	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
 	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
 	c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
 	c.state = c.stateTable[dstState]
 }

 // flush will write the tablelog to the output and flush the remaining full bytes.
 func (c *cState) flush(tableLog uint8) {
 	c.bw.flush32()
 	c.bw.addBits16NC(c.state, tableLog)
 	c.bw.flush()
 }

 // compress is the main compression loop that will encode the input from the last byte to the first.
 func (s *Scratch) compress(src []byte) error {
 	if len(src) <= 2 {
 		return errors.New("compress: src too small")
 	}
 	tt := s.ct.symbolTT[:256]
 	s.bw.reset(s.Out)

 	// Our two states each encodes every second byte.
 	// Last byte encoded (first byte decoded) will always be encoded by c1.
 	var c1, c2 cState

 	// Encode so remaining size is divisible by 4.
 	ip := len(src)
 	if ip&1 == 1 {
 		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
 		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
 		c1.encodeZero(tt[src[ip-3]])
 		ip -= 3
 	} else {
 		c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
 		c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
 		ip -= 2
 	}
 	if ip&2 != 0 {
 		c2.encodeZero(tt[src[ip-1]])
 		c1.encodeZero(tt[src[ip-2]])
 		ip -= 2
 	}

 	// Main compression loop.
 	switch {
 	case !s.zeroBits && s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush.
 		// We do not need to check if any output is 0 bits.
 		for ip >= 4 {
 			s.bw.flush32()
 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
 			ip -= 4
 		}
 	case !s.zeroBits:
 		// We do not need to check if any output is 0 bits.
 		for ip >= 4 {
 			s.bw.flush32()
 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
 			c2.encode(tt[v0])
 			c1.encode(tt[v1])
 			s.bw.flush32()
 			c2.encode(tt[v2])
 			c1.encode(tt[v3])
 			ip -= 4
 		}
 	case s.actualTableLog <= 8:
 		// We can encode 4 symbols without requiring a flush
 		for ip >= 4 {
 			s.bw.flush32()
 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
 			ip -= 4
 		}
 	default:
 		for ip >= 4 {
 			s.bw.flush32()
 			v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
 			c2.encodeZero(tt[v0])
 			c1.encodeZero(tt[v1])
 			s.bw.flush32()
 			c2.encodeZero(tt[v2])
 			c1.encodeZero(tt[v3])
 			ip -= 4
 		}
 	}

 	// Flush final state.
 	// Used to initialize state when decoding.
 	c2.flush(s.actualTableLog)
 	c1.flush(s.actualTableLog)

 	return s.bw.close()
 }

 // writeCount will write the normalized histogram count to header.
 // This is read back by readNCount.
 func (s *Scratch) writeCount() error {
 	var (
 		tableLog  = s.actualTableLog
 		tableSize = 1 << tableLog
 		previous0 bool
 		charnum   uint16

 		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3

 		// Write Table Size
 		bitStream = uint32(tableLog - minTablelog)
 		bitCount  = uint(4)
 		remaining = int16(tableSize + 1) /* +1 for extra accuracy */
 		threshold = int16(tableSize)
 		nbBits    = uint(tableLog + 1)
 	)
 	if cap(s.Out) < maxHeaderSize {
 		s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
 	}
 	outP := uint(0)
 	out := s.Out[:maxHeaderSize]

 	// stops at 1
 	for remaining > 1 {
 		if previous0 {
 			start := charnum
 			for s.norm[charnum] == 0 {
 				charnum++
 			}
 			for charnum >= start+24 {
 				start += 24
 				bitStream += uint32(0xFFFF) << bitCount
 				out[outP] = byte(bitStream)
 				out[outP+1] = byte(bitStream >> 8)
 				outP += 2
 				bitStream >>= 16
 			}
 			for charnum >= start+3 {
 				start += 3
 				bitStream += 3 << bitCount
 				bitCount += 2
 			}
 			bitStream += uint32(charnum-start) << bitCount
 			bitCount += 2
 			if bitCount > 16 {
 				out[outP] = byte(bitStream)
 				out[outP+1] = byte(bitStream >> 8)
 				outP += 2
 				bitStream >>= 16
 				bitCount -= 16
 			}
 		}

 		count := s.norm[charnum]
 		charnum++
 		max := (2*threshold - 1) - remaining
 		if count < 0 {
 			remaining += count
 		} else {
 			remaining -= count
 		}
 		count++ // +1 for extra accuracy
 		if count >= threshold {
 			count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
 		}
 		bitStream += uint32(count) << bitCount
 		bitCount += nbBits
 		if count < max {
 			bitCount--
 		}

 		previous0 = count == 1
 		if remaining < 1 {
 			return errors.New("internal error: remaining<1")
 		}
 		for remaining < threshold {
 			nbBits--
 			threshold >>= 1
 		}

 		if bitCount > 16 {
 			out[outP] = byte(bitStream)
 			out[outP+1] = byte(bitStream >> 8)
 			outP += 2
 			bitStream >>= 16
 			bitCount -= 16
 		}
 	}

 	out[outP] = byte(bitStream)
 	out[outP+1] = byte(bitStream >> 8)
 	outP += (bitCount + 7) / 8

 	if charnum > s.symbolLen {
 		return errors.New("internal error: charnum > s.symbolLen")
 	}
 	s.Out = out[:outP]
 	return nil
 }

 // symbolTransform contains the state transform for a symbol.
 type symbolTransform struct {
 	deltaFindState int32
 	deltaNbBits    uint32
 }

 // String prints values as a human readable string.
 func (s symbolTransform) String() string {
 	return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
 }

 // cTable contains tables used for compression.
 type cTable struct {
 	tableSymbol []byte
 	stateTable  []uint16
 	symbolTT    []symbolTransform
 }

 // allocCtable will allocate tables needed for compression.
 // If existing tables a re big enough, they are simply re-used.
 func (s *Scratch) allocCtable() {
 	tableSize := 1 << s.actualTableLog
 	// get tableSymbol that is big enough.
 	if cap(s.ct.tableSymbol) < tableSize {
 		s.ct.tableSymbol = make([]byte, tableSize)
 	}
 	s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]

 	ctSize := tableSize
 	if cap(s.ct.stateTable) < ctSize {
 		s.ct.stateTable = make([]uint16, ctSize)
 	}
 	s.ct.stateTable = s.ct.stateTable[:ctSize]

 	if cap(s.ct.symbolTT) < 256 {
 		s.ct.symbolTT = make([]symbolTransform, 256)
 	}
 	s.ct.symbolTT = s.ct.symbolTT[:256]
 }

 // buildCTable will populate the compression table so it is ready to be used.
 func (s *Scratch) buildCTable() error {
 	tableSize := uint32(1 << s.actualTableLog)
 	highThreshold := tableSize - 1
 	var cumul [maxSymbolValue + 2]int16

 	s.allocCtable()
 	tableSymbol := s.ct.tableSymbol[:tableSize]
 	// symbol start positions
 	{
 		cumul[0] = 0
 		for ui, v := range s.norm[:s.symbolLen-1] {
 			u := byte(ui) // one less than reference
 			if v == -1 {
 				// Low proba symbol
 				cumul[u+1] = cumul[u] + 1
 				tableSymbol[highThreshold] = u
 				highThreshold--
 			} else {
 				cumul[u+1] = cumul[u] + v
 			}
 		}
 		// Encode last symbol separately to avoid overflowing u
 		u := int(s.symbolLen - 1)
 		v := s.norm[s.symbolLen-1]
 		if v == -1 {
 			// Low proba symbol
 			cumul[u+1] = cumul[u] + 1
 			tableSymbol[highThreshold] = byte(u)
 			highThreshold--
 		} else {
 			cumul[u+1] = cumul[u] + v
 		}
 		if uint32(cumul[s.symbolLen]) != tableSize {
 			return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
 		}
 		cumul[s.symbolLen] = int16(tableSize) + 1
 	}
 	// Spread symbols
 	s.zeroBits = false
 	{
 		step := tableStep(tableSize)
 		tableMask := tableSize - 1
 		var position uint32
 		// if any symbol > largeLimit, we may have 0 bits output.
 		largeLimit := int16(1 << (s.actualTableLog - 1))
 		for ui, v := range s.norm[:s.symbolLen] {
 			symbol := byte(ui)
 			if v > largeLimit {
 				s.zeroBits = true
 			}
 			for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
 				tableSymbol[position] = symbol
 				position = (position + step) & tableMask
 				for position > highThreshold {
 					position = (position + step) & tableMask
 				} /* Low proba area */
 			}
 		}

 		// Check if we have gone through all positions
 		if position != 0 {
 			return errors.New("position!=0")
 		}
 	}

 	// Build table
 	table := s.ct.stateTable
 	{
 		tsi := int(tableSize)
 		for u, v := range tableSymbol {
 			// TableU16 : sorted by symbol order; gives next state value
 			table[cumul[v]] = uint16(tsi + u)
 			cumul[v]++
 		}
 	}

 	// Build Symbol Transformation Table
 	{
 		total := int16(0)
 		symbolTT := s.ct.symbolTT[:s.symbolLen]
 		tableLog := s.actualTableLog
 		tl := (uint32(tableLog) << 16) - (1 << tableLog)
 		for i, v := range s.norm[:s.symbolLen] {
 			switch v {
 			case 0:
 			case -1, 1:
 				symbolTT[i].deltaNbBits = tl
 				symbolTT[i].deltaFindState = int32(total - 1)
 				total++
 			default:
 				maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
 				minStatePlus := uint32(v) << maxBitsOut
 				symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
 				symbolTT[i].deltaFindState = int32(total - v)
 				total += v
 			}
 		}
 		if total != int16(tableSize) {
 			return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
 		}
 	}
 	return nil
 }

 // countSimple will create a simple histogram in s.count.
 // Returns the biggest count.
 // Does not update s.clearCount.
 func (s *Scratch) countSimple(in []byte) (max int) {
 	for _, v := range in {
 		s.count[v]++
 	}
 	m := uint32(0)
 	for i, v := range s.count[:] {
 		if v > m {
 			m = v
 		}
 		if v > 0 {
 			s.symbolLen = uint16(i) + 1
 		}
 	}
 	return int(m)
 }

 // minTableLog provides the minimum logSize to safely represent a distribution.
 func (s *Scratch) minTableLog() uint8 {
 	minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
 	minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
 	if minBitsSrc < minBitsSymbols {
 		return uint8(minBitsSrc)
 	}
 	return uint8(minBitsSymbols)
 }

 // optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
 func (s *Scratch) optimalTableLog() {
 	tableLog := s.TableLog
 	minBits := s.minTableLog()
 	maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
 	if maxBitsSrc < tableLog {
 		// Accuracy can be reduced
 		tableLog = maxBitsSrc
 	}
 	if minBits > tableLog {
 		tableLog = minBits
 	}
 	// Need a minimum to safely represent all symbol values
 	if tableLog < minTablelog {
 		tableLog = minTablelog
 	}
 	if tableLog > maxTableLog {
 		tableLog = maxTableLog
 	}
 	s.actualTableLog = tableLog
 }

 var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}

 // normalizeCount will normalize the count of the symbols so
 // the total is equal to the table size.
 func (s *Scratch) normalizeCount() error {
 	var (
 		tableLog          = s.actualTableLog
 		scale             = 62 - uint64(tableLog)
 		step              = (1 << 62) / uint64(s.br.remain())
 		vStep             = uint64(1) << (scale - 20)
 		stillToDistribute = int16(1 << tableLog)
 		largest           int
 		largestP          int16
 		lowThreshold      = (uint32)(s.br.remain() >> tableLog)
 	)

 	for i, cnt := range s.count[:s.symbolLen] {
 		// already handled
 		// if (count[s] == s.length) return 0;   /* rle special case */

 		if cnt == 0 {
 			s.norm[i] = 0
 			continue
 		}
 		if cnt <= lowThreshold {
 			s.norm[i] = -1
 			stillToDistribute--
 		} else {
 			proba := (int16)((uint64(cnt) * step) >> scale)
 			if proba < 8 {
 				restToBeat := vStep * uint64(rtbTable[proba])
 				v := uint64(cnt)*step - (uint64(proba) << scale)
 				if v > restToBeat {
 					proba++
 				}
 			}
 			if proba > largestP {
 				largestP = proba
 				largest = i
 			}
 			s.norm[i] = proba
 			stillToDistribute -= proba
 		}
 	}

 	if -stillToDistribute >= (s.norm[largest] >> 1) {
 		// corner case, need another normalization method
 		return s.normalizeCount2()
 	}
 	s.norm[largest] += stillToDistribute
 	return nil
 }

 // Secondary normalization method.
 // To be used when primary method fails.
 func (s *Scratch) normalizeCount2() error {
 	const notYetAssigned = -2
 	var (
 		distributed  uint32
 		total        = uint32(s.br.remain())
 		tableLog     = s.actualTableLog
 		lowThreshold = total >> tableLog
 		lowOne       = (total * 3) >> (tableLog + 1)
 	)
 	for i, cnt := range s.count[:s.symbolLen] {
 		if cnt == 0 {
 			s.norm[i] = 0
 			continue
 		}
 		if cnt <= lowThreshold {
 			s.norm[i] = -1
 			distributed++
 			total -= cnt
 			continue
 		}
 		if cnt <= lowOne {
 			s.norm[i] = 1
 			distributed++
 			total -= cnt
 			continue
 		}
 		s.norm[i] = notYetAssigned
 	}
 	toDistribute := (1 << tableLog) - distributed

 	if (total / toDistribute) > lowOne {
 		// risk of rounding to zero
 		lowOne = (total * 3) / (toDistribute * 2)
 		for i, cnt := range s.count[:s.symbolLen] {
 			if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
 				s.norm[i] = 1
 				distributed++
 				total -= cnt
 				continue
 			}
 		}
 		toDistribute = (1 << tableLog) - distributed
 	}
 	if distributed == uint32(s.symbolLen)+1 {
 		// all values are pretty poor;
 		//   probably incompressible data (should have already been detected);
 		//   find max, then give all remaining points to max
 		var maxV int
 		var maxC uint32
 		for i, cnt := range s.count[:s.symbolLen] {
 			if cnt > maxC {
 				maxV = i
 				maxC = cnt
 			}
 		}
 		s.norm[maxV] += int16(toDistribute)
 		return nil
 	}

 	if total == 0 {
 		// all of the symbols were low enough for the lowOne or lowThreshold
 		for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
 			if s.norm[i] > 0 {
 				toDistribute--
 				s.norm[i]++
 			}
 		}
 		return nil
 	}

 	var (
 		vStepLog = 62 - uint64(tableLog)
 		mid      = uint64((1 << (vStepLog - 1)) - 1)
 		rStep    = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
 		tmpTotal = mid
 	)
 	for i, cnt := range s.count[:s.symbolLen] {
 		if s.norm[i] == notYetAssigned {
 			var (
 				end    = tmpTotal + uint64(cnt)*rStep
 				sStart = uint32(tmpTotal >> vStepLog)
 				sEnd   = uint32(end >> vStepLog)
 				weight = sEnd - sStart
 			)
 			if weight < 1 {
 				return errors.New("weight < 1")
 			}
 			s.norm[i] = int16(weight)
 			tmpTotal = end
 		}
 	}
 	return nil
 }

 // validateNorm validates the normalized histogram table.
 func (s *Scratch) validateNorm() (err error) {
 	var total int
 	for _, v := range s.norm[:s.symbolLen] {
 		if v >= 0 {
 			total += int(v)
 		} else {
 			total -= int(v)
 		}
 	}
 	defer func() {
 		if err == nil {
 			return
 		}
 		fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
 		for i, v := range s.norm[:s.symbolLen] {
 			fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
 		}
 	}()
 	if total != (1 << s.actualTableLog) {
 		return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
 	}
 	for i, v := range s.count[s.symbolLen:] {
 		if v != 0 {
 			return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
 		}
 	}
 	return nil
 }
	// Copyright 2018 Klaus Post. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.
	// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.

	package fse

	import (
	"errors"
	"fmt"
	)

	// Compress the input bytes. Input must be < 2GB.
	// Provide a Scratch buffer to avoid memory allocations.
	// Note that the output is also kept in the scratch buffer.
	// If input is too hard to compress, ErrIncompressible is returned.
	// If input is a single byte value repeated ErrUseRLE is returned.
	func Compress(in []byte, s *Scratch) ([]byte, error) {
	if len(in) <= 1 {
	return nil, ErrIncompressible
	}
	if len(in) > (2<<30)-1 {
	return nil, errors.New("input too big, must be < 2GB")
	}
	s, err := s.prepare(in)
	if err != nil {
	return nil, err
	}

	// Create histogram, if none was provided.
	maxCount := s.maxCount
	if maxCount == 0 {
	maxCount = s.countSimple(in)
	}
	// Reset for next run.
	s.clearCount = true
	s.maxCount = 0
	if maxCount == len(in) {
	// One symbol, use RLE
	return nil, ErrUseRLE
	}
	if maxCount == 1 \|\| maxCount < (len(in)>>7) {
	// Each symbol present maximum once or too well distributed.
	return nil, ErrIncompressible
	}
	s.optimalTableLog()
	err = s.normalizeCount()
	if err != nil {
	return nil, err
	}
	err = s.writeCount()
	if err != nil {
	return nil, err
	}

	if false {
	err = s.validateNorm()
	if err != nil {
	return nil, err
	}
	}

	err = s.buildCTable()
	if err != nil {
	return nil, err
	}
	err = s.compress(in)
	if err != nil {
	return nil, err
	}
	s.Out = s.bw.out
	// Check if we compressed.
	if len(s.Out) >= len(in) {
	return nil, ErrIncompressible
	}
	return s.Out, nil
	}

	// cState contains the compression state of a stream.
	type cState struct {
	bw *bitWriter
	stateTable []uint16
	state uint16
	}

	// init will initialize the compression state to the first symbol of the stream.
	func (c cState) init(bw bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
	c.bw = bw
	c.stateTable = ct.stateTable

	nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
	im := int32((nbBitsOut << 16) - first.deltaNbBits)
	lu := (im >> nbBitsOut) + first.deltaFindState
	c.state = c.stateTable[lu]
	}

	// encode the output symbol provided and write it to the bitstream.
	func (c *cState) encode(symbolTT symbolTransform) {
	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
	c.bw.addBits16NC(c.state, uint8(nbBitsOut))
	c.state = c.stateTable[dstState]
	}

	// encode the output symbol provided and write it to the bitstream.
	func (c *cState) encodeZero(symbolTT symbolTransform) {
	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
	c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
	c.state = c.stateTable[dstState]
	}

	// flush will write the tablelog to the output and flush the remaining full bytes.
	func (c *cState) flush(tableLog uint8) {
	c.bw.flush32()
	c.bw.addBits16NC(c.state, tableLog)
	c.bw.flush()
	}

	// compress is the main compression loop that will encode the input from the last byte to the first.
	func (s *Scratch) compress(src []byte) error {
	if len(src) <= 2 {
	return errors.New("compress: src too small")
	}
	tt := s.ct.symbolTT[:256]
	s.bw.reset(s.Out)

	// Our two states each encodes every second byte.
	// Last byte encoded (first byte decoded) will always be encoded by c1.
	var c1, c2 cState

	// Encode so remaining size is divisible by 4.
	ip := len(src)
	if ip&1 == 1 {
	c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
	c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
	c1.encodeZero(tt[src[ip-3]])
	ip -= 3
	} else {
	c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
	c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
	ip -= 2
	}
	if ip&2 != 0 {
	c2.encodeZero(tt[src[ip-1]])
	c1.encodeZero(tt[src[ip-2]])
	ip -= 2
	}

	// Main compression loop.
	switch {
	case !s.zeroBits && s.actualTableLog <= 8:
	// We can encode 4 symbols without requiring a flush.
	// We do not need to check if any output is 0 bits.
	for ip >= 4 {
	s.bw.flush32()
	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
	c2.encode(tt[v0])
	c1.encode(tt[v1])
	c2.encode(tt[v2])
	c1.encode(tt[v3])
	ip -= 4
	}
	case !s.zeroBits:
	// We do not need to check if any output is 0 bits.
	for ip >= 4 {
	s.bw.flush32()
	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
	c2.encode(tt[v0])
	c1.encode(tt[v1])
	s.bw.flush32()
	c2.encode(tt[v2])
	c1.encode(tt[v3])
	ip -= 4
	}
	case s.actualTableLog <= 8:
	// We can encode 4 symbols without requiring a flush
	for ip >= 4 {
	s.bw.flush32()
	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
	c2.encodeZero(tt[v0])
	c1.encodeZero(tt[v1])
	c2.encodeZero(tt[v2])
	c1.encodeZero(tt[v3])
	ip -= 4
	}
	default:
	for ip >= 4 {
	s.bw.flush32()
	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
	c2.encodeZero(tt[v0])
	c1.encodeZero(tt[v1])
	s.bw.flush32()
	c2.encodeZero(tt[v2])
	c1.encodeZero(tt[v3])
	ip -= 4
	}
	}

	// Flush final state.
	// Used to initialize state when decoding.
	c2.flush(s.actualTableLog)
	c1.flush(s.actualTableLog)

	return s.bw.close()
	}

	// writeCount will write the normalized histogram count to header.
	// This is read back by readNCount.
	func (s *Scratch) writeCount() error {
	var (
	tableLog = s.actualTableLog
	tableSize = 1 << tableLog
	previous0 bool
	charnum uint16

	maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3

	// Write Table Size
	bitStream = uint32(tableLog - minTablelog)
	bitCount = uint(4)
	remaining = int16(tableSize + 1) /* +1 for extra accuracy */
	threshold = int16(tableSize)
	nbBits = uint(tableLog + 1)
	)
	if cap(s.Out) < maxHeaderSize {
	s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
	}
	outP := uint(0)
	out := s.Out[:maxHeaderSize]

	// stops at 1
	for remaining > 1 {
	if previous0 {
	start := charnum
	for s.norm[charnum] == 0 {
	charnum++
	}
	for charnum >= start+24 {
	start += 24
	bitStream += uint32(0xFFFF) << bitCount
	out[outP] = byte(bitStream)
	out[outP+1] = byte(bitStream >> 8)
	outP += 2
	bitStream >>= 16
	}
	for charnum >= start+3 {
	start += 3
	bitStream += 3 << bitCount
	bitCount += 2
	}
	bitStream += uint32(charnum-start) << bitCount
	bitCount += 2
	if bitCount > 16 {
	out[outP] = byte(bitStream)
	out[outP+1] = byte(bitStream >> 8)
	outP += 2
	bitStream >>= 16
	bitCount -= 16
	}
	}

	count := s.norm[charnum]
	charnum++
	max := (2*threshold - 1) - remaining
	if count < 0 {
	remaining += count
	} else {
	remaining -= count
	}
	count++ // +1 for extra accuracy
	if count >= threshold {
	count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
	}
	bitStream += uint32(count) << bitCount
	bitCount += nbBits
	if count < max {
	bitCount--
	}

	previous0 = count == 1
	if remaining < 1 {
	return errors.New("internal error: remaining<1")
	}
	for remaining < threshold {
	nbBits--
	threshold >>= 1
	}

	if bitCount > 16 {
	out[outP] = byte(bitStream)
	out[outP+1] = byte(bitStream >> 8)
	outP += 2
	bitStream >>= 16
	bitCount -= 16
	}
	}

	out[outP] = byte(bitStream)
	out[outP+1] = byte(bitStream >> 8)
	outP += (bitCount + 7) / 8

	if charnum > s.symbolLen {
	return errors.New("internal error: charnum > s.symbolLen")
	}
	s.Out = out[:outP]
	return nil
	}

	// symbolTransform contains the state transform for a symbol.
	type symbolTransform struct {
	deltaFindState int32
	deltaNbBits uint32
	}

	// String prints values as a human readable string.
	func (s symbolTransform) String() string {
	return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
	}

	// cTable contains tables used for compression.
	type cTable struct {
	tableSymbol []byte
	stateTable []uint16
	symbolTT []symbolTransform
	}

	// allocCtable will allocate tables needed for compression.
	// If existing tables a re big enough, they are simply re-used.
	func (s *Scratch) allocCtable() {
	tableSize := 1 << s.actualTableLog
	// get tableSymbol that is big enough.
	if cap(s.ct.tableSymbol) < tableSize {
	s.ct.tableSymbol = make([]byte, tableSize)
	}
	s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]

	ctSize := tableSize
	if cap(s.ct.stateTable) < ctSize {
	s.ct.stateTable = make([]uint16, ctSize)
	}
	s.ct.stateTable = s.ct.stateTable[:ctSize]

	if cap(s.ct.symbolTT) < 256 {
	s.ct.symbolTT = make([]symbolTransform, 256)
	}
	s.ct.symbolTT = s.ct.symbolTT[:256]
	}

	// buildCTable will populate the compression table so it is ready to be used.
	func (s *Scratch) buildCTable() error {
	tableSize := uint32(1 << s.actualTableLog)
	highThreshold := tableSize - 1
	var cumul [maxSymbolValue + 2]int16

	s.allocCtable()
	tableSymbol := s.ct.tableSymbol[:tableSize]
	// symbol start positions
	{
	cumul[0] = 0
	for ui, v := range s.norm[:s.symbolLen-1] {
	u := byte(ui) // one less than reference
	if v == -1 {
	// Low proba symbol
	cumul[u+1] = cumul[u] + 1
	tableSymbol[highThreshold] = u
	highThreshold--
	} else {
	cumul[u+1] = cumul[u] + v
	}
	}
	// Encode last symbol separately to avoid overflowing u
	u := int(s.symbolLen - 1)
	v := s.norm[s.symbolLen-1]
	if v == -1 {
	// Low proba symbol
	cumul[u+1] = cumul[u] + 1
	tableSymbol[highThreshold] = byte(u)
	highThreshold--
	} else {
	cumul[u+1] = cumul[u] + v
	}
	if uint32(cumul[s.symbolLen]) != tableSize {
	return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
	}
	cumul[s.symbolLen] = int16(tableSize) + 1
	}
	// Spread symbols
	s.zeroBits = false
	{
	step := tableStep(tableSize)
	tableMask := tableSize - 1
	var position uint32
	// if any symbol > largeLimit, we may have 0 bits output.
	largeLimit := int16(1 << (s.actualTableLog - 1))
	for ui, v := range s.norm[:s.symbolLen] {
	symbol := byte(ui)
	if v > largeLimit {
	s.zeroBits = true
	}
	for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
	tableSymbol[position] = symbol
	position = (position + step) & tableMask
	for position > highThreshold {
	position = (position + step) & tableMask
	} /* Low proba area */
	}
	}

	// Check if we have gone through all positions
	if position != 0 {
	return errors.New("position!=0")
	}
	}

	// Build table
	table := s.ct.stateTable
	{
	tsi := int(tableSize)
	for u, v := range tableSymbol {
	// TableU16 : sorted by symbol order; gives next state value
	table[cumul[v]] = uint16(tsi + u)
	cumul[v]++
	}
	}

	// Build Symbol Transformation Table
	{
	total := int16(0)
	symbolTT := s.ct.symbolTT[:s.symbolLen]
	tableLog := s.actualTableLog
	tl := (uint32(tableLog) << 16) - (1 << tableLog)
	for i, v := range s.norm[:s.symbolLen] {
	switch v {
	case 0:
	case -1, 1:
	symbolTT[i].deltaNbBits = tl
	symbolTT[i].deltaFindState = int32(total - 1)
	total++
	default:
	maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
	minStatePlus := uint32(v) << maxBitsOut
	symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
	symbolTT[i].deltaFindState = int32(total - v)
	total += v
	}
	}
	if total != int16(tableSize) {
	return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
	}
	}
	return nil
	}

	// countSimple will create a simple histogram in s.count.
	// Returns the biggest count.
	// Does not update s.clearCount.
	func (s *Scratch) countSimple(in []byte) (max int) {
	for _, v := range in {
	s.count[v]++
	}
	m := uint32(0)
	for i, v := range s.count[:] {
	if v > m {
	m = v
	}
	if v > 0 {
	s.symbolLen = uint16(i) + 1
	}
	}
	return int(m)
	}

	// minTableLog provides the minimum logSize to safely represent a distribution.
	func (s *Scratch) minTableLog() uint8 {
	minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
	minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
	if minBitsSrc < minBitsSymbols {
	return uint8(minBitsSrc)
	}
	return uint8(minBitsSymbols)
	}

	// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
	func (s *Scratch) optimalTableLog() {
	tableLog := s.TableLog
	minBits := s.minTableLog()
	maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
	if maxBitsSrc < tableLog {
	// Accuracy can be reduced
	tableLog = maxBitsSrc
	}
	if minBits > tableLog {
	tableLog = minBits
	}
	// Need a minimum to safely represent all symbol values
	if tableLog < minTablelog {
	tableLog = minTablelog
	}
	if tableLog > maxTableLog {
	tableLog = maxTableLog
	}
	s.actualTableLog = tableLog
	}

	var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}

	// normalizeCount will normalize the count of the symbols so
	// the total is equal to the table size.
	func (s *Scratch) normalizeCount() error {
	var (
	tableLog = s.actualTableLog
	scale = 62 - uint64(tableLog)
	step = (1 << 62) / uint64(s.br.remain())
	vStep = uint64(1) << (scale - 20)
	stillToDistribute = int16(1 << tableLog)
	largest int
	largestP int16
	lowThreshold = (uint32)(s.br.remain() >> tableLog)
	)

	for i, cnt := range s.count[:s.symbolLen] {
	// already handled
	// if (count[s] == s.length) return 0; /* rle special case */

	if cnt == 0 {
	s.norm[i] = 0
	continue
	}
	if cnt <= lowThreshold {
	s.norm[i] = -1
	stillToDistribute--
	} else {
	proba := (int16)((uint64(cnt) * step) >> scale)
	if proba < 8 {
	restToBeat := vStep * uint64(rtbTable[proba])
	v := uint64(cnt)*step - (uint64(proba) << scale)
	if v > restToBeat {
	proba++
	}
	}
	if proba > largestP {
	largestP = proba
	largest = i
	}
	s.norm[i] = proba
	stillToDistribute -= proba
	}
	}

	if -stillToDistribute >= (s.norm[largest] >> 1) {
	// corner case, need another normalization method
	return s.normalizeCount2()
	}
	s.norm[largest] += stillToDistribute
	return nil
	}

	// Secondary normalization method.
	// To be used when primary method fails.
	func (s *Scratch) normalizeCount2() error {
	const notYetAssigned = -2
	var (
	distributed uint32
	total = uint32(s.br.remain())
	tableLog = s.actualTableLog
	lowThreshold = total >> tableLog
	lowOne = (total * 3) >> (tableLog + 1)
	)
	for i, cnt := range s.count[:s.symbolLen] {
	if cnt == 0 {
	s.norm[i] = 0
	continue
	}
	if cnt <= lowThreshold {
	s.norm[i] = -1
	distributed++
	total -= cnt
	continue
	}
	if cnt <= lowOne {
	s.norm[i] = 1
	distributed++
	total -= cnt
	continue
	}
	s.norm[i] = notYetAssigned
	}
	toDistribute := (1 << tableLog) - distributed

	if (total / toDistribute) > lowOne {
	// risk of rounding to zero
	lowOne = (total * 3) / (toDistribute * 2)
	for i, cnt := range s.count[:s.symbolLen] {
	if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
	s.norm[i] = 1
	distributed++
	total -= cnt
	continue
	}
	}
	toDistribute = (1 << tableLog) - distributed
	}
	if distributed == uint32(s.symbolLen)+1 {
	// all values are pretty poor;
	// probably incompressible data (should have already been detected);
	// find max, then give all remaining points to max
	var maxV int
	var maxC uint32
	for i, cnt := range s.count[:s.symbolLen] {
	if cnt > maxC {
	maxV = i
	maxC = cnt
	}
	}
	s.norm[maxV] += int16(toDistribute)
	return nil
	}

	if total == 0 {
	// all of the symbols were low enough for the lowOne or lowThreshold
	for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
	if s.norm[i] > 0 {
	toDistribute--
	s.norm[i]++
	}
	}
	return nil
	}

	var (
	vStepLog = 62 - uint64(tableLog)
	mid = uint64((1 << (vStepLog - 1)) - 1)
	rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
	tmpTotal = mid
	)
	for i, cnt := range s.count[:s.symbolLen] {
	if s.norm[i] == notYetAssigned {
	var (
	end = tmpTotal + uint64(cnt)*rStep
	sStart = uint32(tmpTotal >> vStepLog)
	sEnd = uint32(end >> vStepLog)
	weight = sEnd - sStart
	)
	if weight < 1 {
	return errors.New("weight < 1")
	}
	s.norm[i] = int16(weight)
	tmpTotal = end
	}
	}
	return nil
	}

	// validateNorm validates the normalized histogram table.
	func (s *Scratch) validateNorm() (err error) {
	var total int
	for _, v := range s.norm[:s.symbolLen] {
	if v >= 0 {
	total += int(v)
	} else {
	total -= int(v)
	}
	}
	defer func() {
	if err == nil {
	return
	}
	fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
	for i, v := range s.norm[:s.symbolLen] {
	fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
	}
	}()
	if total != (1 << s.actualTableLog) {
	return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
	}
	for i, v := range s.count[s.symbolLen:] {
	if v != 0 {
	return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
	}
	}
	return nil
	}