vendor/github.com/klauspost/compress/huff0/compress.go - voltctl - Gitiles

 package huff0

 import (
 	"fmt"
 	"runtime"
 	"sync"
 )

 // Compress1X will compress the input.
 // The output can be decoded using Decompress1X.
 // Supply a Scratch object. The scratch object contains state about re-use,
 // So when sharing across independent encodes, be sure to set the re-use policy.
 func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
 	s, err = s.prepare(in)
 	if err != nil {
 		return nil, false, err
 	}
 	return compress(in, s, s.compress1X)
 }

 // Compress4X will compress the input. The input is split into 4 independent blocks
 // and compressed similar to Compress1X.
 // The output can be decoded using Decompress4X.
 // Supply a Scratch object. The scratch object contains state about re-use,
 // So when sharing across independent encodes, be sure to set the re-use policy.
 func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
 	s, err = s.prepare(in)
 	if err != nil {
 		return nil, false, err
 	}
 	if false {
 		// TODO: compress4Xp only slightly faster.
 		const parallelThreshold = 8 << 10
 		if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 {
 			return compress(in, s, s.compress4X)
 		}
 		return compress(in, s, s.compress4Xp)
 	}
 	return compress(in, s, s.compress4X)
 }

 func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) {
 	// Nuke previous table if we cannot reuse anyway.
 	if s.Reuse == ReusePolicyNone {
 		s.prevTable = s.prevTable[:0]
 	}

 	// Create histogram, if none was provided.
 	maxCount := s.maxCount
 	var canReuse = false
 	if maxCount == 0 {
 		maxCount, canReuse = s.countSimple(in)
 	} else {
 		canReuse = s.canUseTable(s.prevTable)
 	}

 	// We want the output size to be less than this:
 	wantSize := len(in)
 	if s.WantLogLess > 0 {
 		wantSize -= wantSize >> s.WantLogLess
 	}

 	// Reset for next run.
 	s.clearCount = true
 	s.maxCount = 0
 	if maxCount >= len(in) {
 		if maxCount > len(in) {
 			return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
 		}
 		if len(in) == 1 {
 			return nil, false, ErrIncompressible
 		}
 		// One symbol, use RLE
 		return nil, false, ErrUseRLE
 	}
 	if maxCount == 1 || maxCount < (len(in)>>7) {
 		// Each symbol present maximum once or too well distributed.
 		return nil, false, ErrIncompressible
 	}

 	if s.Reuse == ReusePolicyPrefer && canReuse {
 		keepTable := s.cTable
 		keepTL := s.actualTableLog
 		s.cTable = s.prevTable
 		s.actualTableLog = s.prevTableLog
 		s.Out, err = compressor(in)
 		s.cTable = keepTable
 		s.actualTableLog = keepTL
 		if err == nil && len(s.Out) < wantSize {
 			s.OutData = s.Out
 			return s.Out, true, nil
 		}
 		// Do not attempt to re-use later.
 		s.prevTable = s.prevTable[:0]
 	}

 	// Calculate new table.
 	err = s.buildCTable()
 	if err != nil {
 		return nil, false, err
 	}

 	if false && !s.canUseTable(s.cTable) {
 		panic("invalid table generated")
 	}

 	if s.Reuse == ReusePolicyAllow && canReuse {
 		hSize := len(s.Out)
 		oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
 		newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
 		if oldSize <= hSize+newSize || hSize+12 >= wantSize {
 			// Retain cTable even if we re-use.
 			keepTable := s.cTable
 			keepTL := s.actualTableLog

 			s.cTable = s.prevTable
 			s.actualTableLog = s.prevTableLog
 			s.Out, err = compressor(in)

 			// Restore ctable.
 			s.cTable = keepTable
 			s.actualTableLog = keepTL
 			if err != nil {
 				return nil, false, err
 			}
 			if len(s.Out) >= wantSize {
 				return nil, false, ErrIncompressible
 			}
 			s.OutData = s.Out
 			return s.Out, true, nil
 		}
 	}

 	// Use new table
 	err = s.cTable.write(s)
 	if err != nil {
 		s.OutTable = nil
 		return nil, false, err
 	}
 	s.OutTable = s.Out

 	// Compress using new table
 	s.Out, err = compressor(in)
 	if err != nil {
 		s.OutTable = nil
 		return nil, false, err
 	}
 	if len(s.Out) >= wantSize {
 		s.OutTable = nil
 		return nil, false, ErrIncompressible
 	}
 	// Move current table into previous.
 	s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0]
 	s.OutData = s.Out[len(s.OutTable):]
 	return s.Out, false, nil
 }

 func (s *Scratch) compress1X(src []byte) ([]byte, error) {
 	return s.compress1xDo(s.Out, src)
 }

 func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
 	var bw = bitWriter{out: dst}

 	// N is length divisible by 4.
 	n := len(src)
 	n -= n & 3
 	cTable := s.cTable[:256]

 	// Encode last bytes.
 	for i := len(src) & 3; i > 0; i-- {
 		bw.encSymbol(cTable, src[n+i-1])
 	}
 	n -= 4
 	if s.actualTableLog <= 8 {
 		for ; n >= 0; n -= 4 {
 			tmp := src[n : n+4]
 			// tmp should be len 4
 			bw.flush32()
 			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 		}
 	} else {
 		for ; n >= 0; n -= 4 {
 			tmp := src[n : n+4]
 			// tmp should be len 4
 			bw.flush32()
 			bw.encTwoSymbols(cTable, tmp[3], tmp[2])
 			bw.flush32()
 			bw.encTwoSymbols(cTable, tmp[1], tmp[0])
 		}
 	}
 	err := bw.close()
 	return bw.out, err
 }

 var sixZeros [6]byte

 func (s *Scratch) compress4X(src []byte) ([]byte, error) {
 	if len(src) < 12 {
 		return nil, ErrIncompressible
 	}
 	segmentSize := (len(src) + 3) / 4

 	// Add placeholder for output length
 	offsetIdx := len(s.Out)
 	s.Out = append(s.Out, sixZeros[:]...)

 	for i := 0; i < 4; i++ {
 		toDo := src
 		if len(toDo) > segmentSize {
 			toDo = toDo[:segmentSize]
 		}
 		src = src[len(toDo):]

 		var err error
 		idx := len(s.Out)
 		s.Out, err = s.compress1xDo(s.Out, toDo)
 		if err != nil {
 			return nil, err
 		}
 		// Write compressed length as little endian before block.
 		if i < 3 {
 			// Last length is not written.
 			length := len(s.Out) - idx
 			s.Out[i*2+offsetIdx] = byte(length)
 			s.Out[i*2+offsetIdx+1] = byte(length >> 8)
 		}
 	}

 	return s.Out, nil
 }

 // compress4Xp will compress 4 streams using separate goroutines.
 func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 	if len(src) < 12 {
 		return nil, ErrIncompressible
 	}
 	// Add placeholder for output length
 	s.Out = s.Out[:6]

 	segmentSize := (len(src) + 3) / 4
 	var wg sync.WaitGroup
 	var errs [4]error
 	wg.Add(4)
 	for i := 0; i < 4; i++ {
 		toDo := src
 		if len(toDo) > segmentSize {
 			toDo = toDo[:segmentSize]
 		}
 		src = src[len(toDo):]

 		// Separate goroutine for each block.
 		go func(i int) {
 			s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
 			wg.Done()
 		}(i)
 	}
 	wg.Wait()
 	for i := 0; i < 4; i++ {
 		if errs[i] != nil {
 			return nil, errs[i]
 		}
 		o := s.tmpOut[i]
 		// Write compressed length as little endian before block.
 		if i < 3 {
 			// Last length is not written.
 			s.Out[i*2] = byte(len(o))
 			s.Out[i*2+1] = byte(len(o) >> 8)
 		}

 		// Write output.
 		s.Out = append(s.Out, o...)
 	}
 	return s.Out, nil
 }

 // countSimple will create a simple histogram in s.count.
 // Returns the biggest count.
 // Does not update s.clearCount.
 func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	reuse = true
 	for _, v := range in {
 		s.count[v]++
 	}
 	m := uint32(0)
 	if len(s.prevTable) > 0 {
 		for i, v := range s.count[:] {
 			if v > m {
 				m = v
 			}
 			if v > 0 {
 				s.symbolLen = uint16(i) + 1
 				if i >= len(s.prevTable) {
 					reuse = false
 				} else {
 					if s.prevTable[i].nBits == 0 {
 						reuse = false
 					}
 				}
 			}
 		}
 		return int(m), reuse
 	}
 	for i, v := range s.count[:] {
 		if v > m {
 			m = v
 		}
 		if v > 0 {
 			s.symbolLen = uint16(i) + 1
 		}
 	}
 	return int(m), false
 }

 func (s *Scratch) canUseTable(c cTable) bool {
 	if len(c) < int(s.symbolLen) {
 		return false
 	}
 	for i, v := range s.count[:s.symbolLen] {
 		if v != 0 && c[i].nBits == 0 {
 			return false
 		}
 	}
 	return true
 }

 func (s *Scratch) validateTable(c cTable) bool {
 	if len(c) < int(s.symbolLen) {
 		return false
 	}
 	for i, v := range s.count[:s.symbolLen] {
 		if v != 0 {
 			if c[i].nBits == 0 {
 				return false
 			}
 			if c[i].nBits > s.actualTableLog {
 				return false
 			}
 		}
 	}
 	return true
 }

 // minTableLog provides the minimum logSize to safely represent a distribution.
 func (s *Scratch) minTableLog() uint8 {
 	minBitsSrc := highBit32(uint32(s.br.remain())) + 1
 	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
 	if minBitsSrc < minBitsSymbols {
 		return uint8(minBitsSrc)
 	}
 	return uint8(minBitsSymbols)
 }

 // optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
 func (s *Scratch) optimalTableLog() {
 	tableLog := s.TableLog
 	minBits := s.minTableLog()
 	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
 	if maxBitsSrc < tableLog {
 		// Accuracy can be reduced
 		tableLog = maxBitsSrc
 	}
 	if minBits > tableLog {
 		tableLog = minBits
 	}
 	// Need a minimum to safely represent all symbol values
 	if tableLog < minTablelog {
 		tableLog = minTablelog
 	}
 	if tableLog > tableLogMax {
 		tableLog = tableLogMax
 	}
 	s.actualTableLog = tableLog
 }

 type cTableEntry struct {
 	val   uint16
 	nBits uint8
 	// We have 8 bits extra
 }

 const huffNodesMask = huffNodesLen - 1

 func (s *Scratch) buildCTable() error {
 	s.optimalTableLog()
 	s.huffSort()
 	if cap(s.cTable) < maxSymbolValue+1 {
 		s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1)
 	} else {
 		s.cTable = s.cTable[:s.symbolLen]
 		for i := range s.cTable {
 			s.cTable[i] = cTableEntry{}
 		}
 	}

 	var startNode = int16(s.symbolLen)
 	nonNullRank := s.symbolLen - 1

 	nodeNb := int16(startNode)
 	huffNode := s.nodes[1 : huffNodesLen+1]

 	// This overlays the slice above, but allows "-1" index lookups.
 	// Different from reference implementation.
 	huffNode0 := s.nodes[0 : huffNodesLen+1]

 	for huffNode[nonNullRank].count == 0 {
 		nonNullRank--
 	}

 	lowS := int16(nonNullRank)
 	nodeRoot := nodeNb + lowS - 1
 	lowN := nodeNb
 	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
 	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
 	nodeNb++
 	lowS -= 2
 	for n := nodeNb; n <= nodeRoot; n++ {
 		huffNode[n].count = 1 << 30
 	}
 	// fake entry, strong barrier
 	huffNode0[0].count = 1 << 31

 	// create parents
 	for nodeNb <= nodeRoot {
 		var n1, n2 int16
 		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
 			n1 = lowS
 			lowS--
 		} else {
 			n1 = lowN
 			lowN++
 		}
 		if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
 			n2 = lowS
 			lowS--
 		} else {
 			n2 = lowN
 			lowN++
 		}

 		huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
 		huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
 		nodeNb++
 	}

 	// distribute weights (unlimited tree height)
 	huffNode[nodeRoot].nbBits = 0
 	for n := nodeRoot - 1; n >= startNode; n-- {
 		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
 	}
 	for n := uint16(0); n <= nonNullRank; n++ {
 		huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
 	}
 	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
 	maxNbBits := s.actualTableLog

 	// fill result into tree (val, nbBits)
 	if maxNbBits > tableLogMax {
 		return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
 	}
 	var nbPerRank [tableLogMax + 1]uint16
 	var valPerRank [16]uint16
 	for _, v := range huffNode[:nonNullRank+1] {
 		nbPerRank[v.nbBits]++
 	}
 	// determine stating value per rank
 	{
 		min := uint16(0)
 		for n := maxNbBits; n > 0; n-- {
 			// get starting value within each rank
 			valPerRank[n] = min
 			min += nbPerRank[n]
 			min >>= 1
 		}
 	}

 	// push nbBits per symbol, symbol order
 	for _, v := range huffNode[:nonNullRank+1] {
 		s.cTable[v.symbol].nBits = v.nbBits
 	}

 	// assign value within rank, symbol order
 	t := s.cTable[:s.symbolLen]
 	for n, val := range t {
 		nbits := val.nBits & 15
 		v := valPerRank[nbits]
 		t[n].val = v
 		valPerRank[nbits] = v + 1
 	}

 	return nil
 }

 // huffSort will sort symbols, decreasing order.
 func (s *Scratch) huffSort() {
 	type rankPos struct {
 		base    uint32
 		current uint32
 	}

 	// Clear nodes
 	nodes := s.nodes[:huffNodesLen+1]
 	s.nodes = nodes
 	nodes = nodes[1 : huffNodesLen+1]

 	// Sort into buckets based on length of symbol count.
 	var rank [32]rankPos
 	for _, v := range s.count[:s.symbolLen] {
 		r := highBit32(v+1) & 31
 		rank[r].base++
 	}
 	// maxBitLength is log2(BlockSizeMax) + 1
 	const maxBitLength = 18 + 1
 	for n := maxBitLength; n > 0; n-- {
 		rank[n-1].base += rank[n].base
 	}
 	for n := range rank[:maxBitLength] {
 		rank[n].current = rank[n].base
 	}
 	for n, c := range s.count[:s.symbolLen] {
 		r := (highBit32(c+1) + 1) & 31
 		pos := rank[r].current
 		rank[r].current++
 		prev := nodes[(pos-1)&huffNodesMask]
 		for pos > rank[r].base && c > prev.count {
 			nodes[pos&huffNodesMask] = prev
 			pos--
 			prev = nodes[(pos-1)&huffNodesMask]
 		}
 		nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
 	}
 	return
 }

 func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
 	maxNbBits := s.actualTableLog
 	huffNode := s.nodes[1 : huffNodesLen+1]
 	//huffNode = huffNode[: huffNodesLen]

 	largestBits := huffNode[lastNonNull].nbBits

 	// early exit : no elt > maxNbBits
 	if largestBits <= maxNbBits {
 		return largestBits
 	}
 	totalCost := int(0)
 	baseCost := int(1) << (largestBits - maxNbBits)
 	n := uint32(lastNonNull)

 	for huffNode[n].nbBits > maxNbBits {
 		totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
 		huffNode[n].nbBits = maxNbBits
 		n--
 	}
 	// n stops at huffNode[n].nbBits <= maxNbBits

 	for huffNode[n].nbBits == maxNbBits {
 		n--
 	}
 	// n end at index of smallest symbol using < maxNbBits

 	// renorm totalCost
 	totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */

 	// repay normalized cost
 	{
 		const noSymbol = 0xF0F0F0F0
 		var rankLast [tableLogMax + 2]uint32

 		for i := range rankLast[:] {
 			rankLast[i] = noSymbol
 		}

 		// Get pos of last (smallest) symbol per rank
 		{
 			currentNbBits := uint8(maxNbBits)
 			for pos := int(n); pos >= 0; pos-- {
 				if huffNode[pos].nbBits >= currentNbBits {
 					continue
 				}
 				currentNbBits = huffNode[pos].nbBits // < maxNbBits
 				rankLast[maxNbBits-currentNbBits] = uint32(pos)
 			}
 		}

 		for totalCost > 0 {
 			nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1

 			for ; nBitsToDecrease > 1; nBitsToDecrease-- {
 				highPos := rankLast[nBitsToDecrease]
 				lowPos := rankLast[nBitsToDecrease-1]
 				if highPos == noSymbol {
 					continue
 				}
 				if lowPos == noSymbol {
 					break
 				}
 				highTotal := huffNode[highPos].count
 				lowTotal := 2 * huffNode[lowPos].count
 				if highTotal <= lowTotal {
 					break
 				}
 			}
 			// only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !)
 			// HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary
 			// FIXME: try to remove
 			for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) {
 				nBitsToDecrease++
 			}
 			totalCost -= 1 << (nBitsToDecrease - 1)
 			if rankLast[nBitsToDecrease-1] == noSymbol {
 				// this rank is no longer empty
 				rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
 			}
 			huffNode[rankLast[nBitsToDecrease]].nbBits++
 			if rankLast[nBitsToDecrease] == 0 {
 				/* special case, reached largest symbol */
 				rankLast[nBitsToDecrease] = noSymbol
 			} else {
 				rankLast[nBitsToDecrease]--
 				if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
 					rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
 				}
 			}
 		}

 		for totalCost < 0 { /* Sometimes, cost correction overshoot */
 			if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
 				for huffNode[n].nbBits == maxNbBits {
 					n--
 				}
 				huffNode[n+1].nbBits--
 				rankLast[1] = n + 1
 				totalCost++
 				continue
 			}
 			huffNode[rankLast[1]+1].nbBits--
 			rankLast[1]++
 			totalCost++
 		}
 	}
 	return maxNbBits
 }

 type nodeElt struct {
 	count  uint32
 	parent uint16
 	symbol byte
 	nbBits uint8
 }
	package huff0

	import (
	"fmt"
	"runtime"
	"sync"
	)

	// Compress1X will compress the input.
	// The output can be decoded using Decompress1X.
	// Supply a Scratch object. The scratch object contains state about re-use,
	// So when sharing across independent encodes, be sure to set the re-use policy.
	func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
	s, err = s.prepare(in)
	if err != nil {
	return nil, false, err
	}
	return compress(in, s, s.compress1X)
	}

	// Compress4X will compress the input. The input is split into 4 independent blocks
	// and compressed similar to Compress1X.
	// The output can be decoded using Decompress4X.
	// Supply a Scratch object. The scratch object contains state about re-use,
	// So when sharing across independent encodes, be sure to set the re-use policy.
	func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
	s, err = s.prepare(in)
	if err != nil {
	return nil, false, err
	}
	if false {
	// TODO: compress4Xp only slightly faster.
	const parallelThreshold = 8 << 10
	if len(in) < parallelThreshold \|\| runtime.GOMAXPROCS(0) == 1 {
	return compress(in, s, s.compress4X)
	}
	return compress(in, s, s.compress4Xp)
	}
	return compress(in, s, s.compress4X)
	}

	func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) {
	// Nuke previous table if we cannot reuse anyway.
	if s.Reuse == ReusePolicyNone {
	s.prevTable = s.prevTable[:0]
	}

	// Create histogram, if none was provided.
	maxCount := s.maxCount
	var canReuse = false
	if maxCount == 0 {
	maxCount, canReuse = s.countSimple(in)
	} else {
	canReuse = s.canUseTable(s.prevTable)
	}

	// We want the output size to be less than this:
	wantSize := len(in)
	if s.WantLogLess > 0 {
	wantSize -= wantSize >> s.WantLogLess
	}

	// Reset for next run.
	s.clearCount = true
	s.maxCount = 0
	if maxCount >= len(in) {
	if maxCount > len(in) {
	return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
	}
	if len(in) == 1 {
	return nil, false, ErrIncompressible
	}
	// One symbol, use RLE
	return nil, false, ErrUseRLE
	}
	if maxCount == 1 \|\| maxCount < (len(in)>>7) {
	// Each symbol present maximum once or too well distributed.
	return nil, false, ErrIncompressible
	}

	if s.Reuse == ReusePolicyPrefer && canReuse {
	keepTable := s.cTable
	keepTL := s.actualTableLog
	s.cTable = s.prevTable
	s.actualTableLog = s.prevTableLog
	s.Out, err = compressor(in)
	s.cTable = keepTable
	s.actualTableLog = keepTL
	if err == nil && len(s.Out) < wantSize {
	s.OutData = s.Out
	return s.Out, true, nil
	}
	// Do not attempt to re-use later.
	s.prevTable = s.prevTable[:0]
	}

	// Calculate new table.
	err = s.buildCTable()
	if err != nil {
	return nil, false, err
	}

	if false && !s.canUseTable(s.cTable) {
	panic("invalid table generated")
	}

	if s.Reuse == ReusePolicyAllow && canReuse {
	hSize := len(s.Out)
	oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
	newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
	if oldSize <= hSize+newSize \|\| hSize+12 >= wantSize {
	// Retain cTable even if we re-use.
	keepTable := s.cTable
	keepTL := s.actualTableLog

	s.cTable = s.prevTable
	s.actualTableLog = s.prevTableLog
	s.Out, err = compressor(in)

	// Restore ctable.
	s.cTable = keepTable
	s.actualTableLog = keepTL
	if err != nil {
	return nil, false, err
	}
	if len(s.Out) >= wantSize {
	return nil, false, ErrIncompressible
	}
	s.OutData = s.Out
	return s.Out, true, nil
	}
	}

	// Use new table
	err = s.cTable.write(s)
	if err != nil {
	s.OutTable = nil
	return nil, false, err
	}
	s.OutTable = s.Out

	// Compress using new table
	s.Out, err = compressor(in)
	if err != nil {
	s.OutTable = nil
	return nil, false, err
	}
	if len(s.Out) >= wantSize {
	s.OutTable = nil
	return nil, false, ErrIncompressible
	}
	// Move current table into previous.
	s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0]
	s.OutData = s.Out[len(s.OutTable):]
	return s.Out, false, nil
	}

	func (s *Scratch) compress1X(src []byte) ([]byte, error) {
	return s.compress1xDo(s.Out, src)
	}

	func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
	var bw = bitWriter{out: dst}

	// N is length divisible by 4.
	n := len(src)
	n -= n & 3
	cTable := s.cTable[:256]

	// Encode last bytes.
	for i := len(src) & 3; i > 0; i-- {
	bw.encSymbol(cTable, src[n+i-1])
	}
	n -= 4
	if s.actualTableLog <= 8 {
	for ; n >= 0; n -= 4 {
	tmp := src[n : n+4]
	// tmp should be len 4
	bw.flush32()
	bw.encTwoSymbols(cTable, tmp[3], tmp[2])
	bw.encTwoSymbols(cTable, tmp[1], tmp[0])
	}
	} else {
	for ; n >= 0; n -= 4 {
	tmp := src[n : n+4]
	// tmp should be len 4
	bw.flush32()
	bw.encTwoSymbols(cTable, tmp[3], tmp[2])
	bw.flush32()
	bw.encTwoSymbols(cTable, tmp[1], tmp[0])
	}
	}
	err := bw.close()
	return bw.out, err
	}

	var sixZeros [6]byte

	func (s *Scratch) compress4X(src []byte) ([]byte, error) {
	if len(src) < 12 {
	return nil, ErrIncompressible
	}
	segmentSize := (len(src) + 3) / 4

	// Add placeholder for output length
	offsetIdx := len(s.Out)
	s.Out = append(s.Out, sixZeros[:]...)

	for i := 0; i < 4; i++ {
	toDo := src
	if len(toDo) > segmentSize {
	toDo = toDo[:segmentSize]
	}
	src = src[len(toDo):]

	var err error
	idx := len(s.Out)
	s.Out, err = s.compress1xDo(s.Out, toDo)
	if err != nil {
	return nil, err
	}
	// Write compressed length as little endian before block.
	if i < 3 {
	// Last length is not written.
	length := len(s.Out) - idx
	s.Out[i*2+offsetIdx] = byte(length)
	s.Out[i*2+offsetIdx+1] = byte(length >> 8)
	}
	}

	return s.Out, nil
	}

	// compress4Xp will compress 4 streams using separate goroutines.
	func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
	if len(src) < 12 {
	return nil, ErrIncompressible
	}
	// Add placeholder for output length
	s.Out = s.Out[:6]

	segmentSize := (len(src) + 3) / 4
	var wg sync.WaitGroup
	var errs [4]error
	wg.Add(4)
	for i := 0; i < 4; i++ {
	toDo := src
	if len(toDo) > segmentSize {
	toDo = toDo[:segmentSize]
	}
	src = src[len(toDo):]

	// Separate goroutine for each block.
	go func(i int) {
	s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
	wg.Done()
	}(i)
	}
	wg.Wait()
	for i := 0; i < 4; i++ {
	if errs[i] != nil {
	return nil, errs[i]
	}
	o := s.tmpOut[i]
	// Write compressed length as little endian before block.
	if i < 3 {
	// Last length is not written.
	s.Out[i*2] = byte(len(o))
	s.Out[i*2+1] = byte(len(o) >> 8)
	}

	// Write output.
	s.Out = append(s.Out, o...)
	}
	return s.Out, nil
	}

	// countSimple will create a simple histogram in s.count.
	// Returns the biggest count.
	// Does not update s.clearCount.
	func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
	reuse = true
	for _, v := range in {
	s.count[v]++
	}
	m := uint32(0)
	if len(s.prevTable) > 0 {
	for i, v := range s.count[:] {
	if v > m {
	m = v
	}
	if v > 0 {
	s.symbolLen = uint16(i) + 1
	if i >= len(s.prevTable) {
	reuse = false
	} else {
	if s.prevTable[i].nBits == 0 {
	reuse = false
	}
	}
	}
	}
	return int(m), reuse
	}
	for i, v := range s.count[:] {
	if v > m {
	m = v
	}
	if v > 0 {
	s.symbolLen = uint16(i) + 1
	}
	}
	return int(m), false
	}

	func (s *Scratch) canUseTable(c cTable) bool {
	if len(c) < int(s.symbolLen) {
	return false
	}
	for i, v := range s.count[:s.symbolLen] {
	if v != 0 && c[i].nBits == 0 {
	return false
	}
	}
	return true
	}

	func (s *Scratch) validateTable(c cTable) bool {
	if len(c) < int(s.symbolLen) {
	return false
	}
	for i, v := range s.count[:s.symbolLen] {
	if v != 0 {
	if c[i].nBits == 0 {
	return false
	}
	if c[i].nBits > s.actualTableLog {
	return false
	}
	}
	}
	return true
	}

	// minTableLog provides the minimum logSize to safely represent a distribution.
	func (s *Scratch) minTableLog() uint8 {
	minBitsSrc := highBit32(uint32(s.br.remain())) + 1
	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
	if minBitsSrc < minBitsSymbols {
	return uint8(minBitsSrc)
	}
	return uint8(minBitsSymbols)
	}

	// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
	func (s *Scratch) optimalTableLog() {
	tableLog := s.TableLog
	minBits := s.minTableLog()
	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
	if maxBitsSrc < tableLog {
	// Accuracy can be reduced
	tableLog = maxBitsSrc
	}
	if minBits > tableLog {
	tableLog = minBits
	}
	// Need a minimum to safely represent all symbol values
	if tableLog < minTablelog {
	tableLog = minTablelog
	}
	if tableLog > tableLogMax {
	tableLog = tableLogMax
	}
	s.actualTableLog = tableLog
	}

	type cTableEntry struct {
	val uint16
	nBits uint8
	// We have 8 bits extra
	}

	const huffNodesMask = huffNodesLen - 1

	func (s *Scratch) buildCTable() error {
	s.optimalTableLog()
	s.huffSort()
	if cap(s.cTable) < maxSymbolValue+1 {
	s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1)
	} else {
	s.cTable = s.cTable[:s.symbolLen]
	for i := range s.cTable {
	s.cTable[i] = cTableEntry{}
	}
	}

	var startNode = int16(s.symbolLen)
	nonNullRank := s.symbolLen - 1

	nodeNb := int16(startNode)
	huffNode := s.nodes[1 : huffNodesLen+1]

	// This overlays the slice above, but allows "-1" index lookups.
	// Different from reference implementation.
	huffNode0 := s.nodes[0 : huffNodesLen+1]

	for huffNode[nonNullRank].count == 0 {
	nonNullRank--
	}

	lowS := int16(nonNullRank)
	nodeRoot := nodeNb + lowS - 1
	lowN := nodeNb
	huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
	huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
	nodeNb++
	lowS -= 2
	for n := nodeNb; n <= nodeRoot; n++ {
	huffNode[n].count = 1 << 30
	}
	// fake entry, strong barrier
	huffNode0[0].count = 1 << 31

	// create parents
	for nodeNb <= nodeRoot {
	var n1, n2 int16
	if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
	n1 = lowS
	lowS--
	} else {
	n1 = lowN
	lowN++
	}
	if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
	n2 = lowS
	lowS--
	} else {
	n2 = lowN
	lowN++
	}

	huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
	huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
	nodeNb++
	}

	// distribute weights (unlimited tree height)
	huffNode[nodeRoot].nbBits = 0
	for n := nodeRoot - 1; n >= startNode; n-- {
	huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
	}
	for n := uint16(0); n <= nonNullRank; n++ {
	huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
	}
	s.actualTableLog = s.setMaxHeight(int(nonNullRank))
	maxNbBits := s.actualTableLog

	// fill result into tree (val, nbBits)
	if maxNbBits > tableLogMax {
	return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
	}
	var nbPerRank [tableLogMax + 1]uint16
	var valPerRank [16]uint16
	for _, v := range huffNode[:nonNullRank+1] {
	nbPerRank[v.nbBits]++
	}
	// determine stating value per rank
	{
	min := uint16(0)
	for n := maxNbBits; n > 0; n-- {
	// get starting value within each rank
	valPerRank[n] = min
	min += nbPerRank[n]
	min >>= 1
	}
	}

	// push nbBits per symbol, symbol order
	for _, v := range huffNode[:nonNullRank+1] {
	s.cTable[v.symbol].nBits = v.nbBits
	}

	// assign value within rank, symbol order
	t := s.cTable[:s.symbolLen]
	for n, val := range t {
	nbits := val.nBits & 15
	v := valPerRank[nbits]
	t[n].val = v
	valPerRank[nbits] = v + 1
	}

	return nil
	}

	// huffSort will sort symbols, decreasing order.
	func (s *Scratch) huffSort() {
	type rankPos struct {
	base uint32
	current uint32
	}

	// Clear nodes
	nodes := s.nodes[:huffNodesLen+1]
	s.nodes = nodes
	nodes = nodes[1 : huffNodesLen+1]

	// Sort into buckets based on length of symbol count.
	var rank [32]rankPos
	for _, v := range s.count[:s.symbolLen] {
	r := highBit32(v+1) & 31
	rank[r].base++
	}
	// maxBitLength is log2(BlockSizeMax) + 1
	const maxBitLength = 18 + 1
	for n := maxBitLength; n > 0; n-- {
	rank[n-1].base += rank[n].base
	}
	for n := range rank[:maxBitLength] {
	rank[n].current = rank[n].base
	}
	for n, c := range s.count[:s.symbolLen] {
	r := (highBit32(c+1) + 1) & 31
	pos := rank[r].current
	rank[r].current++
	prev := nodes[(pos-1)&huffNodesMask]
	for pos > rank[r].base && c > prev.count {
	nodes[pos&huffNodesMask] = prev
	pos--
	prev = nodes[(pos-1)&huffNodesMask]
	}
	nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
	}
	return
	}

	func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
	maxNbBits := s.actualTableLog
	huffNode := s.nodes[1 : huffNodesLen+1]
	//huffNode = huffNode[: huffNodesLen]

	largestBits := huffNode[lastNonNull].nbBits

	// early exit : no elt > maxNbBits
	if largestBits <= maxNbBits {
	return largestBits
	}
	totalCost := int(0)
	baseCost := int(1) << (largestBits - maxNbBits)
	n := uint32(lastNonNull)

	for huffNode[n].nbBits > maxNbBits {
	totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
	huffNode[n].nbBits = maxNbBits
	n--
	}
	// n stops at huffNode[n].nbBits <= maxNbBits

	for huffNode[n].nbBits == maxNbBits {
	n--
	}
	// n end at index of smallest symbol using < maxNbBits

	// renorm totalCost
	totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */

	// repay normalized cost
	{
	const noSymbol = 0xF0F0F0F0
	var rankLast [tableLogMax + 2]uint32

	for i := range rankLast[:] {
	rankLast[i] = noSymbol
	}

	// Get pos of last (smallest) symbol per rank
	{
	currentNbBits := uint8(maxNbBits)
	for pos := int(n); pos >= 0; pos-- {
	if huffNode[pos].nbBits >= currentNbBits {
	continue
	}
	currentNbBits = huffNode[pos].nbBits // < maxNbBits
	rankLast[maxNbBits-currentNbBits] = uint32(pos)
	}
	}

	for totalCost > 0 {
	nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1

	for ; nBitsToDecrease > 1; nBitsToDecrease-- {
	highPos := rankLast[nBitsToDecrease]
	lowPos := rankLast[nBitsToDecrease-1]
	if highPos == noSymbol {
	continue
	}
	if lowPos == noSymbol {
	break
	}
	highTotal := huffNode[highPos].count
	lowTotal := 2 * huffNode[lowPos].count
	if highTotal <= lowTotal {
	break
	}
	}
	// only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !)
	// HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary
	// FIXME: try to remove
	for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) {
	nBitsToDecrease++
	}
	totalCost -= 1 << (nBitsToDecrease - 1)
	if rankLast[nBitsToDecrease-1] == noSymbol {
	// this rank is no longer empty
	rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
	}
	huffNode[rankLast[nBitsToDecrease]].nbBits++
	if rankLast[nBitsToDecrease] == 0 {
	/* special case, reached largest symbol */
	rankLast[nBitsToDecrease] = noSymbol
	} else {
	rankLast[nBitsToDecrease]--
	if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
	rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
	}
	}
	}

	for totalCost < 0 { /* Sometimes, cost correction overshoot */
	if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
	for huffNode[n].nbBits == maxNbBits {
	n--
	}
	huffNode[n+1].nbBits--
	rankLast[1] = n + 1
	totalCost++
	continue
	}
	huffNode[rankLast[1]+1].nbBits--
	rankLast[1]++
	totalCost++
	}
	}
	return maxNbBits
	}

	type nodeElt struct {
	count uint32
	parent uint16
	symbol byte
	nbBits uint8
	}