Blame - vendor/github.com/klauspost/compress/fse/compress.go - bbsim

blob: b69237c9b8f56e86f5c2e49111d86f907300ecb1 [file] [log] [blame]

Pragya Arya	324337e	2020-02-20 14:35:08 +0530	[diff] [blame]	1	// Copyright 2018 Klaus Post. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4	// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
				5
				6	package fse
				7
				8	import (
				9	"errors"
				10	"fmt"
				11	)
				12
				13	// Compress the input bytes. Input must be < 2GB.
				14	// Provide a Scratch buffer to avoid memory allocations.
				15	// Note that the output is also kept in the scratch buffer.
				16	// If input is too hard to compress, ErrIncompressible is returned.
				17	// If input is a single byte value repeated ErrUseRLE is returned.
				18	func Compress(in []byte, s *Scratch) ([]byte, error) {
				19	if len(in) <= 1 {
				20	return nil, ErrIncompressible
				21	}
				22	if len(in) > (2<<30)-1 {
				23	return nil, errors.New("input too big, must be < 2GB")
				24	}
				25	s, err := s.prepare(in)
				26	if err != nil {
				27	return nil, err
				28	}
				29
				30	// Create histogram, if none was provided.
				31	maxCount := s.maxCount
				32	if maxCount == 0 {
				33	maxCount = s.countSimple(in)
				34	}
				35	// Reset for next run.
				36	s.clearCount = true
				37	s.maxCount = 0
				38	if maxCount == len(in) {
				39	// One symbol, use RLE
				40	return nil, ErrUseRLE
				41	}
				42	if maxCount == 1 \|\| maxCount < (len(in)>>7) {
				43	// Each symbol present maximum once or too well distributed.
				44	return nil, ErrIncompressible
				45	}
				46	s.optimalTableLog()
				47	err = s.normalizeCount()
				48	if err != nil {
				49	return nil, err
				50	}
				51	err = s.writeCount()
				52	if err != nil {
				53	return nil, err
				54	}
				55
				56	if false {
				57	err = s.validateNorm()
				58	if err != nil {
				59	return nil, err
				60	}
				61	}
				62
				63	err = s.buildCTable()
				64	if err != nil {
				65	return nil, err
				66	}
				67	err = s.compress(in)
				68	if err != nil {
				69	return nil, err
				70	}
				71	s.Out = s.bw.out
				72	// Check if we compressed.
				73	if len(s.Out) >= len(in) {
				74	return nil, ErrIncompressible
				75	}
				76	return s.Out, nil
				77	}
				78
				79	// cState contains the compression state of a stream.
				80	type cState struct {
				81	bw *bitWriter
				82	stateTable []uint16
				83	state uint16
				84	}
				85
				86	// init will initialize the compression state to the first symbol of the stream.
				87	func (c cState) init(bw bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
				88	c.bw = bw
				89	c.stateTable = ct.stateTable
				90
				91	nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
				92	im := int32((nbBitsOut << 16) - first.deltaNbBits)
				93	lu := (im >> nbBitsOut) + first.deltaFindState
				94	c.state = c.stateTable[lu]
				95	return
				96	}
				97
				98	// encode the output symbol provided and write it to the bitstream.
				99	func (c *cState) encode(symbolTT symbolTransform) {
				100	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
				101	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
				102	c.bw.addBits16NC(c.state, uint8(nbBitsOut))
				103	c.state = c.stateTable[dstState]
				104	}
				105
				106	// encode the output symbol provided and write it to the bitstream.
				107	func (c *cState) encodeZero(symbolTT symbolTransform) {
				108	nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
				109	dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
				110	c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
				111	c.state = c.stateTable[dstState]
				112	}
				113
				114	// flush will write the tablelog to the output and flush the remaining full bytes.
				115	func (c *cState) flush(tableLog uint8) {
				116	c.bw.flush32()
				117	c.bw.addBits16NC(c.state, tableLog)
				118	c.bw.flush()
				119	}
				120
				121	// compress is the main compression loop that will encode the input from the last byte to the first.
				122	func (s *Scratch) compress(src []byte) error {
				123	if len(src) <= 2 {
				124	return errors.New("compress: src too small")
				125	}
				126	tt := s.ct.symbolTT[:256]
				127	s.bw.reset(s.Out)
				128
				129	// Our two states each encodes every second byte.
				130	// Last byte encoded (first byte decoded) will always be encoded by c1.
				131	var c1, c2 cState
				132
				133	// Encode so remaining size is divisible by 4.
				134	ip := len(src)
				135	if ip&1 == 1 {
				136	c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
				137	c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
				138	c1.encodeZero(tt[src[ip-3]])
				139	ip -= 3
				140	} else {
				141	c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
				142	c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
				143	ip -= 2
				144	}
				145	if ip&2 != 0 {
				146	c2.encodeZero(tt[src[ip-1]])
				147	c1.encodeZero(tt[src[ip-2]])
				148	ip -= 2
				149	}
				150
				151	// Main compression loop.
				152	switch {
				153	case !s.zeroBits && s.actualTableLog <= 8:
				154	// We can encode 4 symbols without requiring a flush.
				155	// We do not need to check if any output is 0 bits.
				156	for ip >= 4 {
				157	s.bw.flush32()
				158	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
				159	c2.encode(tt[v0])
				160	c1.encode(tt[v1])
				161	c2.encode(tt[v2])
				162	c1.encode(tt[v3])
				163	ip -= 4
				164	}
				165	case !s.zeroBits:
				166	// We do not need to check if any output is 0 bits.
				167	for ip >= 4 {
				168	s.bw.flush32()
				169	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
				170	c2.encode(tt[v0])
				171	c1.encode(tt[v1])
				172	s.bw.flush32()
				173	c2.encode(tt[v2])
				174	c1.encode(tt[v3])
				175	ip -= 4
				176	}
				177	case s.actualTableLog <= 8:
				178	// We can encode 4 symbols without requiring a flush
				179	for ip >= 4 {
				180	s.bw.flush32()
				181	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
				182	c2.encodeZero(tt[v0])
				183	c1.encodeZero(tt[v1])
				184	c2.encodeZero(tt[v2])
				185	c1.encodeZero(tt[v3])
				186	ip -= 4
				187	}
				188	default:
				189	for ip >= 4 {
				190	s.bw.flush32()
				191	v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
				192	c2.encodeZero(tt[v0])
				193	c1.encodeZero(tt[v1])
				194	s.bw.flush32()
				195	c2.encodeZero(tt[v2])
				196	c1.encodeZero(tt[v3])
				197	ip -= 4
				198	}
				199	}
				200
				201	// Flush final state.
				202	// Used to initialize state when decoding.
				203	c2.flush(s.actualTableLog)
				204	c1.flush(s.actualTableLog)
				205
				206	return s.bw.close()
				207	}
				208
				209	// writeCount will write the normalized histogram count to header.
				210	// This is read back by readNCount.
				211	func (s *Scratch) writeCount() error {
				212	var (
				213	tableLog = s.actualTableLog
				214	tableSize = 1 << tableLog
				215	previous0 bool
				216	charnum uint16
				217
				218	maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
				219
				220	// Write Table Size
				221	bitStream = uint32(tableLog - minTablelog)
				222	bitCount = uint(4)
				223	remaining = int16(tableSize + 1) /* +1 for extra accuracy */
				224	threshold = int16(tableSize)
				225	nbBits = uint(tableLog + 1)
				226	)
				227	if cap(s.Out) < maxHeaderSize {
				228	s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
				229	}
				230	outP := uint(0)
				231	out := s.Out[:maxHeaderSize]
				232
				233	// stops at 1
				234	for remaining > 1 {
				235	if previous0 {
				236	start := charnum
				237	for s.norm[charnum] == 0 {
				238	charnum++
				239	}
				240	for charnum >= start+24 {
				241	start += 24
				242	bitStream += uint32(0xFFFF) << bitCount
				243	out[outP] = byte(bitStream)
				244	out[outP+1] = byte(bitStream >> 8)
				245	outP += 2
				246	bitStream >>= 16
				247	}
				248	for charnum >= start+3 {
				249	start += 3
				250	bitStream += 3 << bitCount
				251	bitCount += 2
				252	}
				253	bitStream += uint32(charnum-start) << bitCount
				254	bitCount += 2
				255	if bitCount > 16 {
				256	out[outP] = byte(bitStream)
				257	out[outP+1] = byte(bitStream >> 8)
				258	outP += 2
				259	bitStream >>= 16
				260	bitCount -= 16
				261	}
				262	}
				263
				264	count := s.norm[charnum]
				265	charnum++
				266	max := (2*threshold - 1) - remaining
				267	if count < 0 {
				268	remaining += count
				269	} else {
				270	remaining -= count
				271	}
				272	count++ // +1 for extra accuracy
				273	if count >= threshold {
				274	count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
				275	}
				276	bitStream += uint32(count) << bitCount
				277	bitCount += nbBits
				278	if count < max {
				279	bitCount--
				280	}
				281
				282	previous0 = count == 1
				283	if remaining < 1 {
				284	return errors.New("internal error: remaining<1")
				285	}
				286	for remaining < threshold {
				287	nbBits--
				288	threshold >>= 1
				289	}
				290
				291	if bitCount > 16 {
				292	out[outP] = byte(bitStream)
				293	out[outP+1] = byte(bitStream >> 8)
				294	outP += 2
				295	bitStream >>= 16
				296	bitCount -= 16
				297	}
				298	}
				299
				300	out[outP] = byte(bitStream)
				301	out[outP+1] = byte(bitStream >> 8)
				302	outP += (bitCount + 7) / 8
				303
				304	if uint16(charnum) > s.symbolLen {
				305	return errors.New("internal error: charnum > s.symbolLen")
				306	}
				307	s.Out = out[:outP]
				308	return nil
				309	}
				310
				311	// symbolTransform contains the state transform for a symbol.
				312	type symbolTransform struct {
				313	deltaFindState int32
				314	deltaNbBits uint32
				315	}
				316
				317	// String prints values as a human readable string.
				318	func (s symbolTransform) String() string {
				319	return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
				320	}
				321
				322	// cTable contains tables used for compression.
				323	type cTable struct {
				324	tableSymbol []byte
				325	stateTable []uint16
				326	symbolTT []symbolTransform
				327	}
				328
				329	// allocCtable will allocate tables needed for compression.
				330	// If existing tables a re big enough, they are simply re-used.
				331	func (s *Scratch) allocCtable() {
				332	tableSize := 1 << s.actualTableLog
				333	// get tableSymbol that is big enough.
				334	if cap(s.ct.tableSymbol) < int(tableSize) {
				335	s.ct.tableSymbol = make([]byte, tableSize)
				336	}
				337	s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
				338
				339	ctSize := tableSize
				340	if cap(s.ct.stateTable) < ctSize {
				341	s.ct.stateTable = make([]uint16, ctSize)
				342	}
				343	s.ct.stateTable = s.ct.stateTable[:ctSize]
				344
				345	if cap(s.ct.symbolTT) < 256 {
				346	s.ct.symbolTT = make([]symbolTransform, 256)
				347	}
				348	s.ct.symbolTT = s.ct.symbolTT[:256]
				349	}
				350
				351	// buildCTable will populate the compression table so it is ready to be used.
				352	func (s *Scratch) buildCTable() error {
				353	tableSize := uint32(1 << s.actualTableLog)
				354	highThreshold := tableSize - 1
				355	var cumul [maxSymbolValue + 2]int16
				356
				357	s.allocCtable()
				358	tableSymbol := s.ct.tableSymbol[:tableSize]
				359	// symbol start positions
				360	{
				361	cumul[0] = 0
				362	for ui, v := range s.norm[:s.symbolLen-1] {
				363	u := byte(ui) // one less than reference
				364	if v == -1 {
				365	// Low proba symbol
				366	cumul[u+1] = cumul[u] + 1
				367	tableSymbol[highThreshold] = u
				368	highThreshold--
				369	} else {
				370	cumul[u+1] = cumul[u] + v
				371	}
				372	}
				373	// Encode last symbol separately to avoid overflowing u
				374	u := int(s.symbolLen - 1)
				375	v := s.norm[s.symbolLen-1]
				376	if v == -1 {
				377	// Low proba symbol
				378	cumul[u+1] = cumul[u] + 1
				379	tableSymbol[highThreshold] = byte(u)
				380	highThreshold--
				381	} else {
				382	cumul[u+1] = cumul[u] + v
				383	}
				384	if uint32(cumul[s.symbolLen]) != tableSize {
				385	return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
				386	}
				387	cumul[s.symbolLen] = int16(tableSize) + 1
				388	}
				389	// Spread symbols
				390	s.zeroBits = false
				391	{
				392	step := tableStep(tableSize)
				393	tableMask := tableSize - 1
				394	var position uint32
				395	// if any symbol > largeLimit, we may have 0 bits output.
				396	largeLimit := int16(1 << (s.actualTableLog - 1))
				397	for ui, v := range s.norm[:s.symbolLen] {
				398	symbol := byte(ui)
				399	if v > largeLimit {
				400	s.zeroBits = true
				401	}
				402	for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
				403	tableSymbol[position] = symbol
				404	position = (position + step) & tableMask
				405	for position > highThreshold {
				406	position = (position + step) & tableMask
				407	} /* Low proba area */
				408	}
				409	}
				410
				411	// Check if we have gone through all positions
				412	if position != 0 {
				413	return errors.New("position!=0")
				414	}
				415	}
				416
				417	// Build table
				418	table := s.ct.stateTable
				419	{
				420	tsi := int(tableSize)
				421	for u, v := range tableSymbol {
				422	// TableU16 : sorted by symbol order; gives next state value
				423	table[cumul[v]] = uint16(tsi + u)
				424	cumul[v]++
				425	}
				426	}
				427
				428	// Build Symbol Transformation Table
				429	{
				430	total := int16(0)
				431	symbolTT := s.ct.symbolTT[:s.symbolLen]
				432	tableLog := s.actualTableLog
				433	tl := (uint32(tableLog) << 16) - (1 << tableLog)
				434	for i, v := range s.norm[:s.symbolLen] {
				435	switch v {
				436	case 0:
				437	case -1, 1:
				438	symbolTT[i].deltaNbBits = tl
				439	symbolTT[i].deltaFindState = int32(total - 1)
				440	total++
				441	default:
				442	maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
				443	minStatePlus := uint32(v) << maxBitsOut
				444	symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
				445	symbolTT[i].deltaFindState = int32(total - v)
				446	total += v
				447	}
				448	}
				449	if total != int16(tableSize) {
				450	return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
				451	}
				452	}
				453	return nil
				454	}
				455
				456	// countSimple will create a simple histogram in s.count.
				457	// Returns the biggest count.
				458	// Does not update s.clearCount.
				459	func (s *Scratch) countSimple(in []byte) (max int) {
				460	for _, v := range in {
				461	s.count[v]++
				462	}
				463	m := uint32(0)
				464	for i, v := range s.count[:] {
				465	if v > m {
				466	m = v
				467	}
				468	if v > 0 {
				469	s.symbolLen = uint16(i) + 1
				470	}
				471	}
				472	return int(m)
				473	}
				474
				475	// minTableLog provides the minimum logSize to safely represent a distribution.
				476	func (s *Scratch) minTableLog() uint8 {
				477	minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
				478	minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
				479	if minBitsSrc < minBitsSymbols {
				480	return uint8(minBitsSrc)
				481	}
				482	return uint8(minBitsSymbols)
				483	}
				484
				485	// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
				486	func (s *Scratch) optimalTableLog() {
				487	tableLog := s.TableLog
				488	minBits := s.minTableLog()
				489	maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
				490	if maxBitsSrc < tableLog {
				491	// Accuracy can be reduced
				492	tableLog = maxBitsSrc
				493	}
				494	if minBits > tableLog {
				495	tableLog = minBits
				496	}
				497	// Need a minimum to safely represent all symbol values
				498	if tableLog < minTablelog {
				499	tableLog = minTablelog
				500	}
				501	if tableLog > maxTableLog {
				502	tableLog = maxTableLog
				503	}
				504	s.actualTableLog = tableLog
				505	}
				506
				507	var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
				508
				509	// normalizeCount will normalize the count of the symbols so
				510	// the total is equal to the table size.
				511	func (s *Scratch) normalizeCount() error {
				512	var (
				513	tableLog = s.actualTableLog
				514	scale = 62 - uint64(tableLog)
				515	step = (1 << 62) / uint64(s.br.remain())
				516	vStep = uint64(1) << (scale - 20)
				517	stillToDistribute = int16(1 << tableLog)
				518	largest int
				519	largestP int16
				520	lowThreshold = (uint32)(s.br.remain() >> tableLog)
				521	)
				522
				523	for i, cnt := range s.count[:s.symbolLen] {
				524	// already handled
				525	// if (count[s] == s.length) return 0; /* rle special case */
				526
				527	if cnt == 0 {
				528	s.norm[i] = 0
				529	continue
				530	}
				531	if cnt <= lowThreshold {
				532	s.norm[i] = -1
				533	stillToDistribute--
				534	} else {
				535	proba := (int16)((uint64(cnt) * step) >> scale)
				536	if proba < 8 {
				537	restToBeat := vStep * uint64(rtbTable[proba])
				538	v := uint64(cnt)*step - (uint64(proba) << scale)
				539	if v > restToBeat {
				540	proba++
				541	}
				542	}
				543	if proba > largestP {
				544	largestP = proba
				545	largest = i
				546	}
				547	s.norm[i] = proba
				548	stillToDistribute -= proba
				549	}
				550	}
				551
				552	if -stillToDistribute >= (s.norm[largest] >> 1) {
				553	// corner case, need another normalization method
				554	return s.normalizeCount2()
				555	}
				556	s.norm[largest] += stillToDistribute
				557	return nil
				558	}
				559
				560	// Secondary normalization method.
				561	// To be used when primary method fails.
				562	func (s *Scratch) normalizeCount2() error {
				563	const notYetAssigned = -2
				564	var (
				565	distributed uint32
				566	total = uint32(s.br.remain())
				567	tableLog = s.actualTableLog
				568	lowThreshold = uint32(total >> tableLog)
				569	lowOne = uint32((total * 3) >> (tableLog + 1))
				570	)
				571	for i, cnt := range s.count[:s.symbolLen] {
				572	if cnt == 0 {
				573	s.norm[i] = 0
				574	continue
				575	}
				576	if cnt <= lowThreshold {
				577	s.norm[i] = -1
				578	distributed++
				579	total -= cnt
				580	continue
				581	}
				582	if cnt <= lowOne {
				583	s.norm[i] = 1
				584	distributed++
				585	total -= cnt
				586	continue
				587	}
				588	s.norm[i] = notYetAssigned
				589	}
				590	toDistribute := (1 << tableLog) - distributed
				591
				592	if (total / toDistribute) > lowOne {
				593	// risk of rounding to zero
				594	lowOne = uint32((total * 3) / (toDistribute * 2))
				595	for i, cnt := range s.count[:s.symbolLen] {
				596	if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
				597	s.norm[i] = 1
				598	distributed++
				599	total -= cnt
				600	continue
				601	}
				602	}
				603	toDistribute = (1 << tableLog) - distributed
				604	}
				605	if distributed == uint32(s.symbolLen)+1 {
				606	// all values are pretty poor;
				607	// probably incompressible data (should have already been detected);
				608	// find max, then give all remaining points to max
				609	var maxV int
				610	var maxC uint32
				611	for i, cnt := range s.count[:s.symbolLen] {
				612	if cnt > maxC {
				613	maxV = i
				614	maxC = cnt
				615	}
				616	}
				617	s.norm[maxV] += int16(toDistribute)
				618	return nil
				619	}
				620
				621	if total == 0 {
				622	// all of the symbols were low enough for the lowOne or lowThreshold
				623	for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
				624	if s.norm[i] > 0 {
				625	toDistribute--
				626	s.norm[i]++
				627	}
				628	}
				629	return nil
				630	}
				631
				632	var (
				633	vStepLog = 62 - uint64(tableLog)
				634	mid = uint64((1 << (vStepLog - 1)) - 1)
				635	rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
				636	tmpTotal = mid
				637	)
				638	for i, cnt := range s.count[:s.symbolLen] {
				639	if s.norm[i] == notYetAssigned {
				640	var (
				641	end = tmpTotal + uint64(cnt)*rStep
				642	sStart = uint32(tmpTotal >> vStepLog)
				643	sEnd = uint32(end >> vStepLog)
				644	weight = sEnd - sStart
				645	)
				646	if weight < 1 {
				647	return errors.New("weight < 1")
				648	}
				649	s.norm[i] = int16(weight)
				650	tmpTotal = end
				651	}
				652	}
				653	return nil
				654	}
				655
				656	// validateNorm validates the normalized histogram table.
				657	func (s *Scratch) validateNorm() (err error) {
				658	var total int
				659	for _, v := range s.norm[:s.symbolLen] {
				660	if v >= 0 {
				661	total += int(v)
				662	} else {
				663	total -= int(v)
				664	}
				665	}
				666	defer func() {
				667	if err == nil {
				668	return
				669	}
				670	fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
				671	for i, v := range s.norm[:s.symbolLen] {
				672	fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
				673	}
				674	}()
				675	if total != (1 << s.actualTableLog) {
				676	return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
				677	}
				678	for i, v := range s.count[s.symbolLen:] {
				679	if v != 0 {
				680	return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
				681	}
				682	}
				683	return nil
				684	}