Blame - vendor/github.com/klauspost/compress/huff0/decompress.go - voltha-openonu-adapter-go

blob: 41703bba4d65bcd80a68efb9c4c0b8657cc1cce2 [file] [log] [blame]

khenaidoo	7d3c558	2021-08-11 18:09:44 -0400	[diff] [blame]	1	package huff0
				2
				3	import (
				4	"errors"
				5	"fmt"
				6	"io"
				7
				8	"github.com/klauspost/compress/fse"
				9	)
				10
				11	type dTable struct {
				12	single []dEntrySingle
				13	double []dEntryDouble
				14	}
				15
				16	// single-symbols decoding
				17	type dEntrySingle struct {
				18	entry uint16
				19	}
				20
				21	// double-symbols decoding
				22	type dEntryDouble struct {
				23	seq uint16
				24	nBits uint8
				25	len uint8
				26	}
				27
				28	// Uses special code for all tables that are < 8 bits.
				29	const use8BitTables = true
				30
				31	// ReadTable will read a table from the input.
				32	// The size of the input may be larger than the table definition.
				33	// Any content remaining after the table definition will be returned.
				34	// If no Scratch is provided a new one is allocated.
				35	// The returned Scratch can be used for encoding or decoding input using this table.
				36	func ReadTable(in []byte, s Scratch) (s2 Scratch, remain []byte, err error) {
				37	s, err = s.prepare(in)
				38	if err != nil {
				39	return s, nil, err
				40	}
				41	if len(in) <= 1 {
				42	return s, nil, errors.New("input too small for table")
				43	}
				44	iSize := in[0]
				45	in = in[1:]
				46	if iSize >= 128 {
				47	// Uncompressed
				48	oSize := iSize - 127
				49	iSize = (oSize + 1) / 2
				50	if int(iSize) > len(in) {
				51	return s, nil, errors.New("input too small for table")
				52	}
				53	for n := uint8(0); n < oSize; n += 2 {
				54	v := in[n/2]
				55	s.huffWeight[n] = v >> 4
				56	s.huffWeight[n+1] = v & 15
				57	}
				58	s.symbolLen = uint16(oSize)
				59	in = in[iSize:]
				60	} else {
				61	if len(in) < int(iSize) {
				62	return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
				63	}
				64	// FSE compressed weights
				65	s.fse.DecompressLimit = 255
				66	hw := s.huffWeight[:]
				67	s.fse.Out = hw
				68	b, err := fse.Decompress(in[:iSize], s.fse)
				69	s.fse.Out = nil
				70	if err != nil {
				71	return s, nil, err
				72	}
				73	if len(b) > 255 {
				74	return s, nil, errors.New("corrupt input: output table too large")
				75	}
				76	s.symbolLen = uint16(len(b))
				77	in = in[iSize:]
				78	}
				79
				80	// collect weight stats
				81	var rankStats [16]uint32
				82	weightTotal := uint32(0)
				83	for _, v := range s.huffWeight[:s.symbolLen] {
				84	if v > tableLogMax {
				85	return s, nil, errors.New("corrupt input: weight too large")
				86	}
				87	v2 := v & 15
				88	rankStats[v2]++
				89	// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
				90	weightTotal += (1 << v2) >> 1
				91	}
				92	if weightTotal == 0 {
				93	return s, nil, errors.New("corrupt input: weights zero")
				94	}
				95
				96	// get last non-null symbol weight (implied, total must be 2^n)
				97	{
				98	tableLog := highBit32(weightTotal) + 1
				99	if tableLog > tableLogMax {
				100	return s, nil, errors.New("corrupt input: tableLog too big")
				101	}
				102	s.actualTableLog = uint8(tableLog)
				103	// determine last weight
				104	{
				105	total := uint32(1) << tableLog
				106	rest := total - weightTotal
				107	verif := uint32(1) << highBit32(rest)
				108	lastWeight := highBit32(rest) + 1
				109	if verif != rest {
				110	// last value must be a clean power of 2
				111	return s, nil, errors.New("corrupt input: last value not power of two")
				112	}
				113	s.huffWeight[s.symbolLen] = uint8(lastWeight)
				114	s.symbolLen++
				115	rankStats[lastWeight]++
				116	}
				117	}
				118
				119	if (rankStats[1] < 2) \|\| (rankStats[1]&1 != 0) {
				120	// by construction : at least 2 elts of rank 1, must be even
				121	return s, nil, errors.New("corrupt input: min elt size, even check failed ")
				122	}
				123
				124	// TODO: Choose between single/double symbol decoding
				125
				126	// Calculate starting value for each rank
				127	{
				128	var nextRankStart uint32
				129	for n := uint8(1); n < s.actualTableLog+1; n++ {
				130	current := nextRankStart
				131	nextRankStart += rankStats[n] << (n - 1)
				132	rankStats[n] = current
				133	}
				134	}
				135
				136	// fill DTable (always full size)
				137	tSize := 1 << tableLogMax
				138	if len(s.dt.single) != tSize {
				139	s.dt.single = make([]dEntrySingle, tSize)
				140	}
				141	cTable := s.prevTable
				142	if cap(cTable) < maxSymbolValue+1 {
				143	cTable = make([]cTableEntry, 0, maxSymbolValue+1)
				144	}
				145	cTable = cTable[:maxSymbolValue+1]
				146	s.prevTable = cTable[:s.symbolLen]
				147	s.prevTableLog = s.actualTableLog
				148
				149	for n, w := range s.huffWeight[:s.symbolLen] {
				150	if w == 0 {
				151	cTable[n] = cTableEntry{
				152	val: 0,
				153	nBits: 0,
				154	}
				155	continue
				156	}
				157	length := (uint32(1) << w) >> 1
				158	d := dEntrySingle{
				159	entry: uint16(s.actualTableLog+1-w) \| (uint16(n) << 8),
				160	}
				161
				162	rank := &rankStats[w]
				163	cTable[n] = cTableEntry{
				164	val: uint16(*rank >> (w - 1)),
				165	nBits: uint8(d.entry),
				166	}
				167
				168	single := s.dt.single[rank : rank+length]
				169	for i := range single {
				170	single[i] = d
				171	}
				172	*rank += length
				173	}
				174
				175	return s, in, nil
				176	}
				177
				178	// Decompress1X will decompress a 1X encoded stream.
				179	// The length of the supplied input must match the end of a block exactly.
				180	// Before this is called, the table must be initialized with ReadTable unless
				181	// the encoder re-used the table.
				182	// deprecated: Use the stateless Decoder() to get a concurrent version.
				183	func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
				184	if cap(s.Out) < s.MaxDecodedSize {
				185	s.Out = make([]byte, s.MaxDecodedSize)
				186	}
				187	s.Out = s.Out[:0:s.MaxDecodedSize]
				188	s.Out, err = s.Decoder().Decompress1X(s.Out, in)
				189	return s.Out, err
				190	}
				191
				192	// Decompress4X will decompress a 4X encoded stream.
				193	// Before this is called, the table must be initialized with ReadTable unless
				194	// the encoder re-used the table.
				195	// The length of the supplied input must match the end of a block exactly.
				196	// The destination size of the uncompressed data must be known and provided.
				197	// deprecated: Use the stateless Decoder() to get a concurrent version.
				198	func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
				199	if dstSize > s.MaxDecodedSize {
				200	return nil, ErrMaxDecodedSizeExceeded
				201	}
				202	if cap(s.Out) < dstSize {
				203	s.Out = make([]byte, s.MaxDecodedSize)
				204	}
				205	s.Out = s.Out[:0:dstSize]
				206	s.Out, err = s.Decoder().Decompress4X(s.Out, in)
				207	return s.Out, err
				208	}
				209
				210	// Decoder will return a stateless decoder that can be used by multiple
				211	// decompressors concurrently.
				212	// Before this is called, the table must be initialized with ReadTable.
				213	// The Decoder is still linked to the scratch buffer so that cannot be reused.
				214	// However, it is safe to discard the scratch.
				215	func (s Scratch) Decoder() Decoder {
				216	return &Decoder{
				217	dt: s.dt,
				218	actualTableLog: s.actualTableLog,
				219	}
				220	}
				221
				222	// Decoder provides stateless decoding.
				223	type Decoder struct {
				224	dt dTable
				225	actualTableLog uint8
				226	}
				227
				228	// Decompress1X will decompress a 1X encoded stream.
				229	// The cap of the output buffer will be the maximum decompressed size.
				230	// The length of the supplied input must match the end of a block exactly.
				231	func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
				232	if len(d.dt.single) == 0 {
				233	return nil, errors.New("no table loaded")
				234	}
				235	if use8BitTables && d.actualTableLog <= 8 {
				236	return d.decompress1X8Bit(dst, src)
				237	}
				238	var br bitReaderShifted
				239	err := br.init(src)
				240	if err != nil {
				241	return dst, err
				242	}
				243	maxDecodedSize := cap(dst)
				244	dst = dst[:0]
				245
				246	// Avoid bounds check by always having full sized table.
				247	const tlSize = 1 << tableLogMax
				248	const tlMask = tlSize - 1
				249	dt := d.dt.single[:tlSize]
				250
				251	// Use temp table to avoid bound checks/append penalty.
				252	var buf [256]byte
				253	var off uint8
				254
				255	for br.off >= 8 {
				256	br.fillFast()
				257	v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				258	br.advance(uint8(v.entry))
				259	buf[off+0] = uint8(v.entry >> 8)
				260
				261	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				262	br.advance(uint8(v.entry))
				263	buf[off+1] = uint8(v.entry >> 8)
				264
				265	// Refill
				266	br.fillFast()
				267
				268	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				269	br.advance(uint8(v.entry))
				270	buf[off+2] = uint8(v.entry >> 8)
				271
				272	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				273	br.advance(uint8(v.entry))
				274	buf[off+3] = uint8(v.entry >> 8)
				275
				276	off += 4
				277	if off == 0 {
				278	if len(dst)+256 > maxDecodedSize {
				279	br.close()
				280	return nil, ErrMaxDecodedSizeExceeded
				281	}
				282	dst = append(dst, buf[:]...)
				283	}
				284	}
				285
				286	if len(dst)+int(off) > maxDecodedSize {
				287	br.close()
				288	return nil, ErrMaxDecodedSizeExceeded
				289	}
				290	dst = append(dst, buf[:off]...)
				291
				292	// br < 8, so uint8 is fine
				293	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
				294	for bitsLeft > 0 {
				295	br.fill()
				296	if false && br.bitsRead >= 32 {
				297	if br.off >= 4 {
				298	v := br.in[br.off-4:]
				299	v = v[:4]
				300	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				301	br.value = (br.value << 32) \| uint64(low)
				302	br.bitsRead -= 32
				303	br.off -= 4
				304	} else {
				305	for br.off > 0 {
				306	br.value = (br.value << 8) \| uint64(br.in[br.off-1])
				307	br.bitsRead -= 8
				308	br.off--
				309	}
				310	}
				311	}
				312	if len(dst) >= maxDecodedSize {
				313	br.close()
				314	return nil, ErrMaxDecodedSizeExceeded
				315	}
				316	v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
				317	nBits := uint8(v.entry)
				318	br.advance(nBits)
				319	bitsLeft -= nBits
				320	dst = append(dst, uint8(v.entry>>8))
				321	}
				322	return dst, br.close()
				323	}
				324
				325	// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
				326	// The cap of the output buffer will be the maximum decompressed size.
				327	// The length of the supplied input must match the end of a block exactly.
				328	func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
				329	if d.actualTableLog == 8 {
				330	return d.decompress1X8BitExactly(dst, src)
				331	}
				332	var br bitReaderBytes
				333	err := br.init(src)
				334	if err != nil {
				335	return dst, err
				336	}
				337	maxDecodedSize := cap(dst)
				338	dst = dst[:0]
				339
				340	// Avoid bounds check by always having full sized table.
				341	dt := d.dt.single[:256]
				342
				343	// Use temp table to avoid bound checks/append penalty.
				344	var buf [256]byte
				345	var off uint8
				346
				347	shift := (8 - d.actualTableLog) & 7
				348
				349	//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
				350	for br.off >= 4 {
				351	br.fillFast()
				352	v := dt[br.peekByteFast()>>shift]
				353	br.advance(uint8(v.entry))
				354	buf[off+0] = uint8(v.entry >> 8)
				355
				356	v = dt[br.peekByteFast()>>shift]
				357	br.advance(uint8(v.entry))
				358	buf[off+1] = uint8(v.entry >> 8)
				359
				360	v = dt[br.peekByteFast()>>shift]
				361	br.advance(uint8(v.entry))
				362	buf[off+2] = uint8(v.entry >> 8)
				363
				364	v = dt[br.peekByteFast()>>shift]
				365	br.advance(uint8(v.entry))
				366	buf[off+3] = uint8(v.entry >> 8)
				367
				368	off += 4
				369	if off == 0 {
				370	if len(dst)+256 > maxDecodedSize {
				371	br.close()
				372	return nil, ErrMaxDecodedSizeExceeded
				373	}
				374	dst = append(dst, buf[:]...)
				375	}
				376	}
				377
				378	if len(dst)+int(off) > maxDecodedSize {
				379	br.close()
				380	return nil, ErrMaxDecodedSizeExceeded
				381	}
				382	dst = append(dst, buf[:off]...)
				383
				384	// br < 4, so uint8 is fine
				385	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
				386	for bitsLeft > 0 {
				387	if br.bitsRead >= 64-8 {
				388	for br.off > 0 {
				389	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				390	br.bitsRead -= 8
				391	br.off--
				392	}
				393	}
				394	if len(dst) >= maxDecodedSize {
				395	br.close()
				396	return nil, ErrMaxDecodedSizeExceeded
				397	}
				398	v := dt[br.peekByteFast()>>shift]
				399	nBits := uint8(v.entry)
				400	br.advance(nBits)
				401	bitsLeft -= int8(nBits)
				402	dst = append(dst, uint8(v.entry>>8))
				403	}
				404	return dst, br.close()
				405	}
				406
				407	// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
				408	// The cap of the output buffer will be the maximum decompressed size.
				409	// The length of the supplied input must match the end of a block exactly.
				410	func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
				411	var br bitReaderBytes
				412	err := br.init(src)
				413	if err != nil {
				414	return dst, err
				415	}
				416	maxDecodedSize := cap(dst)
				417	dst = dst[:0]
				418
				419	// Avoid bounds check by always having full sized table.
				420	dt := d.dt.single[:256]
				421
				422	// Use temp table to avoid bound checks/append penalty.
				423	var buf [256]byte
				424	var off uint8
				425
				426	const shift = 0
				427
				428	//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
				429	for br.off >= 4 {
				430	br.fillFast()
				431	v := dt[br.peekByteFast()>>shift]
				432	br.advance(uint8(v.entry))
				433	buf[off+0] = uint8(v.entry >> 8)
				434
				435	v = dt[br.peekByteFast()>>shift]
				436	br.advance(uint8(v.entry))
				437	buf[off+1] = uint8(v.entry >> 8)
				438
				439	v = dt[br.peekByteFast()>>shift]
				440	br.advance(uint8(v.entry))
				441	buf[off+2] = uint8(v.entry >> 8)
				442
				443	v = dt[br.peekByteFast()>>shift]
				444	br.advance(uint8(v.entry))
				445	buf[off+3] = uint8(v.entry >> 8)
				446
				447	off += 4
				448	if off == 0 {
				449	if len(dst)+256 > maxDecodedSize {
				450	br.close()
				451	return nil, ErrMaxDecodedSizeExceeded
				452	}
				453	dst = append(dst, buf[:]...)
				454	}
				455	}
				456
				457	if len(dst)+int(off) > maxDecodedSize {
				458	br.close()
				459	return nil, ErrMaxDecodedSizeExceeded
				460	}
				461	dst = append(dst, buf[:off]...)
				462
				463	// br < 4, so uint8 is fine
				464	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
				465	for bitsLeft > 0 {
				466	if br.bitsRead >= 64-8 {
				467	for br.off > 0 {
				468	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				469	br.bitsRead -= 8
				470	br.off--
				471	}
				472	}
				473	if len(dst) >= maxDecodedSize {
				474	br.close()
				475	return nil, ErrMaxDecodedSizeExceeded
				476	}
				477	v := dt[br.peekByteFast()>>shift]
				478	nBits := uint8(v.entry)
				479	br.advance(nBits)
				480	bitsLeft -= int8(nBits)
				481	dst = append(dst, uint8(v.entry>>8))
				482	}
				483	return dst, br.close()
				484	}
				485
				486	// Decompress4X will decompress a 4X encoded stream.
				487	// The length of the supplied input must match the end of a block exactly.
				488	// The capacity of the dst slice must match the destination size of
				489	// the uncompressed data exactly.
				490	func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
				491	if len(d.dt.single) == 0 {
				492	return nil, errors.New("no table loaded")
				493	}
				494	if len(src) < 6+(4*1) {
				495	return nil, errors.New("input too small")
				496	}
				497	if use8BitTables && d.actualTableLog <= 8 {
				498	return d.decompress4X8bit(dst, src)
				499	}
				500
				501	var br [4]bitReaderShifted
				502	start := 6
				503	for i := 0; i < 3; i++ {
				504	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				505	if start+length >= len(src) {
				506	return nil, errors.New("truncated input (or invalid offset)")
				507	}
				508	err := br[i].init(src[start : start+length])
				509	if err != nil {
				510	return nil, err
				511	}
				512	start += length
				513	}
				514	err := br[3].init(src[start:])
				515	if err != nil {
				516	return nil, err
				517	}
				518
				519	// destination, offset to match first output
				520	dstSize := cap(dst)
				521	dst = dst[:dstSize]
				522	out := dst
				523	dstEvery := (dstSize + 3) / 4
				524
				525	const tlSize = 1 << tableLogMax
				526	const tlMask = tlSize - 1
				527	single := d.dt.single[:tlSize]
				528
				529	// Use temp table to avoid bound checks/append penalty.
				530	var buf [256]byte
				531	var off uint8
				532	var decoded int
				533
				534	// Decode 2 values from each decoder/loop.
				535	const bufoff = 256 / 4
				536	for {
				537	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				538	break
				539	}
				540
				541	{
				542	const stream = 0
				543	const stream2 = 1
				544	br[stream].fillFast()
				545	br[stream2].fillFast()
				546
				547	val := br[stream].peekBitsFast(d.actualTableLog)
				548	v := single[val&tlMask]
				549	br[stream].advance(uint8(v.entry))
				550	buf[off+bufoff*stream] = uint8(v.entry >> 8)
				551
				552	val2 := br[stream2].peekBitsFast(d.actualTableLog)
				553	v2 := single[val2&tlMask]
				554	br[stream2].advance(uint8(v2.entry))
				555	buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
				556
				557	val = br[stream].peekBitsFast(d.actualTableLog)
				558	v = single[val&tlMask]
				559	br[stream].advance(uint8(v.entry))
				560	buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
				561
				562	val2 = br[stream2].peekBitsFast(d.actualTableLog)
				563	v2 = single[val2&tlMask]
				564	br[stream2].advance(uint8(v2.entry))
				565	buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
				566	}
				567
				568	{
				569	const stream = 2
				570	const stream2 = 3
				571	br[stream].fillFast()
				572	br[stream2].fillFast()
				573
				574	val := br[stream].peekBitsFast(d.actualTableLog)
				575	v := single[val&tlMask]
				576	br[stream].advance(uint8(v.entry))
				577	buf[off+bufoff*stream] = uint8(v.entry >> 8)
				578
				579	val2 := br[stream2].peekBitsFast(d.actualTableLog)
				580	v2 := single[val2&tlMask]
				581	br[stream2].advance(uint8(v2.entry))
				582	buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
				583
				584	val = br[stream].peekBitsFast(d.actualTableLog)
				585	v = single[val&tlMask]
				586	br[stream].advance(uint8(v.entry))
				587	buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
				588
				589	val2 = br[stream2].peekBitsFast(d.actualTableLog)
				590	v2 = single[val2&tlMask]
				591	br[stream2].advance(uint8(v2.entry))
				592	buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
				593	}
				594
				595	off += 2
				596
				597	if off == bufoff {
				598	if bufoff > dstEvery {
				599	return nil, errors.New("corruption detected: stream overrun 1")
				600	}
				601	copy(out, buf[:bufoff])
				602	copy(out[dstEvery:], buf[bufoff:bufoff*2])
				603	copy(out[dstEvery2:], buf[bufoff2:bufoff*3])
				604	copy(out[dstEvery3:], buf[bufoff3:bufoff*4])
				605	off = 0
				606	out = out[bufoff:]
				607	decoded += 256
				608	// There must at least be 3 buffers left.
				609	if len(out) < dstEvery*3 {
				610	return nil, errors.New("corruption detected: stream overrun 2")
				611	}
				612	}
				613	}
				614	if off > 0 {
				615	ioff := int(off)
				616	if len(out) < dstEvery*3+ioff {
				617	return nil, errors.New("corruption detected: stream overrun 3")
				618	}
				619	copy(out, buf[:off])
				620	copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
				621	copy(out[dstEvery2:dstEvery2+ioff], buf[bufoff2:bufoff3])
				622	copy(out[dstEvery3:dstEvery3+ioff], buf[bufoff3:bufoff4])
				623	decoded += int(off) * 4
				624	out = out[off:]
				625	}
				626
				627	// Decode remaining.
				628	for i := range br {
				629	offset := dstEvery * i
				630	br := &br[i]
				631	bitsLeft := br.off*8 + uint(64-br.bitsRead)
				632	for bitsLeft > 0 {
				633	br.fill()
				634	if false && br.bitsRead >= 32 {
				635	if br.off >= 4 {
				636	v := br.in[br.off-4:]
				637	v = v[:4]
				638	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				639	br.value = (br.value << 32) \| uint64(low)
				640	br.bitsRead -= 32
				641	br.off -= 4
				642	} else {
				643	for br.off > 0 {
				644	br.value = (br.value << 8) \| uint64(br.in[br.off-1])
				645	br.bitsRead -= 8
				646	br.off--
				647	}
				648	}
				649	}
				650	// end inline...
				651	if offset >= len(out) {
				652	return nil, errors.New("corruption detected: stream overrun 4")
				653	}
				654
				655	// Read value and increment offset.
				656	val := br.peekBitsFast(d.actualTableLog)
				657	v := single[val&tlMask].entry
				658	nBits := uint8(v)
				659	br.advance(nBits)
				660	bitsLeft -= uint(nBits)
				661	out[offset] = uint8(v >> 8)
				662	offset++
				663	}
				664	decoded += offset - dstEvery*i
				665	err = br.close()
				666	if err != nil {
				667	return nil, err
				668	}
				669	}
				670	if dstSize != decoded {
				671	return nil, errors.New("corruption detected: short output block")
				672	}
				673	return dst, nil
				674	}
				675
				676	// Decompress4X will decompress a 4X encoded stream.
				677	// The length of the supplied input must match the end of a block exactly.
				678	// The capacity of the dst slice must match the destination size of
				679	// the uncompressed data exactly.
				680	func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
				681	if d.actualTableLog == 8 {
				682	return d.decompress4X8bitExactly(dst, src)
				683	}
				684
				685	var br [4]bitReaderBytes
				686	start := 6
				687	for i := 0; i < 3; i++ {
				688	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				689	if start+length >= len(src) {
				690	return nil, errors.New("truncated input (or invalid offset)")
				691	}
				692	err := br[i].init(src[start : start+length])
				693	if err != nil {
				694	return nil, err
				695	}
				696	start += length
				697	}
				698	err := br[3].init(src[start:])
				699	if err != nil {
				700	return nil, err
				701	}
				702
				703	// destination, offset to match first output
				704	dstSize := cap(dst)
				705	dst = dst[:dstSize]
				706	out := dst
				707	dstEvery := (dstSize + 3) / 4
				708
				709	shift := (8 - d.actualTableLog) & 7
				710
				711	const tlSize = 1 << 8
				712	const tlMask = tlSize - 1
				713	single := d.dt.single[:tlSize]
				714
				715	// Use temp table to avoid bound checks/append penalty.
				716	var buf [256]byte
				717	var off uint8
				718	var decoded int
				719
				720	// Decode 4 values from each decoder/loop.
				721	const bufoff = 256 / 4
				722	for {
				723	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				724	break
				725	}
				726
				727	{
				728	// Interleave 2 decodes.
				729	const stream = 0
				730	const stream2 = 1
				731	br[stream].fillFast()
				732	br[stream2].fillFast()
				733
				734	v := single[br[stream].peekByteFast()>>shift].entry
				735	buf[off+bufoff*stream] = uint8(v >> 8)
				736	br[stream].advance(uint8(v))
				737
				738	v2 := single[br[stream2].peekByteFast()>>shift].entry
				739	buf[off+bufoff*stream2] = uint8(v2 >> 8)
				740	br[stream2].advance(uint8(v2))
				741
				742	v = single[br[stream].peekByteFast()>>shift].entry
				743	buf[off+bufoff*stream+1] = uint8(v >> 8)
				744	br[stream].advance(uint8(v))
				745
				746	v2 = single[br[stream2].peekByteFast()>>shift].entry
				747	buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
				748	br[stream2].advance(uint8(v2))
				749
				750	v = single[br[stream].peekByteFast()>>shift].entry
				751	buf[off+bufoff*stream+2] = uint8(v >> 8)
				752	br[stream].advance(uint8(v))
				753
				754	v2 = single[br[stream2].peekByteFast()>>shift].entry
				755	buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
				756	br[stream2].advance(uint8(v2))
				757
				758	v = single[br[stream].peekByteFast()>>shift].entry
				759	buf[off+bufoff*stream+3] = uint8(v >> 8)
				760	br[stream].advance(uint8(v))
				761
				762	v2 = single[br[stream2].peekByteFast()>>shift].entry
				763	buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
				764	br[stream2].advance(uint8(v2))
				765	}
				766
				767	{
				768	const stream = 2
				769	const stream2 = 3
				770	br[stream].fillFast()
				771	br[stream2].fillFast()
				772
				773	v := single[br[stream].peekByteFast()>>shift].entry
				774	buf[off+bufoff*stream] = uint8(v >> 8)
				775	br[stream].advance(uint8(v))
				776
				777	v2 := single[br[stream2].peekByteFast()>>shift].entry
				778	buf[off+bufoff*stream2] = uint8(v2 >> 8)
				779	br[stream2].advance(uint8(v2))
				780
				781	v = single[br[stream].peekByteFast()>>shift].entry
				782	buf[off+bufoff*stream+1] = uint8(v >> 8)
				783	br[stream].advance(uint8(v))
				784
				785	v2 = single[br[stream2].peekByteFast()>>shift].entry
				786	buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
				787	br[stream2].advance(uint8(v2))
				788
				789	v = single[br[stream].peekByteFast()>>shift].entry
				790	buf[off+bufoff*stream+2] = uint8(v >> 8)
				791	br[stream].advance(uint8(v))
				792
				793	v2 = single[br[stream2].peekByteFast()>>shift].entry
				794	buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
				795	br[stream2].advance(uint8(v2))
				796
				797	v = single[br[stream].peekByteFast()>>shift].entry
				798	buf[off+bufoff*stream+3] = uint8(v >> 8)
				799	br[stream].advance(uint8(v))
				800
				801	v2 = single[br[stream2].peekByteFast()>>shift].entry
				802	buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
				803	br[stream2].advance(uint8(v2))
				804	}
				805
				806	off += 4
				807
				808	if off == bufoff {
				809	if bufoff > dstEvery {
				810	return nil, errors.New("corruption detected: stream overrun 1")
				811	}
				812	copy(out, buf[:bufoff])
				813	copy(out[dstEvery:], buf[bufoff:bufoff*2])
				814	copy(out[dstEvery2:], buf[bufoff2:bufoff*3])
				815	copy(out[dstEvery3:], buf[bufoff3:bufoff*4])
				816	off = 0
				817	out = out[bufoff:]
				818	decoded += 256
				819	// There must at least be 3 buffers left.
				820	if len(out) < dstEvery*3 {
				821	return nil, errors.New("corruption detected: stream overrun 2")
				822	}
				823	}
				824	}
				825	if off > 0 {
				826	ioff := int(off)
				827	if len(out) < dstEvery*3+ioff {
				828	return nil, errors.New("corruption detected: stream overrun 3")
				829	}
				830	copy(out, buf[:off])
				831	copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
				832	copy(out[dstEvery2:dstEvery2+ioff], buf[bufoff2:bufoff3])
				833	copy(out[dstEvery3:dstEvery3+ioff], buf[bufoff3:bufoff4])
				834	decoded += int(off) * 4
				835	out = out[off:]
				836	}
				837
				838	// Decode remaining.
				839	for i := range br {
				840	offset := dstEvery * i
				841	br := &br[i]
				842	bitsLeft := int(br.off*8) + int(64-br.bitsRead)
				843	for bitsLeft > 0 {
				844	if br.finished() {
				845	return nil, io.ErrUnexpectedEOF
				846	}
				847	if br.bitsRead >= 56 {
				848	if br.off >= 4 {
				849	v := br.in[br.off-4:]
				850	v = v[:4]
				851	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				852	br.value \|= uint64(low) << (br.bitsRead - 32)
				853	br.bitsRead -= 32
				854	br.off -= 4
				855	} else {
				856	for br.off > 0 {
				857	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				858	br.bitsRead -= 8
				859	br.off--
				860	}
				861	}
				862	}
				863	// end inline...
				864	if offset >= len(out) {
				865	return nil, errors.New("corruption detected: stream overrun 4")
				866	}
				867
				868	// Read value and increment offset.
				869	v := single[br.peekByteFast()>>shift].entry
				870	nBits := uint8(v)
				871	br.advance(nBits)
				872	bitsLeft -= int(nBits)
				873	out[offset] = uint8(v >> 8)
				874	offset++
				875	}
				876	decoded += offset - dstEvery*i
				877	err = br.close()
				878	if err != nil {
				879	return nil, err
				880	}
				881	}
				882	if dstSize != decoded {
				883	return nil, errors.New("corruption detected: short output block")
				884	}
				885	return dst, nil
				886	}
				887
				888	// Decompress4X will decompress a 4X encoded stream.
				889	// The length of the supplied input must match the end of a block exactly.
				890	// The capacity of the dst slice must match the destination size of
				891	// the uncompressed data exactly.
				892	func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
				893	var br [4]bitReaderBytes
				894	start := 6
				895	for i := 0; i < 3; i++ {
				896	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				897	if start+length >= len(src) {
				898	return nil, errors.New("truncated input (or invalid offset)")
				899	}
				900	err := br[i].init(src[start : start+length])
				901	if err != nil {
				902	return nil, err
				903	}
				904	start += length
				905	}
				906	err := br[3].init(src[start:])
				907	if err != nil {
				908	return nil, err
				909	}
				910
				911	// destination, offset to match first output
				912	dstSize := cap(dst)
				913	dst = dst[:dstSize]
				914	out := dst
				915	dstEvery := (dstSize + 3) / 4
				916
				917	const shift = 0
				918	const tlSize = 1 << 8
				919	const tlMask = tlSize - 1
				920	single := d.dt.single[:tlSize]
				921
				922	// Use temp table to avoid bound checks/append penalty.
				923	var buf [256]byte
				924	var off uint8
				925	var decoded int
				926
				927	// Decode 4 values from each decoder/loop.
				928	const bufoff = 256 / 4
				929	for {
				930	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				931	break
				932	}
				933
				934	{
				935	// Interleave 2 decodes.
				936	const stream = 0
				937	const stream2 = 1
				938	br[stream].fillFast()
				939	br[stream2].fillFast()
				940
				941	v := single[br[stream].peekByteFast()>>shift].entry
				942	buf[off+bufoff*stream] = uint8(v >> 8)
				943	br[stream].advance(uint8(v))
				944
				945	v2 := single[br[stream2].peekByteFast()>>shift].entry
				946	buf[off+bufoff*stream2] = uint8(v2 >> 8)
				947	br[stream2].advance(uint8(v2))
				948
				949	v = single[br[stream].peekByteFast()>>shift].entry
				950	buf[off+bufoff*stream+1] = uint8(v >> 8)
				951	br[stream].advance(uint8(v))
				952
				953	v2 = single[br[stream2].peekByteFast()>>shift].entry
				954	buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
				955	br[stream2].advance(uint8(v2))
				956
				957	v = single[br[stream].peekByteFast()>>shift].entry
				958	buf[off+bufoff*stream+2] = uint8(v >> 8)
				959	br[stream].advance(uint8(v))
				960
				961	v2 = single[br[stream2].peekByteFast()>>shift].entry
				962	buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
				963	br[stream2].advance(uint8(v2))
				964
				965	v = single[br[stream].peekByteFast()>>shift].entry
				966	buf[off+bufoff*stream+3] = uint8(v >> 8)
				967	br[stream].advance(uint8(v))
				968
				969	v2 = single[br[stream2].peekByteFast()>>shift].entry
				970	buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
				971	br[stream2].advance(uint8(v2))
				972	}
				973
				974	{
				975	const stream = 2
				976	const stream2 = 3
				977	br[stream].fillFast()
				978	br[stream2].fillFast()
				979
				980	v := single[br[stream].peekByteFast()>>shift].entry
				981	buf[off+bufoff*stream] = uint8(v >> 8)
				982	br[stream].advance(uint8(v))
				983
				984	v2 := single[br[stream2].peekByteFast()>>shift].entry
				985	buf[off+bufoff*stream2] = uint8(v2 >> 8)
				986	br[stream2].advance(uint8(v2))
				987
				988	v = single[br[stream].peekByteFast()>>shift].entry
				989	buf[off+bufoff*stream+1] = uint8(v >> 8)
				990	br[stream].advance(uint8(v))
				991
				992	v2 = single[br[stream2].peekByteFast()>>shift].entry
				993	buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
				994	br[stream2].advance(uint8(v2))
				995
				996	v = single[br[stream].peekByteFast()>>shift].entry
				997	buf[off+bufoff*stream+2] = uint8(v >> 8)
				998	br[stream].advance(uint8(v))
				999
				1000	v2 = single[br[stream2].peekByteFast()>>shift].entry
				1001	buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
				1002	br[stream2].advance(uint8(v2))
				1003
				1004	v = single[br[stream].peekByteFast()>>shift].entry
				1005	buf[off+bufoff*stream+3] = uint8(v >> 8)
				1006	br[stream].advance(uint8(v))
				1007
				1008	v2 = single[br[stream2].peekByteFast()>>shift].entry
				1009	buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
				1010	br[stream2].advance(uint8(v2))
				1011	}
				1012
				1013	off += 4
				1014
				1015	if off == bufoff {
				1016	if bufoff > dstEvery {
				1017	return nil, errors.New("corruption detected: stream overrun 1")
				1018	}
				1019	copy(out, buf[:bufoff])
				1020	copy(out[dstEvery:], buf[bufoff:bufoff*2])
				1021	copy(out[dstEvery2:], buf[bufoff2:bufoff*3])
				1022	copy(out[dstEvery3:], buf[bufoff3:bufoff*4])
				1023	off = 0
				1024	out = out[bufoff:]
				1025	decoded += 256
				1026	// There must at least be 3 buffers left.
				1027	if len(out) < dstEvery*3 {
				1028	return nil, errors.New("corruption detected: stream overrun 2")
				1029	}
				1030	}
				1031	}
				1032	if off > 0 {
				1033	ioff := int(off)
				1034	if len(out) < dstEvery*3+ioff {
				1035	return nil, errors.New("corruption detected: stream overrun 3")
				1036	}
				1037	copy(out, buf[:off])
				1038	copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
				1039	copy(out[dstEvery2:dstEvery2+ioff], buf[bufoff2:bufoff3])
				1040	copy(out[dstEvery3:dstEvery3+ioff], buf[bufoff3:bufoff4])
				1041	decoded += int(off) * 4
				1042	out = out[off:]
				1043	}
				1044
				1045	// Decode remaining.
				1046	for i := range br {
				1047	offset := dstEvery * i
				1048	br := &br[i]
				1049	bitsLeft := int(br.off*8) + int(64-br.bitsRead)
				1050	for bitsLeft > 0 {
				1051	if br.finished() {
				1052	return nil, io.ErrUnexpectedEOF
				1053	}
				1054	if br.bitsRead >= 56 {
				1055	if br.off >= 4 {
				1056	v := br.in[br.off-4:]
				1057	v = v[:4]
				1058	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				1059	br.value \|= uint64(low) << (br.bitsRead - 32)
				1060	br.bitsRead -= 32
				1061	br.off -= 4
				1062	} else {
				1063	for br.off > 0 {
				1064	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				1065	br.bitsRead -= 8
				1066	br.off--
				1067	}
				1068	}
				1069	}
				1070	// end inline...
				1071	if offset >= len(out) {
				1072	return nil, errors.New("corruption detected: stream overrun 4")
				1073	}
				1074
				1075	// Read value and increment offset.
				1076	v := single[br.peekByteFast()>>shift].entry
				1077	nBits := uint8(v)
				1078	br.advance(nBits)
				1079	bitsLeft -= int(nBits)
				1080	out[offset] = uint8(v >> 8)
				1081	offset++
				1082	}
				1083	decoded += offset - dstEvery*i
				1084	err = br.close()
				1085	if err != nil {
				1086	return nil, err
				1087	}
				1088	}
				1089	if dstSize != decoded {
				1090	return nil, errors.New("corruption detected: short output block")
				1091	}
				1092	return dst, nil
				1093	}
				1094
				1095	// matches will compare a decoding table to a coding table.
				1096	// Errors are written to the writer.
				1097	// Nothing will be written if table is ok.
				1098	func (s *Scratch) matches(ct cTable, w io.Writer) {
				1099	if s == nil \|\| len(s.dt.single) == 0 {
				1100	return
				1101	}
				1102	dt := s.dt.single[:1<<s.actualTableLog]
				1103	tablelog := s.actualTableLog
				1104	ok := 0
				1105	broken := 0
				1106	for sym, enc := range ct {
				1107	errs := 0
				1108	broken++
				1109	if enc.nBits == 0 {
				1110	for _, dec := range dt {
				1111	if uint8(dec.entry>>8) == byte(sym) {
				1112	fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
				1113	errs++
				1114	break
				1115	}
				1116	}
				1117	if errs == 0 {
				1118	broken--
				1119	}
				1120	continue
				1121	}
				1122	// Unused bits in input
				1123	ub := tablelog - enc.nBits
				1124	top := enc.val << ub
				1125	// decoder looks at top bits.
				1126	dec := dt[top]
				1127	if uint8(dec.entry) != enc.nBits {
				1128	fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
				1129	errs++
				1130	}
				1131	if uint8(dec.entry>>8) != uint8(sym) {
				1132	fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
				1133	errs++
				1134	}
				1135	if errs > 0 {
				1136	fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
				1137	continue
				1138	}
				1139	// Ensure that all combinations are covered.
				1140	for i := uint16(0); i < (1 << ub); i++ {
				1141	vval := top \| i
				1142	dec := dt[vval]
				1143	if uint8(dec.entry) != enc.nBits {
				1144	fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
				1145	errs++
				1146	}
				1147	if uint8(dec.entry>>8) != uint8(sym) {
				1148	fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
				1149	errs++
				1150	}
				1151	if errs > 20 {
				1152	fmt.Fprintf(w, "%d errros, stopping\n", errs)
				1153	break
				1154	}
				1155	}
				1156	if errs == 0 {
				1157	ok++
				1158	broken--
				1159	}
				1160	}
				1161	if broken > 0 {
				1162	fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
				1163	}
				1164	}