Blame - vendor/github.com/klauspost/compress/huff0/decompress.go - kafka-topic-exporter

blob: 2668b64d37f0b1e609b4719012e183162d9eebf5 [file] [log] [blame]

kesavand	c71914f	2022-03-25 11:19:03 +0530	[diff] [blame^]	1	package huff0
				2
				3	import (
				4	"errors"
				5	"fmt"
				6	"io"
				7	"sync"
				8
				9	"github.com/klauspost/compress/fse"
				10	)
				11
				12	type dTable struct {
				13	single []dEntrySingle
				14	double []dEntryDouble
				15	}
				16
				17	// single-symbols decoding
				18	type dEntrySingle struct {
				19	entry uint16
				20	}
				21
				22	// double-symbols decoding
				23	type dEntryDouble struct {
				24	seq [4]byte
				25	nBits uint8
				26	len uint8
				27	}
				28
				29	// Uses special code for all tables that are < 8 bits.
				30	const use8BitTables = true
				31
				32	// ReadTable will read a table from the input.
				33	// The size of the input may be larger than the table definition.
				34	// Any content remaining after the table definition will be returned.
				35	// If no Scratch is provided a new one is allocated.
				36	// The returned Scratch can be used for encoding or decoding input using this table.
				37	func ReadTable(in []byte, s Scratch) (s2 Scratch, remain []byte, err error) {
				38	s, err = s.prepare(in)
				39	if err != nil {
				40	return s, nil, err
				41	}
				42	if len(in) <= 1 {
				43	return s, nil, errors.New("input too small for table")
				44	}
				45	iSize := in[0]
				46	in = in[1:]
				47	if iSize >= 128 {
				48	// Uncompressed
				49	oSize := iSize - 127
				50	iSize = (oSize + 1) / 2
				51	if int(iSize) > len(in) {
				52	return s, nil, errors.New("input too small for table")
				53	}
				54	for n := uint8(0); n < oSize; n += 2 {
				55	v := in[n/2]
				56	s.huffWeight[n] = v >> 4
				57	s.huffWeight[n+1] = v & 15
				58	}
				59	s.symbolLen = uint16(oSize)
				60	in = in[iSize:]
				61	} else {
				62	if len(in) < int(iSize) {
				63	return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
				64	}
				65	// FSE compressed weights
				66	s.fse.DecompressLimit = 255
				67	hw := s.huffWeight[:]
				68	s.fse.Out = hw
				69	b, err := fse.Decompress(in[:iSize], s.fse)
				70	s.fse.Out = nil
				71	if err != nil {
				72	return s, nil, err
				73	}
				74	if len(b) > 255 {
				75	return s, nil, errors.New("corrupt input: output table too large")
				76	}
				77	s.symbolLen = uint16(len(b))
				78	in = in[iSize:]
				79	}
				80
				81	// collect weight stats
				82	var rankStats [16]uint32
				83	weightTotal := uint32(0)
				84	for _, v := range s.huffWeight[:s.symbolLen] {
				85	if v > tableLogMax {
				86	return s, nil, errors.New("corrupt input: weight too large")
				87	}
				88	v2 := v & 15
				89	rankStats[v2]++
				90	// (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
				91	weightTotal += (1 << v2) >> 1
				92	}
				93	if weightTotal == 0 {
				94	return s, nil, errors.New("corrupt input: weights zero")
				95	}
				96
				97	// get last non-null symbol weight (implied, total must be 2^n)
				98	{
				99	tableLog := highBit32(weightTotal) + 1
				100	if tableLog > tableLogMax {
				101	return s, nil, errors.New("corrupt input: tableLog too big")
				102	}
				103	s.actualTableLog = uint8(tableLog)
				104	// determine last weight
				105	{
				106	total := uint32(1) << tableLog
				107	rest := total - weightTotal
				108	verif := uint32(1) << highBit32(rest)
				109	lastWeight := highBit32(rest) + 1
				110	if verif != rest {
				111	// last value must be a clean power of 2
				112	return s, nil, errors.New("corrupt input: last value not power of two")
				113	}
				114	s.huffWeight[s.symbolLen] = uint8(lastWeight)
				115	s.symbolLen++
				116	rankStats[lastWeight]++
				117	}
				118	}
				119
				120	if (rankStats[1] < 2) \|\| (rankStats[1]&1 != 0) {
				121	// by construction : at least 2 elts of rank 1, must be even
				122	return s, nil, errors.New("corrupt input: min elt size, even check failed ")
				123	}
				124
				125	// TODO: Choose between single/double symbol decoding
				126
				127	// Calculate starting value for each rank
				128	{
				129	var nextRankStart uint32
				130	for n := uint8(1); n < s.actualTableLog+1; n++ {
				131	current := nextRankStart
				132	nextRankStart += rankStats[n] << (n - 1)
				133	rankStats[n] = current
				134	}
				135	}
				136
				137	// fill DTable (always full size)
				138	tSize := 1 << tableLogMax
				139	if len(s.dt.single) != tSize {
				140	s.dt.single = make([]dEntrySingle, tSize)
				141	}
				142	cTable := s.prevTable
				143	if cap(cTable) < maxSymbolValue+1 {
				144	cTable = make([]cTableEntry, 0, maxSymbolValue+1)
				145	}
				146	cTable = cTable[:maxSymbolValue+1]
				147	s.prevTable = cTable[:s.symbolLen]
				148	s.prevTableLog = s.actualTableLog
				149
				150	for n, w := range s.huffWeight[:s.symbolLen] {
				151	if w == 0 {
				152	cTable[n] = cTableEntry{
				153	val: 0,
				154	nBits: 0,
				155	}
				156	continue
				157	}
				158	length := (uint32(1) << w) >> 1
				159	d := dEntrySingle{
				160	entry: uint16(s.actualTableLog+1-w) \| (uint16(n) << 8),
				161	}
				162
				163	rank := &rankStats[w]
				164	cTable[n] = cTableEntry{
				165	val: uint16(*rank >> (w - 1)),
				166	nBits: uint8(d.entry),
				167	}
				168
				169	single := s.dt.single[rank : rank+length]
				170	for i := range single {
				171	single[i] = d
				172	}
				173	*rank += length
				174	}
				175
				176	return s, in, nil
				177	}
				178
				179	// Decompress1X will decompress a 1X encoded stream.
				180	// The length of the supplied input must match the end of a block exactly.
				181	// Before this is called, the table must be initialized with ReadTable unless
				182	// the encoder re-used the table.
				183	// deprecated: Use the stateless Decoder() to get a concurrent version.
				184	func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
				185	if cap(s.Out) < s.MaxDecodedSize {
				186	s.Out = make([]byte, s.MaxDecodedSize)
				187	}
				188	s.Out = s.Out[:0:s.MaxDecodedSize]
				189	s.Out, err = s.Decoder().Decompress1X(s.Out, in)
				190	return s.Out, err
				191	}
				192
				193	// Decompress4X will decompress a 4X encoded stream.
				194	// Before this is called, the table must be initialized with ReadTable unless
				195	// the encoder re-used the table.
				196	// The length of the supplied input must match the end of a block exactly.
				197	// The destination size of the uncompressed data must be known and provided.
				198	// deprecated: Use the stateless Decoder() to get a concurrent version.
				199	func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
				200	if dstSize > s.MaxDecodedSize {
				201	return nil, ErrMaxDecodedSizeExceeded
				202	}
				203	if cap(s.Out) < dstSize {
				204	s.Out = make([]byte, s.MaxDecodedSize)
				205	}
				206	s.Out = s.Out[:0:dstSize]
				207	s.Out, err = s.Decoder().Decompress4X(s.Out, in)
				208	return s.Out, err
				209	}
				210
				211	// Decoder will return a stateless decoder that can be used by multiple
				212	// decompressors concurrently.
				213	// Before this is called, the table must be initialized with ReadTable.
				214	// The Decoder is still linked to the scratch buffer so that cannot be reused.
				215	// However, it is safe to discard the scratch.
				216	func (s Scratch) Decoder() Decoder {
				217	return &Decoder{
				218	dt: s.dt,
				219	actualTableLog: s.actualTableLog,
				220	bufs: &s.decPool,
				221	}
				222	}
				223
				224	// Decoder provides stateless decoding.
				225	type Decoder struct {
				226	dt dTable
				227	actualTableLog uint8
				228	bufs *sync.Pool
				229	}
				230
				231	func (d Decoder) buffer() [4][256]byte {
				232	buf, ok := d.bufs.Get().(*[4][256]byte)
				233	if ok {
				234	return buf
				235	}
				236	return &[4][256]byte{}
				237	}
				238
				239	// Decompress1X will decompress a 1X encoded stream.
				240	// The cap of the output buffer will be the maximum decompressed size.
				241	// The length of the supplied input must match the end of a block exactly.
				242	func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
				243	if len(d.dt.single) == 0 {
				244	return nil, errors.New("no table loaded")
				245	}
				246	if use8BitTables && d.actualTableLog <= 8 {
				247	return d.decompress1X8Bit(dst, src)
				248	}
				249	var br bitReaderShifted
				250	err := br.init(src)
				251	if err != nil {
				252	return dst, err
				253	}
				254	maxDecodedSize := cap(dst)
				255	dst = dst[:0]
				256
				257	// Avoid bounds check by always having full sized table.
				258	const tlSize = 1 << tableLogMax
				259	const tlMask = tlSize - 1
				260	dt := d.dt.single[:tlSize]
				261
				262	// Use temp table to avoid bound checks/append penalty.
				263	bufs := d.buffer()
				264	buf := &bufs[0]
				265	var off uint8
				266
				267	for br.off >= 8 {
				268	br.fillFast()
				269	v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				270	br.advance(uint8(v.entry))
				271	buf[off+0] = uint8(v.entry >> 8)
				272
				273	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				274	br.advance(uint8(v.entry))
				275	buf[off+1] = uint8(v.entry >> 8)
				276
				277	// Refill
				278	br.fillFast()
				279
				280	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				281	br.advance(uint8(v.entry))
				282	buf[off+2] = uint8(v.entry >> 8)
				283
				284	v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
				285	br.advance(uint8(v.entry))
				286	buf[off+3] = uint8(v.entry >> 8)
				287
				288	off += 4
				289	if off == 0 {
				290	if len(dst)+256 > maxDecodedSize {
				291	br.close()
				292	d.bufs.Put(bufs)
				293	return nil, ErrMaxDecodedSizeExceeded
				294	}
				295	dst = append(dst, buf[:]...)
				296	}
				297	}
				298
				299	if len(dst)+int(off) > maxDecodedSize {
				300	d.bufs.Put(bufs)
				301	br.close()
				302	return nil, ErrMaxDecodedSizeExceeded
				303	}
				304	dst = append(dst, buf[:off]...)
				305
				306	// br < 8, so uint8 is fine
				307	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
				308	for bitsLeft > 0 {
				309	br.fill()
				310	if false && br.bitsRead >= 32 {
				311	if br.off >= 4 {
				312	v := br.in[br.off-4:]
				313	v = v[:4]
				314	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				315	br.value = (br.value << 32) \| uint64(low)
				316	br.bitsRead -= 32
				317	br.off -= 4
				318	} else {
				319	for br.off > 0 {
				320	br.value = (br.value << 8) \| uint64(br.in[br.off-1])
				321	br.bitsRead -= 8
				322	br.off--
				323	}
				324	}
				325	}
				326	if len(dst) >= maxDecodedSize {
				327	d.bufs.Put(bufs)
				328	br.close()
				329	return nil, ErrMaxDecodedSizeExceeded
				330	}
				331	v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
				332	nBits := uint8(v.entry)
				333	br.advance(nBits)
				334	bitsLeft -= nBits
				335	dst = append(dst, uint8(v.entry>>8))
				336	}
				337	d.bufs.Put(bufs)
				338	return dst, br.close()
				339	}
				340
				341	// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
				342	// The cap of the output buffer will be the maximum decompressed size.
				343	// The length of the supplied input must match the end of a block exactly.
				344	func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
				345	if d.actualTableLog == 8 {
				346	return d.decompress1X8BitExactly(dst, src)
				347	}
				348	var br bitReaderBytes
				349	err := br.init(src)
				350	if err != nil {
				351	return dst, err
				352	}
				353	maxDecodedSize := cap(dst)
				354	dst = dst[:0]
				355
				356	// Avoid bounds check by always having full sized table.
				357	dt := d.dt.single[:256]
				358
				359	// Use temp table to avoid bound checks/append penalty.
				360	bufs := d.buffer()
				361	buf := &bufs[0]
				362	var off uint8
				363
				364	switch d.actualTableLog {
				365	case 8:
				366	const shift = 8 - 8
				367	for br.off >= 4 {
				368	br.fillFast()
				369	v := dt[uint8(br.value>>(56+shift))]
				370	br.advance(uint8(v.entry))
				371	buf[off+0] = uint8(v.entry >> 8)
				372
				373	v = dt[uint8(br.value>>(56+shift))]
				374	br.advance(uint8(v.entry))
				375	buf[off+1] = uint8(v.entry >> 8)
				376
				377	v = dt[uint8(br.value>>(56+shift))]
				378	br.advance(uint8(v.entry))
				379	buf[off+2] = uint8(v.entry >> 8)
				380
				381	v = dt[uint8(br.value>>(56+shift))]
				382	br.advance(uint8(v.entry))
				383	buf[off+3] = uint8(v.entry >> 8)
				384
				385	off += 4
				386	if off == 0 {
				387	if len(dst)+256 > maxDecodedSize {
				388	br.close()
				389	d.bufs.Put(bufs)
				390	return nil, ErrMaxDecodedSizeExceeded
				391	}
				392	dst = append(dst, buf[:]...)
				393	}
				394	}
				395	case 7:
				396	const shift = 8 - 7
				397	for br.off >= 4 {
				398	br.fillFast()
				399	v := dt[uint8(br.value>>(56+shift))]
				400	br.advance(uint8(v.entry))
				401	buf[off+0] = uint8(v.entry >> 8)
				402
				403	v = dt[uint8(br.value>>(56+shift))]
				404	br.advance(uint8(v.entry))
				405	buf[off+1] = uint8(v.entry >> 8)
				406
				407	v = dt[uint8(br.value>>(56+shift))]
				408	br.advance(uint8(v.entry))
				409	buf[off+2] = uint8(v.entry >> 8)
				410
				411	v = dt[uint8(br.value>>(56+shift))]
				412	br.advance(uint8(v.entry))
				413	buf[off+3] = uint8(v.entry >> 8)
				414
				415	off += 4
				416	if off == 0 {
				417	if len(dst)+256 > maxDecodedSize {
				418	br.close()
				419	d.bufs.Put(bufs)
				420	return nil, ErrMaxDecodedSizeExceeded
				421	}
				422	dst = append(dst, buf[:]...)
				423	}
				424	}
				425	case 6:
				426	const shift = 8 - 6
				427	for br.off >= 4 {
				428	br.fillFast()
				429	v := dt[uint8(br.value>>(56+shift))]
				430	br.advance(uint8(v.entry))
				431	buf[off+0] = uint8(v.entry >> 8)
				432
				433	v = dt[uint8(br.value>>(56+shift))]
				434	br.advance(uint8(v.entry))
				435	buf[off+1] = uint8(v.entry >> 8)
				436
				437	v = dt[uint8(br.value>>(56+shift))]
				438	br.advance(uint8(v.entry))
				439	buf[off+2] = uint8(v.entry >> 8)
				440
				441	v = dt[uint8(br.value>>(56+shift))]
				442	br.advance(uint8(v.entry))
				443	buf[off+3] = uint8(v.entry >> 8)
				444
				445	off += 4
				446	if off == 0 {
				447	if len(dst)+256 > maxDecodedSize {
				448	d.bufs.Put(bufs)
				449	br.close()
				450	return nil, ErrMaxDecodedSizeExceeded
				451	}
				452	dst = append(dst, buf[:]...)
				453	}
				454	}
				455	case 5:
				456	const shift = 8 - 5
				457	for br.off >= 4 {
				458	br.fillFast()
				459	v := dt[uint8(br.value>>(56+shift))]
				460	br.advance(uint8(v.entry))
				461	buf[off+0] = uint8(v.entry >> 8)
				462
				463	v = dt[uint8(br.value>>(56+shift))]
				464	br.advance(uint8(v.entry))
				465	buf[off+1] = uint8(v.entry >> 8)
				466
				467	v = dt[uint8(br.value>>(56+shift))]
				468	br.advance(uint8(v.entry))
				469	buf[off+2] = uint8(v.entry >> 8)
				470
				471	v = dt[uint8(br.value>>(56+shift))]
				472	br.advance(uint8(v.entry))
				473	buf[off+3] = uint8(v.entry >> 8)
				474
				475	off += 4
				476	if off == 0 {
				477	if len(dst)+256 > maxDecodedSize {
				478	d.bufs.Put(bufs)
				479	br.close()
				480	return nil, ErrMaxDecodedSizeExceeded
				481	}
				482	dst = append(dst, buf[:]...)
				483	}
				484	}
				485	case 4:
				486	const shift = 8 - 4
				487	for br.off >= 4 {
				488	br.fillFast()
				489	v := dt[uint8(br.value>>(56+shift))]
				490	br.advance(uint8(v.entry))
				491	buf[off+0] = uint8(v.entry >> 8)
				492
				493	v = dt[uint8(br.value>>(56+shift))]
				494	br.advance(uint8(v.entry))
				495	buf[off+1] = uint8(v.entry >> 8)
				496
				497	v = dt[uint8(br.value>>(56+shift))]
				498	br.advance(uint8(v.entry))
				499	buf[off+2] = uint8(v.entry >> 8)
				500
				501	v = dt[uint8(br.value>>(56+shift))]
				502	br.advance(uint8(v.entry))
				503	buf[off+3] = uint8(v.entry >> 8)
				504
				505	off += 4
				506	if off == 0 {
				507	if len(dst)+256 > maxDecodedSize {
				508	d.bufs.Put(bufs)
				509	br.close()
				510	return nil, ErrMaxDecodedSizeExceeded
				511	}
				512	dst = append(dst, buf[:]...)
				513	}
				514	}
				515	case 3:
				516	const shift = 8 - 3
				517	for br.off >= 4 {
				518	br.fillFast()
				519	v := dt[uint8(br.value>>(56+shift))]
				520	br.advance(uint8(v.entry))
				521	buf[off+0] = uint8(v.entry >> 8)
				522
				523	v = dt[uint8(br.value>>(56+shift))]
				524	br.advance(uint8(v.entry))
				525	buf[off+1] = uint8(v.entry >> 8)
				526
				527	v = dt[uint8(br.value>>(56+shift))]
				528	br.advance(uint8(v.entry))
				529	buf[off+2] = uint8(v.entry >> 8)
				530
				531	v = dt[uint8(br.value>>(56+shift))]
				532	br.advance(uint8(v.entry))
				533	buf[off+3] = uint8(v.entry >> 8)
				534
				535	off += 4
				536	if off == 0 {
				537	if len(dst)+256 > maxDecodedSize {
				538	d.bufs.Put(bufs)
				539	br.close()
				540	return nil, ErrMaxDecodedSizeExceeded
				541	}
				542	dst = append(dst, buf[:]...)
				543	}
				544	}
				545	case 2:
				546	const shift = 8 - 2
				547	for br.off >= 4 {
				548	br.fillFast()
				549	v := dt[uint8(br.value>>(56+shift))]
				550	br.advance(uint8(v.entry))
				551	buf[off+0] = uint8(v.entry >> 8)
				552
				553	v = dt[uint8(br.value>>(56+shift))]
				554	br.advance(uint8(v.entry))
				555	buf[off+1] = uint8(v.entry >> 8)
				556
				557	v = dt[uint8(br.value>>(56+shift))]
				558	br.advance(uint8(v.entry))
				559	buf[off+2] = uint8(v.entry >> 8)
				560
				561	v = dt[uint8(br.value>>(56+shift))]
				562	br.advance(uint8(v.entry))
				563	buf[off+3] = uint8(v.entry >> 8)
				564
				565	off += 4
				566	if off == 0 {
				567	if len(dst)+256 > maxDecodedSize {
				568	d.bufs.Put(bufs)
				569	br.close()
				570	return nil, ErrMaxDecodedSizeExceeded
				571	}
				572	dst = append(dst, buf[:]...)
				573	}
				574	}
				575	case 1:
				576	const shift = 8 - 1
				577	for br.off >= 4 {
				578	br.fillFast()
				579	v := dt[uint8(br.value>>(56+shift))]
				580	br.advance(uint8(v.entry))
				581	buf[off+0] = uint8(v.entry >> 8)
				582
				583	v = dt[uint8(br.value>>(56+shift))]
				584	br.advance(uint8(v.entry))
				585	buf[off+1] = uint8(v.entry >> 8)
				586
				587	v = dt[uint8(br.value>>(56+shift))]
				588	br.advance(uint8(v.entry))
				589	buf[off+2] = uint8(v.entry >> 8)
				590
				591	v = dt[uint8(br.value>>(56+shift))]
				592	br.advance(uint8(v.entry))
				593	buf[off+3] = uint8(v.entry >> 8)
				594
				595	off += 4
				596	if off == 0 {
				597	if len(dst)+256 > maxDecodedSize {
				598	d.bufs.Put(bufs)
				599	br.close()
				600	return nil, ErrMaxDecodedSizeExceeded
				601	}
				602	dst = append(dst, buf[:]...)
				603	}
				604	}
				605	default:
				606	d.bufs.Put(bufs)
				607	return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
				608	}
				609
				610	if len(dst)+int(off) > maxDecodedSize {
				611	d.bufs.Put(bufs)
				612	br.close()
				613	return nil, ErrMaxDecodedSizeExceeded
				614	}
				615	dst = append(dst, buf[:off]...)
				616
				617	// br < 4, so uint8 is fine
				618	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
				619	shift := (8 - d.actualTableLog) & 7
				620
				621	for bitsLeft > 0 {
				622	if br.bitsRead >= 64-8 {
				623	for br.off > 0 {
				624	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				625	br.bitsRead -= 8
				626	br.off--
				627	}
				628	}
				629	if len(dst) >= maxDecodedSize {
				630	br.close()
				631	d.bufs.Put(bufs)
				632	return nil, ErrMaxDecodedSizeExceeded
				633	}
				634	v := dt[br.peekByteFast()>>shift]
				635	nBits := uint8(v.entry)
				636	br.advance(nBits)
				637	bitsLeft -= int8(nBits)
				638	dst = append(dst, uint8(v.entry>>8))
				639	}
				640	d.bufs.Put(bufs)
				641	return dst, br.close()
				642	}
				643
				644	// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
				645	// The cap of the output buffer will be the maximum decompressed size.
				646	// The length of the supplied input must match the end of a block exactly.
				647	func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
				648	var br bitReaderBytes
				649	err := br.init(src)
				650	if err != nil {
				651	return dst, err
				652	}
				653	maxDecodedSize := cap(dst)
				654	dst = dst[:0]
				655
				656	// Avoid bounds check by always having full sized table.
				657	dt := d.dt.single[:256]
				658
				659	// Use temp table to avoid bound checks/append penalty.
				660	bufs := d.buffer()
				661	buf := &bufs[0]
				662	var off uint8
				663
				664	const shift = 56
				665
				666	//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
				667	for br.off >= 4 {
				668	br.fillFast()
				669	v := dt[uint8(br.value>>shift)]
				670	br.advance(uint8(v.entry))
				671	buf[off+0] = uint8(v.entry >> 8)
				672
				673	v = dt[uint8(br.value>>shift)]
				674	br.advance(uint8(v.entry))
				675	buf[off+1] = uint8(v.entry >> 8)
				676
				677	v = dt[uint8(br.value>>shift)]
				678	br.advance(uint8(v.entry))
				679	buf[off+2] = uint8(v.entry >> 8)
				680
				681	v = dt[uint8(br.value>>shift)]
				682	br.advance(uint8(v.entry))
				683	buf[off+3] = uint8(v.entry >> 8)
				684
				685	off += 4
				686	if off == 0 {
				687	if len(dst)+256 > maxDecodedSize {
				688	d.bufs.Put(bufs)
				689	br.close()
				690	return nil, ErrMaxDecodedSizeExceeded
				691	}
				692	dst = append(dst, buf[:]...)
				693	}
				694	}
				695
				696	if len(dst)+int(off) > maxDecodedSize {
				697	d.bufs.Put(bufs)
				698	br.close()
				699	return nil, ErrMaxDecodedSizeExceeded
				700	}
				701	dst = append(dst, buf[:off]...)
				702
				703	// br < 4, so uint8 is fine
				704	bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
				705	for bitsLeft > 0 {
				706	if br.bitsRead >= 64-8 {
				707	for br.off > 0 {
				708	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				709	br.bitsRead -= 8
				710	br.off--
				711	}
				712	}
				713	if len(dst) >= maxDecodedSize {
				714	d.bufs.Put(bufs)
				715	br.close()
				716	return nil, ErrMaxDecodedSizeExceeded
				717	}
				718	v := dt[br.peekByteFast()]
				719	nBits := uint8(v.entry)
				720	br.advance(nBits)
				721	bitsLeft -= int8(nBits)
				722	dst = append(dst, uint8(v.entry>>8))
				723	}
				724	d.bufs.Put(bufs)
				725	return dst, br.close()
				726	}
				727
				728	// Decompress4X will decompress a 4X encoded stream.
				729	// The length of the supplied input must match the end of a block exactly.
				730	// The capacity of the dst slice must match the destination size of
				731	// the uncompressed data exactly.
				732	func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
				733	if len(d.dt.single) == 0 {
				734	return nil, errors.New("no table loaded")
				735	}
				736	if len(src) < 6+(4*1) {
				737	return nil, errors.New("input too small")
				738	}
				739	if use8BitTables && d.actualTableLog <= 8 {
				740	return d.decompress4X8bit(dst, src)
				741	}
				742
				743	var br [4]bitReaderShifted
				744	start := 6
				745	for i := 0; i < 3; i++ {
				746	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				747	if start+length >= len(src) {
				748	return nil, errors.New("truncated input (or invalid offset)")
				749	}
				750	err := br[i].init(src[start : start+length])
				751	if err != nil {
				752	return nil, err
				753	}
				754	start += length
				755	}
				756	err := br[3].init(src[start:])
				757	if err != nil {
				758	return nil, err
				759	}
				760
				761	// destination, offset to match first output
				762	dstSize := cap(dst)
				763	dst = dst[:dstSize]
				764	out := dst
				765	dstEvery := (dstSize + 3) / 4
				766
				767	const tlSize = 1 << tableLogMax
				768	const tlMask = tlSize - 1
				769	single := d.dt.single[:tlSize]
				770
				771	// Use temp table to avoid bound checks/append penalty.
				772	buf := d.buffer()
				773	var off uint8
				774	var decoded int
				775
				776	// Decode 2 values from each decoder/loop.
				777	const bufoff = 256
				778	for {
				779	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				780	break
				781	}
				782
				783	{
				784	const stream = 0
				785	const stream2 = 1
				786	br[stream].fillFast()
				787	br[stream2].fillFast()
				788
				789	val := br[stream].peekBitsFast(d.actualTableLog)
				790	val2 := br[stream2].peekBitsFast(d.actualTableLog)
				791	v := single[val&tlMask]
				792	v2 := single[val2&tlMask]
				793	br[stream].advance(uint8(v.entry))
				794	br[stream2].advance(uint8(v2.entry))
				795	buf[stream][off] = uint8(v.entry >> 8)
				796	buf[stream2][off] = uint8(v2.entry >> 8)
				797
				798	val = br[stream].peekBitsFast(d.actualTableLog)
				799	val2 = br[stream2].peekBitsFast(d.actualTableLog)
				800	v = single[val&tlMask]
				801	v2 = single[val2&tlMask]
				802	br[stream].advance(uint8(v.entry))
				803	br[stream2].advance(uint8(v2.entry))
				804	buf[stream][off+1] = uint8(v.entry >> 8)
				805	buf[stream2][off+1] = uint8(v2.entry >> 8)
				806	}
				807
				808	{
				809	const stream = 2
				810	const stream2 = 3
				811	br[stream].fillFast()
				812	br[stream2].fillFast()
				813
				814	val := br[stream].peekBitsFast(d.actualTableLog)
				815	val2 := br[stream2].peekBitsFast(d.actualTableLog)
				816	v := single[val&tlMask]
				817	v2 := single[val2&tlMask]
				818	br[stream].advance(uint8(v.entry))
				819	br[stream2].advance(uint8(v2.entry))
				820	buf[stream][off] = uint8(v.entry >> 8)
				821	buf[stream2][off] = uint8(v2.entry >> 8)
				822
				823	val = br[stream].peekBitsFast(d.actualTableLog)
				824	val2 = br[stream2].peekBitsFast(d.actualTableLog)
				825	v = single[val&tlMask]
				826	v2 = single[val2&tlMask]
				827	br[stream].advance(uint8(v.entry))
				828	br[stream2].advance(uint8(v2.entry))
				829	buf[stream][off+1] = uint8(v.entry >> 8)
				830	buf[stream2][off+1] = uint8(v2.entry >> 8)
				831	}
				832
				833	off += 2
				834
				835	if off == 0 {
				836	if bufoff > dstEvery {
				837	d.bufs.Put(buf)
				838	return nil, errors.New("corruption detected: stream overrun 1")
				839	}
				840	copy(out, buf[0][:])
				841	copy(out[dstEvery:], buf[1][:])
				842	copy(out[dstEvery*2:], buf[2][:])
				843	copy(out[dstEvery*3:], buf[3][:])
				844	out = out[bufoff:]
				845	decoded += bufoff * 4
				846	// There must at least be 3 buffers left.
				847	if len(out) < dstEvery*3 {
				848	d.bufs.Put(buf)
				849	return nil, errors.New("corruption detected: stream overrun 2")
				850	}
				851	}
				852	}
				853	if off > 0 {
				854	ioff := int(off)
				855	if len(out) < dstEvery*3+ioff {
				856	d.bufs.Put(buf)
				857	return nil, errors.New("corruption detected: stream overrun 3")
				858	}
				859	copy(out, buf[0][:off])
				860	copy(out[dstEvery:], buf[1][:off])
				861	copy(out[dstEvery*2:], buf[2][:off])
				862	copy(out[dstEvery*3:], buf[3][:off])
				863	decoded += int(off) * 4
				864	out = out[off:]
				865	}
				866
				867	// Decode remaining.
				868	for i := range br {
				869	offset := dstEvery * i
				870	br := &br[i]
				871	bitsLeft := br.off*8 + uint(64-br.bitsRead)
				872	for bitsLeft > 0 {
				873	br.fill()
				874	if false && br.bitsRead >= 32 {
				875	if br.off >= 4 {
				876	v := br.in[br.off-4:]
				877	v = v[:4]
				878	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				879	br.value = (br.value << 32) \| uint64(low)
				880	br.bitsRead -= 32
				881	br.off -= 4
				882	} else {
				883	for br.off > 0 {
				884	br.value = (br.value << 8) \| uint64(br.in[br.off-1])
				885	br.bitsRead -= 8
				886	br.off--
				887	}
				888	}
				889	}
				890	// end inline...
				891	if offset >= len(out) {
				892	d.bufs.Put(buf)
				893	return nil, errors.New("corruption detected: stream overrun 4")
				894	}
				895
				896	// Read value and increment offset.
				897	val := br.peekBitsFast(d.actualTableLog)
				898	v := single[val&tlMask].entry
				899	nBits := uint8(v)
				900	br.advance(nBits)
				901	bitsLeft -= uint(nBits)
				902	out[offset] = uint8(v >> 8)
				903	offset++
				904	}
				905	decoded += offset - dstEvery*i
				906	err = br.close()
				907	if err != nil {
				908	return nil, err
				909	}
				910	}
				911	d.bufs.Put(buf)
				912	if dstSize != decoded {
				913	return nil, errors.New("corruption detected: short output block")
				914	}
				915	return dst, nil
				916	}
				917
				918	// Decompress4X will decompress a 4X encoded stream.
				919	// The length of the supplied input must match the end of a block exactly.
				920	// The capacity of the dst slice must match the destination size of
				921	// the uncompressed data exactly.
				922	func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
				923	if d.actualTableLog == 8 {
				924	return d.decompress4X8bitExactly(dst, src)
				925	}
				926
				927	var br [4]bitReaderBytes
				928	start := 6
				929	for i := 0; i < 3; i++ {
				930	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				931	if start+length >= len(src) {
				932	return nil, errors.New("truncated input (or invalid offset)")
				933	}
				934	err := br[i].init(src[start : start+length])
				935	if err != nil {
				936	return nil, err
				937	}
				938	start += length
				939	}
				940	err := br[3].init(src[start:])
				941	if err != nil {
				942	return nil, err
				943	}
				944
				945	// destination, offset to match first output
				946	dstSize := cap(dst)
				947	dst = dst[:dstSize]
				948	out := dst
				949	dstEvery := (dstSize + 3) / 4
				950
				951	shift := (56 + (8 - d.actualTableLog)) & 63
				952
				953	const tlSize = 1 << 8
				954	single := d.dt.single[:tlSize]
				955
				956	// Use temp table to avoid bound checks/append penalty.
				957	buf := d.buffer()
				958	var off uint8
				959	var decoded int
				960
				961	// Decode 4 values from each decoder/loop.
				962	const bufoff = 256
				963	for {
				964	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				965	break
				966	}
				967
				968	{
				969	// Interleave 2 decodes.
				970	const stream = 0
				971	const stream2 = 1
				972	br1 := &br[stream]
				973	br2 := &br[stream2]
				974	br1.fillFast()
				975	br2.fillFast()
				976
				977	v := single[uint8(br1.value>>shift)].entry
				978	v2 := single[uint8(br2.value>>shift)].entry
				979	br1.bitsRead += uint8(v)
				980	br1.value <<= v & 63
				981	br2.bitsRead += uint8(v2)
				982	br2.value <<= v2 & 63
				983	buf[stream][off] = uint8(v >> 8)
				984	buf[stream2][off] = uint8(v2 >> 8)
				985
				986	v = single[uint8(br1.value>>shift)].entry
				987	v2 = single[uint8(br2.value>>shift)].entry
				988	br1.bitsRead += uint8(v)
				989	br1.value <<= v & 63
				990	br2.bitsRead += uint8(v2)
				991	br2.value <<= v2 & 63
				992	buf[stream][off+1] = uint8(v >> 8)
				993	buf[stream2][off+1] = uint8(v2 >> 8)
				994
				995	v = single[uint8(br1.value>>shift)].entry
				996	v2 = single[uint8(br2.value>>shift)].entry
				997	br1.bitsRead += uint8(v)
				998	br1.value <<= v & 63
				999	br2.bitsRead += uint8(v2)
				1000	br2.value <<= v2 & 63
				1001	buf[stream][off+2] = uint8(v >> 8)
				1002	buf[stream2][off+2] = uint8(v2 >> 8)
				1003
				1004	v = single[uint8(br1.value>>shift)].entry
				1005	v2 = single[uint8(br2.value>>shift)].entry
				1006	br1.bitsRead += uint8(v)
				1007	br1.value <<= v & 63
				1008	br2.bitsRead += uint8(v2)
				1009	br2.value <<= v2 & 63
				1010	buf[stream][off+3] = uint8(v >> 8)
				1011	buf[stream2][off+3] = uint8(v2 >> 8)
				1012	}
				1013
				1014	{
				1015	const stream = 2
				1016	const stream2 = 3
				1017	br1 := &br[stream]
				1018	br2 := &br[stream2]
				1019	br1.fillFast()
				1020	br2.fillFast()
				1021
				1022	v := single[uint8(br1.value>>shift)].entry
				1023	v2 := single[uint8(br2.value>>shift)].entry
				1024	br1.bitsRead += uint8(v)
				1025	br1.value <<= v & 63
				1026	br2.bitsRead += uint8(v2)
				1027	br2.value <<= v2 & 63
				1028	buf[stream][off] = uint8(v >> 8)
				1029	buf[stream2][off] = uint8(v2 >> 8)
				1030
				1031	v = single[uint8(br1.value>>shift)].entry
				1032	v2 = single[uint8(br2.value>>shift)].entry
				1033	br1.bitsRead += uint8(v)
				1034	br1.value <<= v & 63
				1035	br2.bitsRead += uint8(v2)
				1036	br2.value <<= v2 & 63
				1037	buf[stream][off+1] = uint8(v >> 8)
				1038	buf[stream2][off+1] = uint8(v2 >> 8)
				1039
				1040	v = single[uint8(br1.value>>shift)].entry
				1041	v2 = single[uint8(br2.value>>shift)].entry
				1042	br1.bitsRead += uint8(v)
				1043	br1.value <<= v & 63
				1044	br2.bitsRead += uint8(v2)
				1045	br2.value <<= v2 & 63
				1046	buf[stream][off+2] = uint8(v >> 8)
				1047	buf[stream2][off+2] = uint8(v2 >> 8)
				1048
				1049	v = single[uint8(br1.value>>shift)].entry
				1050	v2 = single[uint8(br2.value>>shift)].entry
				1051	br1.bitsRead += uint8(v)
				1052	br1.value <<= v & 63
				1053	br2.bitsRead += uint8(v2)
				1054	br2.value <<= v2 & 63
				1055	buf[stream][off+3] = uint8(v >> 8)
				1056	buf[stream2][off+3] = uint8(v2 >> 8)
				1057	}
				1058
				1059	off += 4
				1060
				1061	if off == 0 {
				1062	if bufoff > dstEvery {
				1063	d.bufs.Put(buf)
				1064	return nil, errors.New("corruption detected: stream overrun 1")
				1065	}
				1066	copy(out, buf[0][:])
				1067	copy(out[dstEvery:], buf[1][:])
				1068	copy(out[dstEvery*2:], buf[2][:])
				1069	copy(out[dstEvery*3:], buf[3][:])
				1070	out = out[bufoff:]
				1071	decoded += bufoff * 4
				1072	// There must at least be 3 buffers left.
				1073	if len(out) < dstEvery*3 {
				1074	d.bufs.Put(buf)
				1075	return nil, errors.New("corruption detected: stream overrun 2")
				1076	}
				1077	}
				1078	}
				1079	if off > 0 {
				1080	ioff := int(off)
				1081	if len(out) < dstEvery*3+ioff {
				1082	d.bufs.Put(buf)
				1083	return nil, errors.New("corruption detected: stream overrun 3")
				1084	}
				1085	copy(out, buf[0][:off])
				1086	copy(out[dstEvery:], buf[1][:off])
				1087	copy(out[dstEvery*2:], buf[2][:off])
				1088	copy(out[dstEvery*3:], buf[3][:off])
				1089	decoded += int(off) * 4
				1090	out = out[off:]
				1091	}
				1092
				1093	// Decode remaining.
				1094	for i := range br {
				1095	offset := dstEvery * i
				1096	br := &br[i]
				1097	bitsLeft := int(br.off*8) + int(64-br.bitsRead)
				1098	for bitsLeft > 0 {
				1099	if br.finished() {
				1100	d.bufs.Put(buf)
				1101	return nil, io.ErrUnexpectedEOF
				1102	}
				1103	if br.bitsRead >= 56 {
				1104	if br.off >= 4 {
				1105	v := br.in[br.off-4:]
				1106	v = v[:4]
				1107	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				1108	br.value \|= uint64(low) << (br.bitsRead - 32)
				1109	br.bitsRead -= 32
				1110	br.off -= 4
				1111	} else {
				1112	for br.off > 0 {
				1113	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				1114	br.bitsRead -= 8
				1115	br.off--
				1116	}
				1117	}
				1118	}
				1119	// end inline...
				1120	if offset >= len(out) {
				1121	d.bufs.Put(buf)
				1122	return nil, errors.New("corruption detected: stream overrun 4")
				1123	}
				1124
				1125	// Read value and increment offset.
				1126	v := single[uint8(br.value>>shift)].entry
				1127	nBits := uint8(v)
				1128	br.advance(nBits)
				1129	bitsLeft -= int(nBits)
				1130	out[offset] = uint8(v >> 8)
				1131	offset++
				1132	}
				1133	decoded += offset - dstEvery*i
				1134	err = br.close()
				1135	if err != nil {
				1136	d.bufs.Put(buf)
				1137	return nil, err
				1138	}
				1139	}
				1140	d.bufs.Put(buf)
				1141	if dstSize != decoded {
				1142	return nil, errors.New("corruption detected: short output block")
				1143	}
				1144	return dst, nil
				1145	}
				1146
				1147	// Decompress4X will decompress a 4X encoded stream.
				1148	// The length of the supplied input must match the end of a block exactly.
				1149	// The capacity of the dst slice must match the destination size of
				1150	// the uncompressed data exactly.
				1151	func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
				1152	var br [4]bitReaderBytes
				1153	start := 6
				1154	for i := 0; i < 3; i++ {
				1155	length := int(src[i2]) \| (int(src[i2+1]) << 8)
				1156	if start+length >= len(src) {
				1157	return nil, errors.New("truncated input (or invalid offset)")
				1158	}
				1159	err := br[i].init(src[start : start+length])
				1160	if err != nil {
				1161	return nil, err
				1162	}
				1163	start += length
				1164	}
				1165	err := br[3].init(src[start:])
				1166	if err != nil {
				1167	return nil, err
				1168	}
				1169
				1170	// destination, offset to match first output
				1171	dstSize := cap(dst)
				1172	dst = dst[:dstSize]
				1173	out := dst
				1174	dstEvery := (dstSize + 3) / 4
				1175
				1176	const shift = 56
				1177	const tlSize = 1 << 8
				1178	const tlMask = tlSize - 1
				1179	single := d.dt.single[:tlSize]
				1180
				1181	// Use temp table to avoid bound checks/append penalty.
				1182	buf := d.buffer()
				1183	var off uint8
				1184	var decoded int
				1185
				1186	// Decode 4 values from each decoder/loop.
				1187	const bufoff = 256
				1188	for {
				1189	if br[0].off < 4 \|\| br[1].off < 4 \|\| br[2].off < 4 \|\| br[3].off < 4 {
				1190	break
				1191	}
				1192
				1193	{
				1194	// Interleave 2 decodes.
				1195	const stream = 0
				1196	const stream2 = 1
				1197	br1 := &br[stream]
				1198	br2 := &br[stream2]
				1199	br1.fillFast()
				1200	br2.fillFast()
				1201
				1202	v := single[uint8(br1.value>>shift)].entry
				1203	v2 := single[uint8(br2.value>>shift)].entry
				1204	br1.bitsRead += uint8(v)
				1205	br1.value <<= v & 63
				1206	br2.bitsRead += uint8(v2)
				1207	br2.value <<= v2 & 63
				1208	buf[stream][off] = uint8(v >> 8)
				1209	buf[stream2][off] = uint8(v2 >> 8)
				1210
				1211	v = single[uint8(br1.value>>shift)].entry
				1212	v2 = single[uint8(br2.value>>shift)].entry
				1213	br1.bitsRead += uint8(v)
				1214	br1.value <<= v & 63
				1215	br2.bitsRead += uint8(v2)
				1216	br2.value <<= v2 & 63
				1217	buf[stream][off+1] = uint8(v >> 8)
				1218	buf[stream2][off+1] = uint8(v2 >> 8)
				1219
				1220	v = single[uint8(br1.value>>shift)].entry
				1221	v2 = single[uint8(br2.value>>shift)].entry
				1222	br1.bitsRead += uint8(v)
				1223	br1.value <<= v & 63
				1224	br2.bitsRead += uint8(v2)
				1225	br2.value <<= v2 & 63
				1226	buf[stream][off+2] = uint8(v >> 8)
				1227	buf[stream2][off+2] = uint8(v2 >> 8)
				1228
				1229	v = single[uint8(br1.value>>shift)].entry
				1230	v2 = single[uint8(br2.value>>shift)].entry
				1231	br1.bitsRead += uint8(v)
				1232	br1.value <<= v & 63
				1233	br2.bitsRead += uint8(v2)
				1234	br2.value <<= v2 & 63
				1235	buf[stream][off+3] = uint8(v >> 8)
				1236	buf[stream2][off+3] = uint8(v2 >> 8)
				1237	}
				1238
				1239	{
				1240	const stream = 2
				1241	const stream2 = 3
				1242	br1 := &br[stream]
				1243	br2 := &br[stream2]
				1244	br1.fillFast()
				1245	br2.fillFast()
				1246
				1247	v := single[uint8(br1.value>>shift)].entry
				1248	v2 := single[uint8(br2.value>>shift)].entry
				1249	br1.bitsRead += uint8(v)
				1250	br1.value <<= v & 63
				1251	br2.bitsRead += uint8(v2)
				1252	br2.value <<= v2 & 63
				1253	buf[stream][off] = uint8(v >> 8)
				1254	buf[stream2][off] = uint8(v2 >> 8)
				1255
				1256	v = single[uint8(br1.value>>shift)].entry
				1257	v2 = single[uint8(br2.value>>shift)].entry
				1258	br1.bitsRead += uint8(v)
				1259	br1.value <<= v & 63
				1260	br2.bitsRead += uint8(v2)
				1261	br2.value <<= v2 & 63
				1262	buf[stream][off+1] = uint8(v >> 8)
				1263	buf[stream2][off+1] = uint8(v2 >> 8)
				1264
				1265	v = single[uint8(br1.value>>shift)].entry
				1266	v2 = single[uint8(br2.value>>shift)].entry
				1267	br1.bitsRead += uint8(v)
				1268	br1.value <<= v & 63
				1269	br2.bitsRead += uint8(v2)
				1270	br2.value <<= v2 & 63
				1271	buf[stream][off+2] = uint8(v >> 8)
				1272	buf[stream2][off+2] = uint8(v2 >> 8)
				1273
				1274	v = single[uint8(br1.value>>shift)].entry
				1275	v2 = single[uint8(br2.value>>shift)].entry
				1276	br1.bitsRead += uint8(v)
				1277	br1.value <<= v & 63
				1278	br2.bitsRead += uint8(v2)
				1279	br2.value <<= v2 & 63
				1280	buf[stream][off+3] = uint8(v >> 8)
				1281	buf[stream2][off+3] = uint8(v2 >> 8)
				1282	}
				1283
				1284	off += 4
				1285
				1286	if off == 0 {
				1287	if bufoff > dstEvery {
				1288	d.bufs.Put(buf)
				1289	return nil, errors.New("corruption detected: stream overrun 1")
				1290	}
				1291	copy(out, buf[0][:])
				1292	copy(out[dstEvery:], buf[1][:])
				1293	copy(out[dstEvery*2:], buf[2][:])
				1294	copy(out[dstEvery*3:], buf[3][:])
				1295	out = out[bufoff:]
				1296	decoded += bufoff * 4
				1297	// There must at least be 3 buffers left.
				1298	if len(out) < dstEvery*3 {
				1299	d.bufs.Put(buf)
				1300	return nil, errors.New("corruption detected: stream overrun 2")
				1301	}
				1302	}
				1303	}
				1304	if off > 0 {
				1305	ioff := int(off)
				1306	if len(out) < dstEvery*3+ioff {
				1307	return nil, errors.New("corruption detected: stream overrun 3")
				1308	}
				1309	copy(out, buf[0][:off])
				1310	copy(out[dstEvery:], buf[1][:off])
				1311	copy(out[dstEvery*2:], buf[2][:off])
				1312	copy(out[dstEvery*3:], buf[3][:off])
				1313	decoded += int(off) * 4
				1314	out = out[off:]
				1315	}
				1316
				1317	// Decode remaining.
				1318	for i := range br {
				1319	offset := dstEvery * i
				1320	br := &br[i]
				1321	bitsLeft := int(br.off*8) + int(64-br.bitsRead)
				1322	for bitsLeft > 0 {
				1323	if br.finished() {
				1324	d.bufs.Put(buf)
				1325	return nil, io.ErrUnexpectedEOF
				1326	}
				1327	if br.bitsRead >= 56 {
				1328	if br.off >= 4 {
				1329	v := br.in[br.off-4:]
				1330	v = v[:4]
				1331	low := (uint32(v[0])) \| (uint32(v[1]) << 8) \| (uint32(v[2]) << 16) \| (uint32(v[3]) << 24)
				1332	br.value \|= uint64(low) << (br.bitsRead - 32)
				1333	br.bitsRead -= 32
				1334	br.off -= 4
				1335	} else {
				1336	for br.off > 0 {
				1337	br.value \|= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
				1338	br.bitsRead -= 8
				1339	br.off--
				1340	}
				1341	}
				1342	}
				1343	// end inline...
				1344	if offset >= len(out) {
				1345	d.bufs.Put(buf)
				1346	return nil, errors.New("corruption detected: stream overrun 4")
				1347	}
				1348
				1349	// Read value and increment offset.
				1350	v := single[br.peekByteFast()].entry
				1351	nBits := uint8(v)
				1352	br.advance(nBits)
				1353	bitsLeft -= int(nBits)
				1354	out[offset] = uint8(v >> 8)
				1355	offset++
				1356	}
				1357	decoded += offset - dstEvery*i
				1358	err = br.close()
				1359	if err != nil {
				1360	d.bufs.Put(buf)
				1361	return nil, err
				1362	}
				1363	}
				1364	d.bufs.Put(buf)
				1365	if dstSize != decoded {
				1366	return nil, errors.New("corruption detected: short output block")
				1367	}
				1368	return dst, nil
				1369	}
				1370
				1371	// matches will compare a decoding table to a coding table.
				1372	// Errors are written to the writer.
				1373	// Nothing will be written if table is ok.
				1374	func (s *Scratch) matches(ct cTable, w io.Writer) {
				1375	if s == nil \|\| len(s.dt.single) == 0 {
				1376	return
				1377	}
				1378	dt := s.dt.single[:1<<s.actualTableLog]
				1379	tablelog := s.actualTableLog
				1380	ok := 0
				1381	broken := 0
				1382	for sym, enc := range ct {
				1383	errs := 0
				1384	broken++
				1385	if enc.nBits == 0 {
				1386	for _, dec := range dt {
				1387	if uint8(dec.entry>>8) == byte(sym) {
				1388	fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
				1389	errs++
				1390	break
				1391	}
				1392	}
				1393	if errs == 0 {
				1394	broken--
				1395	}
				1396	continue
				1397	}
				1398	// Unused bits in input
				1399	ub := tablelog - enc.nBits
				1400	top := enc.val << ub
				1401	// decoder looks at top bits.
				1402	dec := dt[top]
				1403	if uint8(dec.entry) != enc.nBits {
				1404	fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
				1405	errs++
				1406	}
				1407	if uint8(dec.entry>>8) != uint8(sym) {
				1408	fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
				1409	errs++
				1410	}
				1411	if errs > 0 {
				1412	fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
				1413	continue
				1414	}
				1415	// Ensure that all combinations are covered.
				1416	for i := uint16(0); i < (1 << ub); i++ {
				1417	vval := top \| i
				1418	dec := dt[vval]
				1419	if uint8(dec.entry) != enc.nBits {
				1420	fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
				1421	errs++
				1422	}
				1423	if uint8(dec.entry>>8) != uint8(sym) {
				1424	fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
				1425	errs++
				1426	}
				1427	if errs > 20 {
				1428	fmt.Fprintf(w, "%d errros, stopping\n", errs)
				1429	break
				1430	}
				1431	}
				1432	if errs == 0 {
				1433	ok++
				1434	broken--
				1435	}
				1436	}
				1437	if broken > 0 {
				1438	fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
				1439	}
				1440	}