Blame - vendor/github.com/klauspost/compress/zstd/blockenc.go - voltctl

blob: e1be092f3267cb3af5f79f5f9ef5f933cb552c59 [file] [log] [blame]

Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
				8	"errors"
				9	"fmt"
				10	"math"
				11	"math/bits"
				12
				13	"github.com/klauspost/compress/huff0"
				14	)
				15
				16	type blockEnc struct {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	17	size int
				18	literals []byte
				19	sequences []seq
				20	coders seqCoders
				21	litEnc *huff0.Scratch
				22	dictLitEnc *huff0.Scratch
				23	wr bitWriter
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	24
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	25	extraLits int
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	26	output []byte
				27	recentOffsets [3]uint32
				28	prevRecentOffsets [3]uint32
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	29
				30	last bool
				31	lowMem bool
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	32	}
				33
				34	// init should be used once the block has been created.
				35	// If called more than once, the effect is the same as calling reset.
				36	func (b *blockEnc) init() {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	37	if b.lowMem {
				38	// 1K literals
				39	if cap(b.literals) < 1<<10 {
				40	b.literals = make([]byte, 0, 1<<10)
				41	}
				42	const defSeqs = 20
				43	if cap(b.sequences) < defSeqs {
				44	b.sequences = make([]seq, 0, defSeqs)
				45	}
				46	// 1K
				47	if cap(b.output) < 1<<10 {
				48	b.output = make([]byte, 0, 1<<10)
				49	}
				50	} else {
				51	if cap(b.literals) < maxCompressedBlockSize {
				52	b.literals = make([]byte, 0, maxCompressedBlockSize)
				53	}
				54	const defSeqs = 200
				55	if cap(b.sequences) < defSeqs {
				56	b.sequences = make([]seq, 0, defSeqs)
				57	}
				58	if cap(b.output) < maxCompressedBlockSize {
				59	b.output = make([]byte, 0, maxCompressedBlockSize)
				60	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	61	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	62
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	63	if b.coders.mlEnc == nil {
				64	b.coders.mlEnc = &fseEncoder{}
				65	b.coders.mlPrev = &fseEncoder{}
				66	b.coders.ofEnc = &fseEncoder{}
				67	b.coders.ofPrev = &fseEncoder{}
				68	b.coders.llEnc = &fseEncoder{}
				69	b.coders.llPrev = &fseEncoder{}
				70	}
				71	b.litEnc = &huff0.Scratch{WantLogLess: 4}
				72	b.reset(nil)
				73	}
				74
				75	// initNewEncode can be used to reset offsets and encoders to the initial state.
				76	func (b *blockEnc) initNewEncode() {
				77	b.recentOffsets = [3]uint32{1, 4, 8}
				78	b.litEnc.Reuse = huff0.ReusePolicyNone
				79	b.coders.setPrev(nil, nil, nil)
				80	}
				81
				82	// reset will reset the block for a new encode, but in the same stream,
				83	// meaning that state will be carried over, but the block content is reset.
				84	// If a previous block is provided, the recent offsets are carried over.
				85	func (b blockEnc) reset(prev blockEnc) {
				86	b.extraLits = 0
				87	b.literals = b.literals[:0]
				88	b.size = 0
				89	b.sequences = b.sequences[:0]
				90	b.output = b.output[:0]
				91	b.last = false
				92	if prev != nil {
				93	b.recentOffsets = prev.prevRecentOffsets
				94	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	95	b.dictLitEnc = nil
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	96	}
				97
				98	// reset will reset the block for a new encode, but in the same stream,
				99	// meaning that state will be carried over, but the block content is reset.
				100	// If a previous block is provided, the recent offsets are carried over.
				101	func (b blockEnc) swapEncoders(prev blockEnc) {
				102	b.coders.swap(&prev.coders)
				103	b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
				104	}
				105
				106	// blockHeader contains the information for a block header.
				107	type blockHeader uint32
				108
				109	// setLast sets the 'last' indicator on a block.
				110	func (h *blockHeader) setLast(b bool) {
				111	if b {
				112	h = h \| 1
				113	} else {
				114	const mask = (1 << 24) - 2
				115	h = h & mask
				116	}
				117	}
				118
				119	// setSize will store the compressed size of a block.
				120	func (h *blockHeader) setSize(v uint32) {
				121	const mask = 7
				122	h = (h)&mask \| blockHeader(v<<3)
				123	}
				124
				125	// setType sets the block type.
				126	func (h *blockHeader) setType(t blockType) {
				127	const mask = 1 \| (((1 << 24) - 1) ^ 7)
				128	h = (h & mask) \| blockHeader(t<<1)
				129	}
				130
				131	// appendTo will append the block header to a slice.
				132	func (h blockHeader) appendTo(b []byte) []byte {
				133	return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
				134	}
				135
				136	// String returns a string representation of the block.
				137	func (h blockHeader) String() string {
				138	return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
				139	}
				140
				141	// literalsHeader contains literals header information.
				142	type literalsHeader uint64
				143
				144	// setType can be used to set the type of literal block.
				145	func (h *literalsHeader) setType(t literalsBlockType) {
				146	const mask = math.MaxUint64 - 3
				147	h = (h & mask) \| literalsHeader(t)
				148	}
				149
				150	// setSize can be used to set a single size, for uncompressed and RLE content.
				151	func (h *literalsHeader) setSize(regenLen int) {
				152	inBits := bits.Len32(uint32(regenLen))
				153	// Only retain 2 bits
				154	const mask = 3
				155	lh := uint64(*h & mask)
				156	switch {
				157	case inBits < 5:
				158	lh \|= (uint64(regenLen) << 3) \| (1 << 60)
				159	if debug {
				160	got := int(lh>>3) & 0xff
				161	if got != regenLen {
				162	panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
				163	}
				164	}
				165	case inBits < 12:
				166	lh \|= (1 << 2) \| (uint64(regenLen) << 4) \| (2 << 60)
				167	case inBits < 20:
				168	lh \|= (3 << 2) \| (uint64(regenLen) << 4) \| (3 << 60)
				169	default:
				170	panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
				171	}
				172	*h = literalsHeader(lh)
				173	}
				174
				175	// setSizes will set the size of a compressed literals section and the input length.
				176	func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
				177	compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
				178	// Only retain 2 bits
				179	const mask = 3
				180	lh := uint64(*h & mask)
				181	switch {
				182	case compBits <= 10 && inBits <= 10:
				183	if !single {
				184	lh \|= 1 << 2
				185	}
				186	lh \|= (uint64(inLen) << 4) \| (uint64(compLen) << (10 + 4)) \| (3 << 60)
				187	if debug {
				188	const mmask = (1 << 24) - 1
				189	n := (lh >> 4) & mmask
				190	if int(n&1023) != inLen {
				191	panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
				192	}
				193	if int(n>>10) != compLen {
				194	panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
				195	}
				196	}
				197	case compBits <= 14 && inBits <= 14:
				198	lh \|= (2 << 2) \| (uint64(inLen) << 4) \| (uint64(compLen) << (14 + 4)) \| (4 << 60)
				199	if single {
				200	panic("single stream used with more than 10 bits length.")
				201	}
				202	case compBits <= 18 && inBits <= 18:
				203	lh \|= (3 << 2) \| (uint64(inLen) << 4) \| (uint64(compLen) << (18 + 4)) \| (5 << 60)
				204	if single {
				205	panic("single stream used with more than 10 bits length.")
				206	}
				207	default:
				208	panic("internal error: block too big")
				209	}
				210	*h = literalsHeader(lh)
				211	}
				212
				213	// appendTo will append the literals header to a byte slice.
				214	func (h literalsHeader) appendTo(b []byte) []byte {
				215	size := uint8(h >> 60)
				216	switch size {
				217	case 1:
				218	b = append(b, uint8(h))
				219	case 2:
				220	b = append(b, uint8(h), uint8(h>>8))
				221	case 3:
				222	b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
				223	case 4:
				224	b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
				225	case 5:
				226	b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
				227	default:
				228	panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
				229	}
				230	return b
				231	}
				232
				233	// size returns the output size with currently set values.
				234	func (h literalsHeader) size() int {
				235	return int(h >> 60)
				236	}
				237
				238	func (h literalsHeader) String() string {
				239	return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
				240	}
				241
				242	// pushOffsets will push the recent offsets to the backup store.
				243	func (b *blockEnc) pushOffsets() {
				244	b.prevRecentOffsets = b.recentOffsets
				245	}
				246
				247	// pushOffsets will push the recent offsets to the backup store.
				248	func (b *blockEnc) popOffsets() {
				249	b.recentOffsets = b.prevRecentOffsets
				250	}
				251
				252	// matchOffset will adjust recent offsets and return the adjusted one,
				253	// if it matches a previous offset.
				254	func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
				255	// Check if offset is one of the recent offsets.
				256	// Adjusts the output offset accordingly.
				257	// Gives a tiny bit of compression, typically around 1%.
				258	if true {
				259	if lits > 0 {
				260	switch offset {
				261	case b.recentOffsets[0]:
				262	offset = 1
				263	case b.recentOffsets[1]:
				264	b.recentOffsets[1] = b.recentOffsets[0]
				265	b.recentOffsets[0] = offset
				266	offset = 2
				267	case b.recentOffsets[2]:
				268	b.recentOffsets[2] = b.recentOffsets[1]
				269	b.recentOffsets[1] = b.recentOffsets[0]
				270	b.recentOffsets[0] = offset
				271	offset = 3
				272	default:
				273	b.recentOffsets[2] = b.recentOffsets[1]
				274	b.recentOffsets[1] = b.recentOffsets[0]
				275	b.recentOffsets[0] = offset
				276	offset += 3
				277	}
				278	} else {
				279	switch offset {
				280	case b.recentOffsets[1]:
				281	b.recentOffsets[1] = b.recentOffsets[0]
				282	b.recentOffsets[0] = offset
				283	offset = 1
				284	case b.recentOffsets[2]:
				285	b.recentOffsets[2] = b.recentOffsets[1]
				286	b.recentOffsets[1] = b.recentOffsets[0]
				287	b.recentOffsets[0] = offset
				288	offset = 2
				289	case b.recentOffsets[0] - 1:
				290	b.recentOffsets[2] = b.recentOffsets[1]
				291	b.recentOffsets[1] = b.recentOffsets[0]
				292	b.recentOffsets[0] = offset
				293	offset = 3
				294	default:
				295	b.recentOffsets[2] = b.recentOffsets[1]
				296	b.recentOffsets[1] = b.recentOffsets[0]
				297	b.recentOffsets[0] = offset
				298	offset += 3
				299	}
				300	}
				301	} else {
				302	offset += 3
				303	}
				304	return offset
				305	}
				306
				307	// encodeRaw can be used to set the output to a raw representation of supplied bytes.
				308	func (b *blockEnc) encodeRaw(a []byte) {
				309	var bh blockHeader
				310	bh.setLast(b.last)
				311	bh.setSize(uint32(len(a)))
				312	bh.setType(blockTypeRaw)
				313	b.output = bh.appendTo(b.output[:0])
				314	b.output = append(b.output, a...)
				315	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	316	println("Adding RAW block, length", len(a), "last:", b.last)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	317	}
				318	}
				319
				320	// encodeRaw can be used to set the output to a raw representation of supplied bytes.
				321	func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
				322	var bh blockHeader
				323	bh.setLast(b.last)
				324	bh.setSize(uint32(len(src)))
				325	bh.setType(blockTypeRaw)
				326	dst = bh.appendTo(dst)
				327	dst = append(dst, src...)
				328	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	329	println("Adding RAW block, length", len(src), "last:", b.last)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	330	}
				331	return dst
				332	}
				333
				334	// encodeLits can be used if the block is only litLen.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	335	func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	336	var bh blockHeader
				337	bh.setLast(b.last)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	338	bh.setSize(uint32(len(lits)))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	339
				340	// Don't compress extremely small blocks
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	341	if len(lits) < 8 \|\| (len(lits) < 32 && b.dictLitEnc == nil) \|\| raw {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	342	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	343	println("Adding RAW block, length", len(lits), "last:", b.last)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	344	}
				345	bh.setType(blockTypeRaw)
				346	b.output = bh.appendTo(b.output)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	347	b.output = append(b.output, lits...)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	348	return nil
				349	}
				350
				351	var (
				352	out []byte
				353	reUsed, single bool
				354	err error
				355	)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	356	if b.dictLitEnc != nil {
				357	b.litEnc.TransferCTable(b.dictLitEnc)
				358	b.litEnc.Reuse = huff0.ReusePolicyAllow
				359	b.dictLitEnc = nil
				360	}
				361	if len(lits) >= 1024 {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	362	// Use 4 Streams.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	363	out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
				364	} else if len(lits) > 32 {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	365	// Use 1 stream
				366	single = true
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	367	out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	368	} else {
				369	err = huff0.ErrIncompressible
				370	}
				371
				372	switch err {
				373	case huff0.ErrIncompressible:
				374	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	375	println("Adding RAW block, length", len(lits), "last:", b.last)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	376	}
				377	bh.setType(blockTypeRaw)
				378	b.output = bh.appendTo(b.output)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	379	b.output = append(b.output, lits...)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	380	return nil
				381	case huff0.ErrUseRLE:
				382	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	383	println("Adding RLE block, length", len(lits))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	384	}
				385	bh.setType(blockTypeRLE)
				386	b.output = bh.appendTo(b.output)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	387	b.output = append(b.output, lits[0])
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	388	return nil
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	389	case nil:
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	390	default:
				391	return err
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	392	}
				393	// Compressed...
				394	// Now, allow reuse
				395	b.litEnc.Reuse = huff0.ReusePolicyAllow
				396	bh.setType(blockTypeCompressed)
				397	var lh literalsHeader
				398	if reUsed {
				399	if debug {
				400	println("Reused tree, compressed to", len(out))
				401	}
				402	lh.setType(literalsBlockTreeless)
				403	} else {
				404	if debug {
				405	println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
				406	}
				407	lh.setType(literalsBlockCompressed)
				408	}
				409	// Set sizes
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	410	lh.setSizes(len(out), len(lits), single)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	411	bh.setSize(uint32(len(out) + lh.size() + 1))
				412
				413	// Write block headers.
				414	b.output = bh.appendTo(b.output)
				415	b.output = lh.appendTo(b.output)
				416	// Add compressed data.
				417	b.output = append(b.output, out...)
				418	// No sequences.
				419	b.output = append(b.output, 0)
				420	return nil
				421	}
				422
				423	// fuzzFseEncoder can be used to fuzz the FSE encoder.
				424	func fuzzFseEncoder(data []byte) int {
				425	if len(data) > maxSequences \|\| len(data) < 2 {
				426	return 0
				427	}
				428	enc := fseEncoder{}
				429	hist := enc.Histogram()[:256]
				430	maxSym := uint8(0)
				431	for i, v := range data {
				432	v = v & 63
				433	data[i] = v
				434	hist[v]++
				435	if v > maxSym {
				436	maxSym = v
				437	}
				438	}
				439	if maxSym == 0 {
				440	// All 0
				441	return 0
				442	}
				443	maxCount := func(a []uint32) int {
				444	var max uint32
				445	for _, v := range a {
				446	if v > max {
				447	max = v
				448	}
				449	}
				450	return int(max)
				451	}
				452	cnt := maxCount(hist[:maxSym])
				453	if cnt == len(data) {
				454	// RLE
				455	return 0
				456	}
				457	enc.HistogramFinished(maxSym, cnt)
				458	err := enc.normalizeCount(len(data))
				459	if err != nil {
				460	return 0
				461	}
				462	_, err = enc.writeCount(nil)
				463	if err != nil {
				464	panic(err)
				465	}
				466	return 1
				467	}
				468
				469	// encode will encode the block and append the output in b.output.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	470	// Previous offset codes must be pushed if more blocks are expected.
				471	func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	472	if len(b.sequences) == 0 {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	473	return b.encodeLits(b.literals, rawAllLits)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	474	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	475	// We want some difference to at least account for the headers.
				476	saved := b.size - len(b.literals) - (b.size >> 5)
				477	if saved < 16 {
				478	if org == nil {
				479	return errIncompressible
				480	}
				481	b.popOffsets()
				482	return b.encodeLits(org, rawAllLits)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	483	}
				484
				485	var bh blockHeader
				486	var lh literalsHeader
				487	bh.setLast(b.last)
				488	bh.setType(blockTypeCompressed)
				489	// Store offset of the block header. Needed when we know the size.
				490	bhOffset := len(b.output)
				491	b.output = bh.appendTo(b.output)
				492
				493	var (
				494	out []byte
				495	reUsed, single bool
				496	err error
				497	)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	498	if b.dictLitEnc != nil {
				499	b.litEnc.TransferCTable(b.dictLitEnc)
				500	b.litEnc.Reuse = huff0.ReusePolicyAllow
				501	b.dictLitEnc = nil
				502	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	503	if len(b.literals) >= 1024 && !raw {
				504	// Use 4 Streams.
				505	out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
				506	} else if len(b.literals) > 32 && !raw {
				507	// Use 1 stream
				508	single = true
				509	out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
				510	} else {
				511	err = huff0.ErrIncompressible
				512	}
				513
				514	switch err {
				515	case huff0.ErrIncompressible:
				516	lh.setType(literalsBlockRaw)
				517	lh.setSize(len(b.literals))
				518	b.output = lh.appendTo(b.output)
				519	b.output = append(b.output, b.literals...)
				520	if debug {
				521	println("Adding literals RAW, length", len(b.literals))
				522	}
				523	case huff0.ErrUseRLE:
				524	lh.setType(literalsBlockRLE)
				525	lh.setSize(len(b.literals))
				526	b.output = lh.appendTo(b.output)
				527	b.output = append(b.output, b.literals[0])
				528	if debug {
				529	println("Adding literals RLE")
				530	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	531	case nil:
				532	// Compressed litLen...
				533	if reUsed {
				534	if debug {
				535	println("reused tree")
				536	}
				537	lh.setType(literalsBlockTreeless)
				538	} else {
				539	if debug {
				540	println("new tree, size:", len(b.litEnc.OutTable))
				541	}
				542	lh.setType(literalsBlockCompressed)
				543	if debug {
				544	_, _, err := huff0.ReadTable(out, nil)
				545	if err != nil {
				546	panic(err)
				547	}
				548	}
				549	}
				550	lh.setSizes(len(out), len(b.literals), single)
				551	if debug {
				552	printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
				553	println("Adding literal header:", lh)
				554	}
				555	b.output = lh.appendTo(b.output)
				556	b.output = append(b.output, out...)
				557	b.litEnc.Reuse = huff0.ReusePolicyAllow
				558	if debug {
				559	println("Adding literals compressed")
				560	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	561	default:
				562	if debug {
				563	println("Adding literals ERROR:", err)
				564	}
				565	return err
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	566	}
				567	// Sequence compression
				568
				569	// Write the number of sequences
				570	switch {
				571	case len(b.sequences) < 128:
				572	b.output = append(b.output, uint8(len(b.sequences)))
				573	case len(b.sequences) < 0x7f00: // TODO: this could be wrong
				574	n := len(b.sequences)
				575	b.output = append(b.output, 128+uint8(n>>8), uint8(n))
				576	default:
				577	n := len(b.sequences) - 0x7f00
				578	b.output = append(b.output, 255, uint8(n), uint8(n>>8))
				579	}
				580	if debug {
				581	println("Encoding", len(b.sequences), "sequences")
				582	}
				583	b.genCodes()
				584	llEnc := b.coders.llEnc
				585	ofEnc := b.coders.ofEnc
				586	mlEnc := b.coders.mlEnc
				587	err = llEnc.normalizeCount(len(b.sequences))
				588	if err != nil {
				589	return err
				590	}
				591	err = ofEnc.normalizeCount(len(b.sequences))
				592	if err != nil {
				593	return err
				594	}
				595	err = mlEnc.normalizeCount(len(b.sequences))
				596	if err != nil {
				597	return err
				598	}
				599
				600	// Choose the best compression mode for each type.
				601	// Will evaluate the new vs predefined and previous.
				602	chooseComp := func(cur, prev, preDef fseEncoder) (fseEncoder, seqCompMode) {
				603	// See if predefined/previous is better
				604	hist := cur.count[:cur.symbolLen]
				605	nSize := cur.approxSize(hist) + cur.maxHeaderSize()
				606	predefSize := preDef.approxSize(hist)
				607	prevSize := prev.approxSize(hist)
				608
				609	// Add a small penalty for new encoders.
				610	// Don't bother with extremely small (<2 byte gains).
				611	nSize = nSize + (nSize+2816)>>4
				612	switch {
				613	case predefSize <= prevSize && predefSize <= nSize \|\| forcePreDef:
				614	if debug {
				615	println("Using predefined", predefSize>>3, "<=", nSize>>3)
				616	}
				617	return preDef, compModePredefined
				618	case prevSize <= nSize:
				619	if debug {
				620	println("Using previous", prevSize>>3, "<=", nSize>>3)
				621	}
				622	return prev, compModeRepeat
				623	default:
				624	if debug {
				625	println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
				626	println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
				627	}
				628	return cur, compModeFSE
				629	}
				630	}
				631
				632	// Write compression mode
				633	var mode uint8
				634	if llEnc.useRLE {
				635	mode \|= uint8(compModeRLE) << 6
				636	llEnc.setRLE(b.sequences[0].llCode)
				637	if debug {
				638	println("llEnc.useRLE")
				639	}
				640	} else {
				641	var m seqCompMode
				642	llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
				643	mode \|= uint8(m) << 6
				644	}
				645	if ofEnc.useRLE {
				646	mode \|= uint8(compModeRLE) << 4
				647	ofEnc.setRLE(b.sequences[0].ofCode)
				648	if debug {
				649	println("ofEnc.useRLE")
				650	}
				651	} else {
				652	var m seqCompMode
				653	ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
				654	mode \|= uint8(m) << 4
				655	}
				656
				657	if mlEnc.useRLE {
				658	mode \|= uint8(compModeRLE) << 2
				659	mlEnc.setRLE(b.sequences[0].mlCode)
				660	if debug {
				661	println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
				662	}
				663	} else {
				664	var m seqCompMode
				665	mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
				666	mode \|= uint8(m) << 2
				667	}
				668	b.output = append(b.output, mode)
				669	if debug {
				670	printf("Compression modes: 0b%b", mode)
				671	}
				672	b.output, err = llEnc.writeCount(b.output)
				673	if err != nil {
				674	return err
				675	}
				676	start := len(b.output)
				677	b.output, err = ofEnc.writeCount(b.output)
				678	if err != nil {
				679	return err
				680	}
				681	if false {
				682	println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
				683	fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
				684	for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
				685	fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
				686	}
				687	}
				688	b.output, err = mlEnc.writeCount(b.output)
				689	if err != nil {
				690	return err
				691	}
				692
				693	// Maybe in block?
				694	wr := &b.wr
				695	wr.reset(b.output)
				696
				697	var ll, of, ml cState
				698
				699	// Current sequence
				700	seq := len(b.sequences) - 1
				701	s := b.sequences[seq]
				702	llEnc.setBits(llBitsTable[:])
				703	mlEnc.setBits(mlBitsTable[:])
				704	ofEnc.setBits(nil)
				705
				706	llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
				707
				708	// We have 3 bounds checks here (and in the loop).
				709	// Since we are iterating backwards it is kinda hard to avoid.
				710	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
				711	ll.init(wr, &llEnc.ct, llB)
				712	of.init(wr, &ofEnc.ct, ofB)
				713	wr.flush32()
				714	ml.init(wr, &mlEnc.ct, mlB)
				715
				716	// Each of these lookups also generates a bounds check.
				717	wr.addBits32NC(s.litLen, llB.outBits)
				718	wr.addBits32NC(s.matchLen, mlB.outBits)
				719	wr.flush32()
				720	wr.addBits32NC(s.offset, ofB.outBits)
				721	if debugSequences {
				722	println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
				723	}
				724	seq--
				725	if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
				726	// No need to flush (common)
				727	for seq >= 0 {
				728	s = b.sequences[seq]
				729	wr.flush32()
				730	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
				731	// tabelog max is 8 for all.
				732	of.encode(ofB)
				733	ml.encode(mlB)
				734	ll.encode(llB)
				735	wr.flush32()
				736
				737	// We checked that all can stay within 32 bits
				738	wr.addBits32NC(s.litLen, llB.outBits)
				739	wr.addBits32NC(s.matchLen, mlB.outBits)
				740	wr.addBits32NC(s.offset, ofB.outBits)
				741
				742	if debugSequences {
				743	println("Encoded seq", seq, s)
				744	}
				745
				746	seq--
				747	}
				748	} else {
				749	for seq >= 0 {
				750	s = b.sequences[seq]
				751	wr.flush32()
				752	llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
				753	// tabelog max is below 8 for each.
				754	of.encode(ofB)
				755	ml.encode(mlB)
				756	ll.encode(llB)
				757	wr.flush32()
				758
				759	// ml+ll = max 32 bits total
				760	wr.addBits32NC(s.litLen, llB.outBits)
				761	wr.addBits32NC(s.matchLen, mlB.outBits)
				762	wr.flush32()
				763	wr.addBits32NC(s.offset, ofB.outBits)
				764
				765	if debugSequences {
				766	println("Encoded seq", seq, s)
				767	}
				768
				769	seq--
				770	}
				771	}
				772	ml.flush(mlEnc.actualTableLog)
				773	of.flush(ofEnc.actualTableLog)
				774	ll.flush(llEnc.actualTableLog)
				775	err = wr.close()
				776	if err != nil {
				777	return err
				778	}
				779	b.output = wr.out
				780
				781	if len(b.output)-3-bhOffset >= b.size {
				782	// Maybe even add a bigger margin.
				783	b.litEnc.Reuse = huff0.ReusePolicyNone
				784	return errIncompressible
				785	}
				786
				787	// Size is output minus block header.
				788	bh.setSize(uint32(len(b.output)-bhOffset) - 3)
				789	if debug {
				790	println("Rewriting block header", bh)
				791	}
				792	_ = bh.appendTo(b.output[bhOffset:bhOffset])
				793	b.coders.setPrev(llEnc, mlEnc, ofEnc)
				794	return nil
				795	}
				796
				797	var errIncompressible = errors.New("incompressible")
				798
				799	func (b *blockEnc) genCodes() {
				800	if len(b.sequences) == 0 {
				801	// nothing to do
				802	return
				803	}
				804
				805	if len(b.sequences) > math.MaxUint16 {
				806	panic("can only encode up to 64K sequences")
				807	}
				808	// No bounds checks after here:
				809	llH := b.coders.llEnc.Histogram()[:256]
				810	ofH := b.coders.ofEnc.Histogram()[:256]
				811	mlH := b.coders.mlEnc.Histogram()[:256]
				812	for i := range llH {
				813	llH[i] = 0
				814	}
				815	for i := range ofH {
				816	ofH[i] = 0
				817	}
				818	for i := range mlH {
				819	mlH[i] = 0
				820	}
				821
				822	var llMax, ofMax, mlMax uint8
				823	for i, seq := range b.sequences {
				824	v := llCode(seq.litLen)
				825	seq.llCode = v
				826	llH[v]++
				827	if v > llMax {
				828	llMax = v
				829	}
				830
				831	v = ofCode(seq.offset)
				832	seq.ofCode = v
				833	ofH[v]++
				834	if v > ofMax {
				835	ofMax = v
				836	}
				837
				838	v = mlCode(seq.matchLen)
				839	seq.mlCode = v
				840	mlH[v]++
				841	if v > mlMax {
				842	mlMax = v
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	843	if debugAsserts && mlMax > maxMatchLengthSymbol {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	844	panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
				845	}
				846	}
				847	b.sequences[i] = seq
				848	}
				849	maxCount := func(a []uint32) int {
				850	var max uint32
				851	for _, v := range a {
				852	if v > max {
				853	max = v
				854	}
				855	}
				856	return int(max)
				857	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	858	if debugAsserts && mlMax > maxMatchLengthSymbol {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	859	panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
				860	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	861	if debugAsserts && ofMax > maxOffsetBits {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	862	panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
				863	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame]	864	if debugAsserts && llMax > maxLiteralLengthSymbol {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	865	panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
				866	}
				867
				868	b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
				869	b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
				870	b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
				871	}