Blame - vendor/github.com/klauspost/compress/zstd/enc_best.go - voltctl

blob: dc1eed5f00a0b043978970e46e0b062b217aaffa [file] [log] [blame]

David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
				8	"fmt"
				9	"math/bits"
				10	)
				11
				12	const (
				13	bestLongTableBits = 20 // Bits used in the long match table
				14	bestLongTableSize = 1 << bestLongTableBits // Size of the table
				15
				16	// Note: Increasing the short table bits or making the hash shorter
				17	// can actually lead to compression degradation since it will 'steal' more from the
				18	// long match table and match offsets are quite big.
				19	// This greatly depends on the type of input.
				20	bestShortTableBits = 16 // Bits used in the short match table
				21	bestShortTableSize = 1 << bestShortTableBits // Size of the table
				22	)
				23
				24	// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
				25	// The long match table contains the previous entry with the same hash,
				26	// effectively making it a "chain" of length 2.
				27	// When we find a long match we choose between the two values and select the longest.
				28	// When we find a short match, after checking the long, we check if we can find a long at n+1
				29	// and that it is longer (lazy matching).
				30	type bestFastEncoder struct {
				31	fastBase
				32	table [bestShortTableSize]prevEntry
				33	longTable [bestLongTableSize]prevEntry
				34	dictTable []prevEntry
				35	dictLongTable []prevEntry
				36	}
				37
				38	// Encode improves compression...
				39	func (e bestFastEncoder) Encode(blk blockEnc, src []byte) {
				40	const (
				41	// Input margin is the number of bytes we read (8)
				42	// and the maximum we will read ahead (2)
				43	inputMargin = 8 + 4
				44	minNonLiteralBlockSize = 16
				45	)
				46
				47	// Protect against e.cur wraparound.
				48	for e.cur >= bufferReset {
				49	if len(e.hist) == 0 {
				50	for i := range e.table[:] {
				51	e.table[i] = prevEntry{}
				52	}
				53	for i := range e.longTable[:] {
				54	e.longTable[i] = prevEntry{}
				55	}
				56	e.cur = e.maxMatchOff
				57	break
				58	}
				59	// Shift down everything in the table that isn't already too far away.
				60	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				61	for i := range e.table[:] {
				62	v := e.table[i].offset
				63	v2 := e.table[i].prev
				64	if v < minOff {
				65	v = 0
				66	v2 = 0
				67	} else {
				68	v = v - e.cur + e.maxMatchOff
				69	if v2 < minOff {
				70	v2 = 0
				71	} else {
				72	v2 = v2 - e.cur + e.maxMatchOff
				73	}
				74	}
				75	e.table[i] = prevEntry{
				76	offset: v,
				77	prev: v2,
				78	}
				79	}
				80	for i := range e.longTable[:] {
				81	v := e.longTable[i].offset
				82	v2 := e.longTable[i].prev
				83	if v < minOff {
				84	v = 0
				85	v2 = 0
				86	} else {
				87	v = v - e.cur + e.maxMatchOff
				88	if v2 < minOff {
				89	v2 = 0
				90	} else {
				91	v2 = v2 - e.cur + e.maxMatchOff
				92	}
				93	}
				94	e.longTable[i] = prevEntry{
				95	offset: v,
				96	prev: v2,
				97	}
				98	}
				99	e.cur = e.maxMatchOff
				100	break
				101	}
				102
				103	s := e.addBlock(src)
				104	blk.size = len(src)
				105	if len(src) < minNonLiteralBlockSize {
				106	blk.extraLits = len(src)
				107	blk.literals = blk.literals[:len(src)]
				108	copy(blk.literals, src)
				109	return
				110	}
				111
				112	// Override src
				113	src = e.hist
				114	sLimit := int32(len(src)) - inputMargin
				115	const kSearchStrength = 10
				116
				117	// nextEmit is where in src the next emitLiteral should start from.
				118	nextEmit := s
				119	cv := load6432(src, s)
				120
				121	// Relative offsets
				122	offset1 := int32(blk.recentOffsets[0])
				123	offset2 := int32(blk.recentOffsets[1])
				124	offset3 := int32(blk.recentOffsets[2])
				125
				126	addLiterals := func(s *seq, until int32) {
				127	if until == nextEmit {
				128	return
				129	}
				130	blk.literals = append(blk.literals, src[nextEmit:until]...)
				131	s.litLen = uint32(until - nextEmit)
				132	}
				133	_ = addLiterals
				134
				135	if debug {
				136	println("recent offsets:", blk.recentOffsets)
				137	}
				138
				139	encodeLoop:
				140	for {
				141	// We allow the encoder to optionally turn off repeat offsets across blocks
				142	canRepeat := len(blk.sequences) > 2
				143
				144	if debugAsserts && canRepeat && offset1 == 0 {
				145	panic("offset0 was 0")
				146	}
				147
				148	type match struct {
				149	offset int32
				150	s int32
				151	length int32
				152	rep int32
				153	}
				154	matchAt := func(offset int32, s int32, first uint32, rep int32) match {
				155	if s-offset >= e.maxMatchOff \|\| load3232(src, offset) != first {
				156	return match{offset: offset, s: s}
				157	}
				158	return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
				159	}
				160
				161	bestOf := func(a, b match) match {
				162	aScore := b.s - a.s + a.length
				163	bScore := a.s - b.s + b.length
				164	if a.rep < 0 {
				165	aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
				166	}
				167	if b.rep < 0 {
				168	bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
				169	}
				170	if aScore >= bScore {
				171	return a
				172	}
				173	return b
				174	}
				175	const goodEnough = 100
				176
				177	nextHashL := hash8(cv, bestLongTableBits)
				178	nextHashS := hash4x64(cv, bestShortTableBits)
				179	candidateL := e.longTable[nextHashL]
				180	candidateS := e.table[nextHashS]
				181
				182	best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
				183	best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
				184	best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
				185	if canRepeat && best.length < goodEnough {
				186	best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
				187	best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
				188	best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
				189	if best.length > 0 {
				190	best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
				191	best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
				192	best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
				193	}
				194	}
				195	// Load next and check...
				196	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
				197	e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
				198
				199	// Look far ahead, unless we have a really long match already...
				200	if best.length < goodEnough {
				201	// No match found, move forward on input, no need to check forward...
				202	if best.length < 4 {
				203	s += 1 + (s-nextEmit)>>(kSearchStrength-1)
				204	if s >= sLimit {
				205	break encodeLoop
				206	}
				207	cv = load6432(src, s)
				208	continue
				209	}
				210
				211	s++
				212	candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
				213	cv = load6432(src, s)
				214	cv2 := load6432(src, s+1)
				215	candidateL = e.longTable[hash8(cv, bestLongTableBits)]
				216	candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
				217
				218	best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
				219	best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
				220	best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
				221	best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
				222	best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
				223
				224	// See if we can find a better match by checking where the current best ends.
				225	// Use that offset to see if we can find a better full match.
				226	if sAt := best.s + best.length; sAt < sLimit {
				227	nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
				228	candidateEnd := e.longTable[nextHashL]
				229	if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
				230	bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
				231	if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
				232	bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
				233	}
				234	best = bestEnd
				235	}
				236	}
				237	}
				238
				239	// We have a match, we can store the forward value
				240	if best.rep > 0 {
				241	s = best.s
				242	var seq seq
				243	seq.matchLen = uint32(best.length - zstdMinMatch)
				244
				245	// We might be able to match backwards.
				246	// Extend as long as we can.
				247	start := best.s
				248	// We end the search early, so we don't risk 0 literals
				249	// and have to do special offset treatment.
				250	startLimit := nextEmit + 1
				251
				252	tMin := s - e.maxMatchOff
				253	if tMin < 0 {
				254	tMin = 0
				255	}
				256	repIndex := best.offset
				257	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				258	repIndex--
				259	start--
				260	seq.matchLen++
				261	}
				262	addLiterals(&seq, start)
				263
				264	// rep 0
				265	seq.offset = uint32(best.rep)
				266	if debugSequences {
				267	println("repeat sequence", seq, "next s:", s)
				268	}
				269	blk.sequences = append(blk.sequences, seq)
				270
				271	// Index match start+1 (long) -> s - 1
				272	index0 := s
				273	s = best.s + best.length
				274
				275	nextEmit = s
				276	if s >= sLimit {
				277	if debug {
				278	println("repeat ended", s, best.length)
				279
				280	}
				281	break encodeLoop
				282	}
				283	// Index skipped...
				284	off := index0 + e.cur
				285	for index0 < s-1 {
				286	cv0 := load6432(src, index0)
				287	h0 := hash8(cv0, bestLongTableBits)
				288	h1 := hash4x64(cv0, bestShortTableBits)
				289	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				290	e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
				291	off++
				292	index0++
				293	}
				294	switch best.rep {
				295	case 2:
				296	offset1, offset2 = offset2, offset1
				297	case 3:
				298	offset1, offset2, offset3 = offset3, offset1, offset2
				299	}
				300	cv = load6432(src, s)
				301	continue
				302	}
				303
				304	// A 4-byte match has been found. Update recent offsets.
				305	// We'll later see if more than 4 bytes.
				306	s = best.s
				307	t := best.offset
				308	offset1, offset2, offset3 = s-t, offset1, offset2
				309
				310	if debugAsserts && s <= t {
				311	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				312	}
				313
				314	if debugAsserts && canRepeat && int(offset1) > len(src) {
				315	panic("invalid offset")
				316	}
				317
				318	// Extend the n-byte match as long as possible.
				319	l := best.length
				320
				321	// Extend backwards
				322	tMin := s - e.maxMatchOff
				323	if tMin < 0 {
				324	tMin = 0
				325	}
				326	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				327	s--
				328	t--
				329	l++
				330	}
				331
				332	// Write our sequence
				333	var seq seq
				334	seq.litLen = uint32(s - nextEmit)
				335	seq.matchLen = uint32(l - zstdMinMatch)
				336	if seq.litLen > 0 {
				337	blk.literals = append(blk.literals, src[nextEmit:s]...)
				338	}
				339	seq.offset = uint32(s-t) + 3
				340	s += l
				341	if debugSequences {
				342	println("sequence", seq, "next s:", s)
				343	}
				344	blk.sequences = append(blk.sequences, seq)
				345	nextEmit = s
				346	if s >= sLimit {
				347	break encodeLoop
				348	}
				349
				350	// Index match start+1 (long) -> s - 1
				351	index0 := s - l + 1
				352	// every entry
				353	for index0 < s-1 {
				354	cv0 := load6432(src, index0)
				355	h0 := hash8(cv0, bestLongTableBits)
				356	h1 := hash4x64(cv0, bestShortTableBits)
				357	off := index0 + e.cur
				358	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				359	e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
				360	index0++
				361	}
				362
				363	cv = load6432(src, s)
				364	if !canRepeat {
				365	continue
				366	}
				367
				368	// Check offset 2
				369	for {
				370	o2 := s - offset2
				371	if load3232(src, o2) != uint32(cv) {
				372	// Do regular search
				373	break
				374	}
				375
				376	// Store this, since we have it.
				377	nextHashS := hash4x64(cv, bestShortTableBits)
				378	nextHashL := hash8(cv, bestLongTableBits)
				379
				380	// We have at least 4 byte match.
				381	// No need to check backwards. We come straight from a match
				382	l := 4 + e.matchlen(s+4, o2+4, src)
				383
				384	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
				385	e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
				386	seq.matchLen = uint32(l) - zstdMinMatch
				387	seq.litLen = 0
				388
				389	// Since litlen is always 0, this is offset 1.
				390	seq.offset = 1
				391	s += l
				392	nextEmit = s
				393	if debugSequences {
				394	println("sequence", seq, "next s:", s)
				395	}
				396	blk.sequences = append(blk.sequences, seq)
				397
				398	// Swap offset 1 and 2.
				399	offset1, offset2 = offset2, offset1
				400	if s >= sLimit {
				401	// Finished
				402	break encodeLoop
				403	}
				404	cv = load6432(src, s)
				405	}
				406	}
				407
				408	if int(nextEmit) < len(src) {
				409	blk.literals = append(blk.literals, src[nextEmit:]...)
				410	blk.extraLits = len(src) - int(nextEmit)
				411	}
				412	blk.recentOffsets[0] = uint32(offset1)
				413	blk.recentOffsets[1] = uint32(offset2)
				414	blk.recentOffsets[2] = uint32(offset3)
				415	if debug {
				416	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				417	}
				418	}
				419
				420	// EncodeNoHist will encode a block with no history and no following blocks.
				421	// Most notable difference is that src will not be copied for history and
				422	// we do not need to check for max match length.
				423	func (e bestFastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				424	e.ensureHist(len(src))
				425	e.Encode(blk, src)
				426	}
				427
				428	// ResetDict will reset and set a dictionary if not nil
				429	func (e bestFastEncoder) Reset(d dict, singleBlock bool) {
				430	e.resetBase(d, singleBlock)
				431	if d == nil {
				432	return
				433	}
				434	// Init or copy dict table
				435	if len(e.dictTable) != len(e.table) \|\| d.id != e.lastDictID {
				436	if len(e.dictTable) != len(e.table) {
				437	e.dictTable = make([]prevEntry, len(e.table))
				438	}
				439	end := int32(len(d.content)) - 8 + e.maxMatchOff
				440	for i := e.maxMatchOff; i < end; i += 4 {
				441	const hashLog = bestShortTableBits
				442
				443	cv := load6432(d.content, i-e.maxMatchOff)
				444	nextHash := hash4x64(cv, hashLog) // 0 -> 4
				445	nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
				446	nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
				447	nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
				448	e.dictTable[nextHash] = prevEntry{
				449	prev: e.dictTable[nextHash].offset,
				450	offset: i,
				451	}
				452	e.dictTable[nextHash1] = prevEntry{
				453	prev: e.dictTable[nextHash1].offset,
				454	offset: i + 1,
				455	}
				456	e.dictTable[nextHash2] = prevEntry{
				457	prev: e.dictTable[nextHash2].offset,
				458	offset: i + 2,
				459	}
				460	e.dictTable[nextHash3] = prevEntry{
				461	prev: e.dictTable[nextHash3].offset,
				462	offset: i + 3,
				463	}
				464	}
				465	e.lastDictID = d.id
				466	}
				467
				468	// Init or copy dict table
				469	if len(e.dictLongTable) != len(e.longTable) \|\| d.id != e.lastDictID {
				470	if len(e.dictLongTable) != len(e.longTable) {
				471	e.dictLongTable = make([]prevEntry, len(e.longTable))
				472	}
				473	if len(d.content) >= 8 {
				474	cv := load6432(d.content, 0)
				475	h := hash8(cv, bestLongTableBits)
				476	e.dictLongTable[h] = prevEntry{
				477	offset: e.maxMatchOff,
				478	prev: e.dictLongTable[h].offset,
				479	}
				480
				481	end := int32(len(d.content)) - 8 + e.maxMatchOff
				482	off := 8 // First to read
				483	for i := e.maxMatchOff + 1; i < end; i++ {
				484	cv = cv>>8 \| (uint64(d.content[off]) << 56)
				485	h := hash8(cv, bestLongTableBits)
				486	e.dictLongTable[h] = prevEntry{
				487	offset: i,
				488	prev: e.dictLongTable[h].offset,
				489	}
				490	off++
				491	}
				492	}
				493	e.lastDictID = d.id
				494	}
				495	// Reset table to initial state
				496	copy(e.longTable[:], e.dictLongTable)
				497
				498	e.cur = e.maxMatchOff
				499	// Reset table to initial state
				500	copy(e.table[:], e.dictTable)
				501	}