Blame - vendor/github.com/klauspost/compress/zstd/enc_better.go - kafka-topic-exporter

blob: 602c05ee0c4cec83c3c782523d899bf3a33b0bd5 [file] [log] [blame]

kesavand	c71914f	2022-03-25 11:19:03 +0530	[diff] [blame^]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import "fmt"
				8
				9	const (
				10	betterLongTableBits = 19 // Bits used in the long match table
				11	betterLongTableSize = 1 << betterLongTableBits // Size of the table
				12	betterLongLen = 8 // Bytes used for table hash
				13
				14	// Note: Increasing the short table bits or making the hash shorter
				15	// can actually lead to compression degradation since it will 'steal' more from the
				16	// long match table and match offsets are quite big.
				17	// This greatly depends on the type of input.
				18	betterShortTableBits = 13 // Bits used in the short match table
				19	betterShortTableSize = 1 << betterShortTableBits // Size of the table
				20	betterShortLen = 5 // Bytes used for table hash
				21
				22	betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
				23	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
				24
				25	betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
				26	betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
				27	)
				28
				29	type prevEntry struct {
				30	offset int32
				31	prev int32
				32	}
				33
				34	// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
				35	// The long match table contains the previous entry with the same hash,
				36	// effectively making it a "chain" of length 2.
				37	// When we find a long match we choose between the two values and select the longest.
				38	// When we find a short match, after checking the long, we check if we can find a long at n+1
				39	// and that it is longer (lazy matching).
				40	type betterFastEncoder struct {
				41	fastBase
				42	table [betterShortTableSize]tableEntry
				43	longTable [betterLongTableSize]prevEntry
				44	}
				45
				46	type betterFastEncoderDict struct {
				47	betterFastEncoder
				48	dictTable []tableEntry
				49	dictLongTable []prevEntry
				50	shortTableShardDirty [betterShortTableShardCnt]bool
				51	longTableShardDirty [betterLongTableShardCnt]bool
				52	allDirty bool
				53	}
				54
				55	// Encode improves compression...
				56	func (e betterFastEncoder) Encode(blk blockEnc, src []byte) {
				57	const (
				58	// Input margin is the number of bytes we read (8)
				59	// and the maximum we will read ahead (2)
				60	inputMargin = 8 + 2
				61	minNonLiteralBlockSize = 16
				62	)
				63
				64	// Protect against e.cur wraparound.
				65	for e.cur >= bufferReset {
				66	if len(e.hist) == 0 {
				67	for i := range e.table[:] {
				68	e.table[i] = tableEntry{}
				69	}
				70	for i := range e.longTable[:] {
				71	e.longTable[i] = prevEntry{}
				72	}
				73	e.cur = e.maxMatchOff
				74	break
				75	}
				76	// Shift down everything in the table that isn't already too far away.
				77	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				78	for i := range e.table[:] {
				79	v := e.table[i].offset
				80	if v < minOff {
				81	v = 0
				82	} else {
				83	v = v - e.cur + e.maxMatchOff
				84	}
				85	e.table[i].offset = v
				86	}
				87	for i := range e.longTable[:] {
				88	v := e.longTable[i].offset
				89	v2 := e.longTable[i].prev
				90	if v < minOff {
				91	v = 0
				92	v2 = 0
				93	} else {
				94	v = v - e.cur + e.maxMatchOff
				95	if v2 < minOff {
				96	v2 = 0
				97	} else {
				98	v2 = v2 - e.cur + e.maxMatchOff
				99	}
				100	}
				101	e.longTable[i] = prevEntry{
				102	offset: v,
				103	prev: v2,
				104	}
				105	}
				106	e.cur = e.maxMatchOff
				107	break
				108	}
				109
				110	s := e.addBlock(src)
				111	blk.size = len(src)
				112	if len(src) < minNonLiteralBlockSize {
				113	blk.extraLits = len(src)
				114	blk.literals = blk.literals[:len(src)]
				115	copy(blk.literals, src)
				116	return
				117	}
				118
				119	// Override src
				120	src = e.hist
				121	sLimit := int32(len(src)) - inputMargin
				122	// stepSize is the number of bytes to skip on every main loop iteration.
				123	// It should be >= 1.
				124	const stepSize = 1
				125
				126	const kSearchStrength = 9
				127
				128	// nextEmit is where in src the next emitLiteral should start from.
				129	nextEmit := s
				130	cv := load6432(src, s)
				131
				132	// Relative offsets
				133	offset1 := int32(blk.recentOffsets[0])
				134	offset2 := int32(blk.recentOffsets[1])
				135
				136	addLiterals := func(s *seq, until int32) {
				137	if until == nextEmit {
				138	return
				139	}
				140	blk.literals = append(blk.literals, src[nextEmit:until]...)
				141	s.litLen = uint32(until - nextEmit)
				142	}
				143	if debugEncoder {
				144	println("recent offsets:", blk.recentOffsets)
				145	}
				146
				147	encodeLoop:
				148	for {
				149	var t int32
				150	// We allow the encoder to optionally turn off repeat offsets across blocks
				151	canRepeat := len(blk.sequences) > 2
				152	var matched int32
				153
				154	for {
				155	if debugAsserts && canRepeat && offset1 == 0 {
				156	panic("offset0 was 0")
				157	}
				158
				159	nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
				160	nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
				161	candidateL := e.longTable[nextHashL]
				162	candidateS := e.table[nextHashS]
				163
				164	const repOff = 1
				165	repIndex := s - offset1 + repOff
				166	off := s + e.cur
				167	e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
				168	e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
				169
				170	if canRepeat {
				171	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				172	// Consider history as well.
				173	var seq seq
				174	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				175
				176	seq.matchLen = uint32(lenght - zstdMinMatch)
				177
				178	// We might be able to match backwards.
				179	// Extend as long as we can.
				180	start := s + repOff
				181	// We end the search early, so we don't risk 0 literals
				182	// and have to do special offset treatment.
				183	startLimit := nextEmit + 1
				184
				185	tMin := s - e.maxMatchOff
				186	if tMin < 0 {
				187	tMin = 0
				188	}
				189	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				190	repIndex--
				191	start--
				192	seq.matchLen++
				193	}
				194	addLiterals(&seq, start)
				195
				196	// rep 0
				197	seq.offset = 1
				198	if debugSequences {
				199	println("repeat sequence", seq, "next s:", s)
				200	}
				201	blk.sequences = append(blk.sequences, seq)
				202
				203	// Index match start+1 (long) -> s - 1
				204	index0 := s + repOff
				205	s += lenght + repOff
				206
				207	nextEmit = s
				208	if s >= sLimit {
				209	if debugEncoder {
				210	println("repeat ended", s, lenght)
				211
				212	}
				213	break encodeLoop
				214	}
				215	// Index skipped...
				216	for index0 < s-1 {
				217	cv0 := load6432(src, index0)
				218	cv1 := cv0 >> 8
				219	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				220	off := index0 + e.cur
				221	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				222	e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				223	index0 += 2
				224	}
				225	cv = load6432(src, s)
				226	continue
				227	}
				228	const repOff2 = 1
				229
				230	// We deviate from the reference encoder and also check offset 2.
				231	// Still slower and not much better, so disabled.
				232	// repIndex = s - offset2 + repOff2
				233	if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
				234	// Consider history as well.
				235	var seq seq
				236	lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
				237
				238	seq.matchLen = uint32(lenght - zstdMinMatch)
				239
				240	// We might be able to match backwards.
				241	// Extend as long as we can.
				242	start := s + repOff2
				243	// We end the search early, so we don't risk 0 literals
				244	// and have to do special offset treatment.
				245	startLimit := nextEmit + 1
				246
				247	tMin := s - e.maxMatchOff
				248	if tMin < 0 {
				249	tMin = 0
				250	}
				251	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				252	repIndex--
				253	start--
				254	seq.matchLen++
				255	}
				256	addLiterals(&seq, start)
				257
				258	// rep 2
				259	seq.offset = 2
				260	if debugSequences {
				261	println("repeat sequence 2", seq, "next s:", s)
				262	}
				263	blk.sequences = append(blk.sequences, seq)
				264
				265	index0 := s + repOff2
				266	s += lenght + repOff2
				267	nextEmit = s
				268	if s >= sLimit {
				269	if debugEncoder {
				270	println("repeat ended", s, lenght)
				271
				272	}
				273	break encodeLoop
				274	}
				275
				276	// Index skipped...
				277	for index0 < s-1 {
				278	cv0 := load6432(src, index0)
				279	cv1 := cv0 >> 8
				280	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				281	off := index0 + e.cur
				282	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				283	e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				284	index0 += 2
				285	}
				286	cv = load6432(src, s)
				287	// Swap offsets
				288	offset1, offset2 = offset2, offset1
				289	continue
				290	}
				291	}
				292	// Find the offsets of our two matches.
				293	coffsetL := candidateL.offset - e.cur
				294	coffsetLP := candidateL.prev - e.cur
				295
				296	// Check if we have a long match.
				297	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				298	// Found a long match, at least 8 bytes.
				299	matched = e.matchlen(s+8, coffsetL+8, src) + 8
				300	t = coffsetL
				301	if debugAsserts && s <= t {
				302	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				303	}
				304	if debugAsserts && s-t > e.maxMatchOff {
				305	panic("s - t >e.maxMatchOff")
				306	}
				307	if debugMatches {
				308	println("long match")
				309	}
				310
				311	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				312	// Found a long match, at least 8 bytes.
				313	prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
				314	if prevMatch > matched {
				315	matched = prevMatch
				316	t = coffsetLP
				317	}
				318	if debugAsserts && s <= t {
				319	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				320	}
				321	if debugAsserts && s-t > e.maxMatchOff {
				322	panic("s - t >e.maxMatchOff")
				323	}
				324	if debugMatches {
				325	println("long match")
				326	}
				327	}
				328	break
				329	}
				330
				331	// Check if we have a long match on prev.
				332	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				333	// Found a long match, at least 8 bytes.
				334	matched = e.matchlen(s+8, coffsetLP+8, src) + 8
				335	t = coffsetLP
				336	if debugAsserts && s <= t {
				337	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				338	}
				339	if debugAsserts && s-t > e.maxMatchOff {
				340	panic("s - t >e.maxMatchOff")
				341	}
				342	if debugMatches {
				343	println("long match")
				344	}
				345	break
				346	}
				347
				348	coffsetS := candidateS.offset - e.cur
				349
				350	// Check if we have a short match.
				351	if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				352	// found a regular match
				353	matched = e.matchlen(s+4, coffsetS+4, src) + 4
				354
				355	// See if we can find a long match at s+1
				356	const checkAt = 1
				357	cv := load6432(src, s+checkAt)
				358	nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
				359	candidateL = e.longTable[nextHashL]
				360	coffsetL = candidateL.offset - e.cur
				361
				362	// We can store it, since we have at least a 4 byte match.
				363	e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
				364	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				365	// Found a long match, at least 8 bytes.
				366	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				367	if matchedNext > matched {
				368	t = coffsetL
				369	s += checkAt
				370	matched = matchedNext
				371	if debugMatches {
				372	println("long match (after short)")
				373	}
				374	break
				375	}
				376	}
				377
				378	// Check prev long...
				379	coffsetL = candidateL.prev - e.cur
				380	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				381	// Found a long match, at least 8 bytes.
				382	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				383	if matchedNext > matched {
				384	t = coffsetL
				385	s += checkAt
				386	matched = matchedNext
				387	if debugMatches {
				388	println("prev long match (after short)")
				389	}
				390	break
				391	}
				392	}
				393	t = coffsetS
				394	if debugAsserts && s <= t {
				395	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				396	}
				397	if debugAsserts && s-t > e.maxMatchOff {
				398	panic("s - t >e.maxMatchOff")
				399	}
				400	if debugAsserts && t < 0 {
				401	panic("t<0")
				402	}
				403	if debugMatches {
				404	println("short match")
				405	}
				406	break
				407	}
				408
				409	// No match found, move forward in input.
				410	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				411	if s >= sLimit {
				412	break encodeLoop
				413	}
				414	cv = load6432(src, s)
				415	}
				416
				417	// Try to find a better match by searching for a long match at the end of the current best match
				418	if s+matched < sLimit {
				419	nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
				420	cv := load3232(src, s)
				421	candidateL := e.longTable[nextHashL]
				422	coffsetL := candidateL.offset - e.cur - matched
				423	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				424	// Found a long match, at least 4 bytes.
				425	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				426	if matchedNext > matched {
				427	t = coffsetL
				428	matched = matchedNext
				429	if debugMatches {
				430	println("long match at end-of-match")
				431	}
				432	}
				433	}
				434
				435	// Check prev long...
				436	if true {
				437	coffsetL = candidateL.prev - e.cur - matched
				438	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				439	// Found a long match, at least 4 bytes.
				440	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				441	if matchedNext > matched {
				442	t = coffsetL
				443	matched = matchedNext
				444	if debugMatches {
				445	println("prev long match at end-of-match")
				446	}
				447	}
				448	}
				449	}
				450	}
				451	// A match has been found. Update recent offsets.
				452	offset2 = offset1
				453	offset1 = s - t
				454
				455	if debugAsserts && s <= t {
				456	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				457	}
				458
				459	if debugAsserts && canRepeat && int(offset1) > len(src) {
				460	panic("invalid offset")
				461	}
				462
				463	// Extend the n-byte match as long as possible.
				464	l := matched
				465
				466	// Extend backwards
				467	tMin := s - e.maxMatchOff
				468	if tMin < 0 {
				469	tMin = 0
				470	}
				471	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				472	s--
				473	t--
				474	l++
				475	}
				476
				477	// Write our sequence
				478	var seq seq
				479	seq.litLen = uint32(s - nextEmit)
				480	seq.matchLen = uint32(l - zstdMinMatch)
				481	if seq.litLen > 0 {
				482	blk.literals = append(blk.literals, src[nextEmit:s]...)
				483	}
				484	seq.offset = uint32(s-t) + 3
				485	s += l
				486	if debugSequences {
				487	println("sequence", seq, "next s:", s)
				488	}
				489	blk.sequences = append(blk.sequences, seq)
				490	nextEmit = s
				491	if s >= sLimit {
				492	break encodeLoop
				493	}
				494
				495	// Index match start+1 (long) -> s - 1
				496	index0 := s - l + 1
				497	for index0 < s-1 {
				498	cv0 := load6432(src, index0)
				499	cv1 := cv0 >> 8
				500	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				501	off := index0 + e.cur
				502	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				503	e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				504	index0 += 2
				505	}
				506
				507	cv = load6432(src, s)
				508	if !canRepeat {
				509	continue
				510	}
				511
				512	// Check offset 2
				513	for {
				514	o2 := s - offset2
				515	if load3232(src, o2) != uint32(cv) {
				516	// Do regular search
				517	break
				518	}
				519
				520	// Store this, since we have it.
				521	nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
				522	nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
				523
				524	// We have at least 4 byte match.
				525	// No need to check backwards. We come straight from a match
				526	l := 4 + e.matchlen(s+4, o2+4, src)
				527
				528	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
				529	e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				530	seq.matchLen = uint32(l) - zstdMinMatch
				531	seq.litLen = 0
				532
				533	// Since litlen is always 0, this is offset 1.
				534	seq.offset = 1
				535	s += l
				536	nextEmit = s
				537	if debugSequences {
				538	println("sequence", seq, "next s:", s)
				539	}
				540	blk.sequences = append(blk.sequences, seq)
				541
				542	// Swap offset 1 and 2.
				543	offset1, offset2 = offset2, offset1
				544	if s >= sLimit {
				545	// Finished
				546	break encodeLoop
				547	}
				548	cv = load6432(src, s)
				549	}
				550	}
				551
				552	if int(nextEmit) < len(src) {
				553	blk.literals = append(blk.literals, src[nextEmit:]...)
				554	blk.extraLits = len(src) - int(nextEmit)
				555	}
				556	blk.recentOffsets[0] = uint32(offset1)
				557	blk.recentOffsets[1] = uint32(offset2)
				558	if debugEncoder {
				559	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				560	}
				561	}
				562
				563	// EncodeNoHist will encode a block with no history and no following blocks.
				564	// Most notable difference is that src will not be copied for history and
				565	// we do not need to check for max match length.
				566	func (e betterFastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				567	e.ensureHist(len(src))
				568	e.Encode(blk, src)
				569	}
				570
				571	// Encode improves compression...
				572	func (e betterFastEncoderDict) Encode(blk blockEnc, src []byte) {
				573	const (
				574	// Input margin is the number of bytes we read (8)
				575	// and the maximum we will read ahead (2)
				576	inputMargin = 8 + 2
				577	minNonLiteralBlockSize = 16
				578	)
				579
				580	// Protect against e.cur wraparound.
				581	for e.cur >= bufferReset {
				582	if len(e.hist) == 0 {
				583	for i := range e.table[:] {
				584	e.table[i] = tableEntry{}
				585	}
				586	for i := range e.longTable[:] {
				587	e.longTable[i] = prevEntry{}
				588	}
				589	e.cur = e.maxMatchOff
				590	e.allDirty = true
				591	break
				592	}
				593	// Shift down everything in the table that isn't already too far away.
				594	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				595	for i := range e.table[:] {
				596	v := e.table[i].offset
				597	if v < minOff {
				598	v = 0
				599	} else {
				600	v = v - e.cur + e.maxMatchOff
				601	}
				602	e.table[i].offset = v
				603	}
				604	for i := range e.longTable[:] {
				605	v := e.longTable[i].offset
				606	v2 := e.longTable[i].prev
				607	if v < minOff {
				608	v = 0
				609	v2 = 0
				610	} else {
				611	v = v - e.cur + e.maxMatchOff
				612	if v2 < minOff {
				613	v2 = 0
				614	} else {
				615	v2 = v2 - e.cur + e.maxMatchOff
				616	}
				617	}
				618	e.longTable[i] = prevEntry{
				619	offset: v,
				620	prev: v2,
				621	}
				622	}
				623	e.allDirty = true
				624	e.cur = e.maxMatchOff
				625	break
				626	}
				627
				628	s := e.addBlock(src)
				629	blk.size = len(src)
				630	if len(src) < minNonLiteralBlockSize {
				631	blk.extraLits = len(src)
				632	blk.literals = blk.literals[:len(src)]
				633	copy(blk.literals, src)
				634	return
				635	}
				636
				637	// Override src
				638	src = e.hist
				639	sLimit := int32(len(src)) - inputMargin
				640	// stepSize is the number of bytes to skip on every main loop iteration.
				641	// It should be >= 1.
				642	const stepSize = 1
				643
				644	const kSearchStrength = 9
				645
				646	// nextEmit is where in src the next emitLiteral should start from.
				647	nextEmit := s
				648	cv := load6432(src, s)
				649
				650	// Relative offsets
				651	offset1 := int32(blk.recentOffsets[0])
				652	offset2 := int32(blk.recentOffsets[1])
				653
				654	addLiterals := func(s *seq, until int32) {
				655	if until == nextEmit {
				656	return
				657	}
				658	blk.literals = append(blk.literals, src[nextEmit:until]...)
				659	s.litLen = uint32(until - nextEmit)
				660	}
				661	if debugEncoder {
				662	println("recent offsets:", blk.recentOffsets)
				663	}
				664
				665	encodeLoop:
				666	for {
				667	var t int32
				668	// We allow the encoder to optionally turn off repeat offsets across blocks
				669	canRepeat := len(blk.sequences) > 2
				670	var matched int32
				671
				672	for {
				673	if debugAsserts && canRepeat && offset1 == 0 {
				674	panic("offset0 was 0")
				675	}
				676
				677	nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
				678	nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
				679	candidateL := e.longTable[nextHashL]
				680	candidateS := e.table[nextHashS]
				681
				682	const repOff = 1
				683	repIndex := s - offset1 + repOff
				684	off := s + e.cur
				685	e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
				686	e.markLongShardDirty(nextHashL)
				687	e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
				688	e.markShortShardDirty(nextHashS)
				689
				690	if canRepeat {
				691	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				692	// Consider history as well.
				693	var seq seq
				694	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				695
				696	seq.matchLen = uint32(lenght - zstdMinMatch)
				697
				698	// We might be able to match backwards.
				699	// Extend as long as we can.
				700	start := s + repOff
				701	// We end the search early, so we don't risk 0 literals
				702	// and have to do special offset treatment.
				703	startLimit := nextEmit + 1
				704
				705	tMin := s - e.maxMatchOff
				706	if tMin < 0 {
				707	tMin = 0
				708	}
				709	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				710	repIndex--
				711	start--
				712	seq.matchLen++
				713	}
				714	addLiterals(&seq, start)
				715
				716	// rep 0
				717	seq.offset = 1
				718	if debugSequences {
				719	println("repeat sequence", seq, "next s:", s)
				720	}
				721	blk.sequences = append(blk.sequences, seq)
				722
				723	// Index match start+1 (long) -> s - 1
				724	index0 := s + repOff
				725	s += lenght + repOff
				726
				727	nextEmit = s
				728	if s >= sLimit {
				729	if debugEncoder {
				730	println("repeat ended", s, lenght)
				731
				732	}
				733	break encodeLoop
				734	}
				735	// Index skipped...
				736	for index0 < s-1 {
				737	cv0 := load6432(src, index0)
				738	cv1 := cv0 >> 8
				739	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				740	off := index0 + e.cur
				741	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				742	e.markLongShardDirty(h0)
				743	h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
				744	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				745	e.markShortShardDirty(h1)
				746	index0 += 2
				747	}
				748	cv = load6432(src, s)
				749	continue
				750	}
				751	const repOff2 = 1
				752
				753	// We deviate from the reference encoder and also check offset 2.
				754	// Still slower and not much better, so disabled.
				755	// repIndex = s - offset2 + repOff2
				756	if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
				757	// Consider history as well.
				758	var seq seq
				759	lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
				760
				761	seq.matchLen = uint32(lenght - zstdMinMatch)
				762
				763	// We might be able to match backwards.
				764	// Extend as long as we can.
				765	start := s + repOff2
				766	// We end the search early, so we don't risk 0 literals
				767	// and have to do special offset treatment.
				768	startLimit := nextEmit + 1
				769
				770	tMin := s - e.maxMatchOff
				771	if tMin < 0 {
				772	tMin = 0
				773	}
				774	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				775	repIndex--
				776	start--
				777	seq.matchLen++
				778	}
				779	addLiterals(&seq, start)
				780
				781	// rep 2
				782	seq.offset = 2
				783	if debugSequences {
				784	println("repeat sequence 2", seq, "next s:", s)
				785	}
				786	blk.sequences = append(blk.sequences, seq)
				787
				788	index0 := s + repOff2
				789	s += lenght + repOff2
				790	nextEmit = s
				791	if s >= sLimit {
				792	if debugEncoder {
				793	println("repeat ended", s, lenght)
				794
				795	}
				796	break encodeLoop
				797	}
				798
				799	// Index skipped...
				800	for index0 < s-1 {
				801	cv0 := load6432(src, index0)
				802	cv1 := cv0 >> 8
				803	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				804	off := index0 + e.cur
				805	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				806	e.markLongShardDirty(h0)
				807	h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
				808	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				809	e.markShortShardDirty(h1)
				810	index0 += 2
				811	}
				812	cv = load6432(src, s)
				813	// Swap offsets
				814	offset1, offset2 = offset2, offset1
				815	continue
				816	}
				817	}
				818	// Find the offsets of our two matches.
				819	coffsetL := candidateL.offset - e.cur
				820	coffsetLP := candidateL.prev - e.cur
				821
				822	// Check if we have a long match.
				823	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				824	// Found a long match, at least 8 bytes.
				825	matched = e.matchlen(s+8, coffsetL+8, src) + 8
				826	t = coffsetL
				827	if debugAsserts && s <= t {
				828	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				829	}
				830	if debugAsserts && s-t > e.maxMatchOff {
				831	panic("s - t >e.maxMatchOff")
				832	}
				833	if debugMatches {
				834	println("long match")
				835	}
				836
				837	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				838	// Found a long match, at least 8 bytes.
				839	prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
				840	if prevMatch > matched {
				841	matched = prevMatch
				842	t = coffsetLP
				843	}
				844	if debugAsserts && s <= t {
				845	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				846	}
				847	if debugAsserts && s-t > e.maxMatchOff {
				848	panic("s - t >e.maxMatchOff")
				849	}
				850	if debugMatches {
				851	println("long match")
				852	}
				853	}
				854	break
				855	}
				856
				857	// Check if we have a long match on prev.
				858	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				859	// Found a long match, at least 8 bytes.
				860	matched = e.matchlen(s+8, coffsetLP+8, src) + 8
				861	t = coffsetLP
				862	if debugAsserts && s <= t {
				863	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				864	}
				865	if debugAsserts && s-t > e.maxMatchOff {
				866	panic("s - t >e.maxMatchOff")
				867	}
				868	if debugMatches {
				869	println("long match")
				870	}
				871	break
				872	}
				873
				874	coffsetS := candidateS.offset - e.cur
				875
				876	// Check if we have a short match.
				877	if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				878	// found a regular match
				879	matched = e.matchlen(s+4, coffsetS+4, src) + 4
				880
				881	// See if we can find a long match at s+1
				882	const checkAt = 1
				883	cv := load6432(src, s+checkAt)
				884	nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
				885	candidateL = e.longTable[nextHashL]
				886	coffsetL = candidateL.offset - e.cur
				887
				888	// We can store it, since we have at least a 4 byte match.
				889	e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
				890	e.markLongShardDirty(nextHashL)
				891	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				892	// Found a long match, at least 8 bytes.
				893	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				894	if matchedNext > matched {
				895	t = coffsetL
				896	s += checkAt
				897	matched = matchedNext
				898	if debugMatches {
				899	println("long match (after short)")
				900	}
				901	break
				902	}
				903	}
				904
				905	// Check prev long...
				906	coffsetL = candidateL.prev - e.cur
				907	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				908	// Found a long match, at least 8 bytes.
				909	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				910	if matchedNext > matched {
				911	t = coffsetL
				912	s += checkAt
				913	matched = matchedNext
				914	if debugMatches {
				915	println("prev long match (after short)")
				916	}
				917	break
				918	}
				919	}
				920	t = coffsetS
				921	if debugAsserts && s <= t {
				922	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				923	}
				924	if debugAsserts && s-t > e.maxMatchOff {
				925	panic("s - t >e.maxMatchOff")
				926	}
				927	if debugAsserts && t < 0 {
				928	panic("t<0")
				929	}
				930	if debugMatches {
				931	println("short match")
				932	}
				933	break
				934	}
				935
				936	// No match found, move forward in input.
				937	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				938	if s >= sLimit {
				939	break encodeLoop
				940	}
				941	cv = load6432(src, s)
				942	}
				943	// Try to find a better match by searching for a long match at the end of the current best match
				944	if s+matched < sLimit {
				945	nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
				946	cv := load3232(src, s)
				947	candidateL := e.longTable[nextHashL]
				948	coffsetL := candidateL.offset - e.cur - matched
				949	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				950	// Found a long match, at least 4 bytes.
				951	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				952	if matchedNext > matched {
				953	t = coffsetL
				954	matched = matchedNext
				955	if debugMatches {
				956	println("long match at end-of-match")
				957	}
				958	}
				959	}
				960
				961	// Check prev long...
				962	if true {
				963	coffsetL = candidateL.prev - e.cur - matched
				964	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				965	// Found a long match, at least 4 bytes.
				966	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				967	if matchedNext > matched {
				968	t = coffsetL
				969	matched = matchedNext
				970	if debugMatches {
				971	println("prev long match at end-of-match")
				972	}
				973	}
				974	}
				975	}
				976	}
				977	// A match has been found. Update recent offsets.
				978	offset2 = offset1
				979	offset1 = s - t
				980
				981	if debugAsserts && s <= t {
				982	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				983	}
				984
				985	if debugAsserts && canRepeat && int(offset1) > len(src) {
				986	panic("invalid offset")
				987	}
				988
				989	// Extend the n-byte match as long as possible.
				990	l := matched
				991
				992	// Extend backwards
				993	tMin := s - e.maxMatchOff
				994	if tMin < 0 {
				995	tMin = 0
				996	}
				997	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				998	s--
				999	t--
				1000	l++
				1001	}
				1002
				1003	// Write our sequence
				1004	var seq seq
				1005	seq.litLen = uint32(s - nextEmit)
				1006	seq.matchLen = uint32(l - zstdMinMatch)
				1007	if seq.litLen > 0 {
				1008	blk.literals = append(blk.literals, src[nextEmit:s]...)
				1009	}
				1010	seq.offset = uint32(s-t) + 3
				1011	s += l
				1012	if debugSequences {
				1013	println("sequence", seq, "next s:", s)
				1014	}
				1015	blk.sequences = append(blk.sequences, seq)
				1016	nextEmit = s
				1017	if s >= sLimit {
				1018	break encodeLoop
				1019	}
				1020
				1021	// Index match start+1 (long) -> s - 1
				1022	index0 := s - l + 1
				1023	for index0 < s-1 {
				1024	cv0 := load6432(src, index0)
				1025	cv1 := cv0 >> 8
				1026	h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
				1027	off := index0 + e.cur
				1028	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				1029	e.markLongShardDirty(h0)
				1030	h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
				1031	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				1032	e.markShortShardDirty(h1)
				1033	index0 += 2
				1034	}
				1035
				1036	cv = load6432(src, s)
				1037	if !canRepeat {
				1038	continue
				1039	}
				1040
				1041	// Check offset 2
				1042	for {
				1043	o2 := s - offset2
				1044	if load3232(src, o2) != uint32(cv) {
				1045	// Do regular search
				1046	break
				1047	}
				1048
				1049	// Store this, since we have it.
				1050	nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
				1051	nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
				1052
				1053	// We have at least 4 byte match.
				1054	// No need to check backwards. We come straight from a match
				1055	l := 4 + e.matchlen(s+4, o2+4, src)
				1056
				1057	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
				1058	e.markLongShardDirty(nextHashL)
				1059	e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				1060	e.markShortShardDirty(nextHashS)
				1061	seq.matchLen = uint32(l) - zstdMinMatch
				1062	seq.litLen = 0
				1063
				1064	// Since litlen is always 0, this is offset 1.
				1065	seq.offset = 1
				1066	s += l
				1067	nextEmit = s
				1068	if debugSequences {
				1069	println("sequence", seq, "next s:", s)
				1070	}
				1071	blk.sequences = append(blk.sequences, seq)
				1072
				1073	// Swap offset 1 and 2.
				1074	offset1, offset2 = offset2, offset1
				1075	if s >= sLimit {
				1076	// Finished
				1077	break encodeLoop
				1078	}
				1079	cv = load6432(src, s)
				1080	}
				1081	}
				1082
				1083	if int(nextEmit) < len(src) {
				1084	blk.literals = append(blk.literals, src[nextEmit:]...)
				1085	blk.extraLits = len(src) - int(nextEmit)
				1086	}
				1087	blk.recentOffsets[0] = uint32(offset1)
				1088	blk.recentOffsets[1] = uint32(offset2)
				1089	if debugEncoder {
				1090	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				1091	}
				1092	}
				1093
				1094	// ResetDict will reset and set a dictionary if not nil
				1095	func (e betterFastEncoder) Reset(d dict, singleBlock bool) {
				1096	e.resetBase(d, singleBlock)
				1097	if d != nil {
				1098	panic("betterFastEncoder: Reset with dict")
				1099	}
				1100	}
				1101
				1102	// ResetDict will reset and set a dictionary if not nil
				1103	func (e betterFastEncoderDict) Reset(d dict, singleBlock bool) {
				1104	e.resetBase(d, singleBlock)
				1105	if d == nil {
				1106	return
				1107	}
				1108	// Init or copy dict table
				1109	if len(e.dictTable) != len(e.table) \|\| d.id != e.lastDictID {
				1110	if len(e.dictTable) != len(e.table) {
				1111	e.dictTable = make([]tableEntry, len(e.table))
				1112	}
				1113	end := int32(len(d.content)) - 8 + e.maxMatchOff
				1114	for i := e.maxMatchOff; i < end; i += 4 {
				1115	const hashLog = betterShortTableBits
				1116
				1117	cv := load6432(d.content, i-e.maxMatchOff)
				1118	nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
				1119	nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
				1120	nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
				1121	nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
				1122	e.dictTable[nextHash] = tableEntry{
				1123	val: uint32(cv),
				1124	offset: i,
				1125	}
				1126	e.dictTable[nextHash1] = tableEntry{
				1127	val: uint32(cv >> 8),
				1128	offset: i + 1,
				1129	}
				1130	e.dictTable[nextHash2] = tableEntry{
				1131	val: uint32(cv >> 16),
				1132	offset: i + 2,
				1133	}
				1134	e.dictTable[nextHash3] = tableEntry{
				1135	val: uint32(cv >> 24),
				1136	offset: i + 3,
				1137	}
				1138	}
				1139	e.lastDictID = d.id
				1140	e.allDirty = true
				1141	}
				1142
				1143	// Init or copy dict table
				1144	if len(e.dictLongTable) != len(e.longTable) \|\| d.id != e.lastDictID {
				1145	if len(e.dictLongTable) != len(e.longTable) {
				1146	e.dictLongTable = make([]prevEntry, len(e.longTable))
				1147	}
				1148	if len(d.content) >= 8 {
				1149	cv := load6432(d.content, 0)
				1150	h := hashLen(cv, betterLongTableBits, betterLongLen)
				1151	e.dictLongTable[h] = prevEntry{
				1152	offset: e.maxMatchOff,
				1153	prev: e.dictLongTable[h].offset,
				1154	}
				1155
				1156	end := int32(len(d.content)) - 8 + e.maxMatchOff
				1157	off := 8 // First to read
				1158	for i := e.maxMatchOff + 1; i < end; i++ {
				1159	cv = cv>>8 \| (uint64(d.content[off]) << 56)
				1160	h := hashLen(cv, betterLongTableBits, betterLongLen)
				1161	e.dictLongTable[h] = prevEntry{
				1162	offset: i,
				1163	prev: e.dictLongTable[h].offset,
				1164	}
				1165	off++
				1166	}
				1167	}
				1168	e.lastDictID = d.id
				1169	e.allDirty = true
				1170	}
				1171
				1172	// Reset table to initial state
				1173	{
				1174	dirtyShardCnt := 0
				1175	if !e.allDirty {
				1176	for i := range e.shortTableShardDirty {
				1177	if e.shortTableShardDirty[i] {
				1178	dirtyShardCnt++
				1179	}
				1180	}
				1181	}
				1182	const shardCnt = betterShortTableShardCnt
				1183	const shardSize = betterShortTableShardSize
				1184	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
				1185	copy(e.table[:], e.dictTable)
				1186	for i := range e.shortTableShardDirty {
				1187	e.shortTableShardDirty[i] = false
				1188	}
				1189	} else {
				1190	for i := range e.shortTableShardDirty {
				1191	if !e.shortTableShardDirty[i] {
				1192	continue
				1193	}
				1194
				1195	copy(e.table[ishardSize:(i+1)shardSize], e.dictTable[ishardSize:(i+1)shardSize])
				1196	e.shortTableShardDirty[i] = false
				1197	}
				1198	}
				1199	}
				1200	{
				1201	dirtyShardCnt := 0
				1202	if !e.allDirty {
				1203	for i := range e.shortTableShardDirty {
				1204	if e.shortTableShardDirty[i] {
				1205	dirtyShardCnt++
				1206	}
				1207	}
				1208	}
				1209	const shardCnt = betterLongTableShardCnt
				1210	const shardSize = betterLongTableShardSize
				1211	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
				1212	copy(e.longTable[:], e.dictLongTable)
				1213	for i := range e.longTableShardDirty {
				1214	e.longTableShardDirty[i] = false
				1215	}
				1216	} else {
				1217	for i := range e.longTableShardDirty {
				1218	if !e.longTableShardDirty[i] {
				1219	continue
				1220	}
				1221
				1222	copy(e.longTable[ishardSize:(i+1)shardSize], e.dictLongTable[ishardSize:(i+1)shardSize])
				1223	e.longTableShardDirty[i] = false
				1224	}
				1225	}
				1226	}
				1227	e.cur = e.maxMatchOff
				1228	e.allDirty = false
				1229	}
				1230
				1231	func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
				1232	e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
				1233	}
				1234
				1235	func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
				1236	e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
				1237	}