Blame - vendor/github.com/klauspost/compress/zstd/enc_better.go - voltha-openolt-adapter

blob: 604954290e4bf1e5f6f3cc3ad960613e64ab64d6 [file] [log] [blame]

khenaidoo	106c61a	2021-08-11 18:05:46 -0400	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import "fmt"
				8
				9	const (
				10	betterLongTableBits = 19 // Bits used in the long match table
				11	betterLongTableSize = 1 << betterLongTableBits // Size of the table
				12
				13	// Note: Increasing the short table bits or making the hash shorter
				14	// can actually lead to compression degradation since it will 'steal' more from the
				15	// long match table and match offsets are quite big.
				16	// This greatly depends on the type of input.
				17	betterShortTableBits = 13 // Bits used in the short match table
				18	betterShortTableSize = 1 << betterShortTableBits // Size of the table
				19
				20	betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
				21	betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
				22
				23	betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
				24	betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
				25	)
				26
				27	type prevEntry struct {
				28	offset int32
				29	prev int32
				30	}
				31
				32	// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
				33	// The long match table contains the previous entry with the same hash,
				34	// effectively making it a "chain" of length 2.
				35	// When we find a long match we choose between the two values and select the longest.
				36	// When we find a short match, after checking the long, we check if we can find a long at n+1
				37	// and that it is longer (lazy matching).
				38	type betterFastEncoder struct {
				39	fastBase
				40	table [betterShortTableSize]tableEntry
				41	longTable [betterLongTableSize]prevEntry
				42	}
				43
				44	type betterFastEncoderDict struct {
				45	betterFastEncoder
				46	dictTable []tableEntry
				47	dictLongTable []prevEntry
				48	shortTableShardDirty [betterShortTableShardCnt]bool
				49	longTableShardDirty [betterLongTableShardCnt]bool
				50	allDirty bool
				51	}
				52
				53	// Encode improves compression...
				54	func (e betterFastEncoder) Encode(blk blockEnc, src []byte) {
				55	const (
				56	// Input margin is the number of bytes we read (8)
				57	// and the maximum we will read ahead (2)
				58	inputMargin = 8 + 2
				59	minNonLiteralBlockSize = 16
				60	)
				61
				62	// Protect against e.cur wraparound.
				63	for e.cur >= bufferReset {
				64	if len(e.hist) == 0 {
				65	for i := range e.table[:] {
				66	e.table[i] = tableEntry{}
				67	}
				68	for i := range e.longTable[:] {
				69	e.longTable[i] = prevEntry{}
				70	}
				71	e.cur = e.maxMatchOff
				72	break
				73	}
				74	// Shift down everything in the table that isn't already too far away.
				75	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				76	for i := range e.table[:] {
				77	v := e.table[i].offset
				78	if v < minOff {
				79	v = 0
				80	} else {
				81	v = v - e.cur + e.maxMatchOff
				82	}
				83	e.table[i].offset = v
				84	}
				85	for i := range e.longTable[:] {
				86	v := e.longTable[i].offset
				87	v2 := e.longTable[i].prev
				88	if v < minOff {
				89	v = 0
				90	v2 = 0
				91	} else {
				92	v = v - e.cur + e.maxMatchOff
				93	if v2 < minOff {
				94	v2 = 0
				95	} else {
				96	v2 = v2 - e.cur + e.maxMatchOff
				97	}
				98	}
				99	e.longTable[i] = prevEntry{
				100	offset: v,
				101	prev: v2,
				102	}
				103	}
				104	e.cur = e.maxMatchOff
				105	break
				106	}
				107
				108	s := e.addBlock(src)
				109	blk.size = len(src)
				110	if len(src) < minNonLiteralBlockSize {
				111	blk.extraLits = len(src)
				112	blk.literals = blk.literals[:len(src)]
				113	copy(blk.literals, src)
				114	return
				115	}
				116
				117	// Override src
				118	src = e.hist
				119	sLimit := int32(len(src)) - inputMargin
				120	// stepSize is the number of bytes to skip on every main loop iteration.
				121	// It should be >= 1.
				122	const stepSize = 1
				123
				124	const kSearchStrength = 9
				125
				126	// nextEmit is where in src the next emitLiteral should start from.
				127	nextEmit := s
				128	cv := load6432(src, s)
				129
				130	// Relative offsets
				131	offset1 := int32(blk.recentOffsets[0])
				132	offset2 := int32(blk.recentOffsets[1])
				133
				134	addLiterals := func(s *seq, until int32) {
				135	if until == nextEmit {
				136	return
				137	}
				138	blk.literals = append(blk.literals, src[nextEmit:until]...)
				139	s.litLen = uint32(until - nextEmit)
				140	}
				141	if debug {
				142	println("recent offsets:", blk.recentOffsets)
				143	}
				144
				145	encodeLoop:
				146	for {
				147	var t int32
				148	// We allow the encoder to optionally turn off repeat offsets across blocks
				149	canRepeat := len(blk.sequences) > 2
				150	var matched int32
				151
				152	for {
				153	if debugAsserts && canRepeat && offset1 == 0 {
				154	panic("offset0 was 0")
				155	}
				156
				157	nextHashS := hash5(cv, betterShortTableBits)
				158	nextHashL := hash8(cv, betterLongTableBits)
				159	candidateL := e.longTable[nextHashL]
				160	candidateS := e.table[nextHashS]
				161
				162	const repOff = 1
				163	repIndex := s - offset1 + repOff
				164	off := s + e.cur
				165	e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
				166	e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
				167
				168	if canRepeat {
				169	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				170	// Consider history as well.
				171	var seq seq
				172	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				173
				174	seq.matchLen = uint32(lenght - zstdMinMatch)
				175
				176	// We might be able to match backwards.
				177	// Extend as long as we can.
				178	start := s + repOff
				179	// We end the search early, so we don't risk 0 literals
				180	// and have to do special offset treatment.
				181	startLimit := nextEmit + 1
				182
				183	tMin := s - e.maxMatchOff
				184	if tMin < 0 {
				185	tMin = 0
				186	}
				187	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				188	repIndex--
				189	start--
				190	seq.matchLen++
				191	}
				192	addLiterals(&seq, start)
				193
				194	// rep 0
				195	seq.offset = 1
				196	if debugSequences {
				197	println("repeat sequence", seq, "next s:", s)
				198	}
				199	blk.sequences = append(blk.sequences, seq)
				200
				201	// Index match start+1 (long) -> s - 1
				202	index0 := s + repOff
				203	s += lenght + repOff
				204
				205	nextEmit = s
				206	if s >= sLimit {
				207	if debug {
				208	println("repeat ended", s, lenght)
				209
				210	}
				211	break encodeLoop
				212	}
				213	// Index skipped...
				214	for index0 < s-1 {
				215	cv0 := load6432(src, index0)
				216	cv1 := cv0 >> 8
				217	h0 := hash8(cv0, betterLongTableBits)
				218	off := index0 + e.cur
				219	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				220	e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				221	index0 += 2
				222	}
				223	cv = load6432(src, s)
				224	continue
				225	}
				226	const repOff2 = 1
				227
				228	// We deviate from the reference encoder and also check offset 2.
				229	// Still slower and not much better, so disabled.
				230	// repIndex = s - offset2 + repOff2
				231	if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
				232	// Consider history as well.
				233	var seq seq
				234	lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
				235
				236	seq.matchLen = uint32(lenght - zstdMinMatch)
				237
				238	// We might be able to match backwards.
				239	// Extend as long as we can.
				240	start := s + repOff2
				241	// We end the search early, so we don't risk 0 literals
				242	// and have to do special offset treatment.
				243	startLimit := nextEmit + 1
				244
				245	tMin := s - e.maxMatchOff
				246	if tMin < 0 {
				247	tMin = 0
				248	}
				249	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				250	repIndex--
				251	start--
				252	seq.matchLen++
				253	}
				254	addLiterals(&seq, start)
				255
				256	// rep 2
				257	seq.offset = 2
				258	if debugSequences {
				259	println("repeat sequence 2", seq, "next s:", s)
				260	}
				261	blk.sequences = append(blk.sequences, seq)
				262
				263	index0 := s + repOff2
				264	s += lenght + repOff2
				265	nextEmit = s
				266	if s >= sLimit {
				267	if debug {
				268	println("repeat ended", s, lenght)
				269
				270	}
				271	break encodeLoop
				272	}
				273
				274	// Index skipped...
				275	for index0 < s-1 {
				276	cv0 := load6432(src, index0)
				277	cv1 := cv0 >> 8
				278	h0 := hash8(cv0, betterLongTableBits)
				279	off := index0 + e.cur
				280	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				281	e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				282	index0 += 2
				283	}
				284	cv = load6432(src, s)
				285	// Swap offsets
				286	offset1, offset2 = offset2, offset1
				287	continue
				288	}
				289	}
				290	// Find the offsets of our two matches.
				291	coffsetL := candidateL.offset - e.cur
				292	coffsetLP := candidateL.prev - e.cur
				293
				294	// Check if we have a long match.
				295	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				296	// Found a long match, at least 8 bytes.
				297	matched = e.matchlen(s+8, coffsetL+8, src) + 8
				298	t = coffsetL
				299	if debugAsserts && s <= t {
				300	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				301	}
				302	if debugAsserts && s-t > e.maxMatchOff {
				303	panic("s - t >e.maxMatchOff")
				304	}
				305	if debugMatches {
				306	println("long match")
				307	}
				308
				309	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				310	// Found a long match, at least 8 bytes.
				311	prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
				312	if prevMatch > matched {
				313	matched = prevMatch
				314	t = coffsetLP
				315	}
				316	if debugAsserts && s <= t {
				317	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				318	}
				319	if debugAsserts && s-t > e.maxMatchOff {
				320	panic("s - t >e.maxMatchOff")
				321	}
				322	if debugMatches {
				323	println("long match")
				324	}
				325	}
				326	break
				327	}
				328
				329	// Check if we have a long match on prev.
				330	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				331	// Found a long match, at least 8 bytes.
				332	matched = e.matchlen(s+8, coffsetLP+8, src) + 8
				333	t = coffsetLP
				334	if debugAsserts && s <= t {
				335	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				336	}
				337	if debugAsserts && s-t > e.maxMatchOff {
				338	panic("s - t >e.maxMatchOff")
				339	}
				340	if debugMatches {
				341	println("long match")
				342	}
				343	break
				344	}
				345
				346	coffsetS := candidateS.offset - e.cur
				347
				348	// Check if we have a short match.
				349	if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				350	// found a regular match
				351	matched = e.matchlen(s+4, coffsetS+4, src) + 4
				352
				353	// See if we can find a long match at s+1
				354	const checkAt = 1
				355	cv := load6432(src, s+checkAt)
				356	nextHashL = hash8(cv, betterLongTableBits)
				357	candidateL = e.longTable[nextHashL]
				358	coffsetL = candidateL.offset - e.cur
				359
				360	// We can store it, since we have at least a 4 byte match.
				361	e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
				362	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				363	// Found a long match, at least 8 bytes.
				364	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				365	if matchedNext > matched {
				366	t = coffsetL
				367	s += checkAt
				368	matched = matchedNext
				369	if debugMatches {
				370	println("long match (after short)")
				371	}
				372	break
				373	}
				374	}
				375
				376	// Check prev long...
				377	coffsetL = candidateL.prev - e.cur
				378	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				379	// Found a long match, at least 8 bytes.
				380	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				381	if matchedNext > matched {
				382	t = coffsetL
				383	s += checkAt
				384	matched = matchedNext
				385	if debugMatches {
				386	println("prev long match (after short)")
				387	}
				388	break
				389	}
				390	}
				391	t = coffsetS
				392	if debugAsserts && s <= t {
				393	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				394	}
				395	if debugAsserts && s-t > e.maxMatchOff {
				396	panic("s - t >e.maxMatchOff")
				397	}
				398	if debugAsserts && t < 0 {
				399	panic("t<0")
				400	}
				401	if debugMatches {
				402	println("short match")
				403	}
				404	break
				405	}
				406
				407	// No match found, move forward in input.
				408	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				409	if s >= sLimit {
				410	break encodeLoop
				411	}
				412	cv = load6432(src, s)
				413	}
				414
				415	// Try to find a better match by searching for a long match at the end of the current best match
				416	if true && s+matched < sLimit {
				417	nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
				418	cv := load3232(src, s)
				419	candidateL := e.longTable[nextHashL]
				420	coffsetL := candidateL.offset - e.cur - matched
				421	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				422	// Found a long match, at least 4 bytes.
				423	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				424	if matchedNext > matched {
				425	t = coffsetL
				426	matched = matchedNext
				427	if debugMatches {
				428	println("long match at end-of-match")
				429	}
				430	}
				431	}
				432
				433	// Check prev long...
				434	if true {
				435	coffsetL = candidateL.prev - e.cur - matched
				436	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				437	// Found a long match, at least 4 bytes.
				438	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				439	if matchedNext > matched {
				440	t = coffsetL
				441	matched = matchedNext
				442	if debugMatches {
				443	println("prev long match at end-of-match")
				444	}
				445	}
				446	}
				447	}
				448	}
				449	// A match has been found. Update recent offsets.
				450	offset2 = offset1
				451	offset1 = s - t
				452
				453	if debugAsserts && s <= t {
				454	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				455	}
				456
				457	if debugAsserts && canRepeat && int(offset1) > len(src) {
				458	panic("invalid offset")
				459	}
				460
				461	// Extend the n-byte match as long as possible.
				462	l := matched
				463
				464	// Extend backwards
				465	tMin := s - e.maxMatchOff
				466	if tMin < 0 {
				467	tMin = 0
				468	}
				469	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				470	s--
				471	t--
				472	l++
				473	}
				474
				475	// Write our sequence
				476	var seq seq
				477	seq.litLen = uint32(s - nextEmit)
				478	seq.matchLen = uint32(l - zstdMinMatch)
				479	if seq.litLen > 0 {
				480	blk.literals = append(blk.literals, src[nextEmit:s]...)
				481	}
				482	seq.offset = uint32(s-t) + 3
				483	s += l
				484	if debugSequences {
				485	println("sequence", seq, "next s:", s)
				486	}
				487	blk.sequences = append(blk.sequences, seq)
				488	nextEmit = s
				489	if s >= sLimit {
				490	break encodeLoop
				491	}
				492
				493	// Index match start+1 (long) -> s - 1
				494	index0 := s - l + 1
				495	for index0 < s-1 {
				496	cv0 := load6432(src, index0)
				497	cv1 := cv0 >> 8
				498	h0 := hash8(cv0, betterLongTableBits)
				499	off := index0 + e.cur
				500	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				501	e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
				502	index0 += 2
				503	}
				504
				505	cv = load6432(src, s)
				506	if !canRepeat {
				507	continue
				508	}
				509
				510	// Check offset 2
				511	for {
				512	o2 := s - offset2
				513	if load3232(src, o2) != uint32(cv) {
				514	// Do regular search
				515	break
				516	}
				517
				518	// Store this, since we have it.
				519	nextHashS := hash5(cv, betterShortTableBits)
				520	nextHashL := hash8(cv, betterLongTableBits)
				521
				522	// We have at least 4 byte match.
				523	// No need to check backwards. We come straight from a match
				524	l := 4 + e.matchlen(s+4, o2+4, src)
				525
				526	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
				527	e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				528	seq.matchLen = uint32(l) - zstdMinMatch
				529	seq.litLen = 0
				530
				531	// Since litlen is always 0, this is offset 1.
				532	seq.offset = 1
				533	s += l
				534	nextEmit = s
				535	if debugSequences {
				536	println("sequence", seq, "next s:", s)
				537	}
				538	blk.sequences = append(blk.sequences, seq)
				539
				540	// Swap offset 1 and 2.
				541	offset1, offset2 = offset2, offset1
				542	if s >= sLimit {
				543	// Finished
				544	break encodeLoop
				545	}
				546	cv = load6432(src, s)
				547	}
				548	}
				549
				550	if int(nextEmit) < len(src) {
				551	blk.literals = append(blk.literals, src[nextEmit:]...)
				552	blk.extraLits = len(src) - int(nextEmit)
				553	}
				554	blk.recentOffsets[0] = uint32(offset1)
				555	blk.recentOffsets[1] = uint32(offset2)
				556	if debug {
				557	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				558	}
				559	}
				560
				561	// EncodeNoHist will encode a block with no history and no following blocks.
				562	// Most notable difference is that src will not be copied for history and
				563	// we do not need to check for max match length.
				564	func (e betterFastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				565	e.ensureHist(len(src))
				566	e.Encode(blk, src)
				567	}
				568
				569	// Encode improves compression...
				570	func (e betterFastEncoderDict) Encode(blk blockEnc, src []byte) {
				571	const (
				572	// Input margin is the number of bytes we read (8)
				573	// and the maximum we will read ahead (2)
				574	inputMargin = 8 + 2
				575	minNonLiteralBlockSize = 16
				576	)
				577
				578	// Protect against e.cur wraparound.
				579	for e.cur >= bufferReset {
				580	if len(e.hist) == 0 {
				581	for i := range e.table[:] {
				582	e.table[i] = tableEntry{}
				583	}
				584	for i := range e.longTable[:] {
				585	e.longTable[i] = prevEntry{}
				586	}
				587	e.cur = e.maxMatchOff
				588	e.allDirty = true
				589	break
				590	}
				591	// Shift down everything in the table that isn't already too far away.
				592	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				593	for i := range e.table[:] {
				594	v := e.table[i].offset
				595	if v < minOff {
				596	v = 0
				597	} else {
				598	v = v - e.cur + e.maxMatchOff
				599	}
				600	e.table[i].offset = v
				601	}
				602	for i := range e.longTable[:] {
				603	v := e.longTable[i].offset
				604	v2 := e.longTable[i].prev
				605	if v < minOff {
				606	v = 0
				607	v2 = 0
				608	} else {
				609	v = v - e.cur + e.maxMatchOff
				610	if v2 < minOff {
				611	v2 = 0
				612	} else {
				613	v2 = v2 - e.cur + e.maxMatchOff
				614	}
				615	}
				616	e.longTable[i] = prevEntry{
				617	offset: v,
				618	prev: v2,
				619	}
				620	}
				621	e.allDirty = true
				622	e.cur = e.maxMatchOff
				623	break
				624	}
				625
				626	s := e.addBlock(src)
				627	blk.size = len(src)
				628	if len(src) < minNonLiteralBlockSize {
				629	blk.extraLits = len(src)
				630	blk.literals = blk.literals[:len(src)]
				631	copy(blk.literals, src)
				632	return
				633	}
				634
				635	// Override src
				636	src = e.hist
				637	sLimit := int32(len(src)) - inputMargin
				638	// stepSize is the number of bytes to skip on every main loop iteration.
				639	// It should be >= 1.
				640	const stepSize = 1
				641
				642	const kSearchStrength = 9
				643
				644	// nextEmit is where in src the next emitLiteral should start from.
				645	nextEmit := s
				646	cv := load6432(src, s)
				647
				648	// Relative offsets
				649	offset1 := int32(blk.recentOffsets[0])
				650	offset2 := int32(blk.recentOffsets[1])
				651
				652	addLiterals := func(s *seq, until int32) {
				653	if until == nextEmit {
				654	return
				655	}
				656	blk.literals = append(blk.literals, src[nextEmit:until]...)
				657	s.litLen = uint32(until - nextEmit)
				658	}
				659	if debug {
				660	println("recent offsets:", blk.recentOffsets)
				661	}
				662
				663	encodeLoop:
				664	for {
				665	var t int32
				666	// We allow the encoder to optionally turn off repeat offsets across blocks
				667	canRepeat := len(blk.sequences) > 2
				668	var matched int32
				669
				670	for {
				671	if debugAsserts && canRepeat && offset1 == 0 {
				672	panic("offset0 was 0")
				673	}
				674
				675	nextHashS := hash5(cv, betterShortTableBits)
				676	nextHashL := hash8(cv, betterLongTableBits)
				677	candidateL := e.longTable[nextHashL]
				678	candidateS := e.table[nextHashS]
				679
				680	const repOff = 1
				681	repIndex := s - offset1 + repOff
				682	off := s + e.cur
				683	e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
				684	e.markLongShardDirty(nextHashL)
				685	e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
				686	e.markShortShardDirty(nextHashS)
				687
				688	if canRepeat {
				689	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				690	// Consider history as well.
				691	var seq seq
				692	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				693
				694	seq.matchLen = uint32(lenght - zstdMinMatch)
				695
				696	// We might be able to match backwards.
				697	// Extend as long as we can.
				698	start := s + repOff
				699	// We end the search early, so we don't risk 0 literals
				700	// and have to do special offset treatment.
				701	startLimit := nextEmit + 1
				702
				703	tMin := s - e.maxMatchOff
				704	if tMin < 0 {
				705	tMin = 0
				706	}
				707	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				708	repIndex--
				709	start--
				710	seq.matchLen++
				711	}
				712	addLiterals(&seq, start)
				713
				714	// rep 0
				715	seq.offset = 1
				716	if debugSequences {
				717	println("repeat sequence", seq, "next s:", s)
				718	}
				719	blk.sequences = append(blk.sequences, seq)
				720
				721	// Index match start+1 (long) -> s - 1
				722	index0 := s + repOff
				723	s += lenght + repOff
				724
				725	nextEmit = s
				726	if s >= sLimit {
				727	if debug {
				728	println("repeat ended", s, lenght)
				729
				730	}
				731	break encodeLoop
				732	}
				733	// Index skipped...
				734	for index0 < s-1 {
				735	cv0 := load6432(src, index0)
				736	cv1 := cv0 >> 8
				737	h0 := hash8(cv0, betterLongTableBits)
				738	off := index0 + e.cur
				739	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				740	e.markLongShardDirty(h0)
				741	h1 := hash5(cv1, betterShortTableBits)
				742	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				743	e.markShortShardDirty(h1)
				744	index0 += 2
				745	}
				746	cv = load6432(src, s)
				747	continue
				748	}
				749	const repOff2 = 1
				750
				751	// We deviate from the reference encoder and also check offset 2.
				752	// Still slower and not much better, so disabled.
				753	// repIndex = s - offset2 + repOff2
				754	if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
				755	// Consider history as well.
				756	var seq seq
				757	lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
				758
				759	seq.matchLen = uint32(lenght - zstdMinMatch)
				760
				761	// We might be able to match backwards.
				762	// Extend as long as we can.
				763	start := s + repOff2
				764	// We end the search early, so we don't risk 0 literals
				765	// and have to do special offset treatment.
				766	startLimit := nextEmit + 1
				767
				768	tMin := s - e.maxMatchOff
				769	if tMin < 0 {
				770	tMin = 0
				771	}
				772	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				773	repIndex--
				774	start--
				775	seq.matchLen++
				776	}
				777	addLiterals(&seq, start)
				778
				779	// rep 2
				780	seq.offset = 2
				781	if debugSequences {
				782	println("repeat sequence 2", seq, "next s:", s)
				783	}
				784	blk.sequences = append(blk.sequences, seq)
				785
				786	index0 := s + repOff2
				787	s += lenght + repOff2
				788	nextEmit = s
				789	if s >= sLimit {
				790	if debug {
				791	println("repeat ended", s, lenght)
				792
				793	}
				794	break encodeLoop
				795	}
				796
				797	// Index skipped...
				798	for index0 < s-1 {
				799	cv0 := load6432(src, index0)
				800	cv1 := cv0 >> 8
				801	h0 := hash8(cv0, betterLongTableBits)
				802	off := index0 + e.cur
				803	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				804	e.markLongShardDirty(h0)
				805	h1 := hash5(cv1, betterShortTableBits)
				806	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				807	e.markShortShardDirty(h1)
				808	index0 += 2
				809	}
				810	cv = load6432(src, s)
				811	// Swap offsets
				812	offset1, offset2 = offset2, offset1
				813	continue
				814	}
				815	}
				816	// Find the offsets of our two matches.
				817	coffsetL := candidateL.offset - e.cur
				818	coffsetLP := candidateL.prev - e.cur
				819
				820	// Check if we have a long match.
				821	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				822	// Found a long match, at least 8 bytes.
				823	matched = e.matchlen(s+8, coffsetL+8, src) + 8
				824	t = coffsetL
				825	if debugAsserts && s <= t {
				826	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				827	}
				828	if debugAsserts && s-t > e.maxMatchOff {
				829	panic("s - t >e.maxMatchOff")
				830	}
				831	if debugMatches {
				832	println("long match")
				833	}
				834
				835	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				836	// Found a long match, at least 8 bytes.
				837	prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
				838	if prevMatch > matched {
				839	matched = prevMatch
				840	t = coffsetLP
				841	}
				842	if debugAsserts && s <= t {
				843	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				844	}
				845	if debugAsserts && s-t > e.maxMatchOff {
				846	panic("s - t >e.maxMatchOff")
				847	}
				848	if debugMatches {
				849	println("long match")
				850	}
				851	}
				852	break
				853	}
				854
				855	// Check if we have a long match on prev.
				856	if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
				857	// Found a long match, at least 8 bytes.
				858	matched = e.matchlen(s+8, coffsetLP+8, src) + 8
				859	t = coffsetLP
				860	if debugAsserts && s <= t {
				861	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				862	}
				863	if debugAsserts && s-t > e.maxMatchOff {
				864	panic("s - t >e.maxMatchOff")
				865	}
				866	if debugMatches {
				867	println("long match")
				868	}
				869	break
				870	}
				871
				872	coffsetS := candidateS.offset - e.cur
				873
				874	// Check if we have a short match.
				875	if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				876	// found a regular match
				877	matched = e.matchlen(s+4, coffsetS+4, src) + 4
				878
				879	// See if we can find a long match at s+1
				880	const checkAt = 1
				881	cv := load6432(src, s+checkAt)
				882	nextHashL = hash8(cv, betterLongTableBits)
				883	candidateL = e.longTable[nextHashL]
				884	coffsetL = candidateL.offset - e.cur
				885
				886	// We can store it, since we have at least a 4 byte match.
				887	e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
				888	e.markLongShardDirty(nextHashL)
				889	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				890	// Found a long match, at least 8 bytes.
				891	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				892	if matchedNext > matched {
				893	t = coffsetL
				894	s += checkAt
				895	matched = matchedNext
				896	if debugMatches {
				897	println("long match (after short)")
				898	}
				899	break
				900	}
				901	}
				902
				903	// Check prev long...
				904	coffsetL = candidateL.prev - e.cur
				905	if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
				906	// Found a long match, at least 8 bytes.
				907	matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
				908	if matchedNext > matched {
				909	t = coffsetL
				910	s += checkAt
				911	matched = matchedNext
				912	if debugMatches {
				913	println("prev long match (after short)")
				914	}
				915	break
				916	}
				917	}
				918	t = coffsetS
				919	if debugAsserts && s <= t {
				920	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				921	}
				922	if debugAsserts && s-t > e.maxMatchOff {
				923	panic("s - t >e.maxMatchOff")
				924	}
				925	if debugAsserts && t < 0 {
				926	panic("t<0")
				927	}
				928	if debugMatches {
				929	println("short match")
				930	}
				931	break
				932	}
				933
				934	// No match found, move forward in input.
				935	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				936	if s >= sLimit {
				937	break encodeLoop
				938	}
				939	cv = load6432(src, s)
				940	}
				941	// Try to find a better match by searching for a long match at the end of the current best match
				942	if s+matched < sLimit {
				943	nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
				944	cv := load3232(src, s)
				945	candidateL := e.longTable[nextHashL]
				946	coffsetL := candidateL.offset - e.cur - matched
				947	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				948	// Found a long match, at least 4 bytes.
				949	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				950	if matchedNext > matched {
				951	t = coffsetL
				952	matched = matchedNext
				953	if debugMatches {
				954	println("long match at end-of-match")
				955	}
				956	}
				957	}
				958
				959	// Check prev long...
				960	if true {
				961	coffsetL = candidateL.prev - e.cur - matched
				962	if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
				963	// Found a long match, at least 4 bytes.
				964	matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
				965	if matchedNext > matched {
				966	t = coffsetL
				967	matched = matchedNext
				968	if debugMatches {
				969	println("prev long match at end-of-match")
				970	}
				971	}
				972	}
				973	}
				974	}
				975	// A match has been found. Update recent offsets.
				976	offset2 = offset1
				977	offset1 = s - t
				978
				979	if debugAsserts && s <= t {
				980	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				981	}
				982
				983	if debugAsserts && canRepeat && int(offset1) > len(src) {
				984	panic("invalid offset")
				985	}
				986
				987	// Extend the n-byte match as long as possible.
				988	l := matched
				989
				990	// Extend backwards
				991	tMin := s - e.maxMatchOff
				992	if tMin < 0 {
				993	tMin = 0
				994	}
				995	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				996	s--
				997	t--
				998	l++
				999	}
				1000
				1001	// Write our sequence
				1002	var seq seq
				1003	seq.litLen = uint32(s - nextEmit)
				1004	seq.matchLen = uint32(l - zstdMinMatch)
				1005	if seq.litLen > 0 {
				1006	blk.literals = append(blk.literals, src[nextEmit:s]...)
				1007	}
				1008	seq.offset = uint32(s-t) + 3
				1009	s += l
				1010	if debugSequences {
				1011	println("sequence", seq, "next s:", s)
				1012	}
				1013	blk.sequences = append(blk.sequences, seq)
				1014	nextEmit = s
				1015	if s >= sLimit {
				1016	break encodeLoop
				1017	}
				1018
				1019	// Index match start+1 (long) -> s - 1
				1020	index0 := s - l + 1
				1021	for index0 < s-1 {
				1022	cv0 := load6432(src, index0)
				1023	cv1 := cv0 >> 8
				1024	h0 := hash8(cv0, betterLongTableBits)
				1025	off := index0 + e.cur
				1026	e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
				1027	e.markLongShardDirty(h0)
				1028	h1 := hash5(cv1, betterShortTableBits)
				1029	e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
				1030	e.markShortShardDirty(h1)
				1031	index0 += 2
				1032	}
				1033
				1034	cv = load6432(src, s)
				1035	if !canRepeat {
				1036	continue
				1037	}
				1038
				1039	// Check offset 2
				1040	for {
				1041	o2 := s - offset2
				1042	if load3232(src, o2) != uint32(cv) {
				1043	// Do regular search
				1044	break
				1045	}
				1046
				1047	// Store this, since we have it.
				1048	nextHashS := hash5(cv, betterShortTableBits)
				1049	nextHashL := hash8(cv, betterLongTableBits)
				1050
				1051	// We have at least 4 byte match.
				1052	// No need to check backwards. We come straight from a match
				1053	l := 4 + e.matchlen(s+4, o2+4, src)
				1054
				1055	e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
				1056	e.markLongShardDirty(nextHashL)
				1057	e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				1058	e.markShortShardDirty(nextHashS)
				1059	seq.matchLen = uint32(l) - zstdMinMatch
				1060	seq.litLen = 0
				1061
				1062	// Since litlen is always 0, this is offset 1.
				1063	seq.offset = 1
				1064	s += l
				1065	nextEmit = s
				1066	if debugSequences {
				1067	println("sequence", seq, "next s:", s)
				1068	}
				1069	blk.sequences = append(blk.sequences, seq)
				1070
				1071	// Swap offset 1 and 2.
				1072	offset1, offset2 = offset2, offset1
				1073	if s >= sLimit {
				1074	// Finished
				1075	break encodeLoop
				1076	}
				1077	cv = load6432(src, s)
				1078	}
				1079	}
				1080
				1081	if int(nextEmit) < len(src) {
				1082	blk.literals = append(blk.literals, src[nextEmit:]...)
				1083	blk.extraLits = len(src) - int(nextEmit)
				1084	}
				1085	blk.recentOffsets[0] = uint32(offset1)
				1086	blk.recentOffsets[1] = uint32(offset2)
				1087	if debug {
				1088	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				1089	}
				1090	}
				1091
				1092	// ResetDict will reset and set a dictionary if not nil
				1093	func (e betterFastEncoder) Reset(d dict, singleBlock bool) {
				1094	e.resetBase(d, singleBlock)
				1095	if d != nil {
				1096	panic("betterFastEncoder: Reset with dict")
				1097	}
				1098	}
				1099
				1100	// ResetDict will reset and set a dictionary if not nil
				1101	func (e betterFastEncoderDict) Reset(d dict, singleBlock bool) {
				1102	e.resetBase(d, singleBlock)
				1103	if d == nil {
				1104	return
				1105	}
				1106	// Init or copy dict table
				1107	if len(e.dictTable) != len(e.table) \|\| d.id != e.lastDictID {
				1108	if len(e.dictTable) != len(e.table) {
				1109	e.dictTable = make([]tableEntry, len(e.table))
				1110	}
				1111	end := int32(len(d.content)) - 8 + e.maxMatchOff
				1112	for i := e.maxMatchOff; i < end; i += 4 {
				1113	const hashLog = betterShortTableBits
				1114
				1115	cv := load6432(d.content, i-e.maxMatchOff)
				1116	nextHash := hash5(cv, hashLog) // 0 -> 4
				1117	nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
				1118	nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
				1119	nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
				1120	e.dictTable[nextHash] = tableEntry{
				1121	val: uint32(cv),
				1122	offset: i,
				1123	}
				1124	e.dictTable[nextHash1] = tableEntry{
				1125	val: uint32(cv >> 8),
				1126	offset: i + 1,
				1127	}
				1128	e.dictTable[nextHash2] = tableEntry{
				1129	val: uint32(cv >> 16),
				1130	offset: i + 2,
				1131	}
				1132	e.dictTable[nextHash3] = tableEntry{
				1133	val: uint32(cv >> 24),
				1134	offset: i + 3,
				1135	}
				1136	}
				1137	e.lastDictID = d.id
				1138	e.allDirty = true
				1139	}
				1140
				1141	// Init or copy dict table
				1142	if len(e.dictLongTable) != len(e.longTable) \|\| d.id != e.lastDictID {
				1143	if len(e.dictLongTable) != len(e.longTable) {
				1144	e.dictLongTable = make([]prevEntry, len(e.longTable))
				1145	}
				1146	if len(d.content) >= 8 {
				1147	cv := load6432(d.content, 0)
				1148	h := hash8(cv, betterLongTableBits)
				1149	e.dictLongTable[h] = prevEntry{
				1150	offset: e.maxMatchOff,
				1151	prev: e.dictLongTable[h].offset,
				1152	}
				1153
				1154	end := int32(len(d.content)) - 8 + e.maxMatchOff
				1155	off := 8 // First to read
				1156	for i := e.maxMatchOff + 1; i < end; i++ {
				1157	cv = cv>>8 \| (uint64(d.content[off]) << 56)
				1158	h := hash8(cv, betterLongTableBits)
				1159	e.dictLongTable[h] = prevEntry{
				1160	offset: i,
				1161	prev: e.dictLongTable[h].offset,
				1162	}
				1163	off++
				1164	}
				1165	}
				1166	e.lastDictID = d.id
				1167	e.allDirty = true
				1168	}
				1169
				1170	// Reset table to initial state
				1171	{
				1172	dirtyShardCnt := 0
				1173	if !e.allDirty {
				1174	for i := range e.shortTableShardDirty {
				1175	if e.shortTableShardDirty[i] {
				1176	dirtyShardCnt++
				1177	}
				1178	}
				1179	}
				1180	const shardCnt = betterShortTableShardCnt
				1181	const shardSize = betterShortTableShardSize
				1182	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
				1183	copy(e.table[:], e.dictTable)
				1184	for i := range e.shortTableShardDirty {
				1185	e.shortTableShardDirty[i] = false
				1186	}
				1187	} else {
				1188	for i := range e.shortTableShardDirty {
				1189	if !e.shortTableShardDirty[i] {
				1190	continue
				1191	}
				1192
				1193	copy(e.table[ishardSize:(i+1)shardSize], e.dictTable[ishardSize:(i+1)shardSize])
				1194	e.shortTableShardDirty[i] = false
				1195	}
				1196	}
				1197	}
				1198	{
				1199	dirtyShardCnt := 0
				1200	if !e.allDirty {
				1201	for i := range e.shortTableShardDirty {
				1202	if e.shortTableShardDirty[i] {
				1203	dirtyShardCnt++
				1204	}
				1205	}
				1206	}
				1207	const shardCnt = betterLongTableShardCnt
				1208	const shardSize = betterLongTableShardSize
				1209	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
				1210	copy(e.longTable[:], e.dictLongTable)
				1211	for i := range e.longTableShardDirty {
				1212	e.longTableShardDirty[i] = false
				1213	}
				1214	} else {
				1215	for i := range e.longTableShardDirty {
				1216	if !e.longTableShardDirty[i] {
				1217	continue
				1218	}
				1219
				1220	copy(e.longTable[ishardSize:(i+1)shardSize], e.dictLongTable[ishardSize:(i+1)shardSize])
				1221	e.longTableShardDirty[i] = false
				1222	}
				1223	}
				1224	}
				1225	e.cur = e.maxMatchOff
				1226	e.allDirty = false
				1227	}
				1228
				1229	func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
				1230	e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
				1231	}
				1232
				1233	func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
				1234	e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
				1235	}