Blame - vendor/github.com/klauspost/compress/zstd/enc_dfast.go - voltha-go

blob: 8629d43d8683b512eef80c40a87e38e2e05e6f92 [file] [log] [blame]

khenaidoo	d948f77	2021-08-11 17:49:24 -0400	[diff] [blame^]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import "fmt"
				8
				9	const (
				10	dFastLongTableBits = 17 // Bits used in the long match table
				11	dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
				12	dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
				13
				14	dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
				15	dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
				16
				17	dFastShortTableBits = tableBits // Bits used in the short match table
				18	dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
				19	dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
				20	)
				21
				22	type doubleFastEncoder struct {
				23	fastEncoder
				24	longTable [dFastLongTableSize]tableEntry
				25	}
				26
				27	type doubleFastEncoderDict struct {
				28	fastEncoderDict
				29	longTable [dFastLongTableSize]tableEntry
				30	dictLongTable []tableEntry
				31	longTableShardDirty [dLongTableShardCnt]bool
				32	}
				33
				34	// Encode mimmics functionality in zstd_dfast.c
				35	func (e doubleFastEncoder) Encode(blk blockEnc, src []byte) {
				36	const (
				37	// Input margin is the number of bytes we read (8)
				38	// and the maximum we will read ahead (2)
				39	inputMargin = 8 + 2
				40	minNonLiteralBlockSize = 16
				41	)
				42
				43	// Protect against e.cur wraparound.
				44	for e.cur >= bufferReset {
				45	if len(e.hist) == 0 {
				46	for i := range e.table[:] {
				47	e.table[i] = tableEntry{}
				48	}
				49	for i := range e.longTable[:] {
				50	e.longTable[i] = tableEntry{}
				51	}
				52	e.cur = e.maxMatchOff
				53	break
				54	}
				55	// Shift down everything in the table that isn't already too far away.
				56	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				57	for i := range e.table[:] {
				58	v := e.table[i].offset
				59	if v < minOff {
				60	v = 0
				61	} else {
				62	v = v - e.cur + e.maxMatchOff
				63	}
				64	e.table[i].offset = v
				65	}
				66	for i := range e.longTable[:] {
				67	v := e.longTable[i].offset
				68	if v < minOff {
				69	v = 0
				70	} else {
				71	v = v - e.cur + e.maxMatchOff
				72	}
				73	e.longTable[i].offset = v
				74	}
				75	e.cur = e.maxMatchOff
				76	break
				77	}
				78
				79	s := e.addBlock(src)
				80	blk.size = len(src)
				81	if len(src) < minNonLiteralBlockSize {
				82	blk.extraLits = len(src)
				83	blk.literals = blk.literals[:len(src)]
				84	copy(blk.literals, src)
				85	return
				86	}
				87
				88	// Override src
				89	src = e.hist
				90	sLimit := int32(len(src)) - inputMargin
				91	// stepSize is the number of bytes to skip on every main loop iteration.
				92	// It should be >= 1.
				93	const stepSize = 1
				94
				95	const kSearchStrength = 8
				96
				97	// nextEmit is where in src the next emitLiteral should start from.
				98	nextEmit := s
				99	cv := load6432(src, s)
				100
				101	// Relative offsets
				102	offset1 := int32(blk.recentOffsets[0])
				103	offset2 := int32(blk.recentOffsets[1])
				104
				105	addLiterals := func(s *seq, until int32) {
				106	if until == nextEmit {
				107	return
				108	}
				109	blk.literals = append(blk.literals, src[nextEmit:until]...)
				110	s.litLen = uint32(until - nextEmit)
				111	}
				112	if debug {
				113	println("recent offsets:", blk.recentOffsets)
				114	}
				115
				116	encodeLoop:
				117	for {
				118	var t int32
				119	// We allow the encoder to optionally turn off repeat offsets across blocks
				120	canRepeat := len(blk.sequences) > 2
				121
				122	for {
				123	if debugAsserts && canRepeat && offset1 == 0 {
				124	panic("offset0 was 0")
				125	}
				126
				127	nextHashS := hash5(cv, dFastShortTableBits)
				128	nextHashL := hash8(cv, dFastLongTableBits)
				129	candidateL := e.longTable[nextHashL]
				130	candidateS := e.table[nextHashS]
				131
				132	const repOff = 1
				133	repIndex := s - offset1 + repOff
				134	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				135	e.longTable[nextHashL] = entry
				136	e.table[nextHashS] = entry
				137
				138	if canRepeat {
				139	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				140	// Consider history as well.
				141	var seq seq
				142	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				143
				144	seq.matchLen = uint32(lenght - zstdMinMatch)
				145
				146	// We might be able to match backwards.
				147	// Extend as long as we can.
				148	start := s + repOff
				149	// We end the search early, so we don't risk 0 literals
				150	// and have to do special offset treatment.
				151	startLimit := nextEmit + 1
				152
				153	tMin := s - e.maxMatchOff
				154	if tMin < 0 {
				155	tMin = 0
				156	}
				157	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				158	repIndex--
				159	start--
				160	seq.matchLen++
				161	}
				162	addLiterals(&seq, start)
				163
				164	// rep 0
				165	seq.offset = 1
				166	if debugSequences {
				167	println("repeat sequence", seq, "next s:", s)
				168	}
				169	blk.sequences = append(blk.sequences, seq)
				170	s += lenght + repOff
				171	nextEmit = s
				172	if s >= sLimit {
				173	if debug {
				174	println("repeat ended", s, lenght)
				175
				176	}
				177	break encodeLoop
				178	}
				179	cv = load6432(src, s)
				180	continue
				181	}
				182	}
				183	// Find the offsets of our two matches.
				184	coffsetL := s - (candidateL.offset - e.cur)
				185	coffsetS := s - (candidateS.offset - e.cur)
				186
				187	// Check if we have a long match.
				188	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				189	// Found a long match, likely at least 8 bytes.
				190	// Reference encoder checks all 8 bytes, we only check 4,
				191	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				192	t = candidateL.offset - e.cur
				193	if debugAsserts && s <= t {
				194	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				195	}
				196	if debugAsserts && s-t > e.maxMatchOff {
				197	panic("s - t >e.maxMatchOff")
				198	}
				199	if debugMatches {
				200	println("long match")
				201	}
				202	break
				203	}
				204
				205	// Check if we have a short match.
				206	if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				207	// found a regular match
				208	// See if we can find a long match at s+1
				209	const checkAt = 1
				210	cv := load6432(src, s+checkAt)
				211	nextHashL = hash8(cv, dFastLongTableBits)
				212	candidateL = e.longTable[nextHashL]
				213	coffsetL = s - (candidateL.offset - e.cur) + checkAt
				214
				215	// We can store it, since we have at least a 4 byte match.
				216	e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
				217	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				218	// Found a long match, likely at least 8 bytes.
				219	// Reference encoder checks all 8 bytes, we only check 4,
				220	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				221	t = candidateL.offset - e.cur
				222	s += checkAt
				223	if debugMatches {
				224	println("long match (after short)")
				225	}
				226	break
				227	}
				228
				229	t = candidateS.offset - e.cur
				230	if debugAsserts && s <= t {
				231	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				232	}
				233	if debugAsserts && s-t > e.maxMatchOff {
				234	panic("s - t >e.maxMatchOff")
				235	}
				236	if debugAsserts && t < 0 {
				237	panic("t<0")
				238	}
				239	if debugMatches {
				240	println("short match")
				241	}
				242	break
				243	}
				244
				245	// No match found, move forward in input.
				246	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				247	if s >= sLimit {
				248	break encodeLoop
				249	}
				250	cv = load6432(src, s)
				251	}
				252
				253	// A 4-byte match has been found. Update recent offsets.
				254	// We'll later see if more than 4 bytes.
				255	offset2 = offset1
				256	offset1 = s - t
				257
				258	if debugAsserts && s <= t {
				259	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				260	}
				261
				262	if debugAsserts && canRepeat && int(offset1) > len(src) {
				263	panic("invalid offset")
				264	}
				265
				266	// Extend the 4-byte match as long as possible.
				267	l := e.matchlen(s+4, t+4, src) + 4
				268
				269	// Extend backwards
				270	tMin := s - e.maxMatchOff
				271	if tMin < 0 {
				272	tMin = 0
				273	}
				274	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				275	s--
				276	t--
				277	l++
				278	}
				279
				280	// Write our sequence
				281	var seq seq
				282	seq.litLen = uint32(s - nextEmit)
				283	seq.matchLen = uint32(l - zstdMinMatch)
				284	if seq.litLen > 0 {
				285	blk.literals = append(blk.literals, src[nextEmit:s]...)
				286	}
				287	seq.offset = uint32(s-t) + 3
				288	s += l
				289	if debugSequences {
				290	println("sequence", seq, "next s:", s)
				291	}
				292	blk.sequences = append(blk.sequences, seq)
				293	nextEmit = s
				294	if s >= sLimit {
				295	break encodeLoop
				296	}
				297
				298	// Index match start+1 (long) and start+2 (short)
				299	index0 := s - l + 1
				300	// Index match end-2 (long) and end-1 (short)
				301	index1 := s - 2
				302
				303	cv0 := load6432(src, index0)
				304	cv1 := load6432(src, index1)
				305	te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
				306	te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
				307	e.longTable[hash8(cv0, dFastLongTableBits)] = te0
				308	e.longTable[hash8(cv1, dFastLongTableBits)] = te1
				309	cv0 >>= 8
				310	cv1 >>= 8
				311	te0.offset++
				312	te1.offset++
				313	te0.val = uint32(cv0)
				314	te1.val = uint32(cv1)
				315	e.table[hash5(cv0, dFastShortTableBits)] = te0
				316	e.table[hash5(cv1, dFastShortTableBits)] = te1
				317
				318	cv = load6432(src, s)
				319
				320	if !canRepeat {
				321	continue
				322	}
				323
				324	// Check offset 2
				325	for {
				326	o2 := s - offset2
				327	if load3232(src, o2) != uint32(cv) {
				328	// Do regular search
				329	break
				330	}
				331
				332	// Store this, since we have it.
				333	nextHashS := hash5(cv, dFastShortTableBits)
				334	nextHashL := hash8(cv, dFastLongTableBits)
				335
				336	// We have at least 4 byte match.
				337	// No need to check backwards. We come straight from a match
				338	l := 4 + e.matchlen(s+4, o2+4, src)
				339
				340	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				341	e.longTable[nextHashL] = entry
				342	e.table[nextHashS] = entry
				343	seq.matchLen = uint32(l) - zstdMinMatch
				344	seq.litLen = 0
				345
				346	// Since litlen is always 0, this is offset 1.
				347	seq.offset = 1
				348	s += l
				349	nextEmit = s
				350	if debugSequences {
				351	println("sequence", seq, "next s:", s)
				352	}
				353	blk.sequences = append(blk.sequences, seq)
				354
				355	// Swap offset 1 and 2.
				356	offset1, offset2 = offset2, offset1
				357	if s >= sLimit {
				358	// Finished
				359	break encodeLoop
				360	}
				361	cv = load6432(src, s)
				362	}
				363	}
				364
				365	if int(nextEmit) < len(src) {
				366	blk.literals = append(blk.literals, src[nextEmit:]...)
				367	blk.extraLits = len(src) - int(nextEmit)
				368	}
				369	blk.recentOffsets[0] = uint32(offset1)
				370	blk.recentOffsets[1] = uint32(offset2)
				371	if debug {
				372	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				373	}
				374	}
				375
				376	// EncodeNoHist will encode a block with no history and no following blocks.
				377	// Most notable difference is that src will not be copied for history and
				378	// we do not need to check for max match length.
				379	func (e doubleFastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				380	const (
				381	// Input margin is the number of bytes we read (8)
				382	// and the maximum we will read ahead (2)
				383	inputMargin = 8 + 2
				384	minNonLiteralBlockSize = 16
				385	)
				386
				387	// Protect against e.cur wraparound.
				388	if e.cur >= bufferReset {
				389	for i := range e.table[:] {
				390	e.table[i] = tableEntry{}
				391	}
				392	for i := range e.longTable[:] {
				393	e.longTable[i] = tableEntry{}
				394	}
				395	e.cur = e.maxMatchOff
				396	}
				397
				398	s := int32(0)
				399	blk.size = len(src)
				400	if len(src) < minNonLiteralBlockSize {
				401	blk.extraLits = len(src)
				402	blk.literals = blk.literals[:len(src)]
				403	copy(blk.literals, src)
				404	return
				405	}
				406
				407	// Override src
				408	sLimit := int32(len(src)) - inputMargin
				409	// stepSize is the number of bytes to skip on every main loop iteration.
				410	// It should be >= 1.
				411	const stepSize = 1
				412
				413	const kSearchStrength = 8
				414
				415	// nextEmit is where in src the next emitLiteral should start from.
				416	nextEmit := s
				417	cv := load6432(src, s)
				418
				419	// Relative offsets
				420	offset1 := int32(blk.recentOffsets[0])
				421	offset2 := int32(blk.recentOffsets[1])
				422
				423	addLiterals := func(s *seq, until int32) {
				424	if until == nextEmit {
				425	return
				426	}
				427	blk.literals = append(blk.literals, src[nextEmit:until]...)
				428	s.litLen = uint32(until - nextEmit)
				429	}
				430	if debug {
				431	println("recent offsets:", blk.recentOffsets)
				432	}
				433
				434	encodeLoop:
				435	for {
				436	var t int32
				437	for {
				438
				439	nextHashS := hash5(cv, dFastShortTableBits)
				440	nextHashL := hash8(cv, dFastLongTableBits)
				441	candidateL := e.longTable[nextHashL]
				442	candidateS := e.table[nextHashS]
				443
				444	const repOff = 1
				445	repIndex := s - offset1 + repOff
				446	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				447	e.longTable[nextHashL] = entry
				448	e.table[nextHashS] = entry
				449
				450	if len(blk.sequences) > 2 {
				451	if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				452	// Consider history as well.
				453	var seq seq
				454	//length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				455	length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
				456
				457	seq.matchLen = uint32(length - zstdMinMatch)
				458
				459	// We might be able to match backwards.
				460	// Extend as long as we can.
				461	start := s + repOff
				462	// We end the search early, so we don't risk 0 literals
				463	// and have to do special offset treatment.
				464	startLimit := nextEmit + 1
				465
				466	tMin := s - e.maxMatchOff
				467	if tMin < 0 {
				468	tMin = 0
				469	}
				470	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
				471	repIndex--
				472	start--
				473	seq.matchLen++
				474	}
				475	addLiterals(&seq, start)
				476
				477	// rep 0
				478	seq.offset = 1
				479	if debugSequences {
				480	println("repeat sequence", seq, "next s:", s)
				481	}
				482	blk.sequences = append(blk.sequences, seq)
				483	s += length + repOff
				484	nextEmit = s
				485	if s >= sLimit {
				486	if debug {
				487	println("repeat ended", s, length)
				488
				489	}
				490	break encodeLoop
				491	}
				492	cv = load6432(src, s)
				493	continue
				494	}
				495	}
				496	// Find the offsets of our two matches.
				497	coffsetL := s - (candidateL.offset - e.cur)
				498	coffsetS := s - (candidateS.offset - e.cur)
				499
				500	// Check if we have a long match.
				501	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				502	// Found a long match, likely at least 8 bytes.
				503	// Reference encoder checks all 8 bytes, we only check 4,
				504	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				505	t = candidateL.offset - e.cur
				506	if debugAsserts && s <= t {
				507	panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
				508	}
				509	if debugAsserts && s-t > e.maxMatchOff {
				510	panic("s - t >e.maxMatchOff")
				511	}
				512	if debugMatches {
				513	println("long match")
				514	}
				515	break
				516	}
				517
				518	// Check if we have a short match.
				519	if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				520	// found a regular match
				521	// See if we can find a long match at s+1
				522	const checkAt = 1
				523	cv := load6432(src, s+checkAt)
				524	nextHashL = hash8(cv, dFastLongTableBits)
				525	candidateL = e.longTable[nextHashL]
				526	coffsetL = s - (candidateL.offset - e.cur) + checkAt
				527
				528	// We can store it, since we have at least a 4 byte match.
				529	e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
				530	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				531	// Found a long match, likely at least 8 bytes.
				532	// Reference encoder checks all 8 bytes, we only check 4,
				533	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				534	t = candidateL.offset - e.cur
				535	s += checkAt
				536	if debugMatches {
				537	println("long match (after short)")
				538	}
				539	break
				540	}
				541
				542	t = candidateS.offset - e.cur
				543	if debugAsserts && s <= t {
				544	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				545	}
				546	if debugAsserts && s-t > e.maxMatchOff {
				547	panic("s - t >e.maxMatchOff")
				548	}
				549	if debugAsserts && t < 0 {
				550	panic("t<0")
				551	}
				552	if debugMatches {
				553	println("short match")
				554	}
				555	break
				556	}
				557
				558	// No match found, move forward in input.
				559	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				560	if s >= sLimit {
				561	break encodeLoop
				562	}
				563	cv = load6432(src, s)
				564	}
				565
				566	// A 4-byte match has been found. Update recent offsets.
				567	// We'll later see if more than 4 bytes.
				568	offset2 = offset1
				569	offset1 = s - t
				570
				571	if debugAsserts && s <= t {
				572	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				573	}
				574
				575	// Extend the 4-byte match as long as possible.
				576	//l := e.matchlen(s+4, t+4, src) + 4
				577	l := int32(matchLen(src[s+4:], src[t+4:])) + 4
				578
				579	// Extend backwards
				580	tMin := s - e.maxMatchOff
				581	if tMin < 0 {
				582	tMin = 0
				583	}
				584	for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
				585	s--
				586	t--
				587	l++
				588	}
				589
				590	// Write our sequence
				591	var seq seq
				592	seq.litLen = uint32(s - nextEmit)
				593	seq.matchLen = uint32(l - zstdMinMatch)
				594	if seq.litLen > 0 {
				595	blk.literals = append(blk.literals, src[nextEmit:s]...)
				596	}
				597	seq.offset = uint32(s-t) + 3
				598	s += l
				599	if debugSequences {
				600	println("sequence", seq, "next s:", s)
				601	}
				602	blk.sequences = append(blk.sequences, seq)
				603	nextEmit = s
				604	if s >= sLimit {
				605	break encodeLoop
				606	}
				607
				608	// Index match start+1 (long) and start+2 (short)
				609	index0 := s - l + 1
				610	// Index match end-2 (long) and end-1 (short)
				611	index1 := s - 2
				612
				613	cv0 := load6432(src, index0)
				614	cv1 := load6432(src, index1)
				615	te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
				616	te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
				617	e.longTable[hash8(cv0, dFastLongTableBits)] = te0
				618	e.longTable[hash8(cv1, dFastLongTableBits)] = te1
				619	cv0 >>= 8
				620	cv1 >>= 8
				621	te0.offset++
				622	te1.offset++
				623	te0.val = uint32(cv0)
				624	te1.val = uint32(cv1)
				625	e.table[hash5(cv0, dFastShortTableBits)] = te0
				626	e.table[hash5(cv1, dFastShortTableBits)] = te1
				627
				628	cv = load6432(src, s)
				629
				630	if len(blk.sequences) <= 2 {
				631	continue
				632	}
				633
				634	// Check offset 2
				635	for {
				636	o2 := s - offset2
				637	if load3232(src, o2) != uint32(cv) {
				638	// Do regular search
				639	break
				640	}
				641
				642	// Store this, since we have it.
				643	nextHashS := hash5(cv1>>8, dFastShortTableBits)
				644	nextHashL := hash8(cv, dFastLongTableBits)
				645
				646	// We have at least 4 byte match.
				647	// No need to check backwards. We come straight from a match
				648	//l := 4 + e.matchlen(s+4, o2+4, src)
				649	l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
				650
				651	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				652	e.longTable[nextHashL] = entry
				653	e.table[nextHashS] = entry
				654	seq.matchLen = uint32(l) - zstdMinMatch
				655	seq.litLen = 0
				656
				657	// Since litlen is always 0, this is offset 1.
				658	seq.offset = 1
				659	s += l
				660	nextEmit = s
				661	if debugSequences {
				662	println("sequence", seq, "next s:", s)
				663	}
				664	blk.sequences = append(blk.sequences, seq)
				665
				666	// Swap offset 1 and 2.
				667	offset1, offset2 = offset2, offset1
				668	if s >= sLimit {
				669	// Finished
				670	break encodeLoop
				671	}
				672	cv = load6432(src, s)
				673	}
				674	}
				675
				676	if int(nextEmit) < len(src) {
				677	blk.literals = append(blk.literals, src[nextEmit:]...)
				678	blk.extraLits = len(src) - int(nextEmit)
				679	}
				680	if debug {
				681	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				682	}
				683
				684	// We do not store history, so we must offset e.cur to avoid false matches for next user.
				685	if e.cur < bufferReset {
				686	e.cur += int32(len(src))
				687	}
				688	}
				689
				690	// Encode will encode the content, with a dictionary if initialized for it.
				691	func (e doubleFastEncoderDict) Encode(blk blockEnc, src []byte) {
				692	const (
				693	// Input margin is the number of bytes we read (8)
				694	// and the maximum we will read ahead (2)
				695	inputMargin = 8 + 2
				696	minNonLiteralBlockSize = 16
				697	)
				698
				699	// Protect against e.cur wraparound.
				700	for e.cur >= bufferReset {
				701	if len(e.hist) == 0 {
				702	for i := range e.table[:] {
				703	e.table[i] = tableEntry{}
				704	}
				705	for i := range e.longTable[:] {
				706	e.longTable[i] = tableEntry{}
				707	}
				708	e.markAllShardsDirty()
				709	e.cur = e.maxMatchOff
				710	break
				711	}
				712	// Shift down everything in the table that isn't already too far away.
				713	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				714	for i := range e.table[:] {
				715	v := e.table[i].offset
				716	if v < minOff {
				717	v = 0
				718	} else {
				719	v = v - e.cur + e.maxMatchOff
				720	}
				721	e.table[i].offset = v
				722	}
				723	for i := range e.longTable[:] {
				724	v := e.longTable[i].offset
				725	if v < minOff {
				726	v = 0
				727	} else {
				728	v = v - e.cur + e.maxMatchOff
				729	}
				730	e.longTable[i].offset = v
				731	}
				732	e.markAllShardsDirty()
				733	e.cur = e.maxMatchOff
				734	break
				735	}
				736
				737	s := e.addBlock(src)
				738	blk.size = len(src)
				739	if len(src) < minNonLiteralBlockSize {
				740	blk.extraLits = len(src)
				741	blk.literals = blk.literals[:len(src)]
				742	copy(blk.literals, src)
				743	return
				744	}
				745
				746	// Override src
				747	src = e.hist
				748	sLimit := int32(len(src)) - inputMargin
				749	// stepSize is the number of bytes to skip on every main loop iteration.
				750	// It should be >= 1.
				751	const stepSize = 1
				752
				753	const kSearchStrength = 8
				754
				755	// nextEmit is where in src the next emitLiteral should start from.
				756	nextEmit := s
				757	cv := load6432(src, s)
				758
				759	// Relative offsets
				760	offset1 := int32(blk.recentOffsets[0])
				761	offset2 := int32(blk.recentOffsets[1])
				762
				763	addLiterals := func(s *seq, until int32) {
				764	if until == nextEmit {
				765	return
				766	}
				767	blk.literals = append(blk.literals, src[nextEmit:until]...)
				768	s.litLen = uint32(until - nextEmit)
				769	}
				770	if debug {
				771	println("recent offsets:", blk.recentOffsets)
				772	}
				773
				774	encodeLoop:
				775	for {
				776	var t int32
				777	// We allow the encoder to optionally turn off repeat offsets across blocks
				778	canRepeat := len(blk.sequences) > 2
				779
				780	for {
				781	if debugAsserts && canRepeat && offset1 == 0 {
				782	panic("offset0 was 0")
				783	}
				784
				785	nextHashS := hash5(cv, dFastShortTableBits)
				786	nextHashL := hash8(cv, dFastLongTableBits)
				787	candidateL := e.longTable[nextHashL]
				788	candidateS := e.table[nextHashS]
				789
				790	const repOff = 1
				791	repIndex := s - offset1 + repOff
				792	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				793	e.longTable[nextHashL] = entry
				794	e.markLongShardDirty(nextHashL)
				795	e.table[nextHashS] = entry
				796	e.markShardDirty(nextHashS)
				797
				798	if canRepeat {
				799	if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
				800	// Consider history as well.
				801	var seq seq
				802	lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
				803
				804	seq.matchLen = uint32(lenght - zstdMinMatch)
				805
				806	// We might be able to match backwards.
				807	// Extend as long as we can.
				808	start := s + repOff
				809	// We end the search early, so we don't risk 0 literals
				810	// and have to do special offset treatment.
				811	startLimit := nextEmit + 1
				812
				813	tMin := s - e.maxMatchOff
				814	if tMin < 0 {
				815	tMin = 0
				816	}
				817	for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
				818	repIndex--
				819	start--
				820	seq.matchLen++
				821	}
				822	addLiterals(&seq, start)
				823
				824	// rep 0
				825	seq.offset = 1
				826	if debugSequences {
				827	println("repeat sequence", seq, "next s:", s)
				828	}
				829	blk.sequences = append(blk.sequences, seq)
				830	s += lenght + repOff
				831	nextEmit = s
				832	if s >= sLimit {
				833	if debug {
				834	println("repeat ended", s, lenght)
				835
				836	}
				837	break encodeLoop
				838	}
				839	cv = load6432(src, s)
				840	continue
				841	}
				842	}
				843	// Find the offsets of our two matches.
				844	coffsetL := s - (candidateL.offset - e.cur)
				845	coffsetS := s - (candidateS.offset - e.cur)
				846
				847	// Check if we have a long match.
				848	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				849	// Found a long match, likely at least 8 bytes.
				850	// Reference encoder checks all 8 bytes, we only check 4,
				851	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				852	t = candidateL.offset - e.cur
				853	if debugAsserts && s <= t {
				854	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				855	}
				856	if debugAsserts && s-t > e.maxMatchOff {
				857	panic("s - t >e.maxMatchOff")
				858	}
				859	if debugMatches {
				860	println("long match")
				861	}
				862	break
				863	}
				864
				865	// Check if we have a short match.
				866	if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
				867	// found a regular match
				868	// See if we can find a long match at s+1
				869	const checkAt = 1
				870	cv := load6432(src, s+checkAt)
				871	nextHashL = hash8(cv, dFastLongTableBits)
				872	candidateL = e.longTable[nextHashL]
				873	coffsetL = s - (candidateL.offset - e.cur) + checkAt
				874
				875	// We can store it, since we have at least a 4 byte match.
				876	e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
				877	e.markLongShardDirty(nextHashL)
				878	if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
				879	// Found a long match, likely at least 8 bytes.
				880	// Reference encoder checks all 8 bytes, we only check 4,
				881	// but the likelihood of both the first 4 bytes and the hash matching should be enough.
				882	t = candidateL.offset - e.cur
				883	s += checkAt
				884	if debugMatches {
				885	println("long match (after short)")
				886	}
				887	break
				888	}
				889
				890	t = candidateS.offset - e.cur
				891	if debugAsserts && s <= t {
				892	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				893	}
				894	if debugAsserts && s-t > e.maxMatchOff {
				895	panic("s - t >e.maxMatchOff")
				896	}
				897	if debugAsserts && t < 0 {
				898	panic("t<0")
				899	}
				900	if debugMatches {
				901	println("short match")
				902	}
				903	break
				904	}
				905
				906	// No match found, move forward in input.
				907	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				908	if s >= sLimit {
				909	break encodeLoop
				910	}
				911	cv = load6432(src, s)
				912	}
				913
				914	// A 4-byte match has been found. Update recent offsets.
				915	// We'll later see if more than 4 bytes.
				916	offset2 = offset1
				917	offset1 = s - t
				918
				919	if debugAsserts && s <= t {
				920	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				921	}
				922
				923	if debugAsserts && canRepeat && int(offset1) > len(src) {
				924	panic("invalid offset")
				925	}
				926
				927	// Extend the 4-byte match as long as possible.
				928	l := e.matchlen(s+4, t+4, src) + 4
				929
				930	// Extend backwards
				931	tMin := s - e.maxMatchOff
				932	if tMin < 0 {
				933	tMin = 0
				934	}
				935	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				936	s--
				937	t--
				938	l++
				939	}
				940
				941	// Write our sequence
				942	var seq seq
				943	seq.litLen = uint32(s - nextEmit)
				944	seq.matchLen = uint32(l - zstdMinMatch)
				945	if seq.litLen > 0 {
				946	blk.literals = append(blk.literals, src[nextEmit:s]...)
				947	}
				948	seq.offset = uint32(s-t) + 3
				949	s += l
				950	if debugSequences {
				951	println("sequence", seq, "next s:", s)
				952	}
				953	blk.sequences = append(blk.sequences, seq)
				954	nextEmit = s
				955	if s >= sLimit {
				956	break encodeLoop
				957	}
				958
				959	// Index match start+1 (long) and start+2 (short)
				960	index0 := s - l + 1
				961	// Index match end-2 (long) and end-1 (short)
				962	index1 := s - 2
				963
				964	cv0 := load6432(src, index0)
				965	cv1 := load6432(src, index1)
				966	te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
				967	te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
				968	longHash1 := hash8(cv0, dFastLongTableBits)
				969	longHash2 := hash8(cv0, dFastLongTableBits)
				970	e.longTable[longHash1] = te0
				971	e.longTable[longHash2] = te1
				972	e.markLongShardDirty(longHash1)
				973	e.markLongShardDirty(longHash2)
				974	cv0 >>= 8
				975	cv1 >>= 8
				976	te0.offset++
				977	te1.offset++
				978	te0.val = uint32(cv0)
				979	te1.val = uint32(cv1)
				980	hashVal1 := hash5(cv0, dFastShortTableBits)
				981	hashVal2 := hash5(cv1, dFastShortTableBits)
				982	e.table[hashVal1] = te0
				983	e.markShardDirty(hashVal1)
				984	e.table[hashVal2] = te1
				985	e.markShardDirty(hashVal2)
				986
				987	cv = load6432(src, s)
				988
				989	if !canRepeat {
				990	continue
				991	}
				992
				993	// Check offset 2
				994	for {
				995	o2 := s - offset2
				996	if load3232(src, o2) != uint32(cv) {
				997	// Do regular search
				998	break
				999	}
				1000
				1001	// Store this, since we have it.
				1002	nextHashS := hash5(cv, dFastShortTableBits)
				1003	nextHashL := hash8(cv, dFastLongTableBits)
				1004
				1005	// We have at least 4 byte match.
				1006	// No need to check backwards. We come straight from a match
				1007	l := 4 + e.matchlen(s+4, o2+4, src)
				1008
				1009	entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
				1010	e.longTable[nextHashL] = entry
				1011	e.markLongShardDirty(nextHashL)
				1012	e.table[nextHashS] = entry
				1013	e.markShardDirty(nextHashS)
				1014	seq.matchLen = uint32(l) - zstdMinMatch
				1015	seq.litLen = 0
				1016
				1017	// Since litlen is always 0, this is offset 1.
				1018	seq.offset = 1
				1019	s += l
				1020	nextEmit = s
				1021	if debugSequences {
				1022	println("sequence", seq, "next s:", s)
				1023	}
				1024	blk.sequences = append(blk.sequences, seq)
				1025
				1026	// Swap offset 1 and 2.
				1027	offset1, offset2 = offset2, offset1
				1028	if s >= sLimit {
				1029	// Finished
				1030	break encodeLoop
				1031	}
				1032	cv = load6432(src, s)
				1033	}
				1034	}
				1035
				1036	if int(nextEmit) < len(src) {
				1037	blk.literals = append(blk.literals, src[nextEmit:]...)
				1038	blk.extraLits = len(src) - int(nextEmit)
				1039	}
				1040	blk.recentOffsets[0] = uint32(offset1)
				1041	blk.recentOffsets[1] = uint32(offset2)
				1042	if debug {
				1043	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				1044	}
				1045	// If we encoded more than 64K mark all dirty.
				1046	if len(src) > 64<<10 {
				1047	e.markAllShardsDirty()
				1048	}
				1049	}
				1050
				1051	// ResetDict will reset and set a dictionary if not nil
				1052	func (e doubleFastEncoder) Reset(d dict, singleBlock bool) {
				1053	e.fastEncoder.Reset(d, singleBlock)
				1054	if d != nil {
				1055	panic("doubleFastEncoder: Reset with dict not supported")
				1056	}
				1057	}
				1058
				1059	// ResetDict will reset and set a dictionary if not nil
				1060	func (e doubleFastEncoderDict) Reset(d dict, singleBlock bool) {
				1061	allDirty := e.allDirty
				1062	e.fastEncoderDict.Reset(d, singleBlock)
				1063	if d == nil {
				1064	return
				1065	}
				1066
				1067	// Init or copy dict table
				1068	if len(e.dictLongTable) != len(e.longTable) \|\| d.id != e.lastDictID {
				1069	if len(e.dictLongTable) != len(e.longTable) {
				1070	e.dictLongTable = make([]tableEntry, len(e.longTable))
				1071	}
				1072	if len(d.content) >= 8 {
				1073	cv := load6432(d.content, 0)
				1074	e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
				1075	val: uint32(cv),
				1076	offset: e.maxMatchOff,
				1077	}
				1078	end := int32(len(d.content)) - 8 + e.maxMatchOff
				1079	for i := e.maxMatchOff + 1; i < end; i++ {
				1080	cv = cv>>8 \| (uint64(d.content[i-e.maxMatchOff+7]) << 56)
				1081	e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
				1082	val: uint32(cv),
				1083	offset: i,
				1084	}
				1085	}
				1086	}
				1087	e.lastDictID = d.id
				1088	e.allDirty = true
				1089	}
				1090	// Reset table to initial state
				1091	e.cur = e.maxMatchOff
				1092
				1093	dirtyShardCnt := 0
				1094	if !allDirty {
				1095	for i := range e.longTableShardDirty {
				1096	if e.longTableShardDirty[i] {
				1097	dirtyShardCnt++
				1098	}
				1099	}
				1100	}
				1101
				1102	if allDirty \|\| dirtyShardCnt > dLongTableShardCnt/2 {
				1103	copy(e.longTable[:], e.dictLongTable)
				1104	for i := range e.longTableShardDirty {
				1105	e.longTableShardDirty[i] = false
				1106	}
				1107	return
				1108	}
				1109	for i := range e.longTableShardDirty {
				1110	if !e.longTableShardDirty[i] {
				1111	continue
				1112	}
				1113
				1114	copy(e.longTable[idLongTableShardSize:(i+1)dLongTableShardSize], e.dictLongTable[idLongTableShardSize:(i+1)dLongTableShardSize])
				1115	e.longTableShardDirty[i] = false
				1116	}
				1117	}
				1118
				1119	func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
				1120	e.longTableShardDirty[entryNum/dLongTableShardSize] = true
				1121	}