Blame - vendor/github.com/klauspost/compress/zstd/enc_fast.go - voltctl

blob: ba4a17e10646b373227786f82a0f5b0504139370 [file] [log] [blame]

Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	8	"fmt"
				9	"math"
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	10	"math/bits"
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	11	)
				12
				13	const (
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	14	tableBits = 15 // Bits used in the table
				15	tableSize = 1 << tableBits // Size of the table
				16	tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
				17	tableShardSize = tableSize / tableShardCnt // Size of an individual shard
				18	tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	19	maxMatchLength = 131074
				20	)
				21
				22	type tableEntry struct {
				23	val uint32
				24	offset int32
				25	}
				26
				27	type fastEncoder struct {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	28	fastBase
				29	table [tableSize]tableEntry
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	30	}
				31
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	32	type fastEncoderDict struct {
				33	fastEncoder
				34	dictTable []tableEntry
				35	tableShardDirty [tableShardCnt]bool
				36	allDirty bool
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	37	}
				38
				39	// Encode mimmics functionality in zstd_fast.c
				40	func (e fastEncoder) Encode(blk blockEnc, src []byte) {
				41	const (
				42	inputMargin = 8
				43	minNonLiteralBlockSize = 1 + 1 + inputMargin
				44	)
				45
				46	// Protect against e.cur wraparound.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	47	for e.cur >= bufferReset {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	48	if len(e.hist) == 0 {
				49	for i := range e.table[:] {
				50	e.table[i] = tableEntry{}
				51	}
				52	e.cur = e.maxMatchOff
				53	break
				54	}
				55	// Shift down everything in the table that isn't already too far away.
				56	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				57	for i := range e.table[:] {
				58	v := e.table[i].offset
				59	if v < minOff {
				60	v = 0
				61	} else {
				62	v = v - e.cur + e.maxMatchOff
				63	}
				64	e.table[i].offset = v
				65	}
				66	e.cur = e.maxMatchOff
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	67	break
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	68	}
				69
				70	s := e.addBlock(src)
				71	blk.size = len(src)
				72	if len(src) < minNonLiteralBlockSize {
				73	blk.extraLits = len(src)
				74	blk.literals = blk.literals[:len(src)]
				75	copy(blk.literals, src)
				76	return
				77	}
				78
				79	// Override src
				80	src = e.hist
				81	sLimit := int32(len(src)) - inputMargin
				82	// stepSize is the number of bytes to skip on every main loop iteration.
				83	// It should be >= 2.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	84	const stepSize = 2
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	85
				86	// TEMPLATE
				87	const hashLog = tableBits
				88	// seems global, but would be nice to tweak.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	89	const kSearchStrength = 7
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	90
				91	// nextEmit is where in src the next emitLiteral should start from.
				92	nextEmit := s
				93	cv := load6432(src, s)
				94
				95	// Relative offsets
				96	offset1 := int32(blk.recentOffsets[0])
				97	offset2 := int32(blk.recentOffsets[1])
				98
				99	addLiterals := func(s *seq, until int32) {
				100	if until == nextEmit {
				101	return
				102	}
				103	blk.literals = append(blk.literals, src[nextEmit:until]...)
				104	s.litLen = uint32(until - nextEmit)
				105	}
				106	if debug {
				107	println("recent offsets:", blk.recentOffsets)
				108	}
				109
				110	encodeLoop:
				111	for {
				112	// t will contain the match offset when we find one.
				113	// When existing the search loop, we have already checked 4 bytes.
				114	var t int32
				115
				116	// We will not use repeat offsets across blocks.
				117	// By not using them for the first 3 matches
				118	canRepeat := len(blk.sequences) > 2
				119
				120	for {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	121	if debugAsserts && canRepeat && offset1 == 0 {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	122	panic("offset0 was 0")
				123	}
				124
				125	nextHash := hash6(cv, hashLog)
				126	nextHash2 := hash6(cv>>8, hashLog)
				127	candidate := e.table[nextHash]
				128	candidate2 := e.table[nextHash2]
				129	repIndex := s - offset1 + 2
				130
				131	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				132	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				133
				134	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				135	// Consider history as well.
				136	var seq seq
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	137	var length int32
				138	// length = 4 + e.matchlen(s+6, repIndex+4, src)
				139	{
				140	a := src[s+6:]
				141	b := src[repIndex+4:]
				142	endI := len(a) & (math.MaxInt32 - 7)
				143	length = int32(endI) + 4
				144	for i := 0; i < endI; i += 8 {
				145	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				146	length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				147	break
				148	}
				149	}
				150	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	151
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	152	seq.matchLen = uint32(length - zstdMinMatch)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	153
				154	// We might be able to match backwards.
				155	// Extend as long as we can.
				156	start := s + 2
				157	// We end the search early, so we don't risk 0 literals
				158	// and have to do special offset treatment.
				159	startLimit := nextEmit + 1
				160
				161	sMin := s - e.maxMatchOff
				162	if sMin < 0 {
				163	sMin = 0
				164	}
				165	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				166	repIndex--
				167	start--
				168	seq.matchLen++
				169	}
				170	addLiterals(&seq, start)
				171
				172	// rep 0
				173	seq.offset = 1
				174	if debugSequences {
				175	println("repeat sequence", seq, "next s:", s)
				176	}
				177	blk.sequences = append(blk.sequences, seq)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	178	s += length + 2
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	179	nextEmit = s
				180	if s >= sLimit {
				181	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	182	println("repeat ended", s, length)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	183
				184	}
				185	break encodeLoop
				186	}
				187	cv = load6432(src, s)
				188	continue
				189	}
				190	coffset0 := s - (candidate.offset - e.cur)
				191	coffset1 := s - (candidate2.offset - e.cur) + 1
				192	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				193	// found a regular match
				194	t = candidate.offset - e.cur
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	195	if debugAsserts && s <= t {
				196	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	197	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	198	if debugAsserts && s-t > e.maxMatchOff {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	199	panic("s - t >e.maxMatchOff")
				200	}
				201	break
				202	}
				203
				204	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				205	// found a regular match
				206	t = candidate2.offset - e.cur
				207	s++
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	208	if debugAsserts && s <= t {
				209	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	210	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	211	if debugAsserts && s-t > e.maxMatchOff {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	212	panic("s - t >e.maxMatchOff")
				213	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	214	if debugAsserts && t < 0 {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	215	panic("t<0")
				216	}
				217	break
				218	}
				219	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				220	if s >= sLimit {
				221	break encodeLoop
				222	}
				223	cv = load6432(src, s)
				224	}
				225	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				226	offset2 = offset1
				227	offset1 = s - t
				228
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	229	if debugAsserts && s <= t {
				230	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	231	}
				232
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	233	if debugAsserts && canRepeat && int(offset1) > len(src) {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	234	panic("invalid offset")
				235	}
				236
				237	// Extend the 4-byte match as long as possible.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	238	//l := e.matchlen(s+4, t+4, src) + 4
				239	var l int32
				240	{
				241	a := src[s+4:]
				242	b := src[t+4:]
				243	endI := len(a) & (math.MaxInt32 - 7)
				244	l = int32(endI) + 4
				245	for i := 0; i < endI; i += 8 {
				246	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				247	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				248	break
				249	}
				250	}
				251	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	252
				253	// Extend backwards
				254	tMin := s - e.maxMatchOff
				255	if tMin < 0 {
				256	tMin = 0
				257	}
				258	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				259	s--
				260	t--
				261	l++
				262	}
				263
				264	// Write our sequence.
				265	var seq seq
				266	seq.litLen = uint32(s - nextEmit)
				267	seq.matchLen = uint32(l - zstdMinMatch)
				268	if seq.litLen > 0 {
				269	blk.literals = append(blk.literals, src[nextEmit:s]...)
				270	}
				271	// Don't use repeat offsets
				272	seq.offset = uint32(s-t) + 3
				273	s += l
				274	if debugSequences {
				275	println("sequence", seq, "next s:", s)
				276	}
				277	blk.sequences = append(blk.sequences, seq)
				278	nextEmit = s
				279	if s >= sLimit {
				280	break encodeLoop
				281	}
				282	cv = load6432(src, s)
				283
				284	// Check offset 2
				285	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				286	// We have at least 4 byte match.
				287	// No need to check backwards. We come straight from a match
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	288	//l := 4 + e.matchlen(s+4, o2+4, src)
				289	var l int32
				290	{
				291	a := src[s+4:]
				292	b := src[o2+4:]
				293	endI := len(a) & (math.MaxInt32 - 7)
				294	l = int32(endI) + 4
				295	for i := 0; i < endI; i += 8 {
				296	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				297	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				298	break
				299	}
				300	}
				301	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	302
				303	// Store this, since we have it.
				304	nextHash := hash6(cv, hashLog)
				305	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				306	seq.matchLen = uint32(l) - zstdMinMatch
				307	seq.litLen = 0
				308	// Since litlen is always 0, this is offset 1.
				309	seq.offset = 1
				310	s += l
				311	nextEmit = s
				312	if debugSequences {
				313	println("sequence", seq, "next s:", s)
				314	}
				315	blk.sequences = append(blk.sequences, seq)
				316
				317	// Swap offset 1 and 2.
				318	offset1, offset2 = offset2, offset1
				319	if s >= sLimit {
				320	break encodeLoop
				321	}
				322	// Prepare next loop.
				323	cv = load6432(src, s)
				324	}
				325	}
				326
				327	if int(nextEmit) < len(src) {
				328	blk.literals = append(blk.literals, src[nextEmit:]...)
				329	blk.extraLits = len(src) - int(nextEmit)
				330	}
				331	blk.recentOffsets[0] = uint32(offset1)
				332	blk.recentOffsets[1] = uint32(offset2)
				333	if debug {
				334	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				335	}
				336	}
				337
				338	// EncodeNoHist will encode a block with no history and no following blocks.
				339	// Most notable difference is that src will not be copied for history and
				340	// we do not need to check for max match length.
				341	func (e fastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				342	const (
				343	inputMargin = 8
				344	minNonLiteralBlockSize = 1 + 1 + inputMargin
				345	)
				346	if debug {
				347	if len(src) > maxBlockSize {
				348	panic("src too big")
				349	}
				350	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	351
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	352	// Protect against e.cur wraparound.
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	353	if e.cur >= bufferReset {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	354	for i := range e.table[:] {
				355	e.table[i] = tableEntry{}
				356	}
				357	e.cur = e.maxMatchOff
				358	}
				359
				360	s := int32(0)
				361	blk.size = len(src)
				362	if len(src) < minNonLiteralBlockSize {
				363	blk.extraLits = len(src)
				364	blk.literals = blk.literals[:len(src)]
				365	copy(blk.literals, src)
				366	return
				367	}
				368
				369	sLimit := int32(len(src)) - inputMargin
				370	// stepSize is the number of bytes to skip on every main loop iteration.
				371	// It should be >= 2.
				372	const stepSize = 2
				373
				374	// TEMPLATE
				375	const hashLog = tableBits
				376	// seems global, but would be nice to tweak.
				377	const kSearchStrength = 8
				378
				379	// nextEmit is where in src the next emitLiteral should start from.
				380	nextEmit := s
				381	cv := load6432(src, s)
				382
				383	// Relative offsets
				384	offset1 := int32(blk.recentOffsets[0])
				385	offset2 := int32(blk.recentOffsets[1])
				386
				387	addLiterals := func(s *seq, until int32) {
				388	if until == nextEmit {
				389	return
				390	}
				391	blk.literals = append(blk.literals, src[nextEmit:until]...)
				392	s.litLen = uint32(until - nextEmit)
				393	}
				394	if debug {
				395	println("recent offsets:", blk.recentOffsets)
				396	}
				397
				398	encodeLoop:
				399	for {
				400	// t will contain the match offset when we find one.
				401	// When existing the search loop, we have already checked 4 bytes.
				402	var t int32
				403
				404	// We will not use repeat offsets across blocks.
				405	// By not using them for the first 3 matches
				406
				407	for {
				408	nextHash := hash6(cv, hashLog)
				409	nextHash2 := hash6(cv>>8, hashLog)
				410	candidate := e.table[nextHash]
				411	candidate2 := e.table[nextHash2]
				412	repIndex := s - offset1 + 2
				413
				414	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				415	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				416
				417	if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
				418	// Consider history as well.
				419	var seq seq
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	420	// length := 4 + e.matchlen(s+6, repIndex+4, src)
				421	// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
				422	var length int32
				423	{
				424	a := src[s+6:]
				425	b := src[repIndex+4:]
				426	endI := len(a) & (math.MaxInt32 - 7)
				427	length = int32(endI) + 4
				428	for i := 0; i < endI; i += 8 {
				429	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				430	length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				431	break
				432	}
				433	}
				434	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	435
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	436	seq.matchLen = uint32(length - zstdMinMatch)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	437
				438	// We might be able to match backwards.
				439	// Extend as long as we can.
				440	start := s + 2
				441	// We end the search early, so we don't risk 0 literals
				442	// and have to do special offset treatment.
				443	startLimit := nextEmit + 1
				444
				445	sMin := s - e.maxMatchOff
				446	if sMin < 0 {
				447	sMin = 0
				448	}
				449	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
				450	repIndex--
				451	start--
				452	seq.matchLen++
				453	}
				454	addLiterals(&seq, start)
				455
				456	// rep 0
				457	seq.offset = 1
				458	if debugSequences {
				459	println("repeat sequence", seq, "next s:", s)
				460	}
				461	blk.sequences = append(blk.sequences, seq)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	462	s += length + 2
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	463	nextEmit = s
				464	if s >= sLimit {
				465	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	466	println("repeat ended", s, length)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	467
				468	}
				469	break encodeLoop
				470	}
				471	cv = load6432(src, s)
				472	continue
				473	}
				474	coffset0 := s - (candidate.offset - e.cur)
				475	coffset1 := s - (candidate2.offset - e.cur) + 1
				476	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				477	// found a regular match
				478	t = candidate.offset - e.cur
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	479	if debugAsserts && s <= t {
				480	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	481	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	482	if debugAsserts && s-t > e.maxMatchOff {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	483	panic("s - t >e.maxMatchOff")
				484	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	485	if debugAsserts && t < 0 {
				486	panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
				487	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	488	break
				489	}
				490
				491	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				492	// found a regular match
				493	t = candidate2.offset - e.cur
				494	s++
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	495	if debugAsserts && s <= t {
				496	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	497	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	498	if debugAsserts && s-t > e.maxMatchOff {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	499	panic("s - t >e.maxMatchOff")
				500	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	501	if debugAsserts && t < 0 {
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	502	panic("t<0")
				503	}
				504	break
				505	}
				506	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				507	if s >= sLimit {
				508	break encodeLoop
				509	}
				510	cv = load6432(src, s)
				511	}
				512	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				513	offset2 = offset1
				514	offset1 = s - t
				515
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	516	if debugAsserts && s <= t {
				517	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	518	}
				519
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	520	if debugAsserts && t < 0 {
				521	panic(fmt.Sprintf("t (%d) < 0 ", t))
				522	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	523	// Extend the 4-byte match as long as possible.
				524	//l := e.matchlenNoHist(s+4, t+4, src) + 4
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	525	// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
				526	var l int32
				527	{
				528	a := src[s+4:]
				529	b := src[t+4:]
				530	endI := len(a) & (math.MaxInt32 - 7)
				531	l = int32(endI) + 4
				532	for i := 0; i < endI; i += 8 {
				533	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				534	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				535	break
				536	}
				537	}
				538	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	539
				540	// Extend backwards
				541	tMin := s - e.maxMatchOff
				542	if tMin < 0 {
				543	tMin = 0
				544	}
				545	for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
				546	s--
				547	t--
				548	l++
				549	}
				550
				551	// Write our sequence.
				552	var seq seq
				553	seq.litLen = uint32(s - nextEmit)
				554	seq.matchLen = uint32(l - zstdMinMatch)
				555	if seq.litLen > 0 {
				556	blk.literals = append(blk.literals, src[nextEmit:s]...)
				557	}
				558	// Don't use repeat offsets
				559	seq.offset = uint32(s-t) + 3
				560	s += l
				561	if debugSequences {
				562	println("sequence", seq, "next s:", s)
				563	}
				564	blk.sequences = append(blk.sequences, seq)
				565	nextEmit = s
				566	if s >= sLimit {
				567	break encodeLoop
				568	}
				569	cv = load6432(src, s)
				570
				571	// Check offset 2
				572	if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
				573	// We have at least 4 byte match.
				574	// No need to check backwards. We come straight from a match
				575	//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	576	// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
				577	var l int32
				578	{
				579	a := src[s+4:]
				580	b := src[o2+4:]
				581	endI := len(a) & (math.MaxInt32 - 7)
				582	l = int32(endI) + 4
				583	for i := 0; i < endI; i += 8 {
				584	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				585	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				586	break
				587	}
				588	}
				589	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	590
				591	// Store this, since we have it.
				592	nextHash := hash6(cv, hashLog)
				593	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				594	seq.matchLen = uint32(l) - zstdMinMatch
				595	seq.litLen = 0
				596	// Since litlen is always 0, this is offset 1.
				597	seq.offset = 1
				598	s += l
				599	nextEmit = s
				600	if debugSequences {
				601	println("sequence", seq, "next s:", s)
				602	}
				603	blk.sequences = append(blk.sequences, seq)
				604
				605	// Swap offset 1 and 2.
				606	offset1, offset2 = offset2, offset1
				607	if s >= sLimit {
				608	break encodeLoop
				609	}
				610	// Prepare next loop.
				611	cv = load6432(src, s)
				612	}
				613	}
				614
				615	if int(nextEmit) < len(src) {
				616	blk.literals = append(blk.literals, src[nextEmit:]...)
				617	blk.extraLits = len(src) - int(nextEmit)
				618	}
				619	if debug {
				620	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				621	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	622	// We do not store history, so we must offset e.cur to avoid false matches for next user.
				623	if e.cur < bufferReset {
				624	e.cur += int32(len(src))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	625	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	626	}
				627
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	628	// Encode will encode the content, with a dictionary if initialized for it.
				629	func (e fastEncoderDict) Encode(blk blockEnc, src []byte) {
				630	const (
				631	inputMargin = 8
				632	minNonLiteralBlockSize = 1 + 1 + inputMargin
				633	)
				634	if e.allDirty \|\| len(src) > 32<<10 {
				635	e.fastEncoder.Encode(blk, src)
				636	e.allDirty = true
				637	return
				638	}
				639	// Protect against e.cur wraparound.
				640	for e.cur >= bufferReset {
				641	if len(e.hist) == 0 {
				642	for i := range e.table[:] {
				643	e.table[i] = tableEntry{}
				644	}
				645	e.cur = e.maxMatchOff
				646	break
				647	}
				648	// Shift down everything in the table that isn't already too far away.
				649	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				650	for i := range e.table[:] {
				651	v := e.table[i].offset
				652	if v < minOff {
				653	v = 0
				654	} else {
				655	v = v - e.cur + e.maxMatchOff
				656	}
				657	e.table[i].offset = v
				658	}
				659	e.cur = e.maxMatchOff
				660	break
				661	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	662
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	663	s := e.addBlock(src)
				664	blk.size = len(src)
				665	if len(src) < minNonLiteralBlockSize {
				666	blk.extraLits = len(src)
				667	blk.literals = blk.literals[:len(src)]
				668	copy(blk.literals, src)
				669	return
				670	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	671
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	672	// Override src
				673	src = e.hist
				674	sLimit := int32(len(src)) - inputMargin
				675	// stepSize is the number of bytes to skip on every main loop iteration.
				676	// It should be >= 2.
				677	const stepSize = 2
				678
				679	// TEMPLATE
				680	const hashLog = tableBits
				681	// seems global, but would be nice to tweak.
				682	const kSearchStrength = 7
				683
				684	// nextEmit is where in src the next emitLiteral should start from.
				685	nextEmit := s
				686	cv := load6432(src, s)
				687
				688	// Relative offsets
				689	offset1 := int32(blk.recentOffsets[0])
				690	offset2 := int32(blk.recentOffsets[1])
				691
				692	addLiterals := func(s *seq, until int32) {
				693	if until == nextEmit {
				694	return
				695	}
				696	blk.literals = append(blk.literals, src[nextEmit:until]...)
				697	s.litLen = uint32(until - nextEmit)
				698	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	699	if debug {
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	700	println("recent offsets:", blk.recentOffsets)
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	701	}
				702
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	703	encodeLoop:
				704	for {
				705	// t will contain the match offset when we find one.
				706	// When existing the search loop, we have already checked 4 bytes.
				707	var t int32
				708
				709	// We will not use repeat offsets across blocks.
				710	// By not using them for the first 3 matches
				711	canRepeat := len(blk.sequences) > 2
				712
				713	for {
				714	if debugAsserts && canRepeat && offset1 == 0 {
				715	panic("offset0 was 0")
				716	}
				717
				718	nextHash := hash6(cv, hashLog)
				719	nextHash2 := hash6(cv>>8, hashLog)
				720	candidate := e.table[nextHash]
				721	candidate2 := e.table[nextHash2]
				722	repIndex := s - offset1 + 2
				723
				724	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				725	e.markShardDirty(nextHash)
				726	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				727	e.markShardDirty(nextHash2)
				728
				729	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				730	// Consider history as well.
				731	var seq seq
				732	var length int32
				733	// length = 4 + e.matchlen(s+6, repIndex+4, src)
				734	{
				735	a := src[s+6:]
				736	b := src[repIndex+4:]
				737	endI := len(a) & (math.MaxInt32 - 7)
				738	length = int32(endI) + 4
				739	for i := 0; i < endI; i += 8 {
				740	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				741	length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				742	break
				743	}
				744	}
				745	}
				746
				747	seq.matchLen = uint32(length - zstdMinMatch)
				748
				749	// We might be able to match backwards.
				750	// Extend as long as we can.
				751	start := s + 2
				752	// We end the search early, so we don't risk 0 literals
				753	// and have to do special offset treatment.
				754	startLimit := nextEmit + 1
				755
				756	sMin := s - e.maxMatchOff
				757	if sMin < 0 {
				758	sMin = 0
				759	}
				760	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				761	repIndex--
				762	start--
				763	seq.matchLen++
				764	}
				765	addLiterals(&seq, start)
				766
				767	// rep 0
				768	seq.offset = 1
				769	if debugSequences {
				770	println("repeat sequence", seq, "next s:", s)
				771	}
				772	blk.sequences = append(blk.sequences, seq)
				773	s += length + 2
				774	nextEmit = s
				775	if s >= sLimit {
				776	if debug {
				777	println("repeat ended", s, length)
				778
				779	}
				780	break encodeLoop
				781	}
				782	cv = load6432(src, s)
				783	continue
				784	}
				785	coffset0 := s - (candidate.offset - e.cur)
				786	coffset1 := s - (candidate2.offset - e.cur) + 1
				787	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				788	// found a regular match
				789	t = candidate.offset - e.cur
				790	if debugAsserts && s <= t {
				791	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				792	}
				793	if debugAsserts && s-t > e.maxMatchOff {
				794	panic("s - t >e.maxMatchOff")
				795	}
				796	break
				797	}
				798
				799	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				800	// found a regular match
				801	t = candidate2.offset - e.cur
				802	s++
				803	if debugAsserts && s <= t {
				804	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				805	}
				806	if debugAsserts && s-t > e.maxMatchOff {
				807	panic("s - t >e.maxMatchOff")
				808	}
				809	if debugAsserts && t < 0 {
				810	panic("t<0")
				811	}
				812	break
				813	}
				814	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				815	if s >= sLimit {
				816	break encodeLoop
				817	}
				818	cv = load6432(src, s)
				819	}
				820	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				821	offset2 = offset1
				822	offset1 = s - t
				823
				824	if debugAsserts && s <= t {
				825	panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
				826	}
				827
				828	if debugAsserts && canRepeat && int(offset1) > len(src) {
				829	panic("invalid offset")
				830	}
				831
				832	// Extend the 4-byte match as long as possible.
				833	//l := e.matchlen(s+4, t+4, src) + 4
				834	var l int32
				835	{
				836	a := src[s+4:]
				837	b := src[t+4:]
				838	endI := len(a) & (math.MaxInt32 - 7)
				839	l = int32(endI) + 4
				840	for i := 0; i < endI; i += 8 {
				841	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				842	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				843	break
				844	}
				845	}
				846	}
				847
				848	// Extend backwards
				849	tMin := s - e.maxMatchOff
				850	if tMin < 0 {
				851	tMin = 0
				852	}
				853	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				854	s--
				855	t--
				856	l++
				857	}
				858
				859	// Write our sequence.
				860	var seq seq
				861	seq.litLen = uint32(s - nextEmit)
				862	seq.matchLen = uint32(l - zstdMinMatch)
				863	if seq.litLen > 0 {
				864	blk.literals = append(blk.literals, src[nextEmit:s]...)
				865	}
				866	// Don't use repeat offsets
				867	seq.offset = uint32(s-t) + 3
				868	s += l
				869	if debugSequences {
				870	println("sequence", seq, "next s:", s)
				871	}
				872	blk.sequences = append(blk.sequences, seq)
				873	nextEmit = s
				874	if s >= sLimit {
				875	break encodeLoop
				876	}
				877	cv = load6432(src, s)
				878
				879	// Check offset 2
				880	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				881	// We have at least 4 byte match.
				882	// No need to check backwards. We come straight from a match
				883	//l := 4 + e.matchlen(s+4, o2+4, src)
				884	var l int32
				885	{
				886	a := src[s+4:]
				887	b := src[o2+4:]
				888	endI := len(a) & (math.MaxInt32 - 7)
				889	l = int32(endI) + 4
				890	for i := 0; i < endI; i += 8 {
				891	if diff := load64(a, i) ^ load64(b, i); diff != 0 {
				892	l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
				893	break
				894	}
				895	}
				896	}
				897
				898	// Store this, since we have it.
				899	nextHash := hash6(cv, hashLog)
				900	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				901	e.markShardDirty(nextHash)
				902	seq.matchLen = uint32(l) - zstdMinMatch
				903	seq.litLen = 0
				904	// Since litlen is always 0, this is offset 1.
				905	seq.offset = 1
				906	s += l
				907	nextEmit = s
				908	if debugSequences {
				909	println("sequence", seq, "next s:", s)
				910	}
				911	blk.sequences = append(blk.sequences, seq)
				912
				913	// Swap offset 1 and 2.
				914	offset1, offset2 = offset2, offset1
				915	if s >= sLimit {
				916	break encodeLoop
				917	}
				918	// Prepare next loop.
				919	cv = load6432(src, s)
				920	}
				921	}
				922
				923	if int(nextEmit) < len(src) {
				924	blk.literals = append(blk.literals, src[nextEmit:]...)
				925	blk.extraLits = len(src) - int(nextEmit)
				926	}
				927	blk.recentOffsets[0] = uint32(offset1)
				928	blk.recentOffsets[1] = uint32(offset2)
				929	if debug {
				930	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				931	}
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	932	}
				933
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	934	// ResetDict will reset and set a dictionary if not nil
				935	func (e fastEncoder) Reset(d dict, singleBlock bool) {
				936	e.resetBase(d, singleBlock)
				937	if d != nil {
				938	panic("fastEncoder: Reset with dict")
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	939	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	940	}
				941
				942	// ResetDict will reset and set a dictionary if not nil
				943	func (e fastEncoderDict) Reset(d dict, singleBlock bool) {
				944	e.resetBase(d, singleBlock)
				945	if d == nil {
				946	return
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	947	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	948
				949	// Init or copy dict table
				950	if len(e.dictTable) != len(e.table) \|\| d.id != e.lastDictID {
				951	if len(e.dictTable) != len(e.table) {
				952	e.dictTable = make([]tableEntry, len(e.table))
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	953	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	954	if true {
				955	end := e.maxMatchOff + int32(len(d.content)) - 8
				956	for i := e.maxMatchOff; i < end; i += 3 {
				957	const hashLog = tableBits
				958
				959	cv := load6432(d.content, i-e.maxMatchOff)
				960	nextHash := hash6(cv, hashLog) // 0 -> 5
				961	nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
				962	nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
				963	e.dictTable[nextHash] = tableEntry{
				964	val: uint32(cv),
				965	offset: i,
				966	}
				967	e.dictTable[nextHash1] = tableEntry{
				968	val: uint32(cv >> 8),
				969	offset: i + 1,
				970	}
				971	e.dictTable[nextHash2] = tableEntry{
				972	val: uint32(cv >> 16),
				973	offset: i + 2,
				974	}
				975	}
				976	}
				977	e.lastDictID = d.id
				978	e.allDirty = true
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	979	}
David K. Bainbridge	bd6b288	2021-08-26 13:31:02 +0000	[diff] [blame^]	980
				981	e.cur = e.maxMatchOff
				982	dirtyShardCnt := 0
				983	if !e.allDirty {
				984	for i := range e.tableShardDirty {
				985	if e.tableShardDirty[i] {
				986	dirtyShardCnt++
				987	}
				988	}
				989	}
				990
				991	const shardCnt = tableShardCnt
				992	const shardSize = tableShardSize
				993	if e.allDirty \|\| dirtyShardCnt > shardCnt*4/6 {
				994	copy(e.table[:], e.dictTable)
				995	for i := range e.tableShardDirty {
				996	e.tableShardDirty[i] = false
				997	}
				998	e.allDirty = false
				999	return
				1000	}
				1001	for i := range e.tableShardDirty {
				1002	if !e.tableShardDirty[i] {
				1003	continue
				1004	}
				1005
				1006	copy(e.table[ishardSize:(i+1)shardSize], e.dictTable[ishardSize:(i+1)shardSize])
				1007	e.tableShardDirty[i] = false
				1008	}
				1009	e.allDirty = false
				1010	}
				1011
				1012	func (e *fastEncoderDict) markAllShardsDirty() {
				1013	e.allDirty = true
				1014	}
				1015
				1016	func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
				1017	e.tableShardDirty[entryNum/tableShardSize] = true
Scott Baker	ed4efab	2020-01-13 19:12:25 -0800	[diff] [blame]	1018	}