Blame - vendor/github.com/klauspost/compress/zstd/enc_fast.go - bbsim

blob: 0bdddac5b4df33ec1ab56c373798a73a7595468f [file] [log] [blame]

Pragya Arya	324337e	2020-02-20 14:35:08 +0530	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
				8	"math/bits"
				9
				10	"github.com/klauspost/compress/zstd/internal/xxhash"
				11	)
				12
				13	const (
				14	tableBits = 15 // Bits used in the table
				15	tableSize = 1 << tableBits // Size of the table
				16	tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
				17	maxMatchLength = 131074
				18	)
				19
				20	type tableEntry struct {
				21	val uint32
				22	offset int32
				23	}
				24
				25	type fastEncoder struct {
				26	o encParams
				27	// cur is the offset at the start of hist
				28	cur int32
				29	// maximum offset. Should be at least 2x block size.
				30	maxMatchOff int32
				31	hist []byte
				32	crc *xxhash.Digest
				33	table [tableSize]tableEntry
				34	tmp [8]byte
				35	blk *blockEnc
				36	}
				37
				38	// CRC returns the underlying CRC writer.
				39	func (e fastEncoder) CRC() xxhash.Digest {
				40	return e.crc
				41	}
				42
				43	// AppendCRC will append the CRC to the destination slice and return it.
				44	func (e *fastEncoder) AppendCRC(dst []byte) []byte {
				45	crc := e.crc.Sum(e.tmp[:0])
				46	dst = append(dst, crc[7], crc[6], crc[5], crc[4])
				47	return dst
				48	}
				49
				50	// WindowSize returns the window size of the encoder,
				51	// or a window size small enough to contain the input size, if > 0.
				52	func (e *fastEncoder) WindowSize(size int) int32 {
				53	if size > 0 && size < int(e.maxMatchOff) {
				54	b := int32(1) << uint(bits.Len(uint(size)))
				55	// Keep minimum window.
				56	if b < 1024 {
				57	b = 1024
				58	}
				59	return b
				60	}
				61	return e.maxMatchOff
				62	}
				63
				64	// Block returns the current block.
				65	func (e fastEncoder) Block() blockEnc {
				66	return e.blk
				67	}
				68
				69	// Encode mimmics functionality in zstd_fast.c
				70	func (e fastEncoder) Encode(blk blockEnc, src []byte) {
				71	const (
				72	inputMargin = 8
				73	minNonLiteralBlockSize = 1 + 1 + inputMargin
				74	)
				75
				76	// Protect against e.cur wraparound.
				77	for e.cur > (1<<30)+e.maxMatchOff {
				78	if len(e.hist) == 0 {
				79	for i := range e.table[:] {
				80	e.table[i] = tableEntry{}
				81	}
				82	e.cur = e.maxMatchOff
				83	break
				84	}
				85	// Shift down everything in the table that isn't already too far away.
				86	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				87	for i := range e.table[:] {
				88	v := e.table[i].offset
				89	if v < minOff {
				90	v = 0
				91	} else {
				92	v = v - e.cur + e.maxMatchOff
				93	}
				94	e.table[i].offset = v
				95	}
				96	e.cur = e.maxMatchOff
				97	}
				98
				99	s := e.addBlock(src)
				100	blk.size = len(src)
				101	if len(src) < minNonLiteralBlockSize {
				102	blk.extraLits = len(src)
				103	blk.literals = blk.literals[:len(src)]
				104	copy(blk.literals, src)
				105	return
				106	}
				107
				108	// Override src
				109	src = e.hist
				110	sLimit := int32(len(src)) - inputMargin
				111	// stepSize is the number of bytes to skip on every main loop iteration.
				112	// It should be >= 2.
				113	stepSize := int32(e.o.targetLength)
				114	if stepSize == 0 {
				115	stepSize++
				116	}
				117	stepSize++
				118
				119	// TEMPLATE
				120	const hashLog = tableBits
				121	// seems global, but would be nice to tweak.
				122	const kSearchStrength = 8
				123
				124	// nextEmit is where in src the next emitLiteral should start from.
				125	nextEmit := s
				126	cv := load6432(src, s)
				127
				128	// Relative offsets
				129	offset1 := int32(blk.recentOffsets[0])
				130	offset2 := int32(blk.recentOffsets[1])
				131
				132	addLiterals := func(s *seq, until int32) {
				133	if until == nextEmit {
				134	return
				135	}
				136	blk.literals = append(blk.literals, src[nextEmit:until]...)
				137	s.litLen = uint32(until - nextEmit)
				138	}
				139	if debug {
				140	println("recent offsets:", blk.recentOffsets)
				141	}
				142
				143	encodeLoop:
				144	for {
				145	// t will contain the match offset when we find one.
				146	// When existing the search loop, we have already checked 4 bytes.
				147	var t int32
				148
				149	// We will not use repeat offsets across blocks.
				150	// By not using them for the first 3 matches
				151	canRepeat := len(blk.sequences) > 2
				152
				153	for {
				154	if debug && canRepeat && offset1 == 0 {
				155	panic("offset0 was 0")
				156	}
				157
				158	nextHash := hash6(cv, hashLog)
				159	nextHash2 := hash6(cv>>8, hashLog)
				160	candidate := e.table[nextHash]
				161	candidate2 := e.table[nextHash2]
				162	repIndex := s - offset1 + 2
				163
				164	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				165	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				166
				167	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				168	// Consider history as well.
				169	var seq seq
				170	lenght := 4 + e.matchlen(s+6, repIndex+4, src)
				171
				172	seq.matchLen = uint32(lenght - zstdMinMatch)
				173
				174	// We might be able to match backwards.
				175	// Extend as long as we can.
				176	start := s + 2
				177	// We end the search early, so we don't risk 0 literals
				178	// and have to do special offset treatment.
				179	startLimit := nextEmit + 1
				180
				181	sMin := s - e.maxMatchOff
				182	if sMin < 0 {
				183	sMin = 0
				184	}
				185	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				186	repIndex--
				187	start--
				188	seq.matchLen++
				189	}
				190	addLiterals(&seq, start)
				191
				192	// rep 0
				193	seq.offset = 1
				194	if debugSequences {
				195	println("repeat sequence", seq, "next s:", s)
				196	}
				197	blk.sequences = append(blk.sequences, seq)
				198	s += lenght + 2
				199	nextEmit = s
				200	if s >= sLimit {
				201	if debug {
				202	println("repeat ended", s, lenght)
				203
				204	}
				205	break encodeLoop
				206	}
				207	cv = load6432(src, s)
				208	continue
				209	}
				210	coffset0 := s - (candidate.offset - e.cur)
				211	coffset1 := s - (candidate2.offset - e.cur) + 1
				212	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				213	// found a regular match
				214	t = candidate.offset - e.cur
				215	if debug && s <= t {
				216	panic("s <= t")
				217	}
				218	if debug && s-t > e.maxMatchOff {
				219	panic("s - t >e.maxMatchOff")
				220	}
				221	break
				222	}
				223
				224	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				225	// found a regular match
				226	t = candidate2.offset - e.cur
				227	s++
				228	if debug && s <= t {
				229	panic("s <= t")
				230	}
				231	if debug && s-t > e.maxMatchOff {
				232	panic("s - t >e.maxMatchOff")
				233	}
				234	if debug && t < 0 {
				235	panic("t<0")
				236	}
				237	break
				238	}
				239	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				240	if s >= sLimit {
				241	break encodeLoop
				242	}
				243	cv = load6432(src, s)
				244	}
				245	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				246	offset2 = offset1
				247	offset1 = s - t
				248
				249	if debug && s <= t {
				250	panic("s <= t")
				251	}
				252
				253	if debug && canRepeat && int(offset1) > len(src) {
				254	panic("invalid offset")
				255	}
				256
				257	// Extend the 4-byte match as long as possible.
				258	l := e.matchlen(s+4, t+4, src) + 4
				259
				260	// Extend backwards
				261	tMin := s - e.maxMatchOff
				262	if tMin < 0 {
				263	tMin = 0
				264	}
				265	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				266	s--
				267	t--
				268	l++
				269	}
				270
				271	// Write our sequence.
				272	var seq seq
				273	seq.litLen = uint32(s - nextEmit)
				274	seq.matchLen = uint32(l - zstdMinMatch)
				275	if seq.litLen > 0 {
				276	blk.literals = append(blk.literals, src[nextEmit:s]...)
				277	}
				278	// Don't use repeat offsets
				279	seq.offset = uint32(s-t) + 3
				280	s += l
				281	if debugSequences {
				282	println("sequence", seq, "next s:", s)
				283	}
				284	blk.sequences = append(blk.sequences, seq)
				285	nextEmit = s
				286	if s >= sLimit {
				287	break encodeLoop
				288	}
				289	cv = load6432(src, s)
				290
				291	// Check offset 2
				292	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				293	// We have at least 4 byte match.
				294	// No need to check backwards. We come straight from a match
				295	l := 4 + e.matchlen(s+4, o2+4, src)
				296
				297	// Store this, since we have it.
				298	nextHash := hash6(cv, hashLog)
				299	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				300	seq.matchLen = uint32(l) - zstdMinMatch
				301	seq.litLen = 0
				302	// Since litlen is always 0, this is offset 1.
				303	seq.offset = 1
				304	s += l
				305	nextEmit = s
				306	if debugSequences {
				307	println("sequence", seq, "next s:", s)
				308	}
				309	blk.sequences = append(blk.sequences, seq)
				310
				311	// Swap offset 1 and 2.
				312	offset1, offset2 = offset2, offset1
				313	if s >= sLimit {
				314	break encodeLoop
				315	}
				316	// Prepare next loop.
				317	cv = load6432(src, s)
				318	}
				319	}
				320
				321	if int(nextEmit) < len(src) {
				322	blk.literals = append(blk.literals, src[nextEmit:]...)
				323	blk.extraLits = len(src) - int(nextEmit)
				324	}
				325	blk.recentOffsets[0] = uint32(offset1)
				326	blk.recentOffsets[1] = uint32(offset2)
				327	if debug {
				328	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				329	}
				330	}
				331
				332	// EncodeNoHist will encode a block with no history and no following blocks.
				333	// Most notable difference is that src will not be copied for history and
				334	// we do not need to check for max match length.
				335	func (e fastEncoder) EncodeNoHist(blk blockEnc, src []byte) {
				336	const (
				337	inputMargin = 8
				338	minNonLiteralBlockSize = 1 + 1 + inputMargin
				339	)
				340	if debug {
				341	if len(src) > maxBlockSize {
				342	panic("src too big")
				343	}
				344	}
				345	// Protect against e.cur wraparound.
				346	if e.cur > (1<<30)+e.maxMatchOff {
				347	for i := range e.table[:] {
				348	e.table[i] = tableEntry{}
				349	}
				350	e.cur = e.maxMatchOff
				351	}
				352
				353	s := int32(0)
				354	blk.size = len(src)
				355	if len(src) < minNonLiteralBlockSize {
				356	blk.extraLits = len(src)
				357	blk.literals = blk.literals[:len(src)]
				358	copy(blk.literals, src)
				359	return
				360	}
				361
				362	sLimit := int32(len(src)) - inputMargin
				363	// stepSize is the number of bytes to skip on every main loop iteration.
				364	// It should be >= 2.
				365	const stepSize = 2
				366
				367	// TEMPLATE
				368	const hashLog = tableBits
				369	// seems global, but would be nice to tweak.
				370	const kSearchStrength = 8
				371
				372	// nextEmit is where in src the next emitLiteral should start from.
				373	nextEmit := s
				374	cv := load6432(src, s)
				375
				376	// Relative offsets
				377	offset1 := int32(blk.recentOffsets[0])
				378	offset2 := int32(blk.recentOffsets[1])
				379
				380	addLiterals := func(s *seq, until int32) {
				381	if until == nextEmit {
				382	return
				383	}
				384	blk.literals = append(blk.literals, src[nextEmit:until]...)
				385	s.litLen = uint32(until - nextEmit)
				386	}
				387	if debug {
				388	println("recent offsets:", blk.recentOffsets)
				389	}
				390
				391	encodeLoop:
				392	for {
				393	// t will contain the match offset when we find one.
				394	// When existing the search loop, we have already checked 4 bytes.
				395	var t int32
				396
				397	// We will not use repeat offsets across blocks.
				398	// By not using them for the first 3 matches
				399
				400	for {
				401	nextHash := hash6(cv, hashLog)
				402	nextHash2 := hash6(cv>>8, hashLog)
				403	candidate := e.table[nextHash]
				404	candidate2 := e.table[nextHash2]
				405	repIndex := s - offset1 + 2
				406
				407	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				408	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				409
				410	if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
				411	// Consider history as well.
				412	var seq seq
				413	// lenght := 4 + e.matchlen(s+6, repIndex+4, src)
				414	lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
				415
				416	seq.matchLen = uint32(lenght - zstdMinMatch)
				417
				418	// We might be able to match backwards.
				419	// Extend as long as we can.
				420	start := s + 2
				421	// We end the search early, so we don't risk 0 literals
				422	// and have to do special offset treatment.
				423	startLimit := nextEmit + 1
				424
				425	sMin := s - e.maxMatchOff
				426	if sMin < 0 {
				427	sMin = 0
				428	}
				429	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
				430	repIndex--
				431	start--
				432	seq.matchLen++
				433	}
				434	addLiterals(&seq, start)
				435
				436	// rep 0
				437	seq.offset = 1
				438	if debugSequences {
				439	println("repeat sequence", seq, "next s:", s)
				440	}
				441	blk.sequences = append(blk.sequences, seq)
				442	s += lenght + 2
				443	nextEmit = s
				444	if s >= sLimit {
				445	if debug {
				446	println("repeat ended", s, lenght)
				447
				448	}
				449	break encodeLoop
				450	}
				451	cv = load6432(src, s)
				452	continue
				453	}
				454	coffset0 := s - (candidate.offset - e.cur)
				455	coffset1 := s - (candidate2.offset - e.cur) + 1
				456	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				457	// found a regular match
				458	t = candidate.offset - e.cur
				459	if debug && s <= t {
				460	panic("s <= t")
				461	}
				462	if debug && s-t > e.maxMatchOff {
				463	panic("s - t >e.maxMatchOff")
				464	}
				465	break
				466	}
				467
				468	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				469	// found a regular match
				470	t = candidate2.offset - e.cur
				471	s++
				472	if debug && s <= t {
				473	panic("s <= t")
				474	}
				475	if debug && s-t > e.maxMatchOff {
				476	panic("s - t >e.maxMatchOff")
				477	}
				478	if debug && t < 0 {
				479	panic("t<0")
				480	}
				481	break
				482	}
				483	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				484	if s >= sLimit {
				485	break encodeLoop
				486	}
				487	cv = load6432(src, s)
				488	}
				489	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				490	offset2 = offset1
				491	offset1 = s - t
				492
				493	if debug && s <= t {
				494	panic("s <= t")
				495	}
				496
				497	// Extend the 4-byte match as long as possible.
				498	//l := e.matchlenNoHist(s+4, t+4, src) + 4
				499	l := int32(matchLen(src[s+4:], src[t+4:])) + 4
				500
				501	// Extend backwards
				502	tMin := s - e.maxMatchOff
				503	if tMin < 0 {
				504	tMin = 0
				505	}
				506	for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
				507	s--
				508	t--
				509	l++
				510	}
				511
				512	// Write our sequence.
				513	var seq seq
				514	seq.litLen = uint32(s - nextEmit)
				515	seq.matchLen = uint32(l - zstdMinMatch)
				516	if seq.litLen > 0 {
				517	blk.literals = append(blk.literals, src[nextEmit:s]...)
				518	}
				519	// Don't use repeat offsets
				520	seq.offset = uint32(s-t) + 3
				521	s += l
				522	if debugSequences {
				523	println("sequence", seq, "next s:", s)
				524	}
				525	blk.sequences = append(blk.sequences, seq)
				526	nextEmit = s
				527	if s >= sLimit {
				528	break encodeLoop
				529	}
				530	cv = load6432(src, s)
				531
				532	// Check offset 2
				533	if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
				534	// We have at least 4 byte match.
				535	// No need to check backwards. We come straight from a match
				536	//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
				537	l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
				538
				539	// Store this, since we have it.
				540	nextHash := hash6(cv, hashLog)
				541	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				542	seq.matchLen = uint32(l) - zstdMinMatch
				543	seq.litLen = 0
				544	// Since litlen is always 0, this is offset 1.
				545	seq.offset = 1
				546	s += l
				547	nextEmit = s
				548	if debugSequences {
				549	println("sequence", seq, "next s:", s)
				550	}
				551	blk.sequences = append(blk.sequences, seq)
				552
				553	// Swap offset 1 and 2.
				554	offset1, offset2 = offset2, offset1
				555	if s >= sLimit {
				556	break encodeLoop
				557	}
				558	// Prepare next loop.
				559	cv = load6432(src, s)
				560	}
				561	}
				562
				563	if int(nextEmit) < len(src) {
				564	blk.literals = append(blk.literals, src[nextEmit:]...)
				565	blk.extraLits = len(src) - int(nextEmit)
				566	}
				567	if debug {
				568	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				569	}
				570	}
				571
				572	func (e *fastEncoder) addBlock(src []byte) int32 {
				573	// check if we have space already
				574	if len(e.hist)+len(src) > cap(e.hist) {
				575	if cap(e.hist) == 0 {
				576	l := e.maxMatchOff * 2
				577	// Make it at least 1MB.
				578	if l < 1<<20 {
				579	l = 1 << 20
				580	}
				581	e.hist = make([]byte, 0, l)
				582	} else {
				583	if cap(e.hist) < int(e.maxMatchOff*2) {
				584	panic("unexpected buffer size")
				585	}
				586	// Move down
				587	offset := int32(len(e.hist)) - e.maxMatchOff
				588	copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
				589	e.cur += offset
				590	e.hist = e.hist[:e.maxMatchOff]
				591	}
				592	}
				593	s := int32(len(e.hist))
				594	e.hist = append(e.hist, src...)
				595	return s
				596	}
				597
				598	// useBlock will replace the block with the provided one,
				599	// but transfer recent offsets from the previous.
				600	func (e fastEncoder) UseBlock(enc blockEnc) {
				601	enc.reset(e.blk)
				602	e.blk = enc
				603	}
				604
				605	func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 {
				606	// Extend the match to be as long as possible.
				607	return int32(matchLen(src[s:], src[t:]))
				608	}
				609
				610	func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
				611	if debug {
				612	if s < 0 {
				613	panic("s<0")
				614	}
				615	if t < 0 {
				616	panic("t<0")
				617	}
				618	if s-t > e.maxMatchOff {
				619	panic(s - t)
				620	}
				621	}
				622	s1 := int(s) + maxMatchLength - 4
				623	if s1 > len(src) {
				624	s1 = len(src)
				625	}
				626
				627	// Extend the match to be as long as possible.
				628	return int32(matchLen(src[s:s1], src[t:]))
				629	}
				630
				631	// Reset the encoding table.
				632	func (e *fastEncoder) Reset() {
				633	if e.blk == nil {
				634	e.blk = &blockEnc{}
				635	e.blk.init()
				636	} else {
				637	e.blk.reset(nil)
				638	}
				639	e.blk.initNewEncode()
				640	if e.crc == nil {
				641	e.crc = xxhash.New()
				642	} else {
				643	e.crc.Reset()
				644	}
				645	if cap(e.hist) < int(e.maxMatchOff*2) {
				646	l := e.maxMatchOff * 2
				647	// Make it at least 1MB.
				648	if l < 1<<20 {
				649	l = 1 << 20
				650	}
				651	e.hist = make([]byte, 0, l)
				652	}
				653	// We offset current position so everything will be out of reach
				654	e.cur += e.maxMatchOff + int32(len(e.hist))
				655	e.hist = e.hist[:0]
				656	}