Blame - vendor/github.com/klauspost/compress/zstd/enc_fast.go - device-management

blob: 6f388de041f18592b9b6a594cebc46cf1136bd0e [file] [log] [blame]

Dinesh Belwalkar	e63f7f9	2019-11-22 23:11:16 +0000	[diff] [blame]	1	// Copyright 2019+ Klaus Post. All rights reserved.
				2	// License information can be found in the LICENSE file.
				3	// Based on work by Yann Collet, released under BSD License.
				4
				5	package zstd
				6
				7	import (
				8	"math/bits"
				9
				10	"github.com/klauspost/compress/zstd/internal/xxhash"
				11	)
				12
				13	const (
				14	tableBits = 15 // Bits used in the table
				15	tableSize = 1 << tableBits // Size of the table
				16	tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
				17	maxMatchLength = 131074
				18	)
				19
				20	type tableEntry struct {
				21	val uint32
				22	offset int32
				23	}
				24
				25	type fastEncoder struct {
				26	o encParams
				27	// cur is the offset at the start of hist
				28	cur int32
				29	// maximum offset. Should be at least 2x block size.
				30	maxMatchOff int32
				31	hist []byte
				32	crc *xxhash.Digest
				33	table [tableSize]tableEntry
				34	tmp [8]byte
				35	blk *blockEnc
				36	}
				37
				38	// CRC returns the underlying CRC writer.
				39	func (e fastEncoder) CRC() xxhash.Digest {
				40	return e.crc
				41	}
				42
				43	// AppendCRC will append the CRC to the destination slice and return it.
				44	func (e *fastEncoder) AppendCRC(dst []byte) []byte {
				45	crc := e.crc.Sum(e.tmp[:0])
				46	dst = append(dst, crc[7], crc[6], crc[5], crc[4])
				47	return dst
				48	}
				49
				50	// WindowSize returns the window size of the encoder,
				51	// or a window size small enough to contain the input size, if > 0.
				52	func (e *fastEncoder) WindowSize(size int) int32 {
				53	if size > 0 && size < int(e.maxMatchOff) {
				54	b := int32(1) << uint(bits.Len(uint(size)))
				55	// Keep minimum window.
				56	if b < 1024 {
				57	b = 1024
				58	}
				59	return b
				60	}
				61	return e.maxMatchOff
				62	}
				63
				64	// Block returns the current block.
				65	func (e fastEncoder) Block() blockEnc {
				66	return e.blk
				67	}
				68
				69	// Encode mimmics functionality in zstd_fast.c
				70	func (e fastEncoder) Encode(blk blockEnc, src []byte) {
				71	const (
				72	inputMargin = 8
				73	minNonLiteralBlockSize = 1 + 1 + inputMargin
				74	)
				75
				76	// Protect against e.cur wraparound.
				77	for e.cur > (1<<30)+e.maxMatchOff {
				78	if len(e.hist) == 0 {
				79	for i := range e.table[:] {
				80	e.table[i] = tableEntry{}
				81	}
				82	e.cur = e.maxMatchOff
				83	break
				84	}
				85	// Shift down everything in the table that isn't already too far away.
				86	minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
				87	for i := range e.table[:] {
				88	v := e.table[i].offset
				89	if v < minOff {
				90	v = 0
				91	} else {
				92	v = v - e.cur + e.maxMatchOff
				93	}
				94	e.table[i].offset = v
				95	}
				96	e.cur = e.maxMatchOff
				97	}
				98
				99	s := e.addBlock(src)
				100	blk.size = len(src)
				101	if len(src) < minNonLiteralBlockSize {
				102	blk.extraLits = len(src)
				103	blk.literals = blk.literals[:len(src)]
				104	copy(blk.literals, src)
				105	return
				106	}
				107
				108	// Override src
				109	src = e.hist
				110	sLimit := int32(len(src)) - inputMargin
				111	// stepSize is the number of bytes to skip on every main loop iteration.
				112	// It should be >= 2.
				113	stepSize := int32(e.o.targetLength)
				114	if stepSize == 0 {
				115	stepSize++
				116	}
				117	stepSize++
				118
				119	// TEMPLATE
				120	const hashLog = tableBits
				121	// seems global, but would be nice to tweak.
				122	const kSearchStrength = 8
				123
				124	// nextEmit is where in src the next emitLiteral should start from.
				125	nextEmit := s
				126	cv := load6432(src, s)
				127
				128	// Relative offsets
				129	offset1 := int32(blk.recentOffsets[0])
				130	offset2 := int32(blk.recentOffsets[1])
				131
				132	addLiterals := func(s *seq, until int32) {
				133	if until == nextEmit {
				134	return
				135	}
				136	blk.literals = append(blk.literals, src[nextEmit:until]...)
				137	s.litLen = uint32(until - nextEmit)
				138	}
				139	if debug {
				140	println("recent offsets:", blk.recentOffsets)
				141	}
				142
				143	encodeLoop:
				144	for {
				145	// t will contain the match offset when we find one.
				146	// When existing the search loop, we have already checked 4 bytes.
				147	var t int32
				148
				149	// We will not use repeat offsets across blocks.
				150	// By not using them for the first 3 matches
				151	canRepeat := len(blk.sequences) > 2
				152
				153	for {
				154	if debug && canRepeat && offset1 == 0 {
				155	panic("offset0 was 0")
				156	}
				157
				158	nextHash := hash6(cv, hashLog)
				159	nextHash2 := hash6(cv>>8, hashLog)
				160	candidate := e.table[nextHash]
				161	candidate2 := e.table[nextHash2]
				162	repIndex := s - offset1 + 2
				163
				164	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				165	e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
				166
				167	if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
				168	// Consider history as well.
				169	var seq seq
				170	lenght := 4 + e.matchlen(s+6, repIndex+4, src)
				171
				172	seq.matchLen = uint32(lenght - zstdMinMatch)
				173
				174	// We might be able to match backwards.
				175	// Extend as long as we can.
				176	start := s + 2
				177	// We end the search early, so we don't risk 0 literals
				178	// and have to do special offset treatment.
				179	startLimit := nextEmit + 1
				180
				181	sMin := s - e.maxMatchOff
				182	if sMin < 0 {
				183	sMin = 0
				184	}
				185	for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
				186	repIndex--
				187	start--
				188	seq.matchLen++
				189	}
				190	addLiterals(&seq, start)
				191
				192	// rep 0
				193	seq.offset = 1
				194	if debugSequences {
				195	println("repeat sequence", seq, "next s:", s)
				196	}
				197	blk.sequences = append(blk.sequences, seq)
				198	s += lenght + 2
				199	nextEmit = s
				200	if s >= sLimit {
				201	if debug {
				202	println("repeat ended", s, lenght)
				203
				204	}
				205	break encodeLoop
				206	}
				207	cv = load6432(src, s)
				208	continue
				209	}
				210	coffset0 := s - (candidate.offset - e.cur)
				211	coffset1 := s - (candidate2.offset - e.cur) + 1
				212	if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
				213	// found a regular match
				214	t = candidate.offset - e.cur
				215	if debug && s <= t {
				216	panic("s <= t")
				217	}
				218	if debug && s-t > e.maxMatchOff {
				219	panic("s - t >e.maxMatchOff")
				220	}
				221	break
				222	}
				223
				224	if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
				225	// found a regular match
				226	t = candidate2.offset - e.cur
				227	s++
				228	if debug && s <= t {
				229	panic("s <= t")
				230	}
				231	if debug && s-t > e.maxMatchOff {
				232	panic("s - t >e.maxMatchOff")
				233	}
				234	if debug && t < 0 {
				235	panic("t<0")
				236	}
				237	break
				238	}
				239	s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
				240	if s >= sLimit {
				241	break encodeLoop
				242	}
				243	cv = load6432(src, s)
				244	}
				245	// A 4-byte match has been found. We'll later see if more than 4 bytes.
				246	offset2 = offset1
				247	offset1 = s - t
				248
				249	if debug && s <= t {
				250	panic("s <= t")
				251	}
				252
				253	if debug && canRepeat && int(offset1) > len(src) {
				254	panic("invalid offset")
				255	}
				256
				257	// Extend the 4-byte match as long as possible.
				258	l := e.matchlen(s+4, t+4, src) + 4
				259
				260	// Extend backwards
				261	tMin := s - e.maxMatchOff
				262	if tMin < 0 {
				263	tMin = 0
				264	}
				265	for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
				266	s--
				267	t--
				268	l++
				269	}
				270
				271	// Write our sequence.
				272	var seq seq
				273	seq.litLen = uint32(s - nextEmit)
				274	seq.matchLen = uint32(l - zstdMinMatch)
				275	if seq.litLen > 0 {
				276	blk.literals = append(blk.literals, src[nextEmit:s]...)
				277	}
				278	// Don't use repeat offsets
				279	seq.offset = uint32(s-t) + 3
				280	s += l
				281	if debugSequences {
				282	println("sequence", seq, "next s:", s)
				283	}
				284	blk.sequences = append(blk.sequences, seq)
				285	nextEmit = s
				286	if s >= sLimit {
				287	break encodeLoop
				288	}
				289	cv = load6432(src, s)
				290
				291	// Check offset 2
				292	if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
				293	// We have at least 4 byte match.
				294	// No need to check backwards. We come straight from a match
				295	l := 4 + e.matchlen(s+4, o2+4, src)
				296
				297	// Store this, since we have it.
				298	nextHash := hash6(cv, hashLog)
				299	e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
				300	seq.matchLen = uint32(l) - zstdMinMatch
				301	seq.litLen = 0
				302	// Since litlen is always 0, this is offset 1.
				303	seq.offset = 1
				304	s += l
				305	nextEmit = s
				306	if debugSequences {
				307	println("sequence", seq, "next s:", s)
				308	}
				309	blk.sequences = append(blk.sequences, seq)
				310
				311	// Swap offset 1 and 2.
				312	offset1, offset2 = offset2, offset1
				313	if s >= sLimit {
				314	break encodeLoop
				315	}
				316	// Prepare next loop.
				317	cv = load6432(src, s)
				318	}
				319	}
				320
				321	if int(nextEmit) < len(src) {
				322	blk.literals = append(blk.literals, src[nextEmit:]...)
				323	blk.extraLits = len(src) - int(nextEmit)
				324	}
				325	blk.recentOffsets[0] = uint32(offset1)
				326	blk.recentOffsets[1] = uint32(offset2)
				327	if debug {
				328	println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
				329	}
				330	}
				331
				332	func (e *fastEncoder) addBlock(src []byte) int32 {
				333	// check if we have space already
				334	if len(e.hist)+len(src) > cap(e.hist) {
				335	if cap(e.hist) == 0 {
				336	l := e.maxMatchOff * 2
				337	// Make it at least 1MB.
				338	if l < 1<<20 {
				339	l = 1 << 20
				340	}
				341	e.hist = make([]byte, 0, l)
				342	} else {
				343	if cap(e.hist) < int(e.maxMatchOff*2) {
				344	panic("unexpected buffer size")
				345	}
				346	// Move down
				347	offset := int32(len(e.hist)) - e.maxMatchOff
				348	copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
				349	e.cur += offset
				350	e.hist = e.hist[:e.maxMatchOff]
				351	}
				352	}
				353	s := int32(len(e.hist))
				354	e.hist = append(e.hist, src...)
				355	return s
				356	}
				357
				358	// useBlock will replace the block with the provided one,
				359	// but transfer recent offsets from the previous.
				360	func (e fastEncoder) UseBlock(enc blockEnc) {
				361	enc.reset(e.blk)
				362	e.blk = enc
				363	}
				364
				365	func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
				366	if debug {
				367	if s < 0 {
				368	panic("s<0")
				369	}
				370	if t < 0 {
				371	panic("t<0")
				372	}
				373	if s-t > e.maxMatchOff {
				374	panic(s - t)
				375	}
				376	}
				377	s1 := int(s) + maxMatchLength - 4
				378	if s1 > len(src) {
				379	s1 = len(src)
				380	}
				381
				382	// Extend the match to be as long as possible.
				383	return int32(matchLen(src[s:s1], src[t:]))
				384	}
				385
				386	// Reset the encoding table.
				387	func (e *fastEncoder) Reset() {
				388	if e.blk == nil {
				389	e.blk = &blockEnc{}
				390	e.blk.init()
				391	} else {
				392	e.blk.reset(nil)
				393	}
				394	e.blk.initNewEncode()
				395	if e.crc == nil {
				396	e.crc = xxhash.New()
				397	} else {
				398	e.crc.Reset()
				399	}
				400	if cap(e.hist) < int(e.maxMatchOff*2) {
				401	l := e.maxMatchOff * 2
				402	// Make it at least 1MB.
				403	if l < 1<<20 {
				404	l = 1 << 20
				405	}
				406	e.hist = make([]byte, 0, l)
				407	}
				408	// We offset current position so everything will be out of reach
				409	e.cur += e.maxMatchOff + int32(len(e.hist))
				410	e.hist = e.hist[:0]
				411	}