Blame - vendor/github.com/pierrec/lz4/block.go - sadis-server

blob: 00b1111b92b8efa86041747920a75937ee0a4607 [file] [log] [blame]

Matteo Scandolo	9a2772a	2018-11-19 14:56:26 -0800	[diff] [blame]	1	package lz4
				2
				3	import (
				4	"encoding/binary"
				5	"errors"
				6	)
				7
				8	var (
				9	// ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
				10	// block is corrupted or the destination buffer is not large enough for the uncompressed data.
				11	ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short")
				12	// ErrInvalid is returned when reading an invalid LZ4 archive.
				13	ErrInvalid = errors.New("lz4: bad magic number")
				14	)
				15
				16	// blockHash hashes 4 bytes into a value < winSize.
				17	func blockHash(x uint32) uint32 {
				18	const hasher uint32 = 2654435761 // Knuth multiplicative hash.
				19	return x * hasher >> hashShift
				20	}
				21
				22	// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
				23	func CompressBlockBound(n int) int {
				24	return n + n/255 + 16
				25	}
				26
				27	// UncompressBlock uncompresses the source buffer into the destination one,
				28	// and returns the uncompressed size.
				29	//
				30	// The destination buffer must be sized appropriately.
				31	//
				32	// An error is returned if the source data is invalid or the destination buffer is too small.
				33	func UncompressBlock(src, dst []byte) (si int, err error) {
				34	defer func() {
				35	// It is now faster to let the runtime panic and recover on out of bound slice access
				36	// than checking indices as we go along.
				37	if recover() != nil {
				38	err = ErrInvalidSourceShortBuffer
				39	}
				40	}()
				41	sn := len(src)
				42	if sn == 0 {
				43	return 0, nil
				44	}
				45	var di int
				46
				47	for {
				48	// Literals and match lengths (token).
				49	b := int(src[si])
				50	si++
				51
				52	// Literals.
				53	if lLen := b >> 4; lLen > 0 {
				54	if lLen == 0xF {
				55	for src[si] == 0xFF {
				56	lLen += 0xFF
				57	si++
				58	}
				59	lLen += int(src[si])
				60	si++
				61	}
				62	i := si
				63	si += lLen
				64	di += copy(dst[di:], src[i:si])
				65
				66	if si >= sn {
				67	return di, nil
				68	}
				69	}
				70
				71	si++
				72	_ = src[si] // Bound check elimination.
				73	offset := int(src[si-1]) \| int(src[si])<<8
				74	si++
				75
				76	// Match.
				77	mLen := b & 0xF
				78	if mLen == 0xF {
				79	for src[si] == 0xFF {
				80	mLen += 0xFF
				81	si++
				82	}
				83	mLen += int(src[si])
				84	si++
				85	}
				86	mLen += minMatch
				87
				88	// Copy the match.
				89	i := di - offset
				90	if offset > 0 && mLen >= offset {
				91	// Efficiently copy the match dst[di-offset:di] into the dst slice.
				92	bytesToCopy := offset * (mLen / offset)
				93	expanded := dst[i:]
				94	for n := offset; n <= bytesToCopy+offset; n *= 2 {
				95	copy(expanded[n:], expanded[:n])
				96	}
				97	di += bytesToCopy
				98	mLen -= bytesToCopy
				99	}
				100	di += copy(dst[di:], dst[i:i+mLen])
				101	}
				102	}
				103
				104	// CompressBlock compresses the source buffer into the destination one.
				105	// This is the fast version of LZ4 compression and also the default one.
				106	// The size of hashTable must be at least 64Kb.
				107	//
				108	// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
				109	//
				110	// An error is returned if the destination buffer is too small.
				111	func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
				112	defer func() {
				113	if recover() != nil {
				114	err = ErrInvalidSourceShortBuffer
				115	}
				116	}()
				117
				118	sn, dn := len(src)-mfLimit, len(dst)
				119	if sn <= 0 \|\| dn == 0 {
				120	return 0, nil
				121	}
				122	var si int
				123
				124	// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
				125	// const accInit = 1 << skipStrength
				126
				127	anchor := si // Position of the current literals.
				128	// acc := accInit // Variable step: improves performance on non-compressible data.
				129
				130	for si < sn {
				131	// Hash the next 4 bytes (sequence)...
				132	match := binary.LittleEndian.Uint32(src[si:])
				133	h := blockHash(match)
				134
				135	ref := hashTable[h]
				136	hashTable[h] = si
				137	if ref >= sn { // Invalid reference (dirty hashtable).
				138	si++
				139	continue
				140	}
				141	offset := si - ref
				142	if offset <= 0 \|\| offset >= winSize \|\| // Out of window.
				143	match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
				144	// si += acc >> skipStrength
				145	// acc++
				146	si++
				147	continue
				148	}
				149
				150	// Match found.
				151	// acc = accInit
				152	lLen := si - anchor // Literal length.
				153
				154	// Encode match length part 1.
				155	si += minMatch
				156	mLen := si // Match length has minMatch already.
				157	// Find the longest match, first looking by batches of 8 bytes.
				158	for si < sn && binary.LittleEndian.Uint64(src[si:]) == binary.LittleEndian.Uint64(src[si-offset:]) {
				159	si += 8
				160	}
				161	// Then byte by byte.
				162	for si < sn && src[si] == src[si-offset] {
				163	si++
				164	}
				165
				166	mLen = si - mLen
				167	if mLen < 0xF {
				168	dst[di] = byte(mLen)
				169	} else {
				170	dst[di] = 0xF
				171	}
				172
				173	// Encode literals length.
				174	if lLen < 0xF {
				175	dst[di] \|= byte(lLen << 4)
				176	} else {
				177	dst[di] \|= 0xF0
				178	di++
				179	l := lLen - 0xF
				180	for ; l >= 0xFF; l -= 0xFF {
				181	dst[di] = 0xFF
				182	di++
				183	}
				184	dst[di] = byte(l)
				185	}
				186	di++
				187
				188	// Literals.
				189	copy(dst[di:], src[anchor:anchor+lLen])
				190	di += lLen + 2
				191	anchor = si
				192
				193	// Encode offset.
				194	_ = dst[di] // Bound check elimination.
				195	dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
				196
				197	// Encode match length part 2.
				198	if mLen >= 0xF {
				199	for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
				200	dst[di] = 0xFF
				201	di++
				202	}
				203	dst[di] = byte(mLen)
				204	di++
				205	}
				206	}
				207
				208	if anchor == 0 {
				209	// Incompressible.
				210	return 0, nil
				211	}
				212
				213	// Last literals.
				214	lLen := len(src) - anchor
				215	if lLen < 0xF {
				216	dst[di] = byte(lLen << 4)
				217	} else {
				218	dst[di] = 0xF0
				219	di++
				220	for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF {
				221	dst[di] = 0xFF
				222	di++
				223	}
				224	dst[di] = byte(lLen)
				225	}
				226	di++
				227
				228	// Write the last literals.
				229	if di >= anchor {
				230	// Incompressible.
				231	return 0, nil
				232	}
				233	di += copy(dst[di:], src[anchor:])
				234	return di, nil
				235	}
				236
				237	// CompressBlockHC compresses the source buffer src into the destination dst
				238	// with max search depth (use 0 or negative value for no max).
				239	//
				240	// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
				241	//
				242	// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
				243	//
				244	// An error is returned if the destination buffer is too small.
				245	func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
				246	defer func() {
				247	if recover() != nil {
				248	err = ErrInvalidSourceShortBuffer
				249	}
				250	}()
				251
				252	sn, dn := len(src)-mfLimit, len(dst)
				253	if sn <= 0 \|\| dn == 0 {
				254	return 0, nil
				255	}
				256	var si int
				257
				258	// hashTable: stores the last position found for a given hash
				259	// chaingTable: stores previous positions for a given hash
				260	var hashTable, chainTable [winSize]int
				261
				262	if depth <= 0 {
				263	depth = winSize
				264	}
				265
				266	anchor := si
				267	for si < sn {
				268	// Hash the next 4 bytes (sequence).
				269	match := binary.LittleEndian.Uint32(src[si:])
				270	h := blockHash(match)
				271
				272	// Follow the chain until out of window and give the longest match.
				273	mLen := 0
				274	offset := 0
				275	for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] {
				276	// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
				277	// must match to improve on the match length.
				278	if src[next+mLen] != src[si+mLen] {
				279	continue
				280	}
				281	ml := 0
				282	// Compare the current position with a previous with the same hash.
				283	for ml < sn-si && binary.LittleEndian.Uint64(src[next+ml:]) == binary.LittleEndian.Uint64(src[si+ml:]) {
				284	ml += 8
				285	}
				286	for ml < sn-si && src[next+ml] == src[si+ml] {
				287	ml++
				288	}
				289	if ml < minMatch \|\| ml <= mLen {
				290	// Match too small (<minMath) or smaller than the current match.
				291	continue
				292	}
				293	// Found a longer match, keep its position and length.
				294	mLen = ml
				295	offset = si - next
				296	// Try another previous position with the same hash.
				297	try--
				298	}
				299	chainTable[si&winMask] = hashTable[h]
				300	hashTable[h] = si
				301
				302	// No match found.
				303	if mLen == 0 {
				304	si++
				305	continue
				306	}
				307
				308	// Match found.
				309	// Update hash/chain tables with overlapping bytes:
				310	// si already hashed, add everything from si+1 up to the match length.
				311	winStart := si + 1
				312	if ws := si + mLen - winSize; ws > winStart {
				313	winStart = ws
				314	}
				315	for si, ml := winStart, si+mLen; si < ml; {
				316	match >>= 8
				317	match \|= uint32(src[si+3]) << 24
				318	h := blockHash(match)
				319	chainTable[si&winMask] = hashTable[h]
				320	hashTable[h] = si
				321	si++
				322	}
				323
				324	lLen := si - anchor
				325	si += mLen
				326	mLen -= minMatch // Match length does not include minMatch.
				327
				328	if mLen < 0xF {
				329	dst[di] = byte(mLen)
				330	} else {
				331	dst[di] = 0xF
				332	}
				333
				334	// Encode literals length.
				335	if lLen < 0xF {
				336	dst[di] \|= byte(lLen << 4)
				337	} else {
				338	dst[di] \|= 0xF0
				339	di++
				340	l := lLen - 0xF
				341	for ; l >= 0xFF; l -= 0xFF {
				342	dst[di] = 0xFF
				343	di++
				344	}
				345	dst[di] = byte(l)
				346	}
				347	di++
				348
				349	// Literals.
				350	copy(dst[di:], src[anchor:anchor+lLen])
				351	di += lLen
				352	anchor = si
				353
				354	// Encode offset.
				355	di += 2
				356	dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
				357
				358	// Encode match length part 2.
				359	if mLen >= 0xF {
				360	for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
				361	dst[di] = 0xFF
				362	di++
				363	}
				364	dst[di] = byte(mLen)
				365	di++
				366	}
				367	}
				368
				369	if anchor == 0 {
				370	// Incompressible.
				371	return 0, nil
				372	}
				373
				374	// Last literals.
				375	lLen := len(src) - anchor
				376	if lLen < 0xF {
				377	dst[di] = byte(lLen << 4)
				378	} else {
				379	dst[di] = 0xF0
				380	di++
				381	lLen -= 0xF
				382	for ; lLen >= 0xFF; lLen -= 0xFF {
				383	dst[di] = 0xFF
				384	di++
				385	}
				386	dst[di] = byte(lLen)
				387	}
				388	di++
				389
				390	// Write the last literals.
				391	if di >= anchor {
				392	// Incompressible.
				393	return 0, nil
				394	}
				395	di += copy(dst[di:], src[anchor:])
				396	return di, nil
				397	}