blob: d96e0e76f1a855efd7c064669098b771b7bd4660 [file] [log] [blame]
William Kurkianea869482019-04-09 15:16:11 -04001package lz4
2
3import (
4 "encoding/binary"
5 "errors"
6)
7
8var (
9 // ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed
10 // block is corrupted or the destination buffer is not large enough for the uncompressed data.
11 ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short")
12 // ErrInvalid is returned when reading an invalid LZ4 archive.
13 ErrInvalid = errors.New("lz4: bad magic number")
14)
15
16// blockHash hashes 4 bytes into a value < winSize.
17func blockHash(x uint32) uint32 {
18 const hasher uint32 = 2654435761 // Knuth multiplicative hash.
19 return x * hasher >> hashShift
20}
21
22// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible.
23func CompressBlockBound(n int) int {
24 return n + n/255 + 16
25}
26
27// UncompressBlock uncompresses the source buffer into the destination one,
28// and returns the uncompressed size.
29//
30// The destination buffer must be sized appropriately.
31//
32// An error is returned if the source data is invalid or the destination buffer is too small.
33func UncompressBlock(src, dst []byte) (di int, err error) {
34 sn := len(src)
35 if sn == 0 {
36 return 0, nil
37 }
38
39 di = decodeBlock(dst, src)
40 if di < 0 {
41 return 0, ErrInvalidSourceShortBuffer
42 }
43 return di, nil
44}
45
46// CompressBlock compresses the source buffer into the destination one.
47// This is the fast version of LZ4 compression and also the default one.
48// The size of hashTable must be at least 64Kb.
49//
50// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible.
51//
52// An error is returned if the destination buffer is too small.
53func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) {
54 defer func() {
55 if recover() != nil {
56 err = ErrInvalidSourceShortBuffer
57 }
58 }()
59
60 sn, dn := len(src)-mfLimit, len(dst)
61 if sn <= 0 || dn == 0 {
62 return 0, nil
63 }
64 var si int
65
66 // Fast scan strategy: the hash table only stores the last 4 bytes sequences.
67 // const accInit = 1 << skipStrength
68
69 anchor := si // Position of the current literals.
70 // acc := accInit // Variable step: improves performance on non-compressible data.
71
72 for si < sn {
73 // Hash the next 4 bytes (sequence)...
74 match := binary.LittleEndian.Uint32(src[si:])
75 h := blockHash(match)
76
77 ref := hashTable[h]
78 hashTable[h] = si
79 if ref >= sn { // Invalid reference (dirty hashtable).
80 si++
81 continue
82 }
83 offset := si - ref
84 if offset <= 0 || offset >= winSize || // Out of window.
85 match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches.
86 // si += acc >> skipStrength
87 // acc++
88 si++
89 continue
90 }
91
92 // Match found.
93 // acc = accInit
94 lLen := si - anchor // Literal length.
95
96 // Encode match length part 1.
97 si += minMatch
98 mLen := si // Match length has minMatch already.
99 // Find the longest match, first looking by batches of 8 bytes.
100 for si < sn && binary.LittleEndian.Uint64(src[si:]) == binary.LittleEndian.Uint64(src[si-offset:]) {
101 si += 8
102 }
103 // Then byte by byte.
104 for si < sn && src[si] == src[si-offset] {
105 si++
106 }
107
108 mLen = si - mLen
109 if mLen < 0xF {
110 dst[di] = byte(mLen)
111 } else {
112 dst[di] = 0xF
113 }
114
115 // Encode literals length.
116 if lLen < 0xF {
117 dst[di] |= byte(lLen << 4)
118 } else {
119 dst[di] |= 0xF0
120 di++
121 l := lLen - 0xF
122 for ; l >= 0xFF; l -= 0xFF {
123 dst[di] = 0xFF
124 di++
125 }
126 dst[di] = byte(l)
127 }
128 di++
129
130 // Literals.
131 copy(dst[di:di+lLen], src[anchor:anchor+lLen])
132 di += lLen + 2
133 anchor = si
134
135 // Encode offset.
136 _ = dst[di] // Bound check elimination.
137 dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
138
139 // Encode match length part 2.
140 if mLen >= 0xF {
141 for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
142 dst[di] = 0xFF
143 di++
144 }
145 dst[di] = byte(mLen)
146 di++
147 }
148 }
149
150 if anchor == 0 {
151 // Incompressible.
152 return 0, nil
153 }
154
155 // Last literals.
156 lLen := len(src) - anchor
157 if lLen < 0xF {
158 dst[di] = byte(lLen << 4)
159 } else {
160 dst[di] = 0xF0
161 di++
162 for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF {
163 dst[di] = 0xFF
164 di++
165 }
166 dst[di] = byte(lLen)
167 }
168 di++
169
170 // Write the last literals.
171 if di >= anchor {
172 // Incompressible.
173 return 0, nil
174 }
175 di += copy(dst[di:di+len(src)-anchor], src[anchor:])
176 return di, nil
177}
178
179// CompressBlockHC compresses the source buffer src into the destination dst
180// with max search depth (use 0 or negative value for no max).
181//
182// CompressBlockHC compression ratio is better than CompressBlock but it is also slower.
183//
184// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible.
185//
186// An error is returned if the destination buffer is too small.
187func CompressBlockHC(src, dst []byte, depth int) (di int, err error) {
188 defer func() {
189 if recover() != nil {
190 err = ErrInvalidSourceShortBuffer
191 }
192 }()
193
194 sn, dn := len(src)-mfLimit, len(dst)
195 if sn <= 0 || dn == 0 {
196 return 0, nil
197 }
198 var si int
199
200 // hashTable: stores the last position found for a given hash
201 // chaingTable: stores previous positions for a given hash
202 var hashTable, chainTable [winSize]int
203
204 if depth <= 0 {
205 depth = winSize
206 }
207
208 anchor := si
209 for si < sn {
210 // Hash the next 4 bytes (sequence).
211 match := binary.LittleEndian.Uint32(src[si:])
212 h := blockHash(match)
213
214 // Follow the chain until out of window and give the longest match.
215 mLen := 0
216 offset := 0
217 for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] {
218 // The first (mLen==0) or next byte (mLen>=minMatch) at current match length
219 // must match to improve on the match length.
220 if src[next+mLen] != src[si+mLen] {
221 continue
222 }
223 ml := 0
224 // Compare the current position with a previous with the same hash.
225 for ml < sn-si && binary.LittleEndian.Uint64(src[next+ml:]) == binary.LittleEndian.Uint64(src[si+ml:]) {
226 ml += 8
227 }
228 for ml < sn-si && src[next+ml] == src[si+ml] {
229 ml++
230 }
231 if ml < minMatch || ml <= mLen {
232 // Match too small (<minMath) or smaller than the current match.
233 continue
234 }
235 // Found a longer match, keep its position and length.
236 mLen = ml
237 offset = si - next
238 // Try another previous position with the same hash.
239 try--
240 }
241 chainTable[si&winMask] = hashTable[h]
242 hashTable[h] = si
243
244 // No match found.
245 if mLen == 0 {
246 si++
247 continue
248 }
249
250 // Match found.
251 // Update hash/chain tables with overlapping bytes:
252 // si already hashed, add everything from si+1 up to the match length.
253 winStart := si + 1
254 if ws := si + mLen - winSize; ws > winStart {
255 winStart = ws
256 }
257 for si, ml := winStart, si+mLen; si < ml; {
258 match >>= 8
259 match |= uint32(src[si+3]) << 24
260 h := blockHash(match)
261 chainTable[si&winMask] = hashTable[h]
262 hashTable[h] = si
263 si++
264 }
265
266 lLen := si - anchor
267 si += mLen
268 mLen -= minMatch // Match length does not include minMatch.
269
270 if mLen < 0xF {
271 dst[di] = byte(mLen)
272 } else {
273 dst[di] = 0xF
274 }
275
276 // Encode literals length.
277 if lLen < 0xF {
278 dst[di] |= byte(lLen << 4)
279 } else {
280 dst[di] |= 0xF0
281 di++
282 l := lLen - 0xF
283 for ; l >= 0xFF; l -= 0xFF {
284 dst[di] = 0xFF
285 di++
286 }
287 dst[di] = byte(l)
288 }
289 di++
290
291 // Literals.
292 copy(dst[di:di+lLen], src[anchor:anchor+lLen])
293 di += lLen
294 anchor = si
295
296 // Encode offset.
297 di += 2
298 dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
299
300 // Encode match length part 2.
301 if mLen >= 0xF {
302 for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
303 dst[di] = 0xFF
304 di++
305 }
306 dst[di] = byte(mLen)
307 di++
308 }
309 }
310
311 if anchor == 0 {
312 // Incompressible.
313 return 0, nil
314 }
315
316 // Last literals.
317 lLen := len(src) - anchor
318 if lLen < 0xF {
319 dst[di] = byte(lLen << 4)
320 } else {
321 dst[di] = 0xF0
322 di++
323 lLen -= 0xF
324 for ; lLen >= 0xFF; lLen -= 0xFF {
325 dst[di] = 0xFF
326 di++
327 }
328 dst[di] = byte(lLen)
329 }
330 di++
331
332 // Write the last literals.
333 if di >= anchor {
334 // Incompressible.
335 return 0, nil
336 }
337 di += copy(dst[di:di+len(src)-anchor], src[anchor:])
338 return di, nil
339}