Dinesh Belwalkar | e63f7f9 | 2019-11-22 23:11:16 +0000 | [diff] [blame] | 1 | // Copyright 2019+ Klaus Post. All rights reserved. |
| 2 | // License information can be found in the LICENSE file. |
| 3 | // Based on work by Yann Collet, released under BSD License. |
| 4 | |
| 5 | package zstd |
| 6 | |
| 7 | import ( |
| 8 | "math/bits" |
| 9 | |
| 10 | "github.com/klauspost/compress/zstd/internal/xxhash" |
| 11 | ) |
| 12 | |
| 13 | const ( |
| 14 | tableBits = 15 // Bits used in the table |
| 15 | tableSize = 1 << tableBits // Size of the table |
| 16 | tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. |
| 17 | maxMatchLength = 131074 |
| 18 | ) |
| 19 | |
| 20 | type tableEntry struct { |
| 21 | val uint32 |
| 22 | offset int32 |
| 23 | } |
| 24 | |
| 25 | type fastEncoder struct { |
| 26 | o encParams |
| 27 | // cur is the offset at the start of hist |
| 28 | cur int32 |
| 29 | // maximum offset. Should be at least 2x block size. |
| 30 | maxMatchOff int32 |
| 31 | hist []byte |
| 32 | crc *xxhash.Digest |
| 33 | table [tableSize]tableEntry |
| 34 | tmp [8]byte |
| 35 | blk *blockEnc |
| 36 | } |
| 37 | |
| 38 | // CRC returns the underlying CRC writer. |
| 39 | func (e *fastEncoder) CRC() *xxhash.Digest { |
| 40 | return e.crc |
| 41 | } |
| 42 | |
| 43 | // AppendCRC will append the CRC to the destination slice and return it. |
| 44 | func (e *fastEncoder) AppendCRC(dst []byte) []byte { |
| 45 | crc := e.crc.Sum(e.tmp[:0]) |
| 46 | dst = append(dst, crc[7], crc[6], crc[5], crc[4]) |
| 47 | return dst |
| 48 | } |
| 49 | |
| 50 | // WindowSize returns the window size of the encoder, |
| 51 | // or a window size small enough to contain the input size, if > 0. |
| 52 | func (e *fastEncoder) WindowSize(size int) int32 { |
| 53 | if size > 0 && size < int(e.maxMatchOff) { |
| 54 | b := int32(1) << uint(bits.Len(uint(size))) |
| 55 | // Keep minimum window. |
| 56 | if b < 1024 { |
| 57 | b = 1024 |
| 58 | } |
| 59 | return b |
| 60 | } |
| 61 | return e.maxMatchOff |
| 62 | } |
| 63 | |
| 64 | // Block returns the current block. |
| 65 | func (e *fastEncoder) Block() *blockEnc { |
| 66 | return e.blk |
| 67 | } |
| 68 | |
| 69 | // Encode mimmics functionality in zstd_fast.c |
| 70 | func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { |
| 71 | const ( |
| 72 | inputMargin = 8 |
| 73 | minNonLiteralBlockSize = 1 + 1 + inputMargin |
| 74 | ) |
| 75 | |
| 76 | // Protect against e.cur wraparound. |
| 77 | for e.cur > (1<<30)+e.maxMatchOff { |
| 78 | if len(e.hist) == 0 { |
| 79 | for i := range e.table[:] { |
| 80 | e.table[i] = tableEntry{} |
| 81 | } |
| 82 | e.cur = e.maxMatchOff |
| 83 | break |
| 84 | } |
| 85 | // Shift down everything in the table that isn't already too far away. |
| 86 | minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff |
| 87 | for i := range e.table[:] { |
| 88 | v := e.table[i].offset |
| 89 | if v < minOff { |
| 90 | v = 0 |
| 91 | } else { |
| 92 | v = v - e.cur + e.maxMatchOff |
| 93 | } |
| 94 | e.table[i].offset = v |
| 95 | } |
| 96 | e.cur = e.maxMatchOff |
| 97 | } |
| 98 | |
| 99 | s := e.addBlock(src) |
| 100 | blk.size = len(src) |
| 101 | if len(src) < minNonLiteralBlockSize { |
| 102 | blk.extraLits = len(src) |
| 103 | blk.literals = blk.literals[:len(src)] |
| 104 | copy(blk.literals, src) |
| 105 | return |
| 106 | } |
| 107 | |
| 108 | // Override src |
| 109 | src = e.hist |
| 110 | sLimit := int32(len(src)) - inputMargin |
| 111 | // stepSize is the number of bytes to skip on every main loop iteration. |
| 112 | // It should be >= 2. |
| 113 | stepSize := int32(e.o.targetLength) |
| 114 | if stepSize == 0 { |
| 115 | stepSize++ |
| 116 | } |
| 117 | stepSize++ |
| 118 | |
| 119 | // TEMPLATE |
| 120 | const hashLog = tableBits |
| 121 | // seems global, but would be nice to tweak. |
| 122 | const kSearchStrength = 8 |
| 123 | |
| 124 | // nextEmit is where in src the next emitLiteral should start from. |
| 125 | nextEmit := s |
| 126 | cv := load6432(src, s) |
| 127 | |
| 128 | // Relative offsets |
| 129 | offset1 := int32(blk.recentOffsets[0]) |
| 130 | offset2 := int32(blk.recentOffsets[1]) |
| 131 | |
| 132 | addLiterals := func(s *seq, until int32) { |
| 133 | if until == nextEmit { |
| 134 | return |
| 135 | } |
| 136 | blk.literals = append(blk.literals, src[nextEmit:until]...) |
| 137 | s.litLen = uint32(until - nextEmit) |
| 138 | } |
| 139 | if debug { |
| 140 | println("recent offsets:", blk.recentOffsets) |
| 141 | } |
| 142 | |
| 143 | encodeLoop: |
| 144 | for { |
| 145 | // t will contain the match offset when we find one. |
| 146 | // When existing the search loop, we have already checked 4 bytes. |
| 147 | var t int32 |
| 148 | |
| 149 | // We will not use repeat offsets across blocks. |
| 150 | // By not using them for the first 3 matches |
| 151 | canRepeat := len(blk.sequences) > 2 |
| 152 | |
| 153 | for { |
| 154 | if debug && canRepeat && offset1 == 0 { |
| 155 | panic("offset0 was 0") |
| 156 | } |
| 157 | |
| 158 | nextHash := hash6(cv, hashLog) |
| 159 | nextHash2 := hash6(cv>>8, hashLog) |
| 160 | candidate := e.table[nextHash] |
| 161 | candidate2 := e.table[nextHash2] |
| 162 | repIndex := s - offset1 + 2 |
| 163 | |
| 164 | e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} |
| 165 | e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} |
| 166 | |
| 167 | if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { |
| 168 | // Consider history as well. |
| 169 | var seq seq |
| 170 | lenght := 4 + e.matchlen(s+6, repIndex+4, src) |
| 171 | |
| 172 | seq.matchLen = uint32(lenght - zstdMinMatch) |
| 173 | |
| 174 | // We might be able to match backwards. |
| 175 | // Extend as long as we can. |
| 176 | start := s + 2 |
| 177 | // We end the search early, so we don't risk 0 literals |
| 178 | // and have to do special offset treatment. |
| 179 | startLimit := nextEmit + 1 |
| 180 | |
| 181 | sMin := s - e.maxMatchOff |
| 182 | if sMin < 0 { |
| 183 | sMin = 0 |
| 184 | } |
| 185 | for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { |
| 186 | repIndex-- |
| 187 | start-- |
| 188 | seq.matchLen++ |
| 189 | } |
| 190 | addLiterals(&seq, start) |
| 191 | |
| 192 | // rep 0 |
| 193 | seq.offset = 1 |
| 194 | if debugSequences { |
| 195 | println("repeat sequence", seq, "next s:", s) |
| 196 | } |
| 197 | blk.sequences = append(blk.sequences, seq) |
| 198 | s += lenght + 2 |
| 199 | nextEmit = s |
| 200 | if s >= sLimit { |
| 201 | if debug { |
| 202 | println("repeat ended", s, lenght) |
| 203 | |
| 204 | } |
| 205 | break encodeLoop |
| 206 | } |
| 207 | cv = load6432(src, s) |
| 208 | continue |
| 209 | } |
| 210 | coffset0 := s - (candidate.offset - e.cur) |
| 211 | coffset1 := s - (candidate2.offset - e.cur) + 1 |
| 212 | if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { |
| 213 | // found a regular match |
| 214 | t = candidate.offset - e.cur |
| 215 | if debug && s <= t { |
| 216 | panic("s <= t") |
| 217 | } |
| 218 | if debug && s-t > e.maxMatchOff { |
| 219 | panic("s - t >e.maxMatchOff") |
| 220 | } |
| 221 | break |
| 222 | } |
| 223 | |
| 224 | if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { |
| 225 | // found a regular match |
| 226 | t = candidate2.offset - e.cur |
| 227 | s++ |
| 228 | if debug && s <= t { |
| 229 | panic("s <= t") |
| 230 | } |
| 231 | if debug && s-t > e.maxMatchOff { |
| 232 | panic("s - t >e.maxMatchOff") |
| 233 | } |
| 234 | if debug && t < 0 { |
| 235 | panic("t<0") |
| 236 | } |
| 237 | break |
| 238 | } |
| 239 | s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) |
| 240 | if s >= sLimit { |
| 241 | break encodeLoop |
| 242 | } |
| 243 | cv = load6432(src, s) |
| 244 | } |
| 245 | // A 4-byte match has been found. We'll later see if more than 4 bytes. |
| 246 | offset2 = offset1 |
| 247 | offset1 = s - t |
| 248 | |
| 249 | if debug && s <= t { |
| 250 | panic("s <= t") |
| 251 | } |
| 252 | |
| 253 | if debug && canRepeat && int(offset1) > len(src) { |
| 254 | panic("invalid offset") |
| 255 | } |
| 256 | |
| 257 | // Extend the 4-byte match as long as possible. |
| 258 | l := e.matchlen(s+4, t+4, src) + 4 |
| 259 | |
| 260 | // Extend backwards |
| 261 | tMin := s - e.maxMatchOff |
| 262 | if tMin < 0 { |
| 263 | tMin = 0 |
| 264 | } |
| 265 | for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { |
| 266 | s-- |
| 267 | t-- |
| 268 | l++ |
| 269 | } |
| 270 | |
| 271 | // Write our sequence. |
| 272 | var seq seq |
| 273 | seq.litLen = uint32(s - nextEmit) |
| 274 | seq.matchLen = uint32(l - zstdMinMatch) |
| 275 | if seq.litLen > 0 { |
| 276 | blk.literals = append(blk.literals, src[nextEmit:s]...) |
| 277 | } |
| 278 | // Don't use repeat offsets |
| 279 | seq.offset = uint32(s-t) + 3 |
| 280 | s += l |
| 281 | if debugSequences { |
| 282 | println("sequence", seq, "next s:", s) |
| 283 | } |
| 284 | blk.sequences = append(blk.sequences, seq) |
| 285 | nextEmit = s |
| 286 | if s >= sLimit { |
| 287 | break encodeLoop |
| 288 | } |
| 289 | cv = load6432(src, s) |
| 290 | |
| 291 | // Check offset 2 |
| 292 | if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { |
| 293 | // We have at least 4 byte match. |
| 294 | // No need to check backwards. We come straight from a match |
| 295 | l := 4 + e.matchlen(s+4, o2+4, src) |
| 296 | |
| 297 | // Store this, since we have it. |
| 298 | nextHash := hash6(cv, hashLog) |
| 299 | e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} |
| 300 | seq.matchLen = uint32(l) - zstdMinMatch |
| 301 | seq.litLen = 0 |
| 302 | // Since litlen is always 0, this is offset 1. |
| 303 | seq.offset = 1 |
| 304 | s += l |
| 305 | nextEmit = s |
| 306 | if debugSequences { |
| 307 | println("sequence", seq, "next s:", s) |
| 308 | } |
| 309 | blk.sequences = append(blk.sequences, seq) |
| 310 | |
| 311 | // Swap offset 1 and 2. |
| 312 | offset1, offset2 = offset2, offset1 |
| 313 | if s >= sLimit { |
| 314 | break encodeLoop |
| 315 | } |
| 316 | // Prepare next loop. |
| 317 | cv = load6432(src, s) |
| 318 | } |
| 319 | } |
| 320 | |
| 321 | if int(nextEmit) < len(src) { |
| 322 | blk.literals = append(blk.literals, src[nextEmit:]...) |
| 323 | blk.extraLits = len(src) - int(nextEmit) |
| 324 | } |
| 325 | blk.recentOffsets[0] = uint32(offset1) |
| 326 | blk.recentOffsets[1] = uint32(offset2) |
| 327 | if debug { |
| 328 | println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) |
| 329 | } |
| 330 | } |
| 331 | |
| 332 | func (e *fastEncoder) addBlock(src []byte) int32 { |
| 333 | // check if we have space already |
| 334 | if len(e.hist)+len(src) > cap(e.hist) { |
| 335 | if cap(e.hist) == 0 { |
| 336 | l := e.maxMatchOff * 2 |
| 337 | // Make it at least 1MB. |
| 338 | if l < 1<<20 { |
| 339 | l = 1 << 20 |
| 340 | } |
| 341 | e.hist = make([]byte, 0, l) |
| 342 | } else { |
| 343 | if cap(e.hist) < int(e.maxMatchOff*2) { |
| 344 | panic("unexpected buffer size") |
| 345 | } |
| 346 | // Move down |
| 347 | offset := int32(len(e.hist)) - e.maxMatchOff |
| 348 | copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) |
| 349 | e.cur += offset |
| 350 | e.hist = e.hist[:e.maxMatchOff] |
| 351 | } |
| 352 | } |
| 353 | s := int32(len(e.hist)) |
| 354 | e.hist = append(e.hist, src...) |
| 355 | return s |
| 356 | } |
| 357 | |
| 358 | // useBlock will replace the block with the provided one, |
| 359 | // but transfer recent offsets from the previous. |
| 360 | func (e *fastEncoder) UseBlock(enc *blockEnc) { |
| 361 | enc.reset(e.blk) |
| 362 | e.blk = enc |
| 363 | } |
| 364 | |
| 365 | func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 { |
| 366 | if debug { |
| 367 | if s < 0 { |
| 368 | panic("s<0") |
| 369 | } |
| 370 | if t < 0 { |
| 371 | panic("t<0") |
| 372 | } |
| 373 | if s-t > e.maxMatchOff { |
| 374 | panic(s - t) |
| 375 | } |
| 376 | } |
| 377 | s1 := int(s) + maxMatchLength - 4 |
| 378 | if s1 > len(src) { |
| 379 | s1 = len(src) |
| 380 | } |
| 381 | |
| 382 | // Extend the match to be as long as possible. |
| 383 | return int32(matchLen(src[s:s1], src[t:])) |
| 384 | } |
| 385 | |
| 386 | // Reset the encoding table. |
| 387 | func (e *fastEncoder) Reset() { |
| 388 | if e.blk == nil { |
| 389 | e.blk = &blockEnc{} |
| 390 | e.blk.init() |
| 391 | } else { |
| 392 | e.blk.reset(nil) |
| 393 | } |
| 394 | e.blk.initNewEncode() |
| 395 | if e.crc == nil { |
| 396 | e.crc = xxhash.New() |
| 397 | } else { |
| 398 | e.crc.Reset() |
| 399 | } |
| 400 | if cap(e.hist) < int(e.maxMatchOff*2) { |
| 401 | l := e.maxMatchOff * 2 |
| 402 | // Make it at least 1MB. |
| 403 | if l < 1<<20 { |
| 404 | l = 1 << 20 |
| 405 | } |
| 406 | e.hist = make([]byte, 0, l) |
| 407 | } |
| 408 | // We offset current position so everything will be out of reach |
| 409 | e.cur += e.maxMatchOff + int32(len(e.hist)) |
| 410 | e.hist = e.hist[:0] |
| 411 | } |