blob: ee3b09b02a86e05a4702838a82af48f81b0544ed [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7const (
8 dFastLongTableBits = 17 // Bits used in the long match table
9 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
10 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
11
12 dFastShortTableBits = tableBits // Bits used in the short match table
13 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
14 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
15)
16
17type doubleFastEncoder struct {
18 fastEncoder
19 longTable [dFastLongTableSize]tableEntry
20}
21
22// Encode mimmics functionality in zstd_dfast.c
23func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
24 const (
25 // Input margin is the number of bytes we read (8)
26 // and the maximum we will read ahead (2)
27 inputMargin = 8 + 2
28 minNonLiteralBlockSize = 16
29 )
30
31 // Protect against e.cur wraparound.
32 for e.cur > (1<<30)+e.maxMatchOff {
33 if len(e.hist) == 0 {
34 for i := range e.table[:] {
35 e.table[i] = tableEntry{}
36 }
37 for i := range e.longTable[:] {
38 e.longTable[i] = tableEntry{}
39 }
40 e.cur = e.maxMatchOff
41 break
42 }
43 // Shift down everything in the table that isn't already too far away.
44 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
45 for i := range e.table[:] {
46 v := e.table[i].offset
47 if v < minOff {
48 v = 0
49 } else {
50 v = v - e.cur + e.maxMatchOff
51 }
52 e.table[i].offset = v
53 }
54 for i := range e.longTable[:] {
55 v := e.longTable[i].offset
56 if v < minOff {
57 v = 0
58 } else {
59 v = v - e.cur + e.maxMatchOff
60 }
61 e.longTable[i].offset = v
62 }
63 e.cur = e.maxMatchOff
64 }
65
66 s := e.addBlock(src)
67 blk.size = len(src)
68 if len(src) < minNonLiteralBlockSize {
69 blk.extraLits = len(src)
70 blk.literals = blk.literals[:len(src)]
71 copy(blk.literals, src)
72 return
73 }
74
75 // Override src
76 src = e.hist
77 sLimit := int32(len(src)) - inputMargin
78 // stepSize is the number of bytes to skip on every main loop iteration.
79 // It should be >= 1.
80 stepSize := int32(e.o.targetLength)
81 if stepSize == 0 {
82 stepSize++
83 }
84
85 const kSearchStrength = 8
86
87 // nextEmit is where in src the next emitLiteral should start from.
88 nextEmit := s
89 cv := load6432(src, s)
90
91 // Relative offsets
92 offset1 := int32(blk.recentOffsets[0])
93 offset2 := int32(blk.recentOffsets[1])
94
95 addLiterals := func(s *seq, until int32) {
96 if until == nextEmit {
97 return
98 }
99 blk.literals = append(blk.literals, src[nextEmit:until]...)
100 s.litLen = uint32(until - nextEmit)
101 }
102 if debug {
103 println("recent offsets:", blk.recentOffsets)
104 }
105
106encodeLoop:
107 for {
108 var t int32
109 // We allow the encoder to optionally turn off repeat offsets across blocks
110 canRepeat := len(blk.sequences) > 2
111
112 for {
113 if debug && canRepeat && offset1 == 0 {
114 panic("offset0 was 0")
115 }
116
117 nextHashS := hash5(cv, dFastShortTableBits)
118 nextHashL := hash8(cv, dFastLongTableBits)
119 candidateL := e.longTable[nextHashL]
120 candidateS := e.table[nextHashS]
121
122 const repOff = 1
123 repIndex := s - offset1 + repOff
124 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
125 e.longTable[nextHashL] = entry
126 e.table[nextHashS] = entry
127
128 if canRepeat {
129 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
130 // Consider history as well.
131 var seq seq
132 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
133
134 seq.matchLen = uint32(lenght - zstdMinMatch)
135
136 // We might be able to match backwards.
137 // Extend as long as we can.
138 start := s + repOff
139 // We end the search early, so we don't risk 0 literals
140 // and have to do special offset treatment.
141 startLimit := nextEmit + 1
142
143 tMin := s - e.maxMatchOff
144 if tMin < 0 {
145 tMin = 0
146 }
147 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
148 repIndex--
149 start--
150 seq.matchLen++
151 }
152 addLiterals(&seq, start)
153
154 // rep 0
155 seq.offset = 1
156 if debugSequences {
157 println("repeat sequence", seq, "next s:", s)
158 }
159 blk.sequences = append(blk.sequences, seq)
160 s += lenght + repOff
161 nextEmit = s
162 if s >= sLimit {
163 if debug {
164 println("repeat ended", s, lenght)
165
166 }
167 break encodeLoop
168 }
169 cv = load6432(src, s)
170 continue
171 }
172 const repOff2 = 1
173 // We deviate from the reference encoder and also check offset 2.
174 // Slower and not consistently better, so disabled.
175 // repIndex = s - offset2 + repOff2
176 if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) {
177 // Consider history as well.
178 var seq seq
179 lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src)
180
181 seq.matchLen = uint32(lenght - zstdMinMatch)
182
183 // We might be able to match backwards.
184 // Extend as long as we can.
185 start := s + repOff2
186 // We end the search early, so we don't risk 0 literals
187 // and have to do special offset treatment.
188 startLimit := nextEmit + 1
189
190 tMin := s - e.maxMatchOff
191 if tMin < 0 {
192 tMin = 0
193 }
194 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
195 repIndex--
196 start--
197 seq.matchLen++
198 }
199 addLiterals(&seq, start)
200
201 // rep 2
202 seq.offset = 2
203 if debugSequences {
204 println("repeat sequence 2", seq, "next s:", s)
205 }
206 blk.sequences = append(blk.sequences, seq)
207 s += lenght + repOff2
208 nextEmit = s
209 if s >= sLimit {
210 if debug {
211 println("repeat ended", s, lenght)
212
213 }
214 break encodeLoop
215 }
216 cv = load6432(src, s)
217 // Swap offsets
218 offset1, offset2 = offset2, offset1
219 continue
220 }
221 }
222 // Find the offsets of our two matches.
223 coffsetL := s - (candidateL.offset - e.cur)
224 coffsetS := s - (candidateS.offset - e.cur)
225
226 // Check if we have a long match.
227 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
228 // Found a long match, likely at least 8 bytes.
229 // Reference encoder checks all 8 bytes, we only check 4,
230 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
231 t = candidateL.offset - e.cur
232 if debug && s <= t {
233 panic("s <= t")
234 }
235 if debug && s-t > e.maxMatchOff {
236 panic("s - t >e.maxMatchOff")
237 }
238 if debugMatches {
239 println("long match")
240 }
241 break
242 }
243
244 // Check if we have a short match.
245 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
246 // found a regular match
247 // See if we can find a long match at s+1
248 const checkAt = 1
249 cv := load6432(src, s+checkAt)
250 nextHashL = hash8(cv, dFastLongTableBits)
251 candidateL = e.longTable[nextHashL]
252 coffsetL = s - (candidateL.offset - e.cur) + checkAt
253
254 // We can store it, since we have at least a 4 byte match.
255 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
256 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
257 // Found a long match, likely at least 8 bytes.
258 // Reference encoder checks all 8 bytes, we only check 4,
259 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
260 t = candidateL.offset - e.cur
261 s += checkAt
262 if debugMatches {
263 println("long match (after short)")
264 }
265 break
266 }
267
268 t = candidateS.offset - e.cur
269 if debug && s <= t {
270 panic("s <= t")
271 }
272 if debug && s-t > e.maxMatchOff {
273 panic("s - t >e.maxMatchOff")
274 }
275 if debug && t < 0 {
276 panic("t<0")
277 }
278 if debugMatches {
279 println("short match")
280 }
281 break
282 }
283
284 // No match found, move forward in input.
285 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
286 if s >= sLimit {
287 break encodeLoop
288 }
289 cv = load6432(src, s)
290 }
291
292 // A 4-byte match has been found. Update recent offsets.
293 // We'll later see if more than 4 bytes.
294 offset2 = offset1
295 offset1 = s - t
296
297 if debug && s <= t {
298 panic("s <= t")
299 }
300
301 if debug && canRepeat && int(offset1) > len(src) {
302 panic("invalid offset")
303 }
304
305 // Extend the 4-byte match as long as possible.
306 l := e.matchlen(s+4, t+4, src) + 4
307
308 // Extend backwards
309 tMin := s - e.maxMatchOff
310 if tMin < 0 {
311 tMin = 0
312 }
313 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
314 s--
315 t--
316 l++
317 }
318
319 // Write our sequence
320 var seq seq
321 seq.litLen = uint32(s - nextEmit)
322 seq.matchLen = uint32(l - zstdMinMatch)
323 if seq.litLen > 0 {
324 blk.literals = append(blk.literals, src[nextEmit:s]...)
325 }
326 seq.offset = uint32(s-t) + 3
327 s += l
328 if debugSequences {
329 println("sequence", seq, "next s:", s)
330 }
331 blk.sequences = append(blk.sequences, seq)
332 nextEmit = s
333 if s >= sLimit {
334 break encodeLoop
335 }
336
337 // Index match start+1 (long) and start+2 (short)
338 index0 := s - l + 1
339 // Index match end-2 (long) and end-1 (short)
340 index1 := s - 2
341
342 cv0 := load6432(src, index0)
343 cv1 := load6432(src, index1)
344 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
345 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
346 e.longTable[hash8(cv0, dFastLongTableBits)] = te0
347 e.longTable[hash8(cv1, dFastLongTableBits)] = te1
348 cv0 >>= 8
349 cv1 >>= 8
350 te0.offset++
351 te1.offset++
352 te0.val = uint32(cv0)
353 te1.val = uint32(cv1)
354 e.table[hash5(cv0, dFastShortTableBits)] = te0
355 e.table[hash5(cv1, dFastShortTableBits)] = te1
356
357 cv = load6432(src, s)
358
359 if !canRepeat {
360 continue
361 }
362
363 // Check offset 2
364 for {
365 o2 := s - offset2
366 if load3232(src, o2) != uint32(cv) {
367 // Do regular search
368 break
369 }
370
371 // Store this, since we have it.
372 nextHashS := hash5(cv1>>8, dFastShortTableBits)
373 nextHashL := hash8(cv, dFastLongTableBits)
374
375 // We have at least 4 byte match.
376 // No need to check backwards. We come straight from a match
377 l := 4 + e.matchlen(s+4, o2+4, src)
378
379 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
380 e.longTable[nextHashL] = entry
381 e.table[nextHashS] = entry
382 seq.matchLen = uint32(l) - zstdMinMatch
383 seq.litLen = 0
384
385 // Since litlen is always 0, this is offset 1.
386 seq.offset = 1
387 s += l
388 nextEmit = s
389 if debugSequences {
390 println("sequence", seq, "next s:", s)
391 }
392 blk.sequences = append(blk.sequences, seq)
393
394 // Swap offset 1 and 2.
395 offset1, offset2 = offset2, offset1
396 if s >= sLimit {
397 // Finished
398 break encodeLoop
399 }
400 cv = load6432(src, s)
401 }
402 }
403
404 if int(nextEmit) < len(src) {
405 blk.literals = append(blk.literals, src[nextEmit:]...)
406 blk.extraLits = len(src) - int(nextEmit)
407 }
408 blk.recentOffsets[0] = uint32(offset1)
409 blk.recentOffsets[1] = uint32(offset2)
410 if debug {
411 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
412 }
413}
414
415// EncodeNoHist will encode a block with no history and no following blocks.
416// Most notable difference is that src will not be copied for history and
417// we do not need to check for max match length.
418func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
419 const (
420 // Input margin is the number of bytes we read (8)
421 // and the maximum we will read ahead (2)
422 inputMargin = 8 + 2
423 minNonLiteralBlockSize = 16
424 )
425
426 // Protect against e.cur wraparound.
427 if e.cur > (1<<30)+e.maxMatchOff {
428 for i := range e.table[:] {
429 e.table[i] = tableEntry{}
430 }
431 for i := range e.longTable[:] {
432 e.longTable[i] = tableEntry{}
433 }
434 e.cur = e.maxMatchOff
435 }
436
437 s := int32(0)
438 blk.size = len(src)
439 if len(src) < minNonLiteralBlockSize {
440 blk.extraLits = len(src)
441 blk.literals = blk.literals[:len(src)]
442 copy(blk.literals, src)
443 return
444 }
445
446 // Override src
447 sLimit := int32(len(src)) - inputMargin
448 // stepSize is the number of bytes to skip on every main loop iteration.
449 // It should be >= 1.
450 stepSize := int32(e.o.targetLength)
451 if stepSize == 0 {
452 stepSize++
453 }
454
455 const kSearchStrength = 8
456
457 // nextEmit is where in src the next emitLiteral should start from.
458 nextEmit := s
459 cv := load6432(src, s)
460
461 // Relative offsets
462 offset1 := int32(blk.recentOffsets[0])
463 offset2 := int32(blk.recentOffsets[1])
464
465 addLiterals := func(s *seq, until int32) {
466 if until == nextEmit {
467 return
468 }
469 blk.literals = append(blk.literals, src[nextEmit:until]...)
470 s.litLen = uint32(until - nextEmit)
471 }
472 if debug {
473 println("recent offsets:", blk.recentOffsets)
474 }
475
476encodeLoop:
477 for {
478 var t int32
479 for {
480
481 nextHashS := hash5(cv, dFastShortTableBits)
482 nextHashL := hash8(cv, dFastLongTableBits)
483 candidateL := e.longTable[nextHashL]
484 candidateS := e.table[nextHashS]
485
486 const repOff = 1
487 repIndex := s - offset1 + repOff
488 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
489 e.longTable[nextHashL] = entry
490 e.table[nextHashS] = entry
491
492 if len(blk.sequences) > 2 {
493 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
494 // Consider history as well.
495 var seq seq
496 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
497 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
498
499 seq.matchLen = uint32(length - zstdMinMatch)
500
501 // We might be able to match backwards.
502 // Extend as long as we can.
503 start := s + repOff
504 // We end the search early, so we don't risk 0 literals
505 // and have to do special offset treatment.
506 startLimit := nextEmit + 1
507
508 tMin := s - e.maxMatchOff
509 if tMin < 0 {
510 tMin = 0
511 }
512 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
513 repIndex--
514 start--
515 seq.matchLen++
516 }
517 addLiterals(&seq, start)
518
519 // rep 0
520 seq.offset = 1
521 if debugSequences {
522 println("repeat sequence", seq, "next s:", s)
523 }
524 blk.sequences = append(blk.sequences, seq)
525 s += length + repOff
526 nextEmit = s
527 if s >= sLimit {
528 if debug {
529 println("repeat ended", s, length)
530
531 }
532 break encodeLoop
533 }
534 cv = load6432(src, s)
535 continue
536 }
537 }
538 // Find the offsets of our two matches.
539 coffsetL := s - (candidateL.offset - e.cur)
540 coffsetS := s - (candidateS.offset - e.cur)
541
542 // Check if we have a long match.
543 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
544 // Found a long match, likely at least 8 bytes.
545 // Reference encoder checks all 8 bytes, we only check 4,
546 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
547 t = candidateL.offset - e.cur
548 if debug && s <= t {
549 panic("s <= t")
550 }
551 if debug && s-t > e.maxMatchOff {
552 panic("s - t >e.maxMatchOff")
553 }
554 if debugMatches {
555 println("long match")
556 }
557 break
558 }
559
560 // Check if we have a short match.
561 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
562 // found a regular match
563 // See if we can find a long match at s+1
564 const checkAt = 1
565 cv := load6432(src, s+checkAt)
566 nextHashL = hash8(cv, dFastLongTableBits)
567 candidateL = e.longTable[nextHashL]
568 coffsetL = s - (candidateL.offset - e.cur) + checkAt
569
570 // We can store it, since we have at least a 4 byte match.
571 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
572 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
573 // Found a long match, likely at least 8 bytes.
574 // Reference encoder checks all 8 bytes, we only check 4,
575 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
576 t = candidateL.offset - e.cur
577 s += checkAt
578 if debugMatches {
579 println("long match (after short)")
580 }
581 break
582 }
583
584 t = candidateS.offset - e.cur
585 if debug && s <= t {
586 panic("s <= t")
587 }
588 if debug && s-t > e.maxMatchOff {
589 panic("s - t >e.maxMatchOff")
590 }
591 if debug && t < 0 {
592 panic("t<0")
593 }
594 if debugMatches {
595 println("short match")
596 }
597 break
598 }
599
600 // No match found, move forward in input.
601 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
602 if s >= sLimit {
603 break encodeLoop
604 }
605 cv = load6432(src, s)
606 }
607
608 // A 4-byte match has been found. Update recent offsets.
609 // We'll later see if more than 4 bytes.
610 offset2 = offset1
611 offset1 = s - t
612
613 if debug && s <= t {
614 panic("s <= t")
615 }
616
617 // Extend the 4-byte match as long as possible.
618 //l := e.matchlen(s+4, t+4, src) + 4
619 l := int32(matchLen(src[s+4:], src[t+4:])) + 4
620
621 // Extend backwards
622 tMin := s - e.maxMatchOff
623 if tMin < 0 {
624 tMin = 0
625 }
626 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
627 s--
628 t--
629 l++
630 }
631
632 // Write our sequence
633 var seq seq
634 seq.litLen = uint32(s - nextEmit)
635 seq.matchLen = uint32(l - zstdMinMatch)
636 if seq.litLen > 0 {
637 blk.literals = append(blk.literals, src[nextEmit:s]...)
638 }
639 seq.offset = uint32(s-t) + 3
640 s += l
641 if debugSequences {
642 println("sequence", seq, "next s:", s)
643 }
644 blk.sequences = append(blk.sequences, seq)
645 nextEmit = s
646 if s >= sLimit {
647 break encodeLoop
648 }
649
650 // Index match start+1 (long) and start+2 (short)
651 index0 := s - l + 1
652 // Index match end-2 (long) and end-1 (short)
653 index1 := s - 2
654
655 cv0 := load6432(src, index0)
656 cv1 := load6432(src, index1)
657 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
658 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
659 e.longTable[hash8(cv0, dFastLongTableBits)] = te0
660 e.longTable[hash8(cv1, dFastLongTableBits)] = te1
661 cv0 >>= 8
662 cv1 >>= 8
663 te0.offset++
664 te1.offset++
665 te0.val = uint32(cv0)
666 te1.val = uint32(cv1)
667 e.table[hash5(cv0, dFastShortTableBits)] = te0
668 e.table[hash5(cv1, dFastShortTableBits)] = te1
669
670 cv = load6432(src, s)
671
672 if len(blk.sequences) <= 2 {
673 continue
674 }
675
676 // Check offset 2
677 for {
678 o2 := s - offset2
679 if load3232(src, o2) != uint32(cv) {
680 // Do regular search
681 break
682 }
683
684 // Store this, since we have it.
685 nextHashS := hash5(cv1>>8, dFastShortTableBits)
686 nextHashL := hash8(cv, dFastLongTableBits)
687
688 // We have at least 4 byte match.
689 // No need to check backwards. We come straight from a match
690 //l := 4 + e.matchlen(s+4, o2+4, src)
691 l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
692
693 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
694 e.longTable[nextHashL] = entry
695 e.table[nextHashS] = entry
696 seq.matchLen = uint32(l) - zstdMinMatch
697 seq.litLen = 0
698
699 // Since litlen is always 0, this is offset 1.
700 seq.offset = 1
701 s += l
702 nextEmit = s
703 if debugSequences {
704 println("sequence", seq, "next s:", s)
705 }
706 blk.sequences = append(blk.sequences, seq)
707
708 // Swap offset 1 and 2.
709 offset1, offset2 = offset2, offset1
710 if s >= sLimit {
711 // Finished
712 break encodeLoop
713 }
714 cv = load6432(src, s)
715 }
716 }
717
718 if int(nextEmit) < len(src) {
719 blk.literals = append(blk.literals, src[nextEmit:]...)
720 blk.extraLits = len(src) - int(nextEmit)
721 }
722 if debug {
723 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
724 }
725
726}