blob: 604954290e4bf1e5f6f3cc3ad960613e64ab64d6 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10 betterLongTableBits = 19 // Bits used in the long match table
11 betterLongTableSize = 1 << betterLongTableBits // Size of the table
12
13 // Note: Increasing the short table bits or making the hash shorter
14 // can actually lead to compression degradation since it will 'steal' more from the
15 // long match table and match offsets are quite big.
16 // This greatly depends on the type of input.
17 betterShortTableBits = 13 // Bits used in the short match table
18 betterShortTableSize = 1 << betterShortTableBits // Size of the table
19
20 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
21 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
22
23 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
24 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
25)
26
27type prevEntry struct {
28 offset int32
29 prev int32
30}
31
32// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
33// The long match table contains the previous entry with the same hash,
34// effectively making it a "chain" of length 2.
35// When we find a long match we choose between the two values and select the longest.
36// When we find a short match, after checking the long, we check if we can find a long at n+1
37// and that it is longer (lazy matching).
38type betterFastEncoder struct {
39 fastBase
40 table [betterShortTableSize]tableEntry
41 longTable [betterLongTableSize]prevEntry
42}
43
44type betterFastEncoderDict struct {
45 betterFastEncoder
46 dictTable []tableEntry
47 dictLongTable []prevEntry
48 shortTableShardDirty [betterShortTableShardCnt]bool
49 longTableShardDirty [betterLongTableShardCnt]bool
50 allDirty bool
51}
52
53// Encode improves compression...
54func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
55 const (
56 // Input margin is the number of bytes we read (8)
57 // and the maximum we will read ahead (2)
58 inputMargin = 8 + 2
59 minNonLiteralBlockSize = 16
60 )
61
62 // Protect against e.cur wraparound.
63 for e.cur >= bufferReset {
64 if len(e.hist) == 0 {
65 for i := range e.table[:] {
66 e.table[i] = tableEntry{}
67 }
68 for i := range e.longTable[:] {
69 e.longTable[i] = prevEntry{}
70 }
71 e.cur = e.maxMatchOff
72 break
73 }
74 // Shift down everything in the table that isn't already too far away.
75 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
76 for i := range e.table[:] {
77 v := e.table[i].offset
78 if v < minOff {
79 v = 0
80 } else {
81 v = v - e.cur + e.maxMatchOff
82 }
83 e.table[i].offset = v
84 }
85 for i := range e.longTable[:] {
86 v := e.longTable[i].offset
87 v2 := e.longTable[i].prev
88 if v < minOff {
89 v = 0
90 v2 = 0
91 } else {
92 v = v - e.cur + e.maxMatchOff
93 if v2 < minOff {
94 v2 = 0
95 } else {
96 v2 = v2 - e.cur + e.maxMatchOff
97 }
98 }
99 e.longTable[i] = prevEntry{
100 offset: v,
101 prev: v2,
102 }
103 }
104 e.cur = e.maxMatchOff
105 break
106 }
107
108 s := e.addBlock(src)
109 blk.size = len(src)
110 if len(src) < minNonLiteralBlockSize {
111 blk.extraLits = len(src)
112 blk.literals = blk.literals[:len(src)]
113 copy(blk.literals, src)
114 return
115 }
116
117 // Override src
118 src = e.hist
119 sLimit := int32(len(src)) - inputMargin
120 // stepSize is the number of bytes to skip on every main loop iteration.
121 // It should be >= 1.
122 const stepSize = 1
123
124 const kSearchStrength = 9
125
126 // nextEmit is where in src the next emitLiteral should start from.
127 nextEmit := s
128 cv := load6432(src, s)
129
130 // Relative offsets
131 offset1 := int32(blk.recentOffsets[0])
132 offset2 := int32(blk.recentOffsets[1])
133
134 addLiterals := func(s *seq, until int32) {
135 if until == nextEmit {
136 return
137 }
138 blk.literals = append(blk.literals, src[nextEmit:until]...)
139 s.litLen = uint32(until - nextEmit)
140 }
141 if debug {
142 println("recent offsets:", blk.recentOffsets)
143 }
144
145encodeLoop:
146 for {
147 var t int32
148 // We allow the encoder to optionally turn off repeat offsets across blocks
149 canRepeat := len(blk.sequences) > 2
150 var matched int32
151
152 for {
153 if debugAsserts && canRepeat && offset1 == 0 {
154 panic("offset0 was 0")
155 }
156
157 nextHashS := hash5(cv, betterShortTableBits)
158 nextHashL := hash8(cv, betterLongTableBits)
159 candidateL := e.longTable[nextHashL]
160 candidateS := e.table[nextHashS]
161
162 const repOff = 1
163 repIndex := s - offset1 + repOff
164 off := s + e.cur
165 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
166 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
167
168 if canRepeat {
169 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
170 // Consider history as well.
171 var seq seq
172 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
173
174 seq.matchLen = uint32(lenght - zstdMinMatch)
175
176 // We might be able to match backwards.
177 // Extend as long as we can.
178 start := s + repOff
179 // We end the search early, so we don't risk 0 literals
180 // and have to do special offset treatment.
181 startLimit := nextEmit + 1
182
183 tMin := s - e.maxMatchOff
184 if tMin < 0 {
185 tMin = 0
186 }
187 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
188 repIndex--
189 start--
190 seq.matchLen++
191 }
192 addLiterals(&seq, start)
193
194 // rep 0
195 seq.offset = 1
196 if debugSequences {
197 println("repeat sequence", seq, "next s:", s)
198 }
199 blk.sequences = append(blk.sequences, seq)
200
201 // Index match start+1 (long) -> s - 1
202 index0 := s + repOff
203 s += lenght + repOff
204
205 nextEmit = s
206 if s >= sLimit {
207 if debug {
208 println("repeat ended", s, lenght)
209
210 }
211 break encodeLoop
212 }
213 // Index skipped...
214 for index0 < s-1 {
215 cv0 := load6432(src, index0)
216 cv1 := cv0 >> 8
217 h0 := hash8(cv0, betterLongTableBits)
218 off := index0 + e.cur
219 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
220 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
221 index0 += 2
222 }
223 cv = load6432(src, s)
224 continue
225 }
226 const repOff2 = 1
227
228 // We deviate from the reference encoder and also check offset 2.
229 // Still slower and not much better, so disabled.
230 // repIndex = s - offset2 + repOff2
231 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
232 // Consider history as well.
233 var seq seq
234 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
235
236 seq.matchLen = uint32(lenght - zstdMinMatch)
237
238 // We might be able to match backwards.
239 // Extend as long as we can.
240 start := s + repOff2
241 // We end the search early, so we don't risk 0 literals
242 // and have to do special offset treatment.
243 startLimit := nextEmit + 1
244
245 tMin := s - e.maxMatchOff
246 if tMin < 0 {
247 tMin = 0
248 }
249 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
250 repIndex--
251 start--
252 seq.matchLen++
253 }
254 addLiterals(&seq, start)
255
256 // rep 2
257 seq.offset = 2
258 if debugSequences {
259 println("repeat sequence 2", seq, "next s:", s)
260 }
261 blk.sequences = append(blk.sequences, seq)
262
263 index0 := s + repOff2
264 s += lenght + repOff2
265 nextEmit = s
266 if s >= sLimit {
267 if debug {
268 println("repeat ended", s, lenght)
269
270 }
271 break encodeLoop
272 }
273
274 // Index skipped...
275 for index0 < s-1 {
276 cv0 := load6432(src, index0)
277 cv1 := cv0 >> 8
278 h0 := hash8(cv0, betterLongTableBits)
279 off := index0 + e.cur
280 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
281 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
282 index0 += 2
283 }
284 cv = load6432(src, s)
285 // Swap offsets
286 offset1, offset2 = offset2, offset1
287 continue
288 }
289 }
290 // Find the offsets of our two matches.
291 coffsetL := candidateL.offset - e.cur
292 coffsetLP := candidateL.prev - e.cur
293
294 // Check if we have a long match.
295 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
296 // Found a long match, at least 8 bytes.
297 matched = e.matchlen(s+8, coffsetL+8, src) + 8
298 t = coffsetL
299 if debugAsserts && s <= t {
300 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
301 }
302 if debugAsserts && s-t > e.maxMatchOff {
303 panic("s - t >e.maxMatchOff")
304 }
305 if debugMatches {
306 println("long match")
307 }
308
309 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
310 // Found a long match, at least 8 bytes.
311 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
312 if prevMatch > matched {
313 matched = prevMatch
314 t = coffsetLP
315 }
316 if debugAsserts && s <= t {
317 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
318 }
319 if debugAsserts && s-t > e.maxMatchOff {
320 panic("s - t >e.maxMatchOff")
321 }
322 if debugMatches {
323 println("long match")
324 }
325 }
326 break
327 }
328
329 // Check if we have a long match on prev.
330 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
331 // Found a long match, at least 8 bytes.
332 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
333 t = coffsetLP
334 if debugAsserts && s <= t {
335 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
336 }
337 if debugAsserts && s-t > e.maxMatchOff {
338 panic("s - t >e.maxMatchOff")
339 }
340 if debugMatches {
341 println("long match")
342 }
343 break
344 }
345
346 coffsetS := candidateS.offset - e.cur
347
348 // Check if we have a short match.
349 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
350 // found a regular match
351 matched = e.matchlen(s+4, coffsetS+4, src) + 4
352
353 // See if we can find a long match at s+1
354 const checkAt = 1
355 cv := load6432(src, s+checkAt)
356 nextHashL = hash8(cv, betterLongTableBits)
357 candidateL = e.longTable[nextHashL]
358 coffsetL = candidateL.offset - e.cur
359
360 // We can store it, since we have at least a 4 byte match.
361 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
362 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
363 // Found a long match, at least 8 bytes.
364 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
365 if matchedNext > matched {
366 t = coffsetL
367 s += checkAt
368 matched = matchedNext
369 if debugMatches {
370 println("long match (after short)")
371 }
372 break
373 }
374 }
375
376 // Check prev long...
377 coffsetL = candidateL.prev - e.cur
378 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
379 // Found a long match, at least 8 bytes.
380 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
381 if matchedNext > matched {
382 t = coffsetL
383 s += checkAt
384 matched = matchedNext
385 if debugMatches {
386 println("prev long match (after short)")
387 }
388 break
389 }
390 }
391 t = coffsetS
392 if debugAsserts && s <= t {
393 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
394 }
395 if debugAsserts && s-t > e.maxMatchOff {
396 panic("s - t >e.maxMatchOff")
397 }
398 if debugAsserts && t < 0 {
399 panic("t<0")
400 }
401 if debugMatches {
402 println("short match")
403 }
404 break
405 }
406
407 // No match found, move forward in input.
408 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
409 if s >= sLimit {
410 break encodeLoop
411 }
412 cv = load6432(src, s)
413 }
414
415 // Try to find a better match by searching for a long match at the end of the current best match
416 if true && s+matched < sLimit {
417 nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
418 cv := load3232(src, s)
419 candidateL := e.longTable[nextHashL]
420 coffsetL := candidateL.offset - e.cur - matched
421 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
422 // Found a long match, at least 4 bytes.
423 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
424 if matchedNext > matched {
425 t = coffsetL
426 matched = matchedNext
427 if debugMatches {
428 println("long match at end-of-match")
429 }
430 }
431 }
432
433 // Check prev long...
434 if true {
435 coffsetL = candidateL.prev - e.cur - matched
436 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
437 // Found a long match, at least 4 bytes.
438 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
439 if matchedNext > matched {
440 t = coffsetL
441 matched = matchedNext
442 if debugMatches {
443 println("prev long match at end-of-match")
444 }
445 }
446 }
447 }
448 }
449 // A match has been found. Update recent offsets.
450 offset2 = offset1
451 offset1 = s - t
452
453 if debugAsserts && s <= t {
454 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
455 }
456
457 if debugAsserts && canRepeat && int(offset1) > len(src) {
458 panic("invalid offset")
459 }
460
461 // Extend the n-byte match as long as possible.
462 l := matched
463
464 // Extend backwards
465 tMin := s - e.maxMatchOff
466 if tMin < 0 {
467 tMin = 0
468 }
469 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
470 s--
471 t--
472 l++
473 }
474
475 // Write our sequence
476 var seq seq
477 seq.litLen = uint32(s - nextEmit)
478 seq.matchLen = uint32(l - zstdMinMatch)
479 if seq.litLen > 0 {
480 blk.literals = append(blk.literals, src[nextEmit:s]...)
481 }
482 seq.offset = uint32(s-t) + 3
483 s += l
484 if debugSequences {
485 println("sequence", seq, "next s:", s)
486 }
487 blk.sequences = append(blk.sequences, seq)
488 nextEmit = s
489 if s >= sLimit {
490 break encodeLoop
491 }
492
493 // Index match start+1 (long) -> s - 1
494 index0 := s - l + 1
495 for index0 < s-1 {
496 cv0 := load6432(src, index0)
497 cv1 := cv0 >> 8
498 h0 := hash8(cv0, betterLongTableBits)
499 off := index0 + e.cur
500 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
501 e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
502 index0 += 2
503 }
504
505 cv = load6432(src, s)
506 if !canRepeat {
507 continue
508 }
509
510 // Check offset 2
511 for {
512 o2 := s - offset2
513 if load3232(src, o2) != uint32(cv) {
514 // Do regular search
515 break
516 }
517
518 // Store this, since we have it.
519 nextHashS := hash5(cv, betterShortTableBits)
520 nextHashL := hash8(cv, betterLongTableBits)
521
522 // We have at least 4 byte match.
523 // No need to check backwards. We come straight from a match
524 l := 4 + e.matchlen(s+4, o2+4, src)
525
526 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
527 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
528 seq.matchLen = uint32(l) - zstdMinMatch
529 seq.litLen = 0
530
531 // Since litlen is always 0, this is offset 1.
532 seq.offset = 1
533 s += l
534 nextEmit = s
535 if debugSequences {
536 println("sequence", seq, "next s:", s)
537 }
538 blk.sequences = append(blk.sequences, seq)
539
540 // Swap offset 1 and 2.
541 offset1, offset2 = offset2, offset1
542 if s >= sLimit {
543 // Finished
544 break encodeLoop
545 }
546 cv = load6432(src, s)
547 }
548 }
549
550 if int(nextEmit) < len(src) {
551 blk.literals = append(blk.literals, src[nextEmit:]...)
552 blk.extraLits = len(src) - int(nextEmit)
553 }
554 blk.recentOffsets[0] = uint32(offset1)
555 blk.recentOffsets[1] = uint32(offset2)
556 if debug {
557 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
558 }
559}
560
561// EncodeNoHist will encode a block with no history and no following blocks.
562// Most notable difference is that src will not be copied for history and
563// we do not need to check for max match length.
564func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
565 e.ensureHist(len(src))
566 e.Encode(blk, src)
567}
568
569// Encode improves compression...
570func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
571 const (
572 // Input margin is the number of bytes we read (8)
573 // and the maximum we will read ahead (2)
574 inputMargin = 8 + 2
575 minNonLiteralBlockSize = 16
576 )
577
578 // Protect against e.cur wraparound.
579 for e.cur >= bufferReset {
580 if len(e.hist) == 0 {
581 for i := range e.table[:] {
582 e.table[i] = tableEntry{}
583 }
584 for i := range e.longTable[:] {
585 e.longTable[i] = prevEntry{}
586 }
587 e.cur = e.maxMatchOff
588 e.allDirty = true
589 break
590 }
591 // Shift down everything in the table that isn't already too far away.
592 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
593 for i := range e.table[:] {
594 v := e.table[i].offset
595 if v < minOff {
596 v = 0
597 } else {
598 v = v - e.cur + e.maxMatchOff
599 }
600 e.table[i].offset = v
601 }
602 for i := range e.longTable[:] {
603 v := e.longTable[i].offset
604 v2 := e.longTable[i].prev
605 if v < minOff {
606 v = 0
607 v2 = 0
608 } else {
609 v = v - e.cur + e.maxMatchOff
610 if v2 < minOff {
611 v2 = 0
612 } else {
613 v2 = v2 - e.cur + e.maxMatchOff
614 }
615 }
616 e.longTable[i] = prevEntry{
617 offset: v,
618 prev: v2,
619 }
620 }
621 e.allDirty = true
622 e.cur = e.maxMatchOff
623 break
624 }
625
626 s := e.addBlock(src)
627 blk.size = len(src)
628 if len(src) < minNonLiteralBlockSize {
629 blk.extraLits = len(src)
630 blk.literals = blk.literals[:len(src)]
631 copy(blk.literals, src)
632 return
633 }
634
635 // Override src
636 src = e.hist
637 sLimit := int32(len(src)) - inputMargin
638 // stepSize is the number of bytes to skip on every main loop iteration.
639 // It should be >= 1.
640 const stepSize = 1
641
642 const kSearchStrength = 9
643
644 // nextEmit is where in src the next emitLiteral should start from.
645 nextEmit := s
646 cv := load6432(src, s)
647
648 // Relative offsets
649 offset1 := int32(blk.recentOffsets[0])
650 offset2 := int32(blk.recentOffsets[1])
651
652 addLiterals := func(s *seq, until int32) {
653 if until == nextEmit {
654 return
655 }
656 blk.literals = append(blk.literals, src[nextEmit:until]...)
657 s.litLen = uint32(until - nextEmit)
658 }
659 if debug {
660 println("recent offsets:", blk.recentOffsets)
661 }
662
663encodeLoop:
664 for {
665 var t int32
666 // We allow the encoder to optionally turn off repeat offsets across blocks
667 canRepeat := len(blk.sequences) > 2
668 var matched int32
669
670 for {
671 if debugAsserts && canRepeat && offset1 == 0 {
672 panic("offset0 was 0")
673 }
674
675 nextHashS := hash5(cv, betterShortTableBits)
676 nextHashL := hash8(cv, betterLongTableBits)
677 candidateL := e.longTable[nextHashL]
678 candidateS := e.table[nextHashS]
679
680 const repOff = 1
681 repIndex := s - offset1 + repOff
682 off := s + e.cur
683 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
684 e.markLongShardDirty(nextHashL)
685 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
686 e.markShortShardDirty(nextHashS)
687
688 if canRepeat {
689 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
690 // Consider history as well.
691 var seq seq
692 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
693
694 seq.matchLen = uint32(lenght - zstdMinMatch)
695
696 // We might be able to match backwards.
697 // Extend as long as we can.
698 start := s + repOff
699 // We end the search early, so we don't risk 0 literals
700 // and have to do special offset treatment.
701 startLimit := nextEmit + 1
702
703 tMin := s - e.maxMatchOff
704 if tMin < 0 {
705 tMin = 0
706 }
707 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
708 repIndex--
709 start--
710 seq.matchLen++
711 }
712 addLiterals(&seq, start)
713
714 // rep 0
715 seq.offset = 1
716 if debugSequences {
717 println("repeat sequence", seq, "next s:", s)
718 }
719 blk.sequences = append(blk.sequences, seq)
720
721 // Index match start+1 (long) -> s - 1
722 index0 := s + repOff
723 s += lenght + repOff
724
725 nextEmit = s
726 if s >= sLimit {
727 if debug {
728 println("repeat ended", s, lenght)
729
730 }
731 break encodeLoop
732 }
733 // Index skipped...
734 for index0 < s-1 {
735 cv0 := load6432(src, index0)
736 cv1 := cv0 >> 8
737 h0 := hash8(cv0, betterLongTableBits)
738 off := index0 + e.cur
739 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
740 e.markLongShardDirty(h0)
741 h1 := hash5(cv1, betterShortTableBits)
742 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
743 e.markShortShardDirty(h1)
744 index0 += 2
745 }
746 cv = load6432(src, s)
747 continue
748 }
749 const repOff2 = 1
750
751 // We deviate from the reference encoder and also check offset 2.
752 // Still slower and not much better, so disabled.
753 // repIndex = s - offset2 + repOff2
754 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
755 // Consider history as well.
756 var seq seq
757 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
758
759 seq.matchLen = uint32(lenght - zstdMinMatch)
760
761 // We might be able to match backwards.
762 // Extend as long as we can.
763 start := s + repOff2
764 // We end the search early, so we don't risk 0 literals
765 // and have to do special offset treatment.
766 startLimit := nextEmit + 1
767
768 tMin := s - e.maxMatchOff
769 if tMin < 0 {
770 tMin = 0
771 }
772 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
773 repIndex--
774 start--
775 seq.matchLen++
776 }
777 addLiterals(&seq, start)
778
779 // rep 2
780 seq.offset = 2
781 if debugSequences {
782 println("repeat sequence 2", seq, "next s:", s)
783 }
784 blk.sequences = append(blk.sequences, seq)
785
786 index0 := s + repOff2
787 s += lenght + repOff2
788 nextEmit = s
789 if s >= sLimit {
790 if debug {
791 println("repeat ended", s, lenght)
792
793 }
794 break encodeLoop
795 }
796
797 // Index skipped...
798 for index0 < s-1 {
799 cv0 := load6432(src, index0)
800 cv1 := cv0 >> 8
801 h0 := hash8(cv0, betterLongTableBits)
802 off := index0 + e.cur
803 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
804 e.markLongShardDirty(h0)
805 h1 := hash5(cv1, betterShortTableBits)
806 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
807 e.markShortShardDirty(h1)
808 index0 += 2
809 }
810 cv = load6432(src, s)
811 // Swap offsets
812 offset1, offset2 = offset2, offset1
813 continue
814 }
815 }
816 // Find the offsets of our two matches.
817 coffsetL := candidateL.offset - e.cur
818 coffsetLP := candidateL.prev - e.cur
819
820 // Check if we have a long match.
821 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
822 // Found a long match, at least 8 bytes.
823 matched = e.matchlen(s+8, coffsetL+8, src) + 8
824 t = coffsetL
825 if debugAsserts && s <= t {
826 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
827 }
828 if debugAsserts && s-t > e.maxMatchOff {
829 panic("s - t >e.maxMatchOff")
830 }
831 if debugMatches {
832 println("long match")
833 }
834
835 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
836 // Found a long match, at least 8 bytes.
837 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
838 if prevMatch > matched {
839 matched = prevMatch
840 t = coffsetLP
841 }
842 if debugAsserts && s <= t {
843 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
844 }
845 if debugAsserts && s-t > e.maxMatchOff {
846 panic("s - t >e.maxMatchOff")
847 }
848 if debugMatches {
849 println("long match")
850 }
851 }
852 break
853 }
854
855 // Check if we have a long match on prev.
856 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
857 // Found a long match, at least 8 bytes.
858 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
859 t = coffsetLP
860 if debugAsserts && s <= t {
861 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
862 }
863 if debugAsserts && s-t > e.maxMatchOff {
864 panic("s - t >e.maxMatchOff")
865 }
866 if debugMatches {
867 println("long match")
868 }
869 break
870 }
871
872 coffsetS := candidateS.offset - e.cur
873
874 // Check if we have a short match.
875 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
876 // found a regular match
877 matched = e.matchlen(s+4, coffsetS+4, src) + 4
878
879 // See if we can find a long match at s+1
880 const checkAt = 1
881 cv := load6432(src, s+checkAt)
882 nextHashL = hash8(cv, betterLongTableBits)
883 candidateL = e.longTable[nextHashL]
884 coffsetL = candidateL.offset - e.cur
885
886 // We can store it, since we have at least a 4 byte match.
887 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
888 e.markLongShardDirty(nextHashL)
889 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
890 // Found a long match, at least 8 bytes.
891 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
892 if matchedNext > matched {
893 t = coffsetL
894 s += checkAt
895 matched = matchedNext
896 if debugMatches {
897 println("long match (after short)")
898 }
899 break
900 }
901 }
902
903 // Check prev long...
904 coffsetL = candidateL.prev - e.cur
905 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
906 // Found a long match, at least 8 bytes.
907 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
908 if matchedNext > matched {
909 t = coffsetL
910 s += checkAt
911 matched = matchedNext
912 if debugMatches {
913 println("prev long match (after short)")
914 }
915 break
916 }
917 }
918 t = coffsetS
919 if debugAsserts && s <= t {
920 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
921 }
922 if debugAsserts && s-t > e.maxMatchOff {
923 panic("s - t >e.maxMatchOff")
924 }
925 if debugAsserts && t < 0 {
926 panic("t<0")
927 }
928 if debugMatches {
929 println("short match")
930 }
931 break
932 }
933
934 // No match found, move forward in input.
935 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
936 if s >= sLimit {
937 break encodeLoop
938 }
939 cv = load6432(src, s)
940 }
941 // Try to find a better match by searching for a long match at the end of the current best match
942 if s+matched < sLimit {
943 nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
944 cv := load3232(src, s)
945 candidateL := e.longTable[nextHashL]
946 coffsetL := candidateL.offset - e.cur - matched
947 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
948 // Found a long match, at least 4 bytes.
949 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
950 if matchedNext > matched {
951 t = coffsetL
952 matched = matchedNext
953 if debugMatches {
954 println("long match at end-of-match")
955 }
956 }
957 }
958
959 // Check prev long...
960 if true {
961 coffsetL = candidateL.prev - e.cur - matched
962 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
963 // Found a long match, at least 4 bytes.
964 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
965 if matchedNext > matched {
966 t = coffsetL
967 matched = matchedNext
968 if debugMatches {
969 println("prev long match at end-of-match")
970 }
971 }
972 }
973 }
974 }
975 // A match has been found. Update recent offsets.
976 offset2 = offset1
977 offset1 = s - t
978
979 if debugAsserts && s <= t {
980 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
981 }
982
983 if debugAsserts && canRepeat && int(offset1) > len(src) {
984 panic("invalid offset")
985 }
986
987 // Extend the n-byte match as long as possible.
988 l := matched
989
990 // Extend backwards
991 tMin := s - e.maxMatchOff
992 if tMin < 0 {
993 tMin = 0
994 }
995 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
996 s--
997 t--
998 l++
999 }
1000
1001 // Write our sequence
1002 var seq seq
1003 seq.litLen = uint32(s - nextEmit)
1004 seq.matchLen = uint32(l - zstdMinMatch)
1005 if seq.litLen > 0 {
1006 blk.literals = append(blk.literals, src[nextEmit:s]...)
1007 }
1008 seq.offset = uint32(s-t) + 3
1009 s += l
1010 if debugSequences {
1011 println("sequence", seq, "next s:", s)
1012 }
1013 blk.sequences = append(blk.sequences, seq)
1014 nextEmit = s
1015 if s >= sLimit {
1016 break encodeLoop
1017 }
1018
1019 // Index match start+1 (long) -> s - 1
1020 index0 := s - l + 1
1021 for index0 < s-1 {
1022 cv0 := load6432(src, index0)
1023 cv1 := cv0 >> 8
1024 h0 := hash8(cv0, betterLongTableBits)
1025 off := index0 + e.cur
1026 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
1027 e.markLongShardDirty(h0)
1028 h1 := hash5(cv1, betterShortTableBits)
1029 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
1030 e.markShortShardDirty(h1)
1031 index0 += 2
1032 }
1033
1034 cv = load6432(src, s)
1035 if !canRepeat {
1036 continue
1037 }
1038
1039 // Check offset 2
1040 for {
1041 o2 := s - offset2
1042 if load3232(src, o2) != uint32(cv) {
1043 // Do regular search
1044 break
1045 }
1046
1047 // Store this, since we have it.
1048 nextHashS := hash5(cv, betterShortTableBits)
1049 nextHashL := hash8(cv, betterLongTableBits)
1050
1051 // We have at least 4 byte match.
1052 // No need to check backwards. We come straight from a match
1053 l := 4 + e.matchlen(s+4, o2+4, src)
1054
1055 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
1056 e.markLongShardDirty(nextHashL)
1057 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
1058 e.markShortShardDirty(nextHashS)
1059 seq.matchLen = uint32(l) - zstdMinMatch
1060 seq.litLen = 0
1061
1062 // Since litlen is always 0, this is offset 1.
1063 seq.offset = 1
1064 s += l
1065 nextEmit = s
1066 if debugSequences {
1067 println("sequence", seq, "next s:", s)
1068 }
1069 blk.sequences = append(blk.sequences, seq)
1070
1071 // Swap offset 1 and 2.
1072 offset1, offset2 = offset2, offset1
1073 if s >= sLimit {
1074 // Finished
1075 break encodeLoop
1076 }
1077 cv = load6432(src, s)
1078 }
1079 }
1080
1081 if int(nextEmit) < len(src) {
1082 blk.literals = append(blk.literals, src[nextEmit:]...)
1083 blk.extraLits = len(src) - int(nextEmit)
1084 }
1085 blk.recentOffsets[0] = uint32(offset1)
1086 blk.recentOffsets[1] = uint32(offset2)
1087 if debug {
1088 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1089 }
1090}
1091
1092// ResetDict will reset and set a dictionary if not nil
1093func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
1094 e.resetBase(d, singleBlock)
1095 if d != nil {
1096 panic("betterFastEncoder: Reset with dict")
1097 }
1098}
1099
1100// ResetDict will reset and set a dictionary if not nil
1101func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
1102 e.resetBase(d, singleBlock)
1103 if d == nil {
1104 return
1105 }
1106 // Init or copy dict table
1107 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
1108 if len(e.dictTable) != len(e.table) {
1109 e.dictTable = make([]tableEntry, len(e.table))
1110 }
1111 end := int32(len(d.content)) - 8 + e.maxMatchOff
1112 for i := e.maxMatchOff; i < end; i += 4 {
1113 const hashLog = betterShortTableBits
1114
1115 cv := load6432(d.content, i-e.maxMatchOff)
1116 nextHash := hash5(cv, hashLog) // 0 -> 4
1117 nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
1118 nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
1119 nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
1120 e.dictTable[nextHash] = tableEntry{
1121 val: uint32(cv),
1122 offset: i,
1123 }
1124 e.dictTable[nextHash1] = tableEntry{
1125 val: uint32(cv >> 8),
1126 offset: i + 1,
1127 }
1128 e.dictTable[nextHash2] = tableEntry{
1129 val: uint32(cv >> 16),
1130 offset: i + 2,
1131 }
1132 e.dictTable[nextHash3] = tableEntry{
1133 val: uint32(cv >> 24),
1134 offset: i + 3,
1135 }
1136 }
1137 e.lastDictID = d.id
1138 e.allDirty = true
1139 }
1140
1141 // Init or copy dict table
1142 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1143 if len(e.dictLongTable) != len(e.longTable) {
1144 e.dictLongTable = make([]prevEntry, len(e.longTable))
1145 }
1146 if len(d.content) >= 8 {
1147 cv := load6432(d.content, 0)
1148 h := hash8(cv, betterLongTableBits)
1149 e.dictLongTable[h] = prevEntry{
1150 offset: e.maxMatchOff,
1151 prev: e.dictLongTable[h].offset,
1152 }
1153
1154 end := int32(len(d.content)) - 8 + e.maxMatchOff
1155 off := 8 // First to read
1156 for i := e.maxMatchOff + 1; i < end; i++ {
1157 cv = cv>>8 | (uint64(d.content[off]) << 56)
1158 h := hash8(cv, betterLongTableBits)
1159 e.dictLongTable[h] = prevEntry{
1160 offset: i,
1161 prev: e.dictLongTable[h].offset,
1162 }
1163 off++
1164 }
1165 }
1166 e.lastDictID = d.id
1167 e.allDirty = true
1168 }
1169
1170 // Reset table to initial state
1171 {
1172 dirtyShardCnt := 0
1173 if !e.allDirty {
1174 for i := range e.shortTableShardDirty {
1175 if e.shortTableShardDirty[i] {
1176 dirtyShardCnt++
1177 }
1178 }
1179 }
1180 const shardCnt = betterShortTableShardCnt
1181 const shardSize = betterShortTableShardSize
1182 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1183 copy(e.table[:], e.dictTable)
1184 for i := range e.shortTableShardDirty {
1185 e.shortTableShardDirty[i] = false
1186 }
1187 } else {
1188 for i := range e.shortTableShardDirty {
1189 if !e.shortTableShardDirty[i] {
1190 continue
1191 }
1192
1193 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1194 e.shortTableShardDirty[i] = false
1195 }
1196 }
1197 }
1198 {
1199 dirtyShardCnt := 0
1200 if !e.allDirty {
1201 for i := range e.shortTableShardDirty {
1202 if e.shortTableShardDirty[i] {
1203 dirtyShardCnt++
1204 }
1205 }
1206 }
1207 const shardCnt = betterLongTableShardCnt
1208 const shardSize = betterLongTableShardSize
1209 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1210 copy(e.longTable[:], e.dictLongTable)
1211 for i := range e.longTableShardDirty {
1212 e.longTableShardDirty[i] = false
1213 }
1214 } else {
1215 for i := range e.longTableShardDirty {
1216 if !e.longTableShardDirty[i] {
1217 continue
1218 }
1219
1220 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
1221 e.longTableShardDirty[i] = false
1222 }
1223 }
1224 }
1225 e.cur = e.maxMatchOff
1226 e.allDirty = false
1227}
1228
1229func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
1230 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
1231}
1232
1233func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
1234 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
1235}