blob: 602c05ee0c4cec83c3c782523d899bf3a33b0bd5 [file] [log] [blame]
kesavandc71914f2022-03-25 11:19:03 +05301// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10 betterLongTableBits = 19 // Bits used in the long match table
11 betterLongTableSize = 1 << betterLongTableBits // Size of the table
12 betterLongLen = 8 // Bytes used for table hash
13
14 // Note: Increasing the short table bits or making the hash shorter
15 // can actually lead to compression degradation since it will 'steal' more from the
16 // long match table and match offsets are quite big.
17 // This greatly depends on the type of input.
18 betterShortTableBits = 13 // Bits used in the short match table
19 betterShortTableSize = 1 << betterShortTableBits // Size of the table
20 betterShortLen = 5 // Bytes used for table hash
21
22 betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
23 betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
24
25 betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
26 betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
27)
28
29type prevEntry struct {
30 offset int32
31 prev int32
32}
33
34// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
35// The long match table contains the previous entry with the same hash,
36// effectively making it a "chain" of length 2.
37// When we find a long match we choose between the two values and select the longest.
38// When we find a short match, after checking the long, we check if we can find a long at n+1
39// and that it is longer (lazy matching).
40type betterFastEncoder struct {
41 fastBase
42 table [betterShortTableSize]tableEntry
43 longTable [betterLongTableSize]prevEntry
44}
45
46type betterFastEncoderDict struct {
47 betterFastEncoder
48 dictTable []tableEntry
49 dictLongTable []prevEntry
50 shortTableShardDirty [betterShortTableShardCnt]bool
51 longTableShardDirty [betterLongTableShardCnt]bool
52 allDirty bool
53}
54
55// Encode improves compression...
56func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
57 const (
58 // Input margin is the number of bytes we read (8)
59 // and the maximum we will read ahead (2)
60 inputMargin = 8 + 2
61 minNonLiteralBlockSize = 16
62 )
63
64 // Protect against e.cur wraparound.
65 for e.cur >= bufferReset {
66 if len(e.hist) == 0 {
67 for i := range e.table[:] {
68 e.table[i] = tableEntry{}
69 }
70 for i := range e.longTable[:] {
71 e.longTable[i] = prevEntry{}
72 }
73 e.cur = e.maxMatchOff
74 break
75 }
76 // Shift down everything in the table that isn't already too far away.
77 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
78 for i := range e.table[:] {
79 v := e.table[i].offset
80 if v < minOff {
81 v = 0
82 } else {
83 v = v - e.cur + e.maxMatchOff
84 }
85 e.table[i].offset = v
86 }
87 for i := range e.longTable[:] {
88 v := e.longTable[i].offset
89 v2 := e.longTable[i].prev
90 if v < minOff {
91 v = 0
92 v2 = 0
93 } else {
94 v = v - e.cur + e.maxMatchOff
95 if v2 < minOff {
96 v2 = 0
97 } else {
98 v2 = v2 - e.cur + e.maxMatchOff
99 }
100 }
101 e.longTable[i] = prevEntry{
102 offset: v,
103 prev: v2,
104 }
105 }
106 e.cur = e.maxMatchOff
107 break
108 }
109
110 s := e.addBlock(src)
111 blk.size = len(src)
112 if len(src) < minNonLiteralBlockSize {
113 blk.extraLits = len(src)
114 blk.literals = blk.literals[:len(src)]
115 copy(blk.literals, src)
116 return
117 }
118
119 // Override src
120 src = e.hist
121 sLimit := int32(len(src)) - inputMargin
122 // stepSize is the number of bytes to skip on every main loop iteration.
123 // It should be >= 1.
124 const stepSize = 1
125
126 const kSearchStrength = 9
127
128 // nextEmit is where in src the next emitLiteral should start from.
129 nextEmit := s
130 cv := load6432(src, s)
131
132 // Relative offsets
133 offset1 := int32(blk.recentOffsets[0])
134 offset2 := int32(blk.recentOffsets[1])
135
136 addLiterals := func(s *seq, until int32) {
137 if until == nextEmit {
138 return
139 }
140 blk.literals = append(blk.literals, src[nextEmit:until]...)
141 s.litLen = uint32(until - nextEmit)
142 }
143 if debugEncoder {
144 println("recent offsets:", blk.recentOffsets)
145 }
146
147encodeLoop:
148 for {
149 var t int32
150 // We allow the encoder to optionally turn off repeat offsets across blocks
151 canRepeat := len(blk.sequences) > 2
152 var matched int32
153
154 for {
155 if debugAsserts && canRepeat && offset1 == 0 {
156 panic("offset0 was 0")
157 }
158
159 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
160 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
161 candidateL := e.longTable[nextHashL]
162 candidateS := e.table[nextHashS]
163
164 const repOff = 1
165 repIndex := s - offset1 + repOff
166 off := s + e.cur
167 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
168 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
169
170 if canRepeat {
171 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
172 // Consider history as well.
173 var seq seq
174 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
175
176 seq.matchLen = uint32(lenght - zstdMinMatch)
177
178 // We might be able to match backwards.
179 // Extend as long as we can.
180 start := s + repOff
181 // We end the search early, so we don't risk 0 literals
182 // and have to do special offset treatment.
183 startLimit := nextEmit + 1
184
185 tMin := s - e.maxMatchOff
186 if tMin < 0 {
187 tMin = 0
188 }
189 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
190 repIndex--
191 start--
192 seq.matchLen++
193 }
194 addLiterals(&seq, start)
195
196 // rep 0
197 seq.offset = 1
198 if debugSequences {
199 println("repeat sequence", seq, "next s:", s)
200 }
201 blk.sequences = append(blk.sequences, seq)
202
203 // Index match start+1 (long) -> s - 1
204 index0 := s + repOff
205 s += lenght + repOff
206
207 nextEmit = s
208 if s >= sLimit {
209 if debugEncoder {
210 println("repeat ended", s, lenght)
211
212 }
213 break encodeLoop
214 }
215 // Index skipped...
216 for index0 < s-1 {
217 cv0 := load6432(src, index0)
218 cv1 := cv0 >> 8
219 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
220 off := index0 + e.cur
221 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
222 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
223 index0 += 2
224 }
225 cv = load6432(src, s)
226 continue
227 }
228 const repOff2 = 1
229
230 // We deviate from the reference encoder and also check offset 2.
231 // Still slower and not much better, so disabled.
232 // repIndex = s - offset2 + repOff2
233 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
234 // Consider history as well.
235 var seq seq
236 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
237
238 seq.matchLen = uint32(lenght - zstdMinMatch)
239
240 // We might be able to match backwards.
241 // Extend as long as we can.
242 start := s + repOff2
243 // We end the search early, so we don't risk 0 literals
244 // and have to do special offset treatment.
245 startLimit := nextEmit + 1
246
247 tMin := s - e.maxMatchOff
248 if tMin < 0 {
249 tMin = 0
250 }
251 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
252 repIndex--
253 start--
254 seq.matchLen++
255 }
256 addLiterals(&seq, start)
257
258 // rep 2
259 seq.offset = 2
260 if debugSequences {
261 println("repeat sequence 2", seq, "next s:", s)
262 }
263 blk.sequences = append(blk.sequences, seq)
264
265 index0 := s + repOff2
266 s += lenght + repOff2
267 nextEmit = s
268 if s >= sLimit {
269 if debugEncoder {
270 println("repeat ended", s, lenght)
271
272 }
273 break encodeLoop
274 }
275
276 // Index skipped...
277 for index0 < s-1 {
278 cv0 := load6432(src, index0)
279 cv1 := cv0 >> 8
280 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
281 off := index0 + e.cur
282 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
283 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
284 index0 += 2
285 }
286 cv = load6432(src, s)
287 // Swap offsets
288 offset1, offset2 = offset2, offset1
289 continue
290 }
291 }
292 // Find the offsets of our two matches.
293 coffsetL := candidateL.offset - e.cur
294 coffsetLP := candidateL.prev - e.cur
295
296 // Check if we have a long match.
297 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
298 // Found a long match, at least 8 bytes.
299 matched = e.matchlen(s+8, coffsetL+8, src) + 8
300 t = coffsetL
301 if debugAsserts && s <= t {
302 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
303 }
304 if debugAsserts && s-t > e.maxMatchOff {
305 panic("s - t >e.maxMatchOff")
306 }
307 if debugMatches {
308 println("long match")
309 }
310
311 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
312 // Found a long match, at least 8 bytes.
313 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
314 if prevMatch > matched {
315 matched = prevMatch
316 t = coffsetLP
317 }
318 if debugAsserts && s <= t {
319 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
320 }
321 if debugAsserts && s-t > e.maxMatchOff {
322 panic("s - t >e.maxMatchOff")
323 }
324 if debugMatches {
325 println("long match")
326 }
327 }
328 break
329 }
330
331 // Check if we have a long match on prev.
332 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
333 // Found a long match, at least 8 bytes.
334 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
335 t = coffsetLP
336 if debugAsserts && s <= t {
337 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
338 }
339 if debugAsserts && s-t > e.maxMatchOff {
340 panic("s - t >e.maxMatchOff")
341 }
342 if debugMatches {
343 println("long match")
344 }
345 break
346 }
347
348 coffsetS := candidateS.offset - e.cur
349
350 // Check if we have a short match.
351 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
352 // found a regular match
353 matched = e.matchlen(s+4, coffsetS+4, src) + 4
354
355 // See if we can find a long match at s+1
356 const checkAt = 1
357 cv := load6432(src, s+checkAt)
358 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
359 candidateL = e.longTable[nextHashL]
360 coffsetL = candidateL.offset - e.cur
361
362 // We can store it, since we have at least a 4 byte match.
363 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
364 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
365 // Found a long match, at least 8 bytes.
366 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
367 if matchedNext > matched {
368 t = coffsetL
369 s += checkAt
370 matched = matchedNext
371 if debugMatches {
372 println("long match (after short)")
373 }
374 break
375 }
376 }
377
378 // Check prev long...
379 coffsetL = candidateL.prev - e.cur
380 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
381 // Found a long match, at least 8 bytes.
382 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
383 if matchedNext > matched {
384 t = coffsetL
385 s += checkAt
386 matched = matchedNext
387 if debugMatches {
388 println("prev long match (after short)")
389 }
390 break
391 }
392 }
393 t = coffsetS
394 if debugAsserts && s <= t {
395 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
396 }
397 if debugAsserts && s-t > e.maxMatchOff {
398 panic("s - t >e.maxMatchOff")
399 }
400 if debugAsserts && t < 0 {
401 panic("t<0")
402 }
403 if debugMatches {
404 println("short match")
405 }
406 break
407 }
408
409 // No match found, move forward in input.
410 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
411 if s >= sLimit {
412 break encodeLoop
413 }
414 cv = load6432(src, s)
415 }
416
417 // Try to find a better match by searching for a long match at the end of the current best match
418 if s+matched < sLimit {
419 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
420 cv := load3232(src, s)
421 candidateL := e.longTable[nextHashL]
422 coffsetL := candidateL.offset - e.cur - matched
423 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
424 // Found a long match, at least 4 bytes.
425 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
426 if matchedNext > matched {
427 t = coffsetL
428 matched = matchedNext
429 if debugMatches {
430 println("long match at end-of-match")
431 }
432 }
433 }
434
435 // Check prev long...
436 if true {
437 coffsetL = candidateL.prev - e.cur - matched
438 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
439 // Found a long match, at least 4 bytes.
440 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
441 if matchedNext > matched {
442 t = coffsetL
443 matched = matchedNext
444 if debugMatches {
445 println("prev long match at end-of-match")
446 }
447 }
448 }
449 }
450 }
451 // A match has been found. Update recent offsets.
452 offset2 = offset1
453 offset1 = s - t
454
455 if debugAsserts && s <= t {
456 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
457 }
458
459 if debugAsserts && canRepeat && int(offset1) > len(src) {
460 panic("invalid offset")
461 }
462
463 // Extend the n-byte match as long as possible.
464 l := matched
465
466 // Extend backwards
467 tMin := s - e.maxMatchOff
468 if tMin < 0 {
469 tMin = 0
470 }
471 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
472 s--
473 t--
474 l++
475 }
476
477 // Write our sequence
478 var seq seq
479 seq.litLen = uint32(s - nextEmit)
480 seq.matchLen = uint32(l - zstdMinMatch)
481 if seq.litLen > 0 {
482 blk.literals = append(blk.literals, src[nextEmit:s]...)
483 }
484 seq.offset = uint32(s-t) + 3
485 s += l
486 if debugSequences {
487 println("sequence", seq, "next s:", s)
488 }
489 blk.sequences = append(blk.sequences, seq)
490 nextEmit = s
491 if s >= sLimit {
492 break encodeLoop
493 }
494
495 // Index match start+1 (long) -> s - 1
496 index0 := s - l + 1
497 for index0 < s-1 {
498 cv0 := load6432(src, index0)
499 cv1 := cv0 >> 8
500 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
501 off := index0 + e.cur
502 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
503 e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
504 index0 += 2
505 }
506
507 cv = load6432(src, s)
508 if !canRepeat {
509 continue
510 }
511
512 // Check offset 2
513 for {
514 o2 := s - offset2
515 if load3232(src, o2) != uint32(cv) {
516 // Do regular search
517 break
518 }
519
520 // Store this, since we have it.
521 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
522 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
523
524 // We have at least 4 byte match.
525 // No need to check backwards. We come straight from a match
526 l := 4 + e.matchlen(s+4, o2+4, src)
527
528 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
529 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
530 seq.matchLen = uint32(l) - zstdMinMatch
531 seq.litLen = 0
532
533 // Since litlen is always 0, this is offset 1.
534 seq.offset = 1
535 s += l
536 nextEmit = s
537 if debugSequences {
538 println("sequence", seq, "next s:", s)
539 }
540 blk.sequences = append(blk.sequences, seq)
541
542 // Swap offset 1 and 2.
543 offset1, offset2 = offset2, offset1
544 if s >= sLimit {
545 // Finished
546 break encodeLoop
547 }
548 cv = load6432(src, s)
549 }
550 }
551
552 if int(nextEmit) < len(src) {
553 blk.literals = append(blk.literals, src[nextEmit:]...)
554 blk.extraLits = len(src) - int(nextEmit)
555 }
556 blk.recentOffsets[0] = uint32(offset1)
557 blk.recentOffsets[1] = uint32(offset2)
558 if debugEncoder {
559 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
560 }
561}
562
563// EncodeNoHist will encode a block with no history and no following blocks.
564// Most notable difference is that src will not be copied for history and
565// we do not need to check for max match length.
566func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
567 e.ensureHist(len(src))
568 e.Encode(blk, src)
569}
570
571// Encode improves compression...
572func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
573 const (
574 // Input margin is the number of bytes we read (8)
575 // and the maximum we will read ahead (2)
576 inputMargin = 8 + 2
577 minNonLiteralBlockSize = 16
578 )
579
580 // Protect against e.cur wraparound.
581 for e.cur >= bufferReset {
582 if len(e.hist) == 0 {
583 for i := range e.table[:] {
584 e.table[i] = tableEntry{}
585 }
586 for i := range e.longTable[:] {
587 e.longTable[i] = prevEntry{}
588 }
589 e.cur = e.maxMatchOff
590 e.allDirty = true
591 break
592 }
593 // Shift down everything in the table that isn't already too far away.
594 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
595 for i := range e.table[:] {
596 v := e.table[i].offset
597 if v < minOff {
598 v = 0
599 } else {
600 v = v - e.cur + e.maxMatchOff
601 }
602 e.table[i].offset = v
603 }
604 for i := range e.longTable[:] {
605 v := e.longTable[i].offset
606 v2 := e.longTable[i].prev
607 if v < minOff {
608 v = 0
609 v2 = 0
610 } else {
611 v = v - e.cur + e.maxMatchOff
612 if v2 < minOff {
613 v2 = 0
614 } else {
615 v2 = v2 - e.cur + e.maxMatchOff
616 }
617 }
618 e.longTable[i] = prevEntry{
619 offset: v,
620 prev: v2,
621 }
622 }
623 e.allDirty = true
624 e.cur = e.maxMatchOff
625 break
626 }
627
628 s := e.addBlock(src)
629 blk.size = len(src)
630 if len(src) < minNonLiteralBlockSize {
631 blk.extraLits = len(src)
632 blk.literals = blk.literals[:len(src)]
633 copy(blk.literals, src)
634 return
635 }
636
637 // Override src
638 src = e.hist
639 sLimit := int32(len(src)) - inputMargin
640 // stepSize is the number of bytes to skip on every main loop iteration.
641 // It should be >= 1.
642 const stepSize = 1
643
644 const kSearchStrength = 9
645
646 // nextEmit is where in src the next emitLiteral should start from.
647 nextEmit := s
648 cv := load6432(src, s)
649
650 // Relative offsets
651 offset1 := int32(blk.recentOffsets[0])
652 offset2 := int32(blk.recentOffsets[1])
653
654 addLiterals := func(s *seq, until int32) {
655 if until == nextEmit {
656 return
657 }
658 blk.literals = append(blk.literals, src[nextEmit:until]...)
659 s.litLen = uint32(until - nextEmit)
660 }
661 if debugEncoder {
662 println("recent offsets:", blk.recentOffsets)
663 }
664
665encodeLoop:
666 for {
667 var t int32
668 // We allow the encoder to optionally turn off repeat offsets across blocks
669 canRepeat := len(blk.sequences) > 2
670 var matched int32
671
672 for {
673 if debugAsserts && canRepeat && offset1 == 0 {
674 panic("offset0 was 0")
675 }
676
677 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
678 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
679 candidateL := e.longTable[nextHashL]
680 candidateS := e.table[nextHashS]
681
682 const repOff = 1
683 repIndex := s - offset1 + repOff
684 off := s + e.cur
685 e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
686 e.markLongShardDirty(nextHashL)
687 e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
688 e.markShortShardDirty(nextHashS)
689
690 if canRepeat {
691 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
692 // Consider history as well.
693 var seq seq
694 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
695
696 seq.matchLen = uint32(lenght - zstdMinMatch)
697
698 // We might be able to match backwards.
699 // Extend as long as we can.
700 start := s + repOff
701 // We end the search early, so we don't risk 0 literals
702 // and have to do special offset treatment.
703 startLimit := nextEmit + 1
704
705 tMin := s - e.maxMatchOff
706 if tMin < 0 {
707 tMin = 0
708 }
709 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
710 repIndex--
711 start--
712 seq.matchLen++
713 }
714 addLiterals(&seq, start)
715
716 // rep 0
717 seq.offset = 1
718 if debugSequences {
719 println("repeat sequence", seq, "next s:", s)
720 }
721 blk.sequences = append(blk.sequences, seq)
722
723 // Index match start+1 (long) -> s - 1
724 index0 := s + repOff
725 s += lenght + repOff
726
727 nextEmit = s
728 if s >= sLimit {
729 if debugEncoder {
730 println("repeat ended", s, lenght)
731
732 }
733 break encodeLoop
734 }
735 // Index skipped...
736 for index0 < s-1 {
737 cv0 := load6432(src, index0)
738 cv1 := cv0 >> 8
739 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
740 off := index0 + e.cur
741 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
742 e.markLongShardDirty(h0)
743 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
744 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
745 e.markShortShardDirty(h1)
746 index0 += 2
747 }
748 cv = load6432(src, s)
749 continue
750 }
751 const repOff2 = 1
752
753 // We deviate from the reference encoder and also check offset 2.
754 // Still slower and not much better, so disabled.
755 // repIndex = s - offset2 + repOff2
756 if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
757 // Consider history as well.
758 var seq seq
759 lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
760
761 seq.matchLen = uint32(lenght - zstdMinMatch)
762
763 // We might be able to match backwards.
764 // Extend as long as we can.
765 start := s + repOff2
766 // We end the search early, so we don't risk 0 literals
767 // and have to do special offset treatment.
768 startLimit := nextEmit + 1
769
770 tMin := s - e.maxMatchOff
771 if tMin < 0 {
772 tMin = 0
773 }
774 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
775 repIndex--
776 start--
777 seq.matchLen++
778 }
779 addLiterals(&seq, start)
780
781 // rep 2
782 seq.offset = 2
783 if debugSequences {
784 println("repeat sequence 2", seq, "next s:", s)
785 }
786 blk.sequences = append(blk.sequences, seq)
787
788 index0 := s + repOff2
789 s += lenght + repOff2
790 nextEmit = s
791 if s >= sLimit {
792 if debugEncoder {
793 println("repeat ended", s, lenght)
794
795 }
796 break encodeLoop
797 }
798
799 // Index skipped...
800 for index0 < s-1 {
801 cv0 := load6432(src, index0)
802 cv1 := cv0 >> 8
803 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
804 off := index0 + e.cur
805 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
806 e.markLongShardDirty(h0)
807 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
808 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
809 e.markShortShardDirty(h1)
810 index0 += 2
811 }
812 cv = load6432(src, s)
813 // Swap offsets
814 offset1, offset2 = offset2, offset1
815 continue
816 }
817 }
818 // Find the offsets of our two matches.
819 coffsetL := candidateL.offset - e.cur
820 coffsetLP := candidateL.prev - e.cur
821
822 // Check if we have a long match.
823 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
824 // Found a long match, at least 8 bytes.
825 matched = e.matchlen(s+8, coffsetL+8, src) + 8
826 t = coffsetL
827 if debugAsserts && s <= t {
828 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
829 }
830 if debugAsserts && s-t > e.maxMatchOff {
831 panic("s - t >e.maxMatchOff")
832 }
833 if debugMatches {
834 println("long match")
835 }
836
837 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
838 // Found a long match, at least 8 bytes.
839 prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
840 if prevMatch > matched {
841 matched = prevMatch
842 t = coffsetLP
843 }
844 if debugAsserts && s <= t {
845 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
846 }
847 if debugAsserts && s-t > e.maxMatchOff {
848 panic("s - t >e.maxMatchOff")
849 }
850 if debugMatches {
851 println("long match")
852 }
853 }
854 break
855 }
856
857 // Check if we have a long match on prev.
858 if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
859 // Found a long match, at least 8 bytes.
860 matched = e.matchlen(s+8, coffsetLP+8, src) + 8
861 t = coffsetLP
862 if debugAsserts && s <= t {
863 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
864 }
865 if debugAsserts && s-t > e.maxMatchOff {
866 panic("s - t >e.maxMatchOff")
867 }
868 if debugMatches {
869 println("long match")
870 }
871 break
872 }
873
874 coffsetS := candidateS.offset - e.cur
875
876 // Check if we have a short match.
877 if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
878 // found a regular match
879 matched = e.matchlen(s+4, coffsetS+4, src) + 4
880
881 // See if we can find a long match at s+1
882 const checkAt = 1
883 cv := load6432(src, s+checkAt)
884 nextHashL = hashLen(cv, betterLongTableBits, betterLongLen)
885 candidateL = e.longTable[nextHashL]
886 coffsetL = candidateL.offset - e.cur
887
888 // We can store it, since we have at least a 4 byte match.
889 e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
890 e.markLongShardDirty(nextHashL)
891 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
892 // Found a long match, at least 8 bytes.
893 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
894 if matchedNext > matched {
895 t = coffsetL
896 s += checkAt
897 matched = matchedNext
898 if debugMatches {
899 println("long match (after short)")
900 }
901 break
902 }
903 }
904
905 // Check prev long...
906 coffsetL = candidateL.prev - e.cur
907 if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
908 // Found a long match, at least 8 bytes.
909 matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
910 if matchedNext > matched {
911 t = coffsetL
912 s += checkAt
913 matched = matchedNext
914 if debugMatches {
915 println("prev long match (after short)")
916 }
917 break
918 }
919 }
920 t = coffsetS
921 if debugAsserts && s <= t {
922 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
923 }
924 if debugAsserts && s-t > e.maxMatchOff {
925 panic("s - t >e.maxMatchOff")
926 }
927 if debugAsserts && t < 0 {
928 panic("t<0")
929 }
930 if debugMatches {
931 println("short match")
932 }
933 break
934 }
935
936 // No match found, move forward in input.
937 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
938 if s >= sLimit {
939 break encodeLoop
940 }
941 cv = load6432(src, s)
942 }
943 // Try to find a better match by searching for a long match at the end of the current best match
944 if s+matched < sLimit {
945 nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
946 cv := load3232(src, s)
947 candidateL := e.longTable[nextHashL]
948 coffsetL := candidateL.offset - e.cur - matched
949 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
950 // Found a long match, at least 4 bytes.
951 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
952 if matchedNext > matched {
953 t = coffsetL
954 matched = matchedNext
955 if debugMatches {
956 println("long match at end-of-match")
957 }
958 }
959 }
960
961 // Check prev long...
962 if true {
963 coffsetL = candidateL.prev - e.cur - matched
964 if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
965 // Found a long match, at least 4 bytes.
966 matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
967 if matchedNext > matched {
968 t = coffsetL
969 matched = matchedNext
970 if debugMatches {
971 println("prev long match at end-of-match")
972 }
973 }
974 }
975 }
976 }
977 // A match has been found. Update recent offsets.
978 offset2 = offset1
979 offset1 = s - t
980
981 if debugAsserts && s <= t {
982 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
983 }
984
985 if debugAsserts && canRepeat && int(offset1) > len(src) {
986 panic("invalid offset")
987 }
988
989 // Extend the n-byte match as long as possible.
990 l := matched
991
992 // Extend backwards
993 tMin := s - e.maxMatchOff
994 if tMin < 0 {
995 tMin = 0
996 }
997 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
998 s--
999 t--
1000 l++
1001 }
1002
1003 // Write our sequence
1004 var seq seq
1005 seq.litLen = uint32(s - nextEmit)
1006 seq.matchLen = uint32(l - zstdMinMatch)
1007 if seq.litLen > 0 {
1008 blk.literals = append(blk.literals, src[nextEmit:s]...)
1009 }
1010 seq.offset = uint32(s-t) + 3
1011 s += l
1012 if debugSequences {
1013 println("sequence", seq, "next s:", s)
1014 }
1015 blk.sequences = append(blk.sequences, seq)
1016 nextEmit = s
1017 if s >= sLimit {
1018 break encodeLoop
1019 }
1020
1021 // Index match start+1 (long) -> s - 1
1022 index0 := s - l + 1
1023 for index0 < s-1 {
1024 cv0 := load6432(src, index0)
1025 cv1 := cv0 >> 8
1026 h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
1027 off := index0 + e.cur
1028 e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
1029 e.markLongShardDirty(h0)
1030 h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
1031 e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
1032 e.markShortShardDirty(h1)
1033 index0 += 2
1034 }
1035
1036 cv = load6432(src, s)
1037 if !canRepeat {
1038 continue
1039 }
1040
1041 // Check offset 2
1042 for {
1043 o2 := s - offset2
1044 if load3232(src, o2) != uint32(cv) {
1045 // Do regular search
1046 break
1047 }
1048
1049 // Store this, since we have it.
1050 nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
1051 nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
1052
1053 // We have at least 4 byte match.
1054 // No need to check backwards. We come straight from a match
1055 l := 4 + e.matchlen(s+4, o2+4, src)
1056
1057 e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
1058 e.markLongShardDirty(nextHashL)
1059 e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
1060 e.markShortShardDirty(nextHashS)
1061 seq.matchLen = uint32(l) - zstdMinMatch
1062 seq.litLen = 0
1063
1064 // Since litlen is always 0, this is offset 1.
1065 seq.offset = 1
1066 s += l
1067 nextEmit = s
1068 if debugSequences {
1069 println("sequence", seq, "next s:", s)
1070 }
1071 blk.sequences = append(blk.sequences, seq)
1072
1073 // Swap offset 1 and 2.
1074 offset1, offset2 = offset2, offset1
1075 if s >= sLimit {
1076 // Finished
1077 break encodeLoop
1078 }
1079 cv = load6432(src, s)
1080 }
1081 }
1082
1083 if int(nextEmit) < len(src) {
1084 blk.literals = append(blk.literals, src[nextEmit:]...)
1085 blk.extraLits = len(src) - int(nextEmit)
1086 }
1087 blk.recentOffsets[0] = uint32(offset1)
1088 blk.recentOffsets[1] = uint32(offset2)
1089 if debugEncoder {
1090 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1091 }
1092}
1093
1094// ResetDict will reset and set a dictionary if not nil
1095func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
1096 e.resetBase(d, singleBlock)
1097 if d != nil {
1098 panic("betterFastEncoder: Reset with dict")
1099 }
1100}
1101
1102// ResetDict will reset and set a dictionary if not nil
1103func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
1104 e.resetBase(d, singleBlock)
1105 if d == nil {
1106 return
1107 }
1108 // Init or copy dict table
1109 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
1110 if len(e.dictTable) != len(e.table) {
1111 e.dictTable = make([]tableEntry, len(e.table))
1112 }
1113 end := int32(len(d.content)) - 8 + e.maxMatchOff
1114 for i := e.maxMatchOff; i < end; i += 4 {
1115 const hashLog = betterShortTableBits
1116
1117 cv := load6432(d.content, i-e.maxMatchOff)
1118 nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4
1119 nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5
1120 nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6
1121 nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7
1122 e.dictTable[nextHash] = tableEntry{
1123 val: uint32(cv),
1124 offset: i,
1125 }
1126 e.dictTable[nextHash1] = tableEntry{
1127 val: uint32(cv >> 8),
1128 offset: i + 1,
1129 }
1130 e.dictTable[nextHash2] = tableEntry{
1131 val: uint32(cv >> 16),
1132 offset: i + 2,
1133 }
1134 e.dictTable[nextHash3] = tableEntry{
1135 val: uint32(cv >> 24),
1136 offset: i + 3,
1137 }
1138 }
1139 e.lastDictID = d.id
1140 e.allDirty = true
1141 }
1142
1143 // Init or copy dict table
1144 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1145 if len(e.dictLongTable) != len(e.longTable) {
1146 e.dictLongTable = make([]prevEntry, len(e.longTable))
1147 }
1148 if len(d.content) >= 8 {
1149 cv := load6432(d.content, 0)
1150 h := hashLen(cv, betterLongTableBits, betterLongLen)
1151 e.dictLongTable[h] = prevEntry{
1152 offset: e.maxMatchOff,
1153 prev: e.dictLongTable[h].offset,
1154 }
1155
1156 end := int32(len(d.content)) - 8 + e.maxMatchOff
1157 off := 8 // First to read
1158 for i := e.maxMatchOff + 1; i < end; i++ {
1159 cv = cv>>8 | (uint64(d.content[off]) << 56)
1160 h := hashLen(cv, betterLongTableBits, betterLongLen)
1161 e.dictLongTable[h] = prevEntry{
1162 offset: i,
1163 prev: e.dictLongTable[h].offset,
1164 }
1165 off++
1166 }
1167 }
1168 e.lastDictID = d.id
1169 e.allDirty = true
1170 }
1171
1172 // Reset table to initial state
1173 {
1174 dirtyShardCnt := 0
1175 if !e.allDirty {
1176 for i := range e.shortTableShardDirty {
1177 if e.shortTableShardDirty[i] {
1178 dirtyShardCnt++
1179 }
1180 }
1181 }
1182 const shardCnt = betterShortTableShardCnt
1183 const shardSize = betterShortTableShardSize
1184 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1185 copy(e.table[:], e.dictTable)
1186 for i := range e.shortTableShardDirty {
1187 e.shortTableShardDirty[i] = false
1188 }
1189 } else {
1190 for i := range e.shortTableShardDirty {
1191 if !e.shortTableShardDirty[i] {
1192 continue
1193 }
1194
1195 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1196 e.shortTableShardDirty[i] = false
1197 }
1198 }
1199 }
1200 {
1201 dirtyShardCnt := 0
1202 if !e.allDirty {
1203 for i := range e.shortTableShardDirty {
1204 if e.shortTableShardDirty[i] {
1205 dirtyShardCnt++
1206 }
1207 }
1208 }
1209 const shardCnt = betterLongTableShardCnt
1210 const shardSize = betterLongTableShardSize
1211 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
1212 copy(e.longTable[:], e.dictLongTable)
1213 for i := range e.longTableShardDirty {
1214 e.longTableShardDirty[i] = false
1215 }
1216 } else {
1217 for i := range e.longTableShardDirty {
1218 if !e.longTableShardDirty[i] {
1219 continue
1220 }
1221
1222 copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
1223 e.longTableShardDirty[i] = false
1224 }
1225 }
1226 }
1227 e.cur = e.maxMatchOff
1228 e.allDirty = false
1229}
1230
1231func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
1232 e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
1233}
1234
1235func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
1236 e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
1237}