blob: ba4a17e10646b373227786f82a0f5b0504139370 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "fmt"
9 "math"
10 "math/bits"
11)
12
13const (
14 tableBits = 15 // Bits used in the table
15 tableSize = 1 << tableBits // Size of the table
16 tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
17 tableShardSize = tableSize / tableShardCnt // Size of an individual shard
18 tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
19 maxMatchLength = 131074
20)
21
22type tableEntry struct {
23 val uint32
24 offset int32
25}
26
27type fastEncoder struct {
28 fastBase
29 table [tableSize]tableEntry
30}
31
32type fastEncoderDict struct {
33 fastEncoder
34 dictTable []tableEntry
35 tableShardDirty [tableShardCnt]bool
36 allDirty bool
37}
38
39// Encode mimmics functionality in zstd_fast.c
40func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
41 const (
42 inputMargin = 8
43 minNonLiteralBlockSize = 1 + 1 + inputMargin
44 )
45
46 // Protect against e.cur wraparound.
47 for e.cur >= bufferReset {
48 if len(e.hist) == 0 {
49 for i := range e.table[:] {
50 e.table[i] = tableEntry{}
51 }
52 e.cur = e.maxMatchOff
53 break
54 }
55 // Shift down everything in the table that isn't already too far away.
56 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
57 for i := range e.table[:] {
58 v := e.table[i].offset
59 if v < minOff {
60 v = 0
61 } else {
62 v = v - e.cur + e.maxMatchOff
63 }
64 e.table[i].offset = v
65 }
66 e.cur = e.maxMatchOff
67 break
68 }
69
70 s := e.addBlock(src)
71 blk.size = len(src)
72 if len(src) < minNonLiteralBlockSize {
73 blk.extraLits = len(src)
74 blk.literals = blk.literals[:len(src)]
75 copy(blk.literals, src)
76 return
77 }
78
79 // Override src
80 src = e.hist
81 sLimit := int32(len(src)) - inputMargin
82 // stepSize is the number of bytes to skip on every main loop iteration.
83 // It should be >= 2.
84 const stepSize = 2
85
86 // TEMPLATE
87 const hashLog = tableBits
88 // seems global, but would be nice to tweak.
89 const kSearchStrength = 7
90
91 // nextEmit is where in src the next emitLiteral should start from.
92 nextEmit := s
93 cv := load6432(src, s)
94
95 // Relative offsets
96 offset1 := int32(blk.recentOffsets[0])
97 offset2 := int32(blk.recentOffsets[1])
98
99 addLiterals := func(s *seq, until int32) {
100 if until == nextEmit {
101 return
102 }
103 blk.literals = append(blk.literals, src[nextEmit:until]...)
104 s.litLen = uint32(until - nextEmit)
105 }
106 if debug {
107 println("recent offsets:", blk.recentOffsets)
108 }
109
110encodeLoop:
111 for {
112 // t will contain the match offset when we find one.
113 // When existing the search loop, we have already checked 4 bytes.
114 var t int32
115
116 // We will not use repeat offsets across blocks.
117 // By not using them for the first 3 matches
118 canRepeat := len(blk.sequences) > 2
119
120 for {
121 if debugAsserts && canRepeat && offset1 == 0 {
122 panic("offset0 was 0")
123 }
124
125 nextHash := hash6(cv, hashLog)
126 nextHash2 := hash6(cv>>8, hashLog)
127 candidate := e.table[nextHash]
128 candidate2 := e.table[nextHash2]
129 repIndex := s - offset1 + 2
130
131 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
132 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
133
134 if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
135 // Consider history as well.
136 var seq seq
137 var length int32
138 // length = 4 + e.matchlen(s+6, repIndex+4, src)
139 {
140 a := src[s+6:]
141 b := src[repIndex+4:]
142 endI := len(a) & (math.MaxInt32 - 7)
143 length = int32(endI) + 4
144 for i := 0; i < endI; i += 8 {
145 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
146 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
147 break
148 }
149 }
150 }
151
152 seq.matchLen = uint32(length - zstdMinMatch)
153
154 // We might be able to match backwards.
155 // Extend as long as we can.
156 start := s + 2
157 // We end the search early, so we don't risk 0 literals
158 // and have to do special offset treatment.
159 startLimit := nextEmit + 1
160
161 sMin := s - e.maxMatchOff
162 if sMin < 0 {
163 sMin = 0
164 }
165 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
166 repIndex--
167 start--
168 seq.matchLen++
169 }
170 addLiterals(&seq, start)
171
172 // rep 0
173 seq.offset = 1
174 if debugSequences {
175 println("repeat sequence", seq, "next s:", s)
176 }
177 blk.sequences = append(blk.sequences, seq)
178 s += length + 2
179 nextEmit = s
180 if s >= sLimit {
181 if debug {
182 println("repeat ended", s, length)
183
184 }
185 break encodeLoop
186 }
187 cv = load6432(src, s)
188 continue
189 }
190 coffset0 := s - (candidate.offset - e.cur)
191 coffset1 := s - (candidate2.offset - e.cur) + 1
192 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
193 // found a regular match
194 t = candidate.offset - e.cur
195 if debugAsserts && s <= t {
196 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
197 }
198 if debugAsserts && s-t > e.maxMatchOff {
199 panic("s - t >e.maxMatchOff")
200 }
201 break
202 }
203
204 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
205 // found a regular match
206 t = candidate2.offset - e.cur
207 s++
208 if debugAsserts && s <= t {
209 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
210 }
211 if debugAsserts && s-t > e.maxMatchOff {
212 panic("s - t >e.maxMatchOff")
213 }
214 if debugAsserts && t < 0 {
215 panic("t<0")
216 }
217 break
218 }
219 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
220 if s >= sLimit {
221 break encodeLoop
222 }
223 cv = load6432(src, s)
224 }
225 // A 4-byte match has been found. We'll later see if more than 4 bytes.
226 offset2 = offset1
227 offset1 = s - t
228
229 if debugAsserts && s <= t {
230 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
231 }
232
233 if debugAsserts && canRepeat && int(offset1) > len(src) {
234 panic("invalid offset")
235 }
236
237 // Extend the 4-byte match as long as possible.
238 //l := e.matchlen(s+4, t+4, src) + 4
239 var l int32
240 {
241 a := src[s+4:]
242 b := src[t+4:]
243 endI := len(a) & (math.MaxInt32 - 7)
244 l = int32(endI) + 4
245 for i := 0; i < endI; i += 8 {
246 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
247 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
248 break
249 }
250 }
251 }
252
253 // Extend backwards
254 tMin := s - e.maxMatchOff
255 if tMin < 0 {
256 tMin = 0
257 }
258 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
259 s--
260 t--
261 l++
262 }
263
264 // Write our sequence.
265 var seq seq
266 seq.litLen = uint32(s - nextEmit)
267 seq.matchLen = uint32(l - zstdMinMatch)
268 if seq.litLen > 0 {
269 blk.literals = append(blk.literals, src[nextEmit:s]...)
270 }
271 // Don't use repeat offsets
272 seq.offset = uint32(s-t) + 3
273 s += l
274 if debugSequences {
275 println("sequence", seq, "next s:", s)
276 }
277 blk.sequences = append(blk.sequences, seq)
278 nextEmit = s
279 if s >= sLimit {
280 break encodeLoop
281 }
282 cv = load6432(src, s)
283
284 // Check offset 2
285 if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
286 // We have at least 4 byte match.
287 // No need to check backwards. We come straight from a match
288 //l := 4 + e.matchlen(s+4, o2+4, src)
289 var l int32
290 {
291 a := src[s+4:]
292 b := src[o2+4:]
293 endI := len(a) & (math.MaxInt32 - 7)
294 l = int32(endI) + 4
295 for i := 0; i < endI; i += 8 {
296 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
297 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
298 break
299 }
300 }
301 }
302
303 // Store this, since we have it.
304 nextHash := hash6(cv, hashLog)
305 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
306 seq.matchLen = uint32(l) - zstdMinMatch
307 seq.litLen = 0
308 // Since litlen is always 0, this is offset 1.
309 seq.offset = 1
310 s += l
311 nextEmit = s
312 if debugSequences {
313 println("sequence", seq, "next s:", s)
314 }
315 blk.sequences = append(blk.sequences, seq)
316
317 // Swap offset 1 and 2.
318 offset1, offset2 = offset2, offset1
319 if s >= sLimit {
320 break encodeLoop
321 }
322 // Prepare next loop.
323 cv = load6432(src, s)
324 }
325 }
326
327 if int(nextEmit) < len(src) {
328 blk.literals = append(blk.literals, src[nextEmit:]...)
329 blk.extraLits = len(src) - int(nextEmit)
330 }
331 blk.recentOffsets[0] = uint32(offset1)
332 blk.recentOffsets[1] = uint32(offset2)
333 if debug {
334 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
335 }
336}
337
338// EncodeNoHist will encode a block with no history and no following blocks.
339// Most notable difference is that src will not be copied for history and
340// we do not need to check for max match length.
341func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
342 const (
343 inputMargin = 8
344 minNonLiteralBlockSize = 1 + 1 + inputMargin
345 )
346 if debug {
347 if len(src) > maxBlockSize {
348 panic("src too big")
349 }
350 }
351
352 // Protect against e.cur wraparound.
353 if e.cur >= bufferReset {
354 for i := range e.table[:] {
355 e.table[i] = tableEntry{}
356 }
357 e.cur = e.maxMatchOff
358 }
359
360 s := int32(0)
361 blk.size = len(src)
362 if len(src) < minNonLiteralBlockSize {
363 blk.extraLits = len(src)
364 blk.literals = blk.literals[:len(src)]
365 copy(blk.literals, src)
366 return
367 }
368
369 sLimit := int32(len(src)) - inputMargin
370 // stepSize is the number of bytes to skip on every main loop iteration.
371 // It should be >= 2.
372 const stepSize = 2
373
374 // TEMPLATE
375 const hashLog = tableBits
376 // seems global, but would be nice to tweak.
377 const kSearchStrength = 8
378
379 // nextEmit is where in src the next emitLiteral should start from.
380 nextEmit := s
381 cv := load6432(src, s)
382
383 // Relative offsets
384 offset1 := int32(blk.recentOffsets[0])
385 offset2 := int32(blk.recentOffsets[1])
386
387 addLiterals := func(s *seq, until int32) {
388 if until == nextEmit {
389 return
390 }
391 blk.literals = append(blk.literals, src[nextEmit:until]...)
392 s.litLen = uint32(until - nextEmit)
393 }
394 if debug {
395 println("recent offsets:", blk.recentOffsets)
396 }
397
398encodeLoop:
399 for {
400 // t will contain the match offset when we find one.
401 // When existing the search loop, we have already checked 4 bytes.
402 var t int32
403
404 // We will not use repeat offsets across blocks.
405 // By not using them for the first 3 matches
406
407 for {
408 nextHash := hash6(cv, hashLog)
409 nextHash2 := hash6(cv>>8, hashLog)
410 candidate := e.table[nextHash]
411 candidate2 := e.table[nextHash2]
412 repIndex := s - offset1 + 2
413
414 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
415 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
416
417 if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
418 // Consider history as well.
419 var seq seq
420 // length := 4 + e.matchlen(s+6, repIndex+4, src)
421 // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
422 var length int32
423 {
424 a := src[s+6:]
425 b := src[repIndex+4:]
426 endI := len(a) & (math.MaxInt32 - 7)
427 length = int32(endI) + 4
428 for i := 0; i < endI; i += 8 {
429 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
430 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
431 break
432 }
433 }
434 }
435
436 seq.matchLen = uint32(length - zstdMinMatch)
437
438 // We might be able to match backwards.
439 // Extend as long as we can.
440 start := s + 2
441 // We end the search early, so we don't risk 0 literals
442 // and have to do special offset treatment.
443 startLimit := nextEmit + 1
444
445 sMin := s - e.maxMatchOff
446 if sMin < 0 {
447 sMin = 0
448 }
449 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
450 repIndex--
451 start--
452 seq.matchLen++
453 }
454 addLiterals(&seq, start)
455
456 // rep 0
457 seq.offset = 1
458 if debugSequences {
459 println("repeat sequence", seq, "next s:", s)
460 }
461 blk.sequences = append(blk.sequences, seq)
462 s += length + 2
463 nextEmit = s
464 if s >= sLimit {
465 if debug {
466 println("repeat ended", s, length)
467
468 }
469 break encodeLoop
470 }
471 cv = load6432(src, s)
472 continue
473 }
474 coffset0 := s - (candidate.offset - e.cur)
475 coffset1 := s - (candidate2.offset - e.cur) + 1
476 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
477 // found a regular match
478 t = candidate.offset - e.cur
479 if debugAsserts && s <= t {
480 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
481 }
482 if debugAsserts && s-t > e.maxMatchOff {
483 panic("s - t >e.maxMatchOff")
484 }
485 if debugAsserts && t < 0 {
486 panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
487 }
488 break
489 }
490
491 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
492 // found a regular match
493 t = candidate2.offset - e.cur
494 s++
495 if debugAsserts && s <= t {
496 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
497 }
498 if debugAsserts && s-t > e.maxMatchOff {
499 panic("s - t >e.maxMatchOff")
500 }
501 if debugAsserts && t < 0 {
502 panic("t<0")
503 }
504 break
505 }
506 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
507 if s >= sLimit {
508 break encodeLoop
509 }
510 cv = load6432(src, s)
511 }
512 // A 4-byte match has been found. We'll later see if more than 4 bytes.
513 offset2 = offset1
514 offset1 = s - t
515
516 if debugAsserts && s <= t {
517 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
518 }
519
520 if debugAsserts && t < 0 {
521 panic(fmt.Sprintf("t (%d) < 0 ", t))
522 }
523 // Extend the 4-byte match as long as possible.
524 //l := e.matchlenNoHist(s+4, t+4, src) + 4
525 // l := int32(matchLen(src[s+4:], src[t+4:])) + 4
526 var l int32
527 {
528 a := src[s+4:]
529 b := src[t+4:]
530 endI := len(a) & (math.MaxInt32 - 7)
531 l = int32(endI) + 4
532 for i := 0; i < endI; i += 8 {
533 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
534 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
535 break
536 }
537 }
538 }
539
540 // Extend backwards
541 tMin := s - e.maxMatchOff
542 if tMin < 0 {
543 tMin = 0
544 }
545 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
546 s--
547 t--
548 l++
549 }
550
551 // Write our sequence.
552 var seq seq
553 seq.litLen = uint32(s - nextEmit)
554 seq.matchLen = uint32(l - zstdMinMatch)
555 if seq.litLen > 0 {
556 blk.literals = append(blk.literals, src[nextEmit:s]...)
557 }
558 // Don't use repeat offsets
559 seq.offset = uint32(s-t) + 3
560 s += l
561 if debugSequences {
562 println("sequence", seq, "next s:", s)
563 }
564 blk.sequences = append(blk.sequences, seq)
565 nextEmit = s
566 if s >= sLimit {
567 break encodeLoop
568 }
569 cv = load6432(src, s)
570
571 // Check offset 2
572 if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
573 // We have at least 4 byte match.
574 // No need to check backwards. We come straight from a match
575 //l := 4 + e.matchlenNoHist(s+4, o2+4, src)
576 // l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
577 var l int32
578 {
579 a := src[s+4:]
580 b := src[o2+4:]
581 endI := len(a) & (math.MaxInt32 - 7)
582 l = int32(endI) + 4
583 for i := 0; i < endI; i += 8 {
584 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
585 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
586 break
587 }
588 }
589 }
590
591 // Store this, since we have it.
592 nextHash := hash6(cv, hashLog)
593 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
594 seq.matchLen = uint32(l) - zstdMinMatch
595 seq.litLen = 0
596 // Since litlen is always 0, this is offset 1.
597 seq.offset = 1
598 s += l
599 nextEmit = s
600 if debugSequences {
601 println("sequence", seq, "next s:", s)
602 }
603 blk.sequences = append(blk.sequences, seq)
604
605 // Swap offset 1 and 2.
606 offset1, offset2 = offset2, offset1
607 if s >= sLimit {
608 break encodeLoop
609 }
610 // Prepare next loop.
611 cv = load6432(src, s)
612 }
613 }
614
615 if int(nextEmit) < len(src) {
616 blk.literals = append(blk.literals, src[nextEmit:]...)
617 blk.extraLits = len(src) - int(nextEmit)
618 }
619 if debug {
620 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
621 }
622 // We do not store history, so we must offset e.cur to avoid false matches for next user.
623 if e.cur < bufferReset {
624 e.cur += int32(len(src))
625 }
626}
627
628// Encode will encode the content, with a dictionary if initialized for it.
629func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
630 const (
631 inputMargin = 8
632 minNonLiteralBlockSize = 1 + 1 + inputMargin
633 )
634 if e.allDirty || len(src) > 32<<10 {
635 e.fastEncoder.Encode(blk, src)
636 e.allDirty = true
637 return
638 }
639 // Protect against e.cur wraparound.
640 for e.cur >= bufferReset {
641 if len(e.hist) == 0 {
642 for i := range e.table[:] {
643 e.table[i] = tableEntry{}
644 }
645 e.cur = e.maxMatchOff
646 break
647 }
648 // Shift down everything in the table that isn't already too far away.
649 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
650 for i := range e.table[:] {
651 v := e.table[i].offset
652 if v < minOff {
653 v = 0
654 } else {
655 v = v - e.cur + e.maxMatchOff
656 }
657 e.table[i].offset = v
658 }
659 e.cur = e.maxMatchOff
660 break
661 }
662
663 s := e.addBlock(src)
664 blk.size = len(src)
665 if len(src) < minNonLiteralBlockSize {
666 blk.extraLits = len(src)
667 blk.literals = blk.literals[:len(src)]
668 copy(blk.literals, src)
669 return
670 }
671
672 // Override src
673 src = e.hist
674 sLimit := int32(len(src)) - inputMargin
675 // stepSize is the number of bytes to skip on every main loop iteration.
676 // It should be >= 2.
677 const stepSize = 2
678
679 // TEMPLATE
680 const hashLog = tableBits
681 // seems global, but would be nice to tweak.
682 const kSearchStrength = 7
683
684 // nextEmit is where in src the next emitLiteral should start from.
685 nextEmit := s
686 cv := load6432(src, s)
687
688 // Relative offsets
689 offset1 := int32(blk.recentOffsets[0])
690 offset2 := int32(blk.recentOffsets[1])
691
692 addLiterals := func(s *seq, until int32) {
693 if until == nextEmit {
694 return
695 }
696 blk.literals = append(blk.literals, src[nextEmit:until]...)
697 s.litLen = uint32(until - nextEmit)
698 }
699 if debug {
700 println("recent offsets:", blk.recentOffsets)
701 }
702
703encodeLoop:
704 for {
705 // t will contain the match offset when we find one.
706 // When existing the search loop, we have already checked 4 bytes.
707 var t int32
708
709 // We will not use repeat offsets across blocks.
710 // By not using them for the first 3 matches
711 canRepeat := len(blk.sequences) > 2
712
713 for {
714 if debugAsserts && canRepeat && offset1 == 0 {
715 panic("offset0 was 0")
716 }
717
718 nextHash := hash6(cv, hashLog)
719 nextHash2 := hash6(cv>>8, hashLog)
720 candidate := e.table[nextHash]
721 candidate2 := e.table[nextHash2]
722 repIndex := s - offset1 + 2
723
724 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
725 e.markShardDirty(nextHash)
726 e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
727 e.markShardDirty(nextHash2)
728
729 if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
730 // Consider history as well.
731 var seq seq
732 var length int32
733 // length = 4 + e.matchlen(s+6, repIndex+4, src)
734 {
735 a := src[s+6:]
736 b := src[repIndex+4:]
737 endI := len(a) & (math.MaxInt32 - 7)
738 length = int32(endI) + 4
739 for i := 0; i < endI; i += 8 {
740 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
741 length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
742 break
743 }
744 }
745 }
746
747 seq.matchLen = uint32(length - zstdMinMatch)
748
749 // We might be able to match backwards.
750 // Extend as long as we can.
751 start := s + 2
752 // We end the search early, so we don't risk 0 literals
753 // and have to do special offset treatment.
754 startLimit := nextEmit + 1
755
756 sMin := s - e.maxMatchOff
757 if sMin < 0 {
758 sMin = 0
759 }
760 for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
761 repIndex--
762 start--
763 seq.matchLen++
764 }
765 addLiterals(&seq, start)
766
767 // rep 0
768 seq.offset = 1
769 if debugSequences {
770 println("repeat sequence", seq, "next s:", s)
771 }
772 blk.sequences = append(blk.sequences, seq)
773 s += length + 2
774 nextEmit = s
775 if s >= sLimit {
776 if debug {
777 println("repeat ended", s, length)
778
779 }
780 break encodeLoop
781 }
782 cv = load6432(src, s)
783 continue
784 }
785 coffset0 := s - (candidate.offset - e.cur)
786 coffset1 := s - (candidate2.offset - e.cur) + 1
787 if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
788 // found a regular match
789 t = candidate.offset - e.cur
790 if debugAsserts && s <= t {
791 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
792 }
793 if debugAsserts && s-t > e.maxMatchOff {
794 panic("s - t >e.maxMatchOff")
795 }
796 break
797 }
798
799 if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
800 // found a regular match
801 t = candidate2.offset - e.cur
802 s++
803 if debugAsserts && s <= t {
804 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
805 }
806 if debugAsserts && s-t > e.maxMatchOff {
807 panic("s - t >e.maxMatchOff")
808 }
809 if debugAsserts && t < 0 {
810 panic("t<0")
811 }
812 break
813 }
814 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
815 if s >= sLimit {
816 break encodeLoop
817 }
818 cv = load6432(src, s)
819 }
820 // A 4-byte match has been found. We'll later see if more than 4 bytes.
821 offset2 = offset1
822 offset1 = s - t
823
824 if debugAsserts && s <= t {
825 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
826 }
827
828 if debugAsserts && canRepeat && int(offset1) > len(src) {
829 panic("invalid offset")
830 }
831
832 // Extend the 4-byte match as long as possible.
833 //l := e.matchlen(s+4, t+4, src) + 4
834 var l int32
835 {
836 a := src[s+4:]
837 b := src[t+4:]
838 endI := len(a) & (math.MaxInt32 - 7)
839 l = int32(endI) + 4
840 for i := 0; i < endI; i += 8 {
841 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
842 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
843 break
844 }
845 }
846 }
847
848 // Extend backwards
849 tMin := s - e.maxMatchOff
850 if tMin < 0 {
851 tMin = 0
852 }
853 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
854 s--
855 t--
856 l++
857 }
858
859 // Write our sequence.
860 var seq seq
861 seq.litLen = uint32(s - nextEmit)
862 seq.matchLen = uint32(l - zstdMinMatch)
863 if seq.litLen > 0 {
864 blk.literals = append(blk.literals, src[nextEmit:s]...)
865 }
866 // Don't use repeat offsets
867 seq.offset = uint32(s-t) + 3
868 s += l
869 if debugSequences {
870 println("sequence", seq, "next s:", s)
871 }
872 blk.sequences = append(blk.sequences, seq)
873 nextEmit = s
874 if s >= sLimit {
875 break encodeLoop
876 }
877 cv = load6432(src, s)
878
879 // Check offset 2
880 if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
881 // We have at least 4 byte match.
882 // No need to check backwards. We come straight from a match
883 //l := 4 + e.matchlen(s+4, o2+4, src)
884 var l int32
885 {
886 a := src[s+4:]
887 b := src[o2+4:]
888 endI := len(a) & (math.MaxInt32 - 7)
889 l = int32(endI) + 4
890 for i := 0; i < endI; i += 8 {
891 if diff := load64(a, i) ^ load64(b, i); diff != 0 {
892 l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
893 break
894 }
895 }
896 }
897
898 // Store this, since we have it.
899 nextHash := hash6(cv, hashLog)
900 e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
901 e.markShardDirty(nextHash)
902 seq.matchLen = uint32(l) - zstdMinMatch
903 seq.litLen = 0
904 // Since litlen is always 0, this is offset 1.
905 seq.offset = 1
906 s += l
907 nextEmit = s
908 if debugSequences {
909 println("sequence", seq, "next s:", s)
910 }
911 blk.sequences = append(blk.sequences, seq)
912
913 // Swap offset 1 and 2.
914 offset1, offset2 = offset2, offset1
915 if s >= sLimit {
916 break encodeLoop
917 }
918 // Prepare next loop.
919 cv = load6432(src, s)
920 }
921 }
922
923 if int(nextEmit) < len(src) {
924 blk.literals = append(blk.literals, src[nextEmit:]...)
925 blk.extraLits = len(src) - int(nextEmit)
926 }
927 blk.recentOffsets[0] = uint32(offset1)
928 blk.recentOffsets[1] = uint32(offset2)
929 if debug {
930 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
931 }
932}
933
934// ResetDict will reset and set a dictionary if not nil
935func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
936 e.resetBase(d, singleBlock)
937 if d != nil {
938 panic("fastEncoder: Reset with dict")
939 }
940}
941
942// ResetDict will reset and set a dictionary if not nil
943func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
944 e.resetBase(d, singleBlock)
945 if d == nil {
946 return
947 }
948
949 // Init or copy dict table
950 if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
951 if len(e.dictTable) != len(e.table) {
952 e.dictTable = make([]tableEntry, len(e.table))
953 }
954 if true {
955 end := e.maxMatchOff + int32(len(d.content)) - 8
956 for i := e.maxMatchOff; i < end; i += 3 {
957 const hashLog = tableBits
958
959 cv := load6432(d.content, i-e.maxMatchOff)
960 nextHash := hash6(cv, hashLog) // 0 -> 5
961 nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
962 nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
963 e.dictTable[nextHash] = tableEntry{
964 val: uint32(cv),
965 offset: i,
966 }
967 e.dictTable[nextHash1] = tableEntry{
968 val: uint32(cv >> 8),
969 offset: i + 1,
970 }
971 e.dictTable[nextHash2] = tableEntry{
972 val: uint32(cv >> 16),
973 offset: i + 2,
974 }
975 }
976 }
977 e.lastDictID = d.id
978 e.allDirty = true
979 }
980
981 e.cur = e.maxMatchOff
982 dirtyShardCnt := 0
983 if !e.allDirty {
984 for i := range e.tableShardDirty {
985 if e.tableShardDirty[i] {
986 dirtyShardCnt++
987 }
988 }
989 }
990
991 const shardCnt = tableShardCnt
992 const shardSize = tableShardSize
993 if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
994 copy(e.table[:], e.dictTable)
995 for i := range e.tableShardDirty {
996 e.tableShardDirty[i] = false
997 }
998 e.allDirty = false
999 return
1000 }
1001 for i := range e.tableShardDirty {
1002 if !e.tableShardDirty[i] {
1003 continue
1004 }
1005
1006 copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
1007 e.tableShardDirty[i] = false
1008 }
1009 e.allDirty = false
1010}
1011
1012func (e *fastEncoderDict) markAllShardsDirty() {
1013 e.allDirty = true
1014}
1015
1016func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
1017 e.tableShardDirty[entryNum/tableShardSize] = true
1018}