blob: 8629d43d8683b512eef80c40a87e38e2e05e6f92 [file] [log] [blame]
khenaidood948f772021-08-11 17:49:24 -04001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10 dFastLongTableBits = 17 // Bits used in the long match table
11 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
12 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
13
14 dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
15 dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
16
17 dFastShortTableBits = tableBits // Bits used in the short match table
18 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
19 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
20)
21
22type doubleFastEncoder struct {
23 fastEncoder
24 longTable [dFastLongTableSize]tableEntry
25}
26
27type doubleFastEncoderDict struct {
28 fastEncoderDict
29 longTable [dFastLongTableSize]tableEntry
30 dictLongTable []tableEntry
31 longTableShardDirty [dLongTableShardCnt]bool
32}
33
34// Encode mimmics functionality in zstd_dfast.c
35func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
36 const (
37 // Input margin is the number of bytes we read (8)
38 // and the maximum we will read ahead (2)
39 inputMargin = 8 + 2
40 minNonLiteralBlockSize = 16
41 )
42
43 // Protect against e.cur wraparound.
44 for e.cur >= bufferReset {
45 if len(e.hist) == 0 {
46 for i := range e.table[:] {
47 e.table[i] = tableEntry{}
48 }
49 for i := range e.longTable[:] {
50 e.longTable[i] = tableEntry{}
51 }
52 e.cur = e.maxMatchOff
53 break
54 }
55 // Shift down everything in the table that isn't already too far away.
56 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
57 for i := range e.table[:] {
58 v := e.table[i].offset
59 if v < minOff {
60 v = 0
61 } else {
62 v = v - e.cur + e.maxMatchOff
63 }
64 e.table[i].offset = v
65 }
66 for i := range e.longTable[:] {
67 v := e.longTable[i].offset
68 if v < minOff {
69 v = 0
70 } else {
71 v = v - e.cur + e.maxMatchOff
72 }
73 e.longTable[i].offset = v
74 }
75 e.cur = e.maxMatchOff
76 break
77 }
78
79 s := e.addBlock(src)
80 blk.size = len(src)
81 if len(src) < minNonLiteralBlockSize {
82 blk.extraLits = len(src)
83 blk.literals = blk.literals[:len(src)]
84 copy(blk.literals, src)
85 return
86 }
87
88 // Override src
89 src = e.hist
90 sLimit := int32(len(src)) - inputMargin
91 // stepSize is the number of bytes to skip on every main loop iteration.
92 // It should be >= 1.
93 const stepSize = 1
94
95 const kSearchStrength = 8
96
97 // nextEmit is where in src the next emitLiteral should start from.
98 nextEmit := s
99 cv := load6432(src, s)
100
101 // Relative offsets
102 offset1 := int32(blk.recentOffsets[0])
103 offset2 := int32(blk.recentOffsets[1])
104
105 addLiterals := func(s *seq, until int32) {
106 if until == nextEmit {
107 return
108 }
109 blk.literals = append(blk.literals, src[nextEmit:until]...)
110 s.litLen = uint32(until - nextEmit)
111 }
112 if debug {
113 println("recent offsets:", blk.recentOffsets)
114 }
115
116encodeLoop:
117 for {
118 var t int32
119 // We allow the encoder to optionally turn off repeat offsets across blocks
120 canRepeat := len(blk.sequences) > 2
121
122 for {
123 if debugAsserts && canRepeat && offset1 == 0 {
124 panic("offset0 was 0")
125 }
126
127 nextHashS := hash5(cv, dFastShortTableBits)
128 nextHashL := hash8(cv, dFastLongTableBits)
129 candidateL := e.longTable[nextHashL]
130 candidateS := e.table[nextHashS]
131
132 const repOff = 1
133 repIndex := s - offset1 + repOff
134 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
135 e.longTable[nextHashL] = entry
136 e.table[nextHashS] = entry
137
138 if canRepeat {
139 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
140 // Consider history as well.
141 var seq seq
142 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
143
144 seq.matchLen = uint32(lenght - zstdMinMatch)
145
146 // We might be able to match backwards.
147 // Extend as long as we can.
148 start := s + repOff
149 // We end the search early, so we don't risk 0 literals
150 // and have to do special offset treatment.
151 startLimit := nextEmit + 1
152
153 tMin := s - e.maxMatchOff
154 if tMin < 0 {
155 tMin = 0
156 }
157 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
158 repIndex--
159 start--
160 seq.matchLen++
161 }
162 addLiterals(&seq, start)
163
164 // rep 0
165 seq.offset = 1
166 if debugSequences {
167 println("repeat sequence", seq, "next s:", s)
168 }
169 blk.sequences = append(blk.sequences, seq)
170 s += lenght + repOff
171 nextEmit = s
172 if s >= sLimit {
173 if debug {
174 println("repeat ended", s, lenght)
175
176 }
177 break encodeLoop
178 }
179 cv = load6432(src, s)
180 continue
181 }
182 }
183 // Find the offsets of our two matches.
184 coffsetL := s - (candidateL.offset - e.cur)
185 coffsetS := s - (candidateS.offset - e.cur)
186
187 // Check if we have a long match.
188 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
189 // Found a long match, likely at least 8 bytes.
190 // Reference encoder checks all 8 bytes, we only check 4,
191 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
192 t = candidateL.offset - e.cur
193 if debugAsserts && s <= t {
194 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
195 }
196 if debugAsserts && s-t > e.maxMatchOff {
197 panic("s - t >e.maxMatchOff")
198 }
199 if debugMatches {
200 println("long match")
201 }
202 break
203 }
204
205 // Check if we have a short match.
206 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
207 // found a regular match
208 // See if we can find a long match at s+1
209 const checkAt = 1
210 cv := load6432(src, s+checkAt)
211 nextHashL = hash8(cv, dFastLongTableBits)
212 candidateL = e.longTable[nextHashL]
213 coffsetL = s - (candidateL.offset - e.cur) + checkAt
214
215 // We can store it, since we have at least a 4 byte match.
216 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
217 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
218 // Found a long match, likely at least 8 bytes.
219 // Reference encoder checks all 8 bytes, we only check 4,
220 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
221 t = candidateL.offset - e.cur
222 s += checkAt
223 if debugMatches {
224 println("long match (after short)")
225 }
226 break
227 }
228
229 t = candidateS.offset - e.cur
230 if debugAsserts && s <= t {
231 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
232 }
233 if debugAsserts && s-t > e.maxMatchOff {
234 panic("s - t >e.maxMatchOff")
235 }
236 if debugAsserts && t < 0 {
237 panic("t<0")
238 }
239 if debugMatches {
240 println("short match")
241 }
242 break
243 }
244
245 // No match found, move forward in input.
246 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
247 if s >= sLimit {
248 break encodeLoop
249 }
250 cv = load6432(src, s)
251 }
252
253 // A 4-byte match has been found. Update recent offsets.
254 // We'll later see if more than 4 bytes.
255 offset2 = offset1
256 offset1 = s - t
257
258 if debugAsserts && s <= t {
259 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
260 }
261
262 if debugAsserts && canRepeat && int(offset1) > len(src) {
263 panic("invalid offset")
264 }
265
266 // Extend the 4-byte match as long as possible.
267 l := e.matchlen(s+4, t+4, src) + 4
268
269 // Extend backwards
270 tMin := s - e.maxMatchOff
271 if tMin < 0 {
272 tMin = 0
273 }
274 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
275 s--
276 t--
277 l++
278 }
279
280 // Write our sequence
281 var seq seq
282 seq.litLen = uint32(s - nextEmit)
283 seq.matchLen = uint32(l - zstdMinMatch)
284 if seq.litLen > 0 {
285 blk.literals = append(blk.literals, src[nextEmit:s]...)
286 }
287 seq.offset = uint32(s-t) + 3
288 s += l
289 if debugSequences {
290 println("sequence", seq, "next s:", s)
291 }
292 blk.sequences = append(blk.sequences, seq)
293 nextEmit = s
294 if s >= sLimit {
295 break encodeLoop
296 }
297
298 // Index match start+1 (long) and start+2 (short)
299 index0 := s - l + 1
300 // Index match end-2 (long) and end-1 (short)
301 index1 := s - 2
302
303 cv0 := load6432(src, index0)
304 cv1 := load6432(src, index1)
305 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
306 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
307 e.longTable[hash8(cv0, dFastLongTableBits)] = te0
308 e.longTable[hash8(cv1, dFastLongTableBits)] = te1
309 cv0 >>= 8
310 cv1 >>= 8
311 te0.offset++
312 te1.offset++
313 te0.val = uint32(cv0)
314 te1.val = uint32(cv1)
315 e.table[hash5(cv0, dFastShortTableBits)] = te0
316 e.table[hash5(cv1, dFastShortTableBits)] = te1
317
318 cv = load6432(src, s)
319
320 if !canRepeat {
321 continue
322 }
323
324 // Check offset 2
325 for {
326 o2 := s - offset2
327 if load3232(src, o2) != uint32(cv) {
328 // Do regular search
329 break
330 }
331
332 // Store this, since we have it.
333 nextHashS := hash5(cv, dFastShortTableBits)
334 nextHashL := hash8(cv, dFastLongTableBits)
335
336 // We have at least 4 byte match.
337 // No need to check backwards. We come straight from a match
338 l := 4 + e.matchlen(s+4, o2+4, src)
339
340 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
341 e.longTable[nextHashL] = entry
342 e.table[nextHashS] = entry
343 seq.matchLen = uint32(l) - zstdMinMatch
344 seq.litLen = 0
345
346 // Since litlen is always 0, this is offset 1.
347 seq.offset = 1
348 s += l
349 nextEmit = s
350 if debugSequences {
351 println("sequence", seq, "next s:", s)
352 }
353 blk.sequences = append(blk.sequences, seq)
354
355 // Swap offset 1 and 2.
356 offset1, offset2 = offset2, offset1
357 if s >= sLimit {
358 // Finished
359 break encodeLoop
360 }
361 cv = load6432(src, s)
362 }
363 }
364
365 if int(nextEmit) < len(src) {
366 blk.literals = append(blk.literals, src[nextEmit:]...)
367 blk.extraLits = len(src) - int(nextEmit)
368 }
369 blk.recentOffsets[0] = uint32(offset1)
370 blk.recentOffsets[1] = uint32(offset2)
371 if debug {
372 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
373 }
374}
375
376// EncodeNoHist will encode a block with no history and no following blocks.
377// Most notable difference is that src will not be copied for history and
378// we do not need to check for max match length.
379func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
380 const (
381 // Input margin is the number of bytes we read (8)
382 // and the maximum we will read ahead (2)
383 inputMargin = 8 + 2
384 minNonLiteralBlockSize = 16
385 )
386
387 // Protect against e.cur wraparound.
388 if e.cur >= bufferReset {
389 for i := range e.table[:] {
390 e.table[i] = tableEntry{}
391 }
392 for i := range e.longTable[:] {
393 e.longTable[i] = tableEntry{}
394 }
395 e.cur = e.maxMatchOff
396 }
397
398 s := int32(0)
399 blk.size = len(src)
400 if len(src) < minNonLiteralBlockSize {
401 blk.extraLits = len(src)
402 blk.literals = blk.literals[:len(src)]
403 copy(blk.literals, src)
404 return
405 }
406
407 // Override src
408 sLimit := int32(len(src)) - inputMargin
409 // stepSize is the number of bytes to skip on every main loop iteration.
410 // It should be >= 1.
411 const stepSize = 1
412
413 const kSearchStrength = 8
414
415 // nextEmit is where in src the next emitLiteral should start from.
416 nextEmit := s
417 cv := load6432(src, s)
418
419 // Relative offsets
420 offset1 := int32(blk.recentOffsets[0])
421 offset2 := int32(blk.recentOffsets[1])
422
423 addLiterals := func(s *seq, until int32) {
424 if until == nextEmit {
425 return
426 }
427 blk.literals = append(blk.literals, src[nextEmit:until]...)
428 s.litLen = uint32(until - nextEmit)
429 }
430 if debug {
431 println("recent offsets:", blk.recentOffsets)
432 }
433
434encodeLoop:
435 for {
436 var t int32
437 for {
438
439 nextHashS := hash5(cv, dFastShortTableBits)
440 nextHashL := hash8(cv, dFastLongTableBits)
441 candidateL := e.longTable[nextHashL]
442 candidateS := e.table[nextHashS]
443
444 const repOff = 1
445 repIndex := s - offset1 + repOff
446 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
447 e.longTable[nextHashL] = entry
448 e.table[nextHashS] = entry
449
450 if len(blk.sequences) > 2 {
451 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
452 // Consider history as well.
453 var seq seq
454 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
455 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
456
457 seq.matchLen = uint32(length - zstdMinMatch)
458
459 // We might be able to match backwards.
460 // Extend as long as we can.
461 start := s + repOff
462 // We end the search early, so we don't risk 0 literals
463 // and have to do special offset treatment.
464 startLimit := nextEmit + 1
465
466 tMin := s - e.maxMatchOff
467 if tMin < 0 {
468 tMin = 0
469 }
470 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
471 repIndex--
472 start--
473 seq.matchLen++
474 }
475 addLiterals(&seq, start)
476
477 // rep 0
478 seq.offset = 1
479 if debugSequences {
480 println("repeat sequence", seq, "next s:", s)
481 }
482 blk.sequences = append(blk.sequences, seq)
483 s += length + repOff
484 nextEmit = s
485 if s >= sLimit {
486 if debug {
487 println("repeat ended", s, length)
488
489 }
490 break encodeLoop
491 }
492 cv = load6432(src, s)
493 continue
494 }
495 }
496 // Find the offsets of our two matches.
497 coffsetL := s - (candidateL.offset - e.cur)
498 coffsetS := s - (candidateS.offset - e.cur)
499
500 // Check if we have a long match.
501 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
502 // Found a long match, likely at least 8 bytes.
503 // Reference encoder checks all 8 bytes, we only check 4,
504 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
505 t = candidateL.offset - e.cur
506 if debugAsserts && s <= t {
507 panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
508 }
509 if debugAsserts && s-t > e.maxMatchOff {
510 panic("s - t >e.maxMatchOff")
511 }
512 if debugMatches {
513 println("long match")
514 }
515 break
516 }
517
518 // Check if we have a short match.
519 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
520 // found a regular match
521 // See if we can find a long match at s+1
522 const checkAt = 1
523 cv := load6432(src, s+checkAt)
524 nextHashL = hash8(cv, dFastLongTableBits)
525 candidateL = e.longTable[nextHashL]
526 coffsetL = s - (candidateL.offset - e.cur) + checkAt
527
528 // We can store it, since we have at least a 4 byte match.
529 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
530 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
531 // Found a long match, likely at least 8 bytes.
532 // Reference encoder checks all 8 bytes, we only check 4,
533 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
534 t = candidateL.offset - e.cur
535 s += checkAt
536 if debugMatches {
537 println("long match (after short)")
538 }
539 break
540 }
541
542 t = candidateS.offset - e.cur
543 if debugAsserts && s <= t {
544 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
545 }
546 if debugAsserts && s-t > e.maxMatchOff {
547 panic("s - t >e.maxMatchOff")
548 }
549 if debugAsserts && t < 0 {
550 panic("t<0")
551 }
552 if debugMatches {
553 println("short match")
554 }
555 break
556 }
557
558 // No match found, move forward in input.
559 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
560 if s >= sLimit {
561 break encodeLoop
562 }
563 cv = load6432(src, s)
564 }
565
566 // A 4-byte match has been found. Update recent offsets.
567 // We'll later see if more than 4 bytes.
568 offset2 = offset1
569 offset1 = s - t
570
571 if debugAsserts && s <= t {
572 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
573 }
574
575 // Extend the 4-byte match as long as possible.
576 //l := e.matchlen(s+4, t+4, src) + 4
577 l := int32(matchLen(src[s+4:], src[t+4:])) + 4
578
579 // Extend backwards
580 tMin := s - e.maxMatchOff
581 if tMin < 0 {
582 tMin = 0
583 }
584 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
585 s--
586 t--
587 l++
588 }
589
590 // Write our sequence
591 var seq seq
592 seq.litLen = uint32(s - nextEmit)
593 seq.matchLen = uint32(l - zstdMinMatch)
594 if seq.litLen > 0 {
595 blk.literals = append(blk.literals, src[nextEmit:s]...)
596 }
597 seq.offset = uint32(s-t) + 3
598 s += l
599 if debugSequences {
600 println("sequence", seq, "next s:", s)
601 }
602 blk.sequences = append(blk.sequences, seq)
603 nextEmit = s
604 if s >= sLimit {
605 break encodeLoop
606 }
607
608 // Index match start+1 (long) and start+2 (short)
609 index0 := s - l + 1
610 // Index match end-2 (long) and end-1 (short)
611 index1 := s - 2
612
613 cv0 := load6432(src, index0)
614 cv1 := load6432(src, index1)
615 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
616 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
617 e.longTable[hash8(cv0, dFastLongTableBits)] = te0
618 e.longTable[hash8(cv1, dFastLongTableBits)] = te1
619 cv0 >>= 8
620 cv1 >>= 8
621 te0.offset++
622 te1.offset++
623 te0.val = uint32(cv0)
624 te1.val = uint32(cv1)
625 e.table[hash5(cv0, dFastShortTableBits)] = te0
626 e.table[hash5(cv1, dFastShortTableBits)] = te1
627
628 cv = load6432(src, s)
629
630 if len(blk.sequences) <= 2 {
631 continue
632 }
633
634 // Check offset 2
635 for {
636 o2 := s - offset2
637 if load3232(src, o2) != uint32(cv) {
638 // Do regular search
639 break
640 }
641
642 // Store this, since we have it.
643 nextHashS := hash5(cv1>>8, dFastShortTableBits)
644 nextHashL := hash8(cv, dFastLongTableBits)
645
646 // We have at least 4 byte match.
647 // No need to check backwards. We come straight from a match
648 //l := 4 + e.matchlen(s+4, o2+4, src)
649 l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
650
651 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
652 e.longTable[nextHashL] = entry
653 e.table[nextHashS] = entry
654 seq.matchLen = uint32(l) - zstdMinMatch
655 seq.litLen = 0
656
657 // Since litlen is always 0, this is offset 1.
658 seq.offset = 1
659 s += l
660 nextEmit = s
661 if debugSequences {
662 println("sequence", seq, "next s:", s)
663 }
664 blk.sequences = append(blk.sequences, seq)
665
666 // Swap offset 1 and 2.
667 offset1, offset2 = offset2, offset1
668 if s >= sLimit {
669 // Finished
670 break encodeLoop
671 }
672 cv = load6432(src, s)
673 }
674 }
675
676 if int(nextEmit) < len(src) {
677 blk.literals = append(blk.literals, src[nextEmit:]...)
678 blk.extraLits = len(src) - int(nextEmit)
679 }
680 if debug {
681 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
682 }
683
684 // We do not store history, so we must offset e.cur to avoid false matches for next user.
685 if e.cur < bufferReset {
686 e.cur += int32(len(src))
687 }
688}
689
690// Encode will encode the content, with a dictionary if initialized for it.
691func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
692 const (
693 // Input margin is the number of bytes we read (8)
694 // and the maximum we will read ahead (2)
695 inputMargin = 8 + 2
696 minNonLiteralBlockSize = 16
697 )
698
699 // Protect against e.cur wraparound.
700 for e.cur >= bufferReset {
701 if len(e.hist) == 0 {
702 for i := range e.table[:] {
703 e.table[i] = tableEntry{}
704 }
705 for i := range e.longTable[:] {
706 e.longTable[i] = tableEntry{}
707 }
708 e.markAllShardsDirty()
709 e.cur = e.maxMatchOff
710 break
711 }
712 // Shift down everything in the table that isn't already too far away.
713 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
714 for i := range e.table[:] {
715 v := e.table[i].offset
716 if v < minOff {
717 v = 0
718 } else {
719 v = v - e.cur + e.maxMatchOff
720 }
721 e.table[i].offset = v
722 }
723 for i := range e.longTable[:] {
724 v := e.longTable[i].offset
725 if v < minOff {
726 v = 0
727 } else {
728 v = v - e.cur + e.maxMatchOff
729 }
730 e.longTable[i].offset = v
731 }
732 e.markAllShardsDirty()
733 e.cur = e.maxMatchOff
734 break
735 }
736
737 s := e.addBlock(src)
738 blk.size = len(src)
739 if len(src) < minNonLiteralBlockSize {
740 blk.extraLits = len(src)
741 blk.literals = blk.literals[:len(src)]
742 copy(blk.literals, src)
743 return
744 }
745
746 // Override src
747 src = e.hist
748 sLimit := int32(len(src)) - inputMargin
749 // stepSize is the number of bytes to skip on every main loop iteration.
750 // It should be >= 1.
751 const stepSize = 1
752
753 const kSearchStrength = 8
754
755 // nextEmit is where in src the next emitLiteral should start from.
756 nextEmit := s
757 cv := load6432(src, s)
758
759 // Relative offsets
760 offset1 := int32(blk.recentOffsets[0])
761 offset2 := int32(blk.recentOffsets[1])
762
763 addLiterals := func(s *seq, until int32) {
764 if until == nextEmit {
765 return
766 }
767 blk.literals = append(blk.literals, src[nextEmit:until]...)
768 s.litLen = uint32(until - nextEmit)
769 }
770 if debug {
771 println("recent offsets:", blk.recentOffsets)
772 }
773
774encodeLoop:
775 for {
776 var t int32
777 // We allow the encoder to optionally turn off repeat offsets across blocks
778 canRepeat := len(blk.sequences) > 2
779
780 for {
781 if debugAsserts && canRepeat && offset1 == 0 {
782 panic("offset0 was 0")
783 }
784
785 nextHashS := hash5(cv, dFastShortTableBits)
786 nextHashL := hash8(cv, dFastLongTableBits)
787 candidateL := e.longTable[nextHashL]
788 candidateS := e.table[nextHashS]
789
790 const repOff = 1
791 repIndex := s - offset1 + repOff
792 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
793 e.longTable[nextHashL] = entry
794 e.markLongShardDirty(nextHashL)
795 e.table[nextHashS] = entry
796 e.markShardDirty(nextHashS)
797
798 if canRepeat {
799 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
800 // Consider history as well.
801 var seq seq
802 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
803
804 seq.matchLen = uint32(lenght - zstdMinMatch)
805
806 // We might be able to match backwards.
807 // Extend as long as we can.
808 start := s + repOff
809 // We end the search early, so we don't risk 0 literals
810 // and have to do special offset treatment.
811 startLimit := nextEmit + 1
812
813 tMin := s - e.maxMatchOff
814 if tMin < 0 {
815 tMin = 0
816 }
817 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
818 repIndex--
819 start--
820 seq.matchLen++
821 }
822 addLiterals(&seq, start)
823
824 // rep 0
825 seq.offset = 1
826 if debugSequences {
827 println("repeat sequence", seq, "next s:", s)
828 }
829 blk.sequences = append(blk.sequences, seq)
830 s += lenght + repOff
831 nextEmit = s
832 if s >= sLimit {
833 if debug {
834 println("repeat ended", s, lenght)
835
836 }
837 break encodeLoop
838 }
839 cv = load6432(src, s)
840 continue
841 }
842 }
843 // Find the offsets of our two matches.
844 coffsetL := s - (candidateL.offset - e.cur)
845 coffsetS := s - (candidateS.offset - e.cur)
846
847 // Check if we have a long match.
848 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
849 // Found a long match, likely at least 8 bytes.
850 // Reference encoder checks all 8 bytes, we only check 4,
851 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
852 t = candidateL.offset - e.cur
853 if debugAsserts && s <= t {
854 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
855 }
856 if debugAsserts && s-t > e.maxMatchOff {
857 panic("s - t >e.maxMatchOff")
858 }
859 if debugMatches {
860 println("long match")
861 }
862 break
863 }
864
865 // Check if we have a short match.
866 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
867 // found a regular match
868 // See if we can find a long match at s+1
869 const checkAt = 1
870 cv := load6432(src, s+checkAt)
871 nextHashL = hash8(cv, dFastLongTableBits)
872 candidateL = e.longTable[nextHashL]
873 coffsetL = s - (candidateL.offset - e.cur) + checkAt
874
875 // We can store it, since we have at least a 4 byte match.
876 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
877 e.markLongShardDirty(nextHashL)
878 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
879 // Found a long match, likely at least 8 bytes.
880 // Reference encoder checks all 8 bytes, we only check 4,
881 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
882 t = candidateL.offset - e.cur
883 s += checkAt
884 if debugMatches {
885 println("long match (after short)")
886 }
887 break
888 }
889
890 t = candidateS.offset - e.cur
891 if debugAsserts && s <= t {
892 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
893 }
894 if debugAsserts && s-t > e.maxMatchOff {
895 panic("s - t >e.maxMatchOff")
896 }
897 if debugAsserts && t < 0 {
898 panic("t<0")
899 }
900 if debugMatches {
901 println("short match")
902 }
903 break
904 }
905
906 // No match found, move forward in input.
907 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
908 if s >= sLimit {
909 break encodeLoop
910 }
911 cv = load6432(src, s)
912 }
913
914 // A 4-byte match has been found. Update recent offsets.
915 // We'll later see if more than 4 bytes.
916 offset2 = offset1
917 offset1 = s - t
918
919 if debugAsserts && s <= t {
920 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
921 }
922
923 if debugAsserts && canRepeat && int(offset1) > len(src) {
924 panic("invalid offset")
925 }
926
927 // Extend the 4-byte match as long as possible.
928 l := e.matchlen(s+4, t+4, src) + 4
929
930 // Extend backwards
931 tMin := s - e.maxMatchOff
932 if tMin < 0 {
933 tMin = 0
934 }
935 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
936 s--
937 t--
938 l++
939 }
940
941 // Write our sequence
942 var seq seq
943 seq.litLen = uint32(s - nextEmit)
944 seq.matchLen = uint32(l - zstdMinMatch)
945 if seq.litLen > 0 {
946 blk.literals = append(blk.literals, src[nextEmit:s]...)
947 }
948 seq.offset = uint32(s-t) + 3
949 s += l
950 if debugSequences {
951 println("sequence", seq, "next s:", s)
952 }
953 blk.sequences = append(blk.sequences, seq)
954 nextEmit = s
955 if s >= sLimit {
956 break encodeLoop
957 }
958
959 // Index match start+1 (long) and start+2 (short)
960 index0 := s - l + 1
961 // Index match end-2 (long) and end-1 (short)
962 index1 := s - 2
963
964 cv0 := load6432(src, index0)
965 cv1 := load6432(src, index1)
966 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
967 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
968 longHash1 := hash8(cv0, dFastLongTableBits)
969 longHash2 := hash8(cv0, dFastLongTableBits)
970 e.longTable[longHash1] = te0
971 e.longTable[longHash2] = te1
972 e.markLongShardDirty(longHash1)
973 e.markLongShardDirty(longHash2)
974 cv0 >>= 8
975 cv1 >>= 8
976 te0.offset++
977 te1.offset++
978 te0.val = uint32(cv0)
979 te1.val = uint32(cv1)
980 hashVal1 := hash5(cv0, dFastShortTableBits)
981 hashVal2 := hash5(cv1, dFastShortTableBits)
982 e.table[hashVal1] = te0
983 e.markShardDirty(hashVal1)
984 e.table[hashVal2] = te1
985 e.markShardDirty(hashVal2)
986
987 cv = load6432(src, s)
988
989 if !canRepeat {
990 continue
991 }
992
993 // Check offset 2
994 for {
995 o2 := s - offset2
996 if load3232(src, o2) != uint32(cv) {
997 // Do regular search
998 break
999 }
1000
1001 // Store this, since we have it.
1002 nextHashS := hash5(cv, dFastShortTableBits)
1003 nextHashL := hash8(cv, dFastLongTableBits)
1004
1005 // We have at least 4 byte match.
1006 // No need to check backwards. We come straight from a match
1007 l := 4 + e.matchlen(s+4, o2+4, src)
1008
1009 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
1010 e.longTable[nextHashL] = entry
1011 e.markLongShardDirty(nextHashL)
1012 e.table[nextHashS] = entry
1013 e.markShardDirty(nextHashS)
1014 seq.matchLen = uint32(l) - zstdMinMatch
1015 seq.litLen = 0
1016
1017 // Since litlen is always 0, this is offset 1.
1018 seq.offset = 1
1019 s += l
1020 nextEmit = s
1021 if debugSequences {
1022 println("sequence", seq, "next s:", s)
1023 }
1024 blk.sequences = append(blk.sequences, seq)
1025
1026 // Swap offset 1 and 2.
1027 offset1, offset2 = offset2, offset1
1028 if s >= sLimit {
1029 // Finished
1030 break encodeLoop
1031 }
1032 cv = load6432(src, s)
1033 }
1034 }
1035
1036 if int(nextEmit) < len(src) {
1037 blk.literals = append(blk.literals, src[nextEmit:]...)
1038 blk.extraLits = len(src) - int(nextEmit)
1039 }
1040 blk.recentOffsets[0] = uint32(offset1)
1041 blk.recentOffsets[1] = uint32(offset2)
1042 if debug {
1043 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1044 }
1045 // If we encoded more than 64K mark all dirty.
1046 if len(src) > 64<<10 {
1047 e.markAllShardsDirty()
1048 }
1049}
1050
1051// ResetDict will reset and set a dictionary if not nil
1052func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
1053 e.fastEncoder.Reset(d, singleBlock)
1054 if d != nil {
1055 panic("doubleFastEncoder: Reset with dict not supported")
1056 }
1057}
1058
1059// ResetDict will reset and set a dictionary if not nil
1060func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
1061 allDirty := e.allDirty
1062 e.fastEncoderDict.Reset(d, singleBlock)
1063 if d == nil {
1064 return
1065 }
1066
1067 // Init or copy dict table
1068 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1069 if len(e.dictLongTable) != len(e.longTable) {
1070 e.dictLongTable = make([]tableEntry, len(e.longTable))
1071 }
1072 if len(d.content) >= 8 {
1073 cv := load6432(d.content, 0)
1074 e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
1075 val: uint32(cv),
1076 offset: e.maxMatchOff,
1077 }
1078 end := int32(len(d.content)) - 8 + e.maxMatchOff
1079 for i := e.maxMatchOff + 1; i < end; i++ {
1080 cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
1081 e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
1082 val: uint32(cv),
1083 offset: i,
1084 }
1085 }
1086 }
1087 e.lastDictID = d.id
1088 e.allDirty = true
1089 }
1090 // Reset table to initial state
1091 e.cur = e.maxMatchOff
1092
1093 dirtyShardCnt := 0
1094 if !allDirty {
1095 for i := range e.longTableShardDirty {
1096 if e.longTableShardDirty[i] {
1097 dirtyShardCnt++
1098 }
1099 }
1100 }
1101
1102 if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
1103 copy(e.longTable[:], e.dictLongTable)
1104 for i := range e.longTableShardDirty {
1105 e.longTableShardDirty[i] = false
1106 }
1107 return
1108 }
1109 for i := range e.longTableShardDirty {
1110 if !e.longTableShardDirty[i] {
1111 continue
1112 }
1113
1114 copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
1115 e.longTableShardDirty[i] = false
1116 }
1117}
1118
1119func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
1120 e.longTableShardDirty[entryNum/dLongTableShardSize] = true
1121}