blob: d6b3104240b0a78a9723f542f37bacbc3e738953 [file] [log] [blame]
kesavandc71914f2022-03-25 11:19:03 +05301// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import "fmt"
8
9const (
10 dFastLongTableBits = 17 // Bits used in the long match table
11 dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
12 dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
13 dFastLongLen = 8 // Bytes used for table hash
14
15 dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
16 dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
17
18 dFastShortTableBits = tableBits // Bits used in the short match table
19 dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
20 dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
21 dFastShortLen = 5 // Bytes used for table hash
22
23)
24
25type doubleFastEncoder struct {
26 fastEncoder
27 longTable [dFastLongTableSize]tableEntry
28}
29
30type doubleFastEncoderDict struct {
31 fastEncoderDict
32 longTable [dFastLongTableSize]tableEntry
33 dictLongTable []tableEntry
34 longTableShardDirty [dLongTableShardCnt]bool
35}
36
37// Encode mimmics functionality in zstd_dfast.c
38func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
39 const (
40 // Input margin is the number of bytes we read (8)
41 // and the maximum we will read ahead (2)
42 inputMargin = 8 + 2
43 minNonLiteralBlockSize = 16
44 )
45
46 // Protect against e.cur wraparound.
47 for e.cur >= bufferReset {
48 if len(e.hist) == 0 {
49 for i := range e.table[:] {
50 e.table[i] = tableEntry{}
51 }
52 for i := range e.longTable[:] {
53 e.longTable[i] = tableEntry{}
54 }
55 e.cur = e.maxMatchOff
56 break
57 }
58 // Shift down everything in the table that isn't already too far away.
59 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
60 for i := range e.table[:] {
61 v := e.table[i].offset
62 if v < minOff {
63 v = 0
64 } else {
65 v = v - e.cur + e.maxMatchOff
66 }
67 e.table[i].offset = v
68 }
69 for i := range e.longTable[:] {
70 v := e.longTable[i].offset
71 if v < minOff {
72 v = 0
73 } else {
74 v = v - e.cur + e.maxMatchOff
75 }
76 e.longTable[i].offset = v
77 }
78 e.cur = e.maxMatchOff
79 break
80 }
81
82 s := e.addBlock(src)
83 blk.size = len(src)
84 if len(src) < minNonLiteralBlockSize {
85 blk.extraLits = len(src)
86 blk.literals = blk.literals[:len(src)]
87 copy(blk.literals, src)
88 return
89 }
90
91 // Override src
92 src = e.hist
93 sLimit := int32(len(src)) - inputMargin
94 // stepSize is the number of bytes to skip on every main loop iteration.
95 // It should be >= 1.
96 const stepSize = 1
97
98 const kSearchStrength = 8
99
100 // nextEmit is where in src the next emitLiteral should start from.
101 nextEmit := s
102 cv := load6432(src, s)
103
104 // Relative offsets
105 offset1 := int32(blk.recentOffsets[0])
106 offset2 := int32(blk.recentOffsets[1])
107
108 addLiterals := func(s *seq, until int32) {
109 if until == nextEmit {
110 return
111 }
112 blk.literals = append(blk.literals, src[nextEmit:until]...)
113 s.litLen = uint32(until - nextEmit)
114 }
115 if debugEncoder {
116 println("recent offsets:", blk.recentOffsets)
117 }
118
119encodeLoop:
120 for {
121 var t int32
122 // We allow the encoder to optionally turn off repeat offsets across blocks
123 canRepeat := len(blk.sequences) > 2
124
125 for {
126 if debugAsserts && canRepeat && offset1 == 0 {
127 panic("offset0 was 0")
128 }
129
130 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
131 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
132 candidateL := e.longTable[nextHashL]
133 candidateS := e.table[nextHashS]
134
135 const repOff = 1
136 repIndex := s - offset1 + repOff
137 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
138 e.longTable[nextHashL] = entry
139 e.table[nextHashS] = entry
140
141 if canRepeat {
142 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
143 // Consider history as well.
144 var seq seq
145 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
146
147 seq.matchLen = uint32(lenght - zstdMinMatch)
148
149 // We might be able to match backwards.
150 // Extend as long as we can.
151 start := s + repOff
152 // We end the search early, so we don't risk 0 literals
153 // and have to do special offset treatment.
154 startLimit := nextEmit + 1
155
156 tMin := s - e.maxMatchOff
157 if tMin < 0 {
158 tMin = 0
159 }
160 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
161 repIndex--
162 start--
163 seq.matchLen++
164 }
165 addLiterals(&seq, start)
166
167 // rep 0
168 seq.offset = 1
169 if debugSequences {
170 println("repeat sequence", seq, "next s:", s)
171 }
172 blk.sequences = append(blk.sequences, seq)
173 s += lenght + repOff
174 nextEmit = s
175 if s >= sLimit {
176 if debugEncoder {
177 println("repeat ended", s, lenght)
178
179 }
180 break encodeLoop
181 }
182 cv = load6432(src, s)
183 continue
184 }
185 }
186 // Find the offsets of our two matches.
187 coffsetL := s - (candidateL.offset - e.cur)
188 coffsetS := s - (candidateS.offset - e.cur)
189
190 // Check if we have a long match.
191 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
192 // Found a long match, likely at least 8 bytes.
193 // Reference encoder checks all 8 bytes, we only check 4,
194 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
195 t = candidateL.offset - e.cur
196 if debugAsserts && s <= t {
197 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
198 }
199 if debugAsserts && s-t > e.maxMatchOff {
200 panic("s - t >e.maxMatchOff")
201 }
202 if debugMatches {
203 println("long match")
204 }
205 break
206 }
207
208 // Check if we have a short match.
209 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
210 // found a regular match
211 // See if we can find a long match at s+1
212 const checkAt = 1
213 cv := load6432(src, s+checkAt)
214 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
215 candidateL = e.longTable[nextHashL]
216 coffsetL = s - (candidateL.offset - e.cur) + checkAt
217
218 // We can store it, since we have at least a 4 byte match.
219 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
220 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
221 // Found a long match, likely at least 8 bytes.
222 // Reference encoder checks all 8 bytes, we only check 4,
223 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
224 t = candidateL.offset - e.cur
225 s += checkAt
226 if debugMatches {
227 println("long match (after short)")
228 }
229 break
230 }
231
232 t = candidateS.offset - e.cur
233 if debugAsserts && s <= t {
234 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
235 }
236 if debugAsserts && s-t > e.maxMatchOff {
237 panic("s - t >e.maxMatchOff")
238 }
239 if debugAsserts && t < 0 {
240 panic("t<0")
241 }
242 if debugMatches {
243 println("short match")
244 }
245 break
246 }
247
248 // No match found, move forward in input.
249 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
250 if s >= sLimit {
251 break encodeLoop
252 }
253 cv = load6432(src, s)
254 }
255
256 // A 4-byte match has been found. Update recent offsets.
257 // We'll later see if more than 4 bytes.
258 offset2 = offset1
259 offset1 = s - t
260
261 if debugAsserts && s <= t {
262 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
263 }
264
265 if debugAsserts && canRepeat && int(offset1) > len(src) {
266 panic("invalid offset")
267 }
268
269 // Extend the 4-byte match as long as possible.
270 l := e.matchlen(s+4, t+4, src) + 4
271
272 // Extend backwards
273 tMin := s - e.maxMatchOff
274 if tMin < 0 {
275 tMin = 0
276 }
277 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
278 s--
279 t--
280 l++
281 }
282
283 // Write our sequence
284 var seq seq
285 seq.litLen = uint32(s - nextEmit)
286 seq.matchLen = uint32(l - zstdMinMatch)
287 if seq.litLen > 0 {
288 blk.literals = append(blk.literals, src[nextEmit:s]...)
289 }
290 seq.offset = uint32(s-t) + 3
291 s += l
292 if debugSequences {
293 println("sequence", seq, "next s:", s)
294 }
295 blk.sequences = append(blk.sequences, seq)
296 nextEmit = s
297 if s >= sLimit {
298 break encodeLoop
299 }
300
301 // Index match start+1 (long) and start+2 (short)
302 index0 := s - l + 1
303 // Index match end-2 (long) and end-1 (short)
304 index1 := s - 2
305
306 cv0 := load6432(src, index0)
307 cv1 := load6432(src, index1)
308 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
309 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
310 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
311 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
312 cv0 >>= 8
313 cv1 >>= 8
314 te0.offset++
315 te1.offset++
316 te0.val = uint32(cv0)
317 te1.val = uint32(cv1)
318 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
319 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
320
321 cv = load6432(src, s)
322
323 if !canRepeat {
324 continue
325 }
326
327 // Check offset 2
328 for {
329 o2 := s - offset2
330 if load3232(src, o2) != uint32(cv) {
331 // Do regular search
332 break
333 }
334
335 // Store this, since we have it.
336 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
337 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
338
339 // We have at least 4 byte match.
340 // No need to check backwards. We come straight from a match
341 l := 4 + e.matchlen(s+4, o2+4, src)
342
343 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
344 e.longTable[nextHashL] = entry
345 e.table[nextHashS] = entry
346 seq.matchLen = uint32(l) - zstdMinMatch
347 seq.litLen = 0
348
349 // Since litlen is always 0, this is offset 1.
350 seq.offset = 1
351 s += l
352 nextEmit = s
353 if debugSequences {
354 println("sequence", seq, "next s:", s)
355 }
356 blk.sequences = append(blk.sequences, seq)
357
358 // Swap offset 1 and 2.
359 offset1, offset2 = offset2, offset1
360 if s >= sLimit {
361 // Finished
362 break encodeLoop
363 }
364 cv = load6432(src, s)
365 }
366 }
367
368 if int(nextEmit) < len(src) {
369 blk.literals = append(blk.literals, src[nextEmit:]...)
370 blk.extraLits = len(src) - int(nextEmit)
371 }
372 blk.recentOffsets[0] = uint32(offset1)
373 blk.recentOffsets[1] = uint32(offset2)
374 if debugEncoder {
375 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
376 }
377}
378
379// EncodeNoHist will encode a block with no history and no following blocks.
380// Most notable difference is that src will not be copied for history and
381// we do not need to check for max match length.
382func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
383 const (
384 // Input margin is the number of bytes we read (8)
385 // and the maximum we will read ahead (2)
386 inputMargin = 8 + 2
387 minNonLiteralBlockSize = 16
388 )
389
390 // Protect against e.cur wraparound.
391 if e.cur >= bufferReset {
392 for i := range e.table[:] {
393 e.table[i] = tableEntry{}
394 }
395 for i := range e.longTable[:] {
396 e.longTable[i] = tableEntry{}
397 }
398 e.cur = e.maxMatchOff
399 }
400
401 s := int32(0)
402 blk.size = len(src)
403 if len(src) < minNonLiteralBlockSize {
404 blk.extraLits = len(src)
405 blk.literals = blk.literals[:len(src)]
406 copy(blk.literals, src)
407 return
408 }
409
410 // Override src
411 sLimit := int32(len(src)) - inputMargin
412 // stepSize is the number of bytes to skip on every main loop iteration.
413 // It should be >= 1.
414 const stepSize = 1
415
416 const kSearchStrength = 8
417
418 // nextEmit is where in src the next emitLiteral should start from.
419 nextEmit := s
420 cv := load6432(src, s)
421
422 // Relative offsets
423 offset1 := int32(blk.recentOffsets[0])
424 offset2 := int32(blk.recentOffsets[1])
425
426 addLiterals := func(s *seq, until int32) {
427 if until == nextEmit {
428 return
429 }
430 blk.literals = append(blk.literals, src[nextEmit:until]...)
431 s.litLen = uint32(until - nextEmit)
432 }
433 if debugEncoder {
434 println("recent offsets:", blk.recentOffsets)
435 }
436
437encodeLoop:
438 for {
439 var t int32
440 for {
441
442 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
443 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
444 candidateL := e.longTable[nextHashL]
445 candidateS := e.table[nextHashS]
446
447 const repOff = 1
448 repIndex := s - offset1 + repOff
449 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
450 e.longTable[nextHashL] = entry
451 e.table[nextHashS] = entry
452
453 if len(blk.sequences) > 2 {
454 if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
455 // Consider history as well.
456 var seq seq
457 //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
458 length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
459
460 seq.matchLen = uint32(length - zstdMinMatch)
461
462 // We might be able to match backwards.
463 // Extend as long as we can.
464 start := s + repOff
465 // We end the search early, so we don't risk 0 literals
466 // and have to do special offset treatment.
467 startLimit := nextEmit + 1
468
469 tMin := s - e.maxMatchOff
470 if tMin < 0 {
471 tMin = 0
472 }
473 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
474 repIndex--
475 start--
476 seq.matchLen++
477 }
478 addLiterals(&seq, start)
479
480 // rep 0
481 seq.offset = 1
482 if debugSequences {
483 println("repeat sequence", seq, "next s:", s)
484 }
485 blk.sequences = append(blk.sequences, seq)
486 s += length + repOff
487 nextEmit = s
488 if s >= sLimit {
489 if debugEncoder {
490 println("repeat ended", s, length)
491
492 }
493 break encodeLoop
494 }
495 cv = load6432(src, s)
496 continue
497 }
498 }
499 // Find the offsets of our two matches.
500 coffsetL := s - (candidateL.offset - e.cur)
501 coffsetS := s - (candidateS.offset - e.cur)
502
503 // Check if we have a long match.
504 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
505 // Found a long match, likely at least 8 bytes.
506 // Reference encoder checks all 8 bytes, we only check 4,
507 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
508 t = candidateL.offset - e.cur
509 if debugAsserts && s <= t {
510 panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
511 }
512 if debugAsserts && s-t > e.maxMatchOff {
513 panic("s - t >e.maxMatchOff")
514 }
515 if debugMatches {
516 println("long match")
517 }
518 break
519 }
520
521 // Check if we have a short match.
522 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
523 // found a regular match
524 // See if we can find a long match at s+1
525 const checkAt = 1
526 cv := load6432(src, s+checkAt)
527 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
528 candidateL = e.longTable[nextHashL]
529 coffsetL = s - (candidateL.offset - e.cur) + checkAt
530
531 // We can store it, since we have at least a 4 byte match.
532 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
533 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
534 // Found a long match, likely at least 8 bytes.
535 // Reference encoder checks all 8 bytes, we only check 4,
536 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
537 t = candidateL.offset - e.cur
538 s += checkAt
539 if debugMatches {
540 println("long match (after short)")
541 }
542 break
543 }
544
545 t = candidateS.offset - e.cur
546 if debugAsserts && s <= t {
547 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
548 }
549 if debugAsserts && s-t > e.maxMatchOff {
550 panic("s - t >e.maxMatchOff")
551 }
552 if debugAsserts && t < 0 {
553 panic("t<0")
554 }
555 if debugMatches {
556 println("short match")
557 }
558 break
559 }
560
561 // No match found, move forward in input.
562 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
563 if s >= sLimit {
564 break encodeLoop
565 }
566 cv = load6432(src, s)
567 }
568
569 // A 4-byte match has been found. Update recent offsets.
570 // We'll later see if more than 4 bytes.
571 offset2 = offset1
572 offset1 = s - t
573
574 if debugAsserts && s <= t {
575 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
576 }
577
578 // Extend the 4-byte match as long as possible.
579 //l := e.matchlen(s+4, t+4, src) + 4
580 l := int32(matchLen(src[s+4:], src[t+4:])) + 4
581
582 // Extend backwards
583 tMin := s - e.maxMatchOff
584 if tMin < 0 {
585 tMin = 0
586 }
587 for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
588 s--
589 t--
590 l++
591 }
592
593 // Write our sequence
594 var seq seq
595 seq.litLen = uint32(s - nextEmit)
596 seq.matchLen = uint32(l - zstdMinMatch)
597 if seq.litLen > 0 {
598 blk.literals = append(blk.literals, src[nextEmit:s]...)
599 }
600 seq.offset = uint32(s-t) + 3
601 s += l
602 if debugSequences {
603 println("sequence", seq, "next s:", s)
604 }
605 blk.sequences = append(blk.sequences, seq)
606 nextEmit = s
607 if s >= sLimit {
608 break encodeLoop
609 }
610
611 // Index match start+1 (long) and start+2 (short)
612 index0 := s - l + 1
613 // Index match end-2 (long) and end-1 (short)
614 index1 := s - 2
615
616 cv0 := load6432(src, index0)
617 cv1 := load6432(src, index1)
618 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
619 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
620 e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0
621 e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1
622 cv0 >>= 8
623 cv1 >>= 8
624 te0.offset++
625 te1.offset++
626 te0.val = uint32(cv0)
627 te1.val = uint32(cv1)
628 e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0
629 e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1
630
631 cv = load6432(src, s)
632
633 if len(blk.sequences) <= 2 {
634 continue
635 }
636
637 // Check offset 2
638 for {
639 o2 := s - offset2
640 if load3232(src, o2) != uint32(cv) {
641 // Do regular search
642 break
643 }
644
645 // Store this, since we have it.
646 nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen)
647 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
648
649 // We have at least 4 byte match.
650 // No need to check backwards. We come straight from a match
651 //l := 4 + e.matchlen(s+4, o2+4, src)
652 l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
653
654 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
655 e.longTable[nextHashL] = entry
656 e.table[nextHashS] = entry
657 seq.matchLen = uint32(l) - zstdMinMatch
658 seq.litLen = 0
659
660 // Since litlen is always 0, this is offset 1.
661 seq.offset = 1
662 s += l
663 nextEmit = s
664 if debugSequences {
665 println("sequence", seq, "next s:", s)
666 }
667 blk.sequences = append(blk.sequences, seq)
668
669 // Swap offset 1 and 2.
670 offset1, offset2 = offset2, offset1
671 if s >= sLimit {
672 // Finished
673 break encodeLoop
674 }
675 cv = load6432(src, s)
676 }
677 }
678
679 if int(nextEmit) < len(src) {
680 blk.literals = append(blk.literals, src[nextEmit:]...)
681 blk.extraLits = len(src) - int(nextEmit)
682 }
683 if debugEncoder {
684 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
685 }
686
687 // We do not store history, so we must offset e.cur to avoid false matches for next user.
688 if e.cur < bufferReset {
689 e.cur += int32(len(src))
690 }
691}
692
693// Encode will encode the content, with a dictionary if initialized for it.
694func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
695 const (
696 // Input margin is the number of bytes we read (8)
697 // and the maximum we will read ahead (2)
698 inputMargin = 8 + 2
699 minNonLiteralBlockSize = 16
700 )
701
702 // Protect against e.cur wraparound.
703 for e.cur >= bufferReset {
704 if len(e.hist) == 0 {
705 for i := range e.table[:] {
706 e.table[i] = tableEntry{}
707 }
708 for i := range e.longTable[:] {
709 e.longTable[i] = tableEntry{}
710 }
711 e.markAllShardsDirty()
712 e.cur = e.maxMatchOff
713 break
714 }
715 // Shift down everything in the table that isn't already too far away.
716 minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
717 for i := range e.table[:] {
718 v := e.table[i].offset
719 if v < minOff {
720 v = 0
721 } else {
722 v = v - e.cur + e.maxMatchOff
723 }
724 e.table[i].offset = v
725 }
726 for i := range e.longTable[:] {
727 v := e.longTable[i].offset
728 if v < minOff {
729 v = 0
730 } else {
731 v = v - e.cur + e.maxMatchOff
732 }
733 e.longTable[i].offset = v
734 }
735 e.markAllShardsDirty()
736 e.cur = e.maxMatchOff
737 break
738 }
739
740 s := e.addBlock(src)
741 blk.size = len(src)
742 if len(src) < minNonLiteralBlockSize {
743 blk.extraLits = len(src)
744 blk.literals = blk.literals[:len(src)]
745 copy(blk.literals, src)
746 return
747 }
748
749 // Override src
750 src = e.hist
751 sLimit := int32(len(src)) - inputMargin
752 // stepSize is the number of bytes to skip on every main loop iteration.
753 // It should be >= 1.
754 const stepSize = 1
755
756 const kSearchStrength = 8
757
758 // nextEmit is where in src the next emitLiteral should start from.
759 nextEmit := s
760 cv := load6432(src, s)
761
762 // Relative offsets
763 offset1 := int32(blk.recentOffsets[0])
764 offset2 := int32(blk.recentOffsets[1])
765
766 addLiterals := func(s *seq, until int32) {
767 if until == nextEmit {
768 return
769 }
770 blk.literals = append(blk.literals, src[nextEmit:until]...)
771 s.litLen = uint32(until - nextEmit)
772 }
773 if debugEncoder {
774 println("recent offsets:", blk.recentOffsets)
775 }
776
777encodeLoop:
778 for {
779 var t int32
780 // We allow the encoder to optionally turn off repeat offsets across blocks
781 canRepeat := len(blk.sequences) > 2
782
783 for {
784 if debugAsserts && canRepeat && offset1 == 0 {
785 panic("offset0 was 0")
786 }
787
788 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
789 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
790 candidateL := e.longTable[nextHashL]
791 candidateS := e.table[nextHashS]
792
793 const repOff = 1
794 repIndex := s - offset1 + repOff
795 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
796 e.longTable[nextHashL] = entry
797 e.markLongShardDirty(nextHashL)
798 e.table[nextHashS] = entry
799 e.markShardDirty(nextHashS)
800
801 if canRepeat {
802 if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
803 // Consider history as well.
804 var seq seq
805 lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
806
807 seq.matchLen = uint32(lenght - zstdMinMatch)
808
809 // We might be able to match backwards.
810 // Extend as long as we can.
811 start := s + repOff
812 // We end the search early, so we don't risk 0 literals
813 // and have to do special offset treatment.
814 startLimit := nextEmit + 1
815
816 tMin := s - e.maxMatchOff
817 if tMin < 0 {
818 tMin = 0
819 }
820 for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
821 repIndex--
822 start--
823 seq.matchLen++
824 }
825 addLiterals(&seq, start)
826
827 // rep 0
828 seq.offset = 1
829 if debugSequences {
830 println("repeat sequence", seq, "next s:", s)
831 }
832 blk.sequences = append(blk.sequences, seq)
833 s += lenght + repOff
834 nextEmit = s
835 if s >= sLimit {
836 if debugEncoder {
837 println("repeat ended", s, lenght)
838
839 }
840 break encodeLoop
841 }
842 cv = load6432(src, s)
843 continue
844 }
845 }
846 // Find the offsets of our two matches.
847 coffsetL := s - (candidateL.offset - e.cur)
848 coffsetS := s - (candidateS.offset - e.cur)
849
850 // Check if we have a long match.
851 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
852 // Found a long match, likely at least 8 bytes.
853 // Reference encoder checks all 8 bytes, we only check 4,
854 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
855 t = candidateL.offset - e.cur
856 if debugAsserts && s <= t {
857 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
858 }
859 if debugAsserts && s-t > e.maxMatchOff {
860 panic("s - t >e.maxMatchOff")
861 }
862 if debugMatches {
863 println("long match")
864 }
865 break
866 }
867
868 // Check if we have a short match.
869 if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
870 // found a regular match
871 // See if we can find a long match at s+1
872 const checkAt = 1
873 cv := load6432(src, s+checkAt)
874 nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen)
875 candidateL = e.longTable[nextHashL]
876 coffsetL = s - (candidateL.offset - e.cur) + checkAt
877
878 // We can store it, since we have at least a 4 byte match.
879 e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
880 e.markLongShardDirty(nextHashL)
881 if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
882 // Found a long match, likely at least 8 bytes.
883 // Reference encoder checks all 8 bytes, we only check 4,
884 // but the likelihood of both the first 4 bytes and the hash matching should be enough.
885 t = candidateL.offset - e.cur
886 s += checkAt
887 if debugMatches {
888 println("long match (after short)")
889 }
890 break
891 }
892
893 t = candidateS.offset - e.cur
894 if debugAsserts && s <= t {
895 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
896 }
897 if debugAsserts && s-t > e.maxMatchOff {
898 panic("s - t >e.maxMatchOff")
899 }
900 if debugAsserts && t < 0 {
901 panic("t<0")
902 }
903 if debugMatches {
904 println("short match")
905 }
906 break
907 }
908
909 // No match found, move forward in input.
910 s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
911 if s >= sLimit {
912 break encodeLoop
913 }
914 cv = load6432(src, s)
915 }
916
917 // A 4-byte match has been found. Update recent offsets.
918 // We'll later see if more than 4 bytes.
919 offset2 = offset1
920 offset1 = s - t
921
922 if debugAsserts && s <= t {
923 panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
924 }
925
926 if debugAsserts && canRepeat && int(offset1) > len(src) {
927 panic("invalid offset")
928 }
929
930 // Extend the 4-byte match as long as possible.
931 l := e.matchlen(s+4, t+4, src) + 4
932
933 // Extend backwards
934 tMin := s - e.maxMatchOff
935 if tMin < 0 {
936 tMin = 0
937 }
938 for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
939 s--
940 t--
941 l++
942 }
943
944 // Write our sequence
945 var seq seq
946 seq.litLen = uint32(s - nextEmit)
947 seq.matchLen = uint32(l - zstdMinMatch)
948 if seq.litLen > 0 {
949 blk.literals = append(blk.literals, src[nextEmit:s]...)
950 }
951 seq.offset = uint32(s-t) + 3
952 s += l
953 if debugSequences {
954 println("sequence", seq, "next s:", s)
955 }
956 blk.sequences = append(blk.sequences, seq)
957 nextEmit = s
958 if s >= sLimit {
959 break encodeLoop
960 }
961
962 // Index match start+1 (long) and start+2 (short)
963 index0 := s - l + 1
964 // Index match end-2 (long) and end-1 (short)
965 index1 := s - 2
966
967 cv0 := load6432(src, index0)
968 cv1 := load6432(src, index1)
969 te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
970 te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
971 longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
972 longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
973 e.longTable[longHash1] = te0
974 e.longTable[longHash2] = te1
975 e.markLongShardDirty(longHash1)
976 e.markLongShardDirty(longHash2)
977 cv0 >>= 8
978 cv1 >>= 8
979 te0.offset++
980 te1.offset++
981 te0.val = uint32(cv0)
982 te1.val = uint32(cv1)
983 hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen)
984 hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen)
985 e.table[hashVal1] = te0
986 e.markShardDirty(hashVal1)
987 e.table[hashVal2] = te1
988 e.markShardDirty(hashVal2)
989
990 cv = load6432(src, s)
991
992 if !canRepeat {
993 continue
994 }
995
996 // Check offset 2
997 for {
998 o2 := s - offset2
999 if load3232(src, o2) != uint32(cv) {
1000 // Do regular search
1001 break
1002 }
1003
1004 // Store this, since we have it.
1005 nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
1006 nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
1007
1008 // We have at least 4 byte match.
1009 // No need to check backwards. We come straight from a match
1010 l := 4 + e.matchlen(s+4, o2+4, src)
1011
1012 entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
1013 e.longTable[nextHashL] = entry
1014 e.markLongShardDirty(nextHashL)
1015 e.table[nextHashS] = entry
1016 e.markShardDirty(nextHashS)
1017 seq.matchLen = uint32(l) - zstdMinMatch
1018 seq.litLen = 0
1019
1020 // Since litlen is always 0, this is offset 1.
1021 seq.offset = 1
1022 s += l
1023 nextEmit = s
1024 if debugSequences {
1025 println("sequence", seq, "next s:", s)
1026 }
1027 blk.sequences = append(blk.sequences, seq)
1028
1029 // Swap offset 1 and 2.
1030 offset1, offset2 = offset2, offset1
1031 if s >= sLimit {
1032 // Finished
1033 break encodeLoop
1034 }
1035 cv = load6432(src, s)
1036 }
1037 }
1038
1039 if int(nextEmit) < len(src) {
1040 blk.literals = append(blk.literals, src[nextEmit:]...)
1041 blk.extraLits = len(src) - int(nextEmit)
1042 }
1043 blk.recentOffsets[0] = uint32(offset1)
1044 blk.recentOffsets[1] = uint32(offset2)
1045 if debugEncoder {
1046 println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
1047 }
1048 // If we encoded more than 64K mark all dirty.
1049 if len(src) > 64<<10 {
1050 e.markAllShardsDirty()
1051 }
1052}
1053
1054// ResetDict will reset and set a dictionary if not nil
1055func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
1056 e.fastEncoder.Reset(d, singleBlock)
1057 if d != nil {
1058 panic("doubleFastEncoder: Reset with dict not supported")
1059 }
1060}
1061
1062// ResetDict will reset and set a dictionary if not nil
1063func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
1064 allDirty := e.allDirty
1065 e.fastEncoderDict.Reset(d, singleBlock)
1066 if d == nil {
1067 return
1068 }
1069
1070 // Init or copy dict table
1071 if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
1072 if len(e.dictLongTable) != len(e.longTable) {
1073 e.dictLongTable = make([]tableEntry, len(e.longTable))
1074 }
1075 if len(d.content) >= 8 {
1076 cv := load6432(d.content, 0)
1077 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
1078 val: uint32(cv),
1079 offset: e.maxMatchOff,
1080 }
1081 end := int32(len(d.content)) - 8 + e.maxMatchOff
1082 for i := e.maxMatchOff + 1; i < end; i++ {
1083 cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
1084 e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{
1085 val: uint32(cv),
1086 offset: i,
1087 }
1088 }
1089 }
1090 e.lastDictID = d.id
1091 e.allDirty = true
1092 }
1093 // Reset table to initial state
1094 e.cur = e.maxMatchOff
1095
1096 dirtyShardCnt := 0
1097 if !allDirty {
1098 for i := range e.longTableShardDirty {
1099 if e.longTableShardDirty[i] {
1100 dirtyShardCnt++
1101 }
1102 }
1103 }
1104
1105 if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
1106 copy(e.longTable[:], e.dictLongTable)
1107 for i := range e.longTableShardDirty {
1108 e.longTableShardDirty[i] = false
1109 }
1110 return
1111 }
1112 for i := range e.longTableShardDirty {
1113 if !e.longTableShardDirty[i] {
1114 continue
1115 }
1116
1117 copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
1118 e.longTableShardDirty[i] = false
1119 }
1120}
1121
1122func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
1123 e.longTableShardDirty[entryNum/dLongTableShardSize] = true
1124}