blob: 507757d525a1a4ad912244f97c11247a22ae873f [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Copyright 2019+ Klaus Post. All rights reserved.
2// License information can be found in the LICENSE file.
3// Based on work by Yann Collet, released under BSD License.
4
5package zstd
6
7import (
8 "errors"
9 "fmt"
10 "math"
11 "math/bits"
12
13 "github.com/klauspost/compress/huff0"
14)
15
16type blockEnc struct {
17 size int
18 literals []byte
19 sequences []seq
20 coders seqCoders
21 litEnc *huff0.Scratch
22 wr bitWriter
23
24 extraLits int
25 last bool
26
27 output []byte
28 recentOffsets [3]uint32
29 prevRecentOffsets [3]uint32
30}
31
32// init should be used once the block has been created.
33// If called more than once, the effect is the same as calling reset.
34func (b *blockEnc) init() {
35 if cap(b.literals) < maxCompressedLiteralSize {
36 b.literals = make([]byte, 0, maxCompressedLiteralSize)
37 }
38 const defSeqs = 200
39 b.literals = b.literals[:0]
40 if cap(b.sequences) < defSeqs {
41 b.sequences = make([]seq, 0, defSeqs)
42 }
43 if cap(b.output) < maxCompressedBlockSize {
44 b.output = make([]byte, 0, maxCompressedBlockSize)
45 }
46 if b.coders.mlEnc == nil {
47 b.coders.mlEnc = &fseEncoder{}
48 b.coders.mlPrev = &fseEncoder{}
49 b.coders.ofEnc = &fseEncoder{}
50 b.coders.ofPrev = &fseEncoder{}
51 b.coders.llEnc = &fseEncoder{}
52 b.coders.llPrev = &fseEncoder{}
53 }
54 b.litEnc = &huff0.Scratch{WantLogLess: 4}
55 b.reset(nil)
56}
57
58// initNewEncode can be used to reset offsets and encoders to the initial state.
59func (b *blockEnc) initNewEncode() {
60 b.recentOffsets = [3]uint32{1, 4, 8}
61 b.litEnc.Reuse = huff0.ReusePolicyNone
62 b.coders.setPrev(nil, nil, nil)
63}
64
65// reset will reset the block for a new encode, but in the same stream,
66// meaning that state will be carried over, but the block content is reset.
67// If a previous block is provided, the recent offsets are carried over.
68func (b *blockEnc) reset(prev *blockEnc) {
69 b.extraLits = 0
70 b.literals = b.literals[:0]
71 b.size = 0
72 b.sequences = b.sequences[:0]
73 b.output = b.output[:0]
74 b.last = false
75 if prev != nil {
76 b.recentOffsets = prev.prevRecentOffsets
77 }
78}
79
80// reset will reset the block for a new encode, but in the same stream,
81// meaning that state will be carried over, but the block content is reset.
82// If a previous block is provided, the recent offsets are carried over.
83func (b *blockEnc) swapEncoders(prev *blockEnc) {
84 b.coders.swap(&prev.coders)
85 b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
86}
87
88// blockHeader contains the information for a block header.
89type blockHeader uint32
90
91// setLast sets the 'last' indicator on a block.
92func (h *blockHeader) setLast(b bool) {
93 if b {
94 *h = *h | 1
95 } else {
96 const mask = (1 << 24) - 2
97 *h = *h & mask
98 }
99}
100
101// setSize will store the compressed size of a block.
102func (h *blockHeader) setSize(v uint32) {
103 const mask = 7
104 *h = (*h)&mask | blockHeader(v<<3)
105}
106
107// setType sets the block type.
108func (h *blockHeader) setType(t blockType) {
109 const mask = 1 | (((1 << 24) - 1) ^ 7)
110 *h = (*h & mask) | blockHeader(t<<1)
111}
112
113// appendTo will append the block header to a slice.
114func (h blockHeader) appendTo(b []byte) []byte {
115 return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
116}
117
118// String returns a string representation of the block.
119func (h blockHeader) String() string {
120 return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
121}
122
123// literalsHeader contains literals header information.
124type literalsHeader uint64
125
126// setType can be used to set the type of literal block.
127func (h *literalsHeader) setType(t literalsBlockType) {
128 const mask = math.MaxUint64 - 3
129 *h = (*h & mask) | literalsHeader(t)
130}
131
132// setSize can be used to set a single size, for uncompressed and RLE content.
133func (h *literalsHeader) setSize(regenLen int) {
134 inBits := bits.Len32(uint32(regenLen))
135 // Only retain 2 bits
136 const mask = 3
137 lh := uint64(*h & mask)
138 switch {
139 case inBits < 5:
140 lh |= (uint64(regenLen) << 3) | (1 << 60)
141 if debug {
142 got := int(lh>>3) & 0xff
143 if got != regenLen {
144 panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
145 }
146 }
147 case inBits < 12:
148 lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
149 case inBits < 20:
150 lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
151 default:
152 panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
153 }
154 *h = literalsHeader(lh)
155}
156
157// setSizes will set the size of a compressed literals section and the input length.
158func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
159 compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
160 // Only retain 2 bits
161 const mask = 3
162 lh := uint64(*h & mask)
163 switch {
164 case compBits <= 10 && inBits <= 10:
165 if !single {
166 lh |= 1 << 2
167 }
168 lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
169 if debug {
170 const mmask = (1 << 24) - 1
171 n := (lh >> 4) & mmask
172 if int(n&1023) != inLen {
173 panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
174 }
175 if int(n>>10) != compLen {
176 panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
177 }
178 }
179 case compBits <= 14 && inBits <= 14:
180 lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
181 if single {
182 panic("single stream used with more than 10 bits length.")
183 }
184 case compBits <= 18 && inBits <= 18:
185 lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
186 if single {
187 panic("single stream used with more than 10 bits length.")
188 }
189 default:
190 panic("internal error: block too big")
191 }
192 *h = literalsHeader(lh)
193}
194
195// appendTo will append the literals header to a byte slice.
196func (h literalsHeader) appendTo(b []byte) []byte {
197 size := uint8(h >> 60)
198 switch size {
199 case 1:
200 b = append(b, uint8(h))
201 case 2:
202 b = append(b, uint8(h), uint8(h>>8))
203 case 3:
204 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
205 case 4:
206 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
207 case 5:
208 b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
209 default:
210 panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
211 }
212 return b
213}
214
215// size returns the output size with currently set values.
216func (h literalsHeader) size() int {
217 return int(h >> 60)
218}
219
220func (h literalsHeader) String() string {
221 return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
222}
223
224// pushOffsets will push the recent offsets to the backup store.
225func (b *blockEnc) pushOffsets() {
226 b.prevRecentOffsets = b.recentOffsets
227}
228
229// pushOffsets will push the recent offsets to the backup store.
230func (b *blockEnc) popOffsets() {
231 b.recentOffsets = b.prevRecentOffsets
232}
233
234// matchOffset will adjust recent offsets and return the adjusted one,
235// if it matches a previous offset.
236func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
237 // Check if offset is one of the recent offsets.
238 // Adjusts the output offset accordingly.
239 // Gives a tiny bit of compression, typically around 1%.
240 if true {
241 if lits > 0 {
242 switch offset {
243 case b.recentOffsets[0]:
244 offset = 1
245 case b.recentOffsets[1]:
246 b.recentOffsets[1] = b.recentOffsets[0]
247 b.recentOffsets[0] = offset
248 offset = 2
249 case b.recentOffsets[2]:
250 b.recentOffsets[2] = b.recentOffsets[1]
251 b.recentOffsets[1] = b.recentOffsets[0]
252 b.recentOffsets[0] = offset
253 offset = 3
254 default:
255 b.recentOffsets[2] = b.recentOffsets[1]
256 b.recentOffsets[1] = b.recentOffsets[0]
257 b.recentOffsets[0] = offset
258 offset += 3
259 }
260 } else {
261 switch offset {
262 case b.recentOffsets[1]:
263 b.recentOffsets[1] = b.recentOffsets[0]
264 b.recentOffsets[0] = offset
265 offset = 1
266 case b.recentOffsets[2]:
267 b.recentOffsets[2] = b.recentOffsets[1]
268 b.recentOffsets[1] = b.recentOffsets[0]
269 b.recentOffsets[0] = offset
270 offset = 2
271 case b.recentOffsets[0] - 1:
272 b.recentOffsets[2] = b.recentOffsets[1]
273 b.recentOffsets[1] = b.recentOffsets[0]
274 b.recentOffsets[0] = offset
275 offset = 3
276 default:
277 b.recentOffsets[2] = b.recentOffsets[1]
278 b.recentOffsets[1] = b.recentOffsets[0]
279 b.recentOffsets[0] = offset
280 offset += 3
281 }
282 }
283 } else {
284 offset += 3
285 }
286 return offset
287}
288
289// encodeRaw can be used to set the output to a raw representation of supplied bytes.
290func (b *blockEnc) encodeRaw(a []byte) {
291 var bh blockHeader
292 bh.setLast(b.last)
293 bh.setSize(uint32(len(a)))
294 bh.setType(blockTypeRaw)
295 b.output = bh.appendTo(b.output[:0])
296 b.output = append(b.output, a...)
297 if debug {
298 println("Adding RAW block, length", len(a))
299 }
300}
301
302// encodeRaw can be used to set the output to a raw representation of supplied bytes.
303func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
304 var bh blockHeader
305 bh.setLast(b.last)
306 bh.setSize(uint32(len(src)))
307 bh.setType(blockTypeRaw)
308 dst = bh.appendTo(dst)
309 dst = append(dst, src...)
310 if debug {
311 println("Adding RAW block, length", len(src))
312 }
313 return dst
314}
315
316// encodeLits can be used if the block is only litLen.
317func (b *blockEnc) encodeLits(raw bool) error {
318 var bh blockHeader
319 bh.setLast(b.last)
320 bh.setSize(uint32(len(b.literals)))
321
322 // Don't compress extremely small blocks
323 if len(b.literals) < 32 || raw {
324 if debug {
325 println("Adding RAW block, length", len(b.literals))
326 }
327 bh.setType(blockTypeRaw)
328 b.output = bh.appendTo(b.output)
329 b.output = append(b.output, b.literals...)
330 return nil
331 }
332
333 var (
334 out []byte
335 reUsed, single bool
336 err error
337 )
338 if len(b.literals) >= 1024 {
339 // Use 4 Streams.
340 out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
341 } else if len(b.literals) > 32 {
342 // Use 1 stream
343 single = true
344 out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
345 } else {
346 err = huff0.ErrIncompressible
347 }
348
349 switch err {
350 case huff0.ErrIncompressible:
351 if debug {
352 println("Adding RAW block, length", len(b.literals))
353 }
354 bh.setType(blockTypeRaw)
355 b.output = bh.appendTo(b.output)
356 b.output = append(b.output, b.literals...)
357 return nil
358 case huff0.ErrUseRLE:
359 if debug {
360 println("Adding RLE block, length", len(b.literals))
361 }
362 bh.setType(blockTypeRLE)
363 b.output = bh.appendTo(b.output)
364 b.output = append(b.output, b.literals[0])
365 return nil
366 default:
367 return err
368 case nil:
369 }
370 // Compressed...
371 // Now, allow reuse
372 b.litEnc.Reuse = huff0.ReusePolicyAllow
373 bh.setType(blockTypeCompressed)
374 var lh literalsHeader
375 if reUsed {
376 if debug {
377 println("Reused tree, compressed to", len(out))
378 }
379 lh.setType(literalsBlockTreeless)
380 } else {
381 if debug {
382 println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
383 }
384 lh.setType(literalsBlockCompressed)
385 }
386 // Set sizes
387 lh.setSizes(len(out), len(b.literals), single)
388 bh.setSize(uint32(len(out) + lh.size() + 1))
389
390 // Write block headers.
391 b.output = bh.appendTo(b.output)
392 b.output = lh.appendTo(b.output)
393 // Add compressed data.
394 b.output = append(b.output, out...)
395 // No sequences.
396 b.output = append(b.output, 0)
397 return nil
398}
399
400// fuzzFseEncoder can be used to fuzz the FSE encoder.
401func fuzzFseEncoder(data []byte) int {
402 if len(data) > maxSequences || len(data) < 2 {
403 return 0
404 }
405 enc := fseEncoder{}
406 hist := enc.Histogram()[:256]
407 maxSym := uint8(0)
408 for i, v := range data {
409 v = v & 63
410 data[i] = v
411 hist[v]++
412 if v > maxSym {
413 maxSym = v
414 }
415 }
416 if maxSym == 0 {
417 // All 0
418 return 0
419 }
420 maxCount := func(a []uint32) int {
421 var max uint32
422 for _, v := range a {
423 if v > max {
424 max = v
425 }
426 }
427 return int(max)
428 }
429 cnt := maxCount(hist[:maxSym])
430 if cnt == len(data) {
431 // RLE
432 return 0
433 }
434 enc.HistogramFinished(maxSym, cnt)
435 err := enc.normalizeCount(len(data))
436 if err != nil {
437 return 0
438 }
439 _, err = enc.writeCount(nil)
440 if err != nil {
441 panic(err)
442 }
443 return 1
444}
445
446// encode will encode the block and append the output in b.output.
447func (b *blockEnc) encode(raw bool) error {
448 if len(b.sequences) == 0 {
449 return b.encodeLits(raw)
450 }
451 // We want some difference
452 if len(b.literals) > (b.size - (b.size >> 5)) {
453 return errIncompressible
454 }
455
456 var bh blockHeader
457 var lh literalsHeader
458 bh.setLast(b.last)
459 bh.setType(blockTypeCompressed)
460 // Store offset of the block header. Needed when we know the size.
461 bhOffset := len(b.output)
462 b.output = bh.appendTo(b.output)
463
464 var (
465 out []byte
466 reUsed, single bool
467 err error
468 )
469 if len(b.literals) >= 1024 && !raw {
470 // Use 4 Streams.
471 out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
472 } else if len(b.literals) > 32 && !raw {
473 // Use 1 stream
474 single = true
475 out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
476 } else {
477 err = huff0.ErrIncompressible
478 }
479
480 switch err {
481 case huff0.ErrIncompressible:
482 lh.setType(literalsBlockRaw)
483 lh.setSize(len(b.literals))
484 b.output = lh.appendTo(b.output)
485 b.output = append(b.output, b.literals...)
486 if debug {
487 println("Adding literals RAW, length", len(b.literals))
488 }
489 case huff0.ErrUseRLE:
490 lh.setType(literalsBlockRLE)
491 lh.setSize(len(b.literals))
492 b.output = lh.appendTo(b.output)
493 b.output = append(b.output, b.literals[0])
494 if debug {
495 println("Adding literals RLE")
496 }
497 default:
498 if debug {
499 println("Adding literals ERROR:", err)
500 }
501 return err
502 case nil:
503 // Compressed litLen...
504 if reUsed {
505 if debug {
506 println("reused tree")
507 }
508 lh.setType(literalsBlockTreeless)
509 } else {
510 if debug {
511 println("new tree, size:", len(b.litEnc.OutTable))
512 }
513 lh.setType(literalsBlockCompressed)
514 if debug {
515 _, _, err := huff0.ReadTable(out, nil)
516 if err != nil {
517 panic(err)
518 }
519 }
520 }
521 lh.setSizes(len(out), len(b.literals), single)
522 if debug {
523 printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
524 println("Adding literal header:", lh)
525 }
526 b.output = lh.appendTo(b.output)
527 b.output = append(b.output, out...)
528 b.litEnc.Reuse = huff0.ReusePolicyAllow
529 if debug {
530 println("Adding literals compressed")
531 }
532 }
533 // Sequence compression
534
535 // Write the number of sequences
536 switch {
537 case len(b.sequences) < 128:
538 b.output = append(b.output, uint8(len(b.sequences)))
539 case len(b.sequences) < 0x7f00: // TODO: this could be wrong
540 n := len(b.sequences)
541 b.output = append(b.output, 128+uint8(n>>8), uint8(n))
542 default:
543 n := len(b.sequences) - 0x7f00
544 b.output = append(b.output, 255, uint8(n), uint8(n>>8))
545 }
546 if debug {
547 println("Encoding", len(b.sequences), "sequences")
548 }
549 b.genCodes()
550 llEnc := b.coders.llEnc
551 ofEnc := b.coders.ofEnc
552 mlEnc := b.coders.mlEnc
553 err = llEnc.normalizeCount(len(b.sequences))
554 if err != nil {
555 return err
556 }
557 err = ofEnc.normalizeCount(len(b.sequences))
558 if err != nil {
559 return err
560 }
561 err = mlEnc.normalizeCount(len(b.sequences))
562 if err != nil {
563 return err
564 }
565
566 // Choose the best compression mode for each type.
567 // Will evaluate the new vs predefined and previous.
568 chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
569 // See if predefined/previous is better
570 hist := cur.count[:cur.symbolLen]
571 nSize := cur.approxSize(hist) + cur.maxHeaderSize()
572 predefSize := preDef.approxSize(hist)
573 prevSize := prev.approxSize(hist)
574
575 // Add a small penalty for new encoders.
576 // Don't bother with extremely small (<2 byte gains).
577 nSize = nSize + (nSize+2*8*16)>>4
578 switch {
579 case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
580 if debug {
581 println("Using predefined", predefSize>>3, "<=", nSize>>3)
582 }
583 return preDef, compModePredefined
584 case prevSize <= nSize:
585 if debug {
586 println("Using previous", prevSize>>3, "<=", nSize>>3)
587 }
588 return prev, compModeRepeat
589 default:
590 if debug {
591 println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
592 println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
593 }
594 return cur, compModeFSE
595 }
596 }
597
598 // Write compression mode
599 var mode uint8
600 if llEnc.useRLE {
601 mode |= uint8(compModeRLE) << 6
602 llEnc.setRLE(b.sequences[0].llCode)
603 if debug {
604 println("llEnc.useRLE")
605 }
606 } else {
607 var m seqCompMode
608 llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
609 mode |= uint8(m) << 6
610 }
611 if ofEnc.useRLE {
612 mode |= uint8(compModeRLE) << 4
613 ofEnc.setRLE(b.sequences[0].ofCode)
614 if debug {
615 println("ofEnc.useRLE")
616 }
617 } else {
618 var m seqCompMode
619 ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
620 mode |= uint8(m) << 4
621 }
622
623 if mlEnc.useRLE {
624 mode |= uint8(compModeRLE) << 2
625 mlEnc.setRLE(b.sequences[0].mlCode)
626 if debug {
627 println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
628 }
629 } else {
630 var m seqCompMode
631 mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
632 mode |= uint8(m) << 2
633 }
634 b.output = append(b.output, mode)
635 if debug {
636 printf("Compression modes: 0b%b", mode)
637 }
638 b.output, err = llEnc.writeCount(b.output)
639 if err != nil {
640 return err
641 }
642 start := len(b.output)
643 b.output, err = ofEnc.writeCount(b.output)
644 if err != nil {
645 return err
646 }
647 if false {
648 println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
649 fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
650 for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
651 fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
652 }
653 }
654 b.output, err = mlEnc.writeCount(b.output)
655 if err != nil {
656 return err
657 }
658
659 // Maybe in block?
660 wr := &b.wr
661 wr.reset(b.output)
662
663 var ll, of, ml cState
664
665 // Current sequence
666 seq := len(b.sequences) - 1
667 s := b.sequences[seq]
668 llEnc.setBits(llBitsTable[:])
669 mlEnc.setBits(mlBitsTable[:])
670 ofEnc.setBits(nil)
671
672 llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
673
674 // We have 3 bounds checks here (and in the loop).
675 // Since we are iterating backwards it is kinda hard to avoid.
676 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
677 ll.init(wr, &llEnc.ct, llB)
678 of.init(wr, &ofEnc.ct, ofB)
679 wr.flush32()
680 ml.init(wr, &mlEnc.ct, mlB)
681
682 // Each of these lookups also generates a bounds check.
683 wr.addBits32NC(s.litLen, llB.outBits)
684 wr.addBits32NC(s.matchLen, mlB.outBits)
685 wr.flush32()
686 wr.addBits32NC(s.offset, ofB.outBits)
687 if debugSequences {
688 println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
689 }
690 seq--
691 if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
692 // No need to flush (common)
693 for seq >= 0 {
694 s = b.sequences[seq]
695 wr.flush32()
696 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
697 // tabelog max is 8 for all.
698 of.encode(ofB)
699 ml.encode(mlB)
700 ll.encode(llB)
701 wr.flush32()
702
703 // We checked that all can stay within 32 bits
704 wr.addBits32NC(s.litLen, llB.outBits)
705 wr.addBits32NC(s.matchLen, mlB.outBits)
706 wr.addBits32NC(s.offset, ofB.outBits)
707
708 if debugSequences {
709 println("Encoded seq", seq, s)
710 }
711
712 seq--
713 }
714 } else {
715 for seq >= 0 {
716 s = b.sequences[seq]
717 wr.flush32()
718 llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
719 // tabelog max is below 8 for each.
720 of.encode(ofB)
721 ml.encode(mlB)
722 ll.encode(llB)
723 wr.flush32()
724
725 // ml+ll = max 32 bits total
726 wr.addBits32NC(s.litLen, llB.outBits)
727 wr.addBits32NC(s.matchLen, mlB.outBits)
728 wr.flush32()
729 wr.addBits32NC(s.offset, ofB.outBits)
730
731 if debugSequences {
732 println("Encoded seq", seq, s)
733 }
734
735 seq--
736 }
737 }
738 ml.flush(mlEnc.actualTableLog)
739 of.flush(ofEnc.actualTableLog)
740 ll.flush(llEnc.actualTableLog)
741 err = wr.close()
742 if err != nil {
743 return err
744 }
745 b.output = wr.out
746
747 if len(b.output)-3-bhOffset >= b.size {
748 // Maybe even add a bigger margin.
749 b.litEnc.Reuse = huff0.ReusePolicyNone
750 return errIncompressible
751 }
752
753 // Size is output minus block header.
754 bh.setSize(uint32(len(b.output)-bhOffset) - 3)
755 if debug {
756 println("Rewriting block header", bh)
757 }
758 _ = bh.appendTo(b.output[bhOffset:bhOffset])
759 b.coders.setPrev(llEnc, mlEnc, ofEnc)
760 return nil
761}
762
763var errIncompressible = errors.New("incompressible")
764
765func (b *blockEnc) genCodes() {
766 if len(b.sequences) == 0 {
767 // nothing to do
768 return
769 }
770
771 if len(b.sequences) > math.MaxUint16 {
772 panic("can only encode up to 64K sequences")
773 }
774 // No bounds checks after here:
775 llH := b.coders.llEnc.Histogram()[:256]
776 ofH := b.coders.ofEnc.Histogram()[:256]
777 mlH := b.coders.mlEnc.Histogram()[:256]
778 for i := range llH {
779 llH[i] = 0
780 }
781 for i := range ofH {
782 ofH[i] = 0
783 }
784 for i := range mlH {
785 mlH[i] = 0
786 }
787
788 var llMax, ofMax, mlMax uint8
789 for i, seq := range b.sequences {
790 v := llCode(seq.litLen)
791 seq.llCode = v
792 llH[v]++
793 if v > llMax {
794 llMax = v
795 }
796
797 v = ofCode(seq.offset)
798 seq.ofCode = v
799 ofH[v]++
800 if v > ofMax {
801 ofMax = v
802 }
803
804 v = mlCode(seq.matchLen)
805 seq.mlCode = v
806 mlH[v]++
807 if v > mlMax {
808 mlMax = v
809 if debug && mlMax > maxMatchLengthSymbol {
810 panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
811 }
812 }
813 b.sequences[i] = seq
814 }
815 maxCount := func(a []uint32) int {
816 var max uint32
817 for _, v := range a {
818 if v > max {
819 max = v
820 }
821 }
822 return int(max)
823 }
824 if mlMax > maxMatchLengthSymbol {
825 panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
826 }
827 if ofMax > maxOffsetBits {
828 panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
829 }
830 if llMax > maxLiteralLengthSymbol {
831 panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
832 }
833
834 b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
835 b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
836 b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
837}