[VOL-4291] Rw-core updates for gRPC migration
Change-Id: I8d5a554409115b29318089671ca4e1ab3fa98810
diff --git a/vendor/github.com/klauspost/compress/huff0/.gitignore b/vendor/github.com/klauspost/compress/huff0/.gitignore
new file mode 100644
index 0000000..b3d2629
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/.gitignore
@@ -0,0 +1 @@
+/huff0-fuzz.zip
diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md
new file mode 100644
index 0000000..8b6e5c6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/README.md
@@ -0,0 +1,89 @@
+# Huff0 entropy compression
+
+This package provides Huff0 encoding and decoding as used in zstd.
+
+[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders),
+a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU
+(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds.
+
+This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
+This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
+but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding.
+
+* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
+
+## News
+
+This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package.
+
+This ensures that most functionality is well tested.
+
+# Usage
+
+This package provides a low level interface that allows to compress single independent blocks.
+
+Each block is separate, and there is no built in integrity checks.
+This means that the caller should keep track of block sizes and also do checksums if needed.
+
+Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and
+[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions.
+You must provide input and will receive the output and maybe an error.
+
+These error values can be returned:
+
+| Error | Description |
+|---------------------|-----------------------------------------------------------------------------|
+| `<nil>` | Everything ok, output is returned |
+| `ErrIncompressible` | Returned when input is judged to be too hard to compress |
+| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated |
+| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) |
+| `(error)` | An internal error occurred. |
+
+
+As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these.
+
+To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object
+that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same
+object can be used for both.
+
+Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
+you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
+
+The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding.
+
+## Tables and re-use
+
+Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results.
+
+The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy)
+that controls this behaviour. See the documentation for details. This can be altered between each block.
+
+Do however note that this information is *not* stored in the output block and it is up to the users of the package to
+record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called,
+based on the boolean reported back from the CompressXX call.
+
+If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the
+[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object.
+
+## Decompressing
+
+The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable).
+This will initialize the decoding tables.
+You can supply the complete block to `ReadTable` and it will return the data part of the block
+which can be given to the decompressor.
+
+Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
+or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+
+For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
+
+You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
+your input was likely corrupted.
+
+It is important to note that a successful decoding does *not* mean your output matches your original input.
+There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
+
+# Contributing
+
+Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
new file mode 100644
index 0000000..a4979e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -0,0 +1,329 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.bitsRead += 8 - uint8(highBit32(uint32(v)))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) peekBitsFast(n uint8) uint16 {
+ const regMask = 64 - 1
+ v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+ return v
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+func (b *bitReader) advance(n uint8) {
+ b.bitsRead += n
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value = (b.value << 8) | uint64(b.in[b.off-1])
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderBytes struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderBytes) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderBytes) peekByteFast() uint8 {
+ got := uint8(b.value >> 56)
+ return got
+}
+
+func (b *bitReaderBytes) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderBytes) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
+func (b *bitReaderBytes) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderBytes) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderBytes) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderBytes) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReaderShifted reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderShifted struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderShifted) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
+ return uint16(b.value >> ((64 - n) & 63))
+}
+
+func (b *bitReaderShifted) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderShifted) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
+func (b *bitReaderShifted) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderShifted) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderShifted) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderShifted) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
new file mode 100644
index 0000000..6bce4e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -0,0 +1,210 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+ bitContainer uint64
+ nBits uint8
+ out []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+ 0, 1, 3, 7, 0xF, 0x1F,
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF} /* up to 16 bits */
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
+ enc := ct[symbol]
+ b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
+ if false {
+ if enc.nBits == 0 {
+ panic("nbits 0")
+ }
+ }
+ b.nBits += enc.nBits
+}
+
+// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
+ encA := ct[av]
+ encB := ct[bv]
+ sh := b.nBits & 63
+ combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
+ b.bitContainer |= combined << sh
+ if false {
+ if encA.nBits == 0 {
+ panic("nbitsA 0")
+ }
+ if encB.nBits == 0 {
+ panic("nbitsB 0")
+ }
+ }
+ b.nBits += encA.nBits + encB.nBits
+}
+
+// addBits16ZeroNC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+// This is fastest if bits can be zero.
+func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
+ if bits == 0 {
+ return
+ }
+ value <<= (16 - bits) & 15
+ value >>= (16 - bits) & 15
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+ v := b.nBits >> 3
+ switch v {
+ case 0:
+ return
+ case 1:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ )
+ b.bitContainer >>= 1 << 3
+ case 2:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ )
+ b.bitContainer >>= 2 << 3
+ case 3:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ )
+ b.bitContainer >>= 3 << 3
+ case 4:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ )
+ b.bitContainer >>= 4 << 3
+ case 5:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ )
+ b.bitContainer >>= 5 << 3
+ case 6:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ )
+ b.bitContainer >>= 6 << 3
+ case 7:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ )
+ b.bitContainer >>= 7 << 3
+ case 8:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ byte(b.bitContainer>>56),
+ )
+ b.bitContainer = 0
+ b.nBits = 0
+ return
+ default:
+ panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+ }
+ b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+ if b.nBits < 32 {
+ return
+ }
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24))
+ b.nBits -= 32
+ b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+ nbBytes := (b.nBits + 7) >> 3
+ for i := uint8(0); i < nbBytes; i++ {
+ b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+ }
+ b.nBits = 0
+ b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+ // End mark
+ b.addBits16Clean(1, 1)
+ // flush until next byte.
+ b.flushAlign()
+ return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+ b.bitContainer = 0
+ b.nBits = 0
+ b.out = out
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go
new file mode 100644
index 0000000..50bcdf6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+ b []byte
+ off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+ b.b = in
+ b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+ b.off += int(n)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+ v3 := int32(b.b[b.off+3])
+ v2 := int32(b.b[b.off+2])
+ v1 := int32(b.b[b.off+1])
+ v0 := int32(b.b[b.off])
+ return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+ v3 := uint32(b.b[b.off+3])
+ v2 := uint32(b.b[b.off+2])
+ v1 := uint32(b.b[b.off+1])
+ v0 := uint32(b.b[b.off])
+ return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+ return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+ return len(b.b) - b.off
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
new file mode 100644
index 0000000..0823c92
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -0,0 +1,656 @@
+package huff0
+
+import (
+ "fmt"
+ "runtime"
+ "sync"
+)
+
+// Compress1X will compress the input.
+// The output can be decoded using Decompress1X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return nil, false, err
+ }
+ return compress(in, s, s.compress1X)
+}
+
+// Compress4X will compress the input. The input is split into 4 independent blocks
+// and compressed similar to Compress1X.
+// The output can be decoded using Decompress4X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return nil, false, err
+ }
+ if false {
+ // TODO: compress4Xp only slightly faster.
+ const parallelThreshold = 8 << 10
+ if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 {
+ return compress(in, s, s.compress4X)
+ }
+ return compress(in, s, s.compress4Xp)
+ }
+ return compress(in, s, s.compress4X)
+}
+
+func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) {
+ // Nuke previous table if we cannot reuse anyway.
+ if s.Reuse == ReusePolicyNone {
+ s.prevTable = s.prevTable[:0]
+ }
+
+ // Create histogram, if none was provided.
+ maxCount := s.maxCount
+ var canReuse = false
+ if maxCount == 0 {
+ maxCount, canReuse = s.countSimple(in)
+ } else {
+ canReuse = s.canUseTable(s.prevTable)
+ }
+
+ // We want the output size to be less than this:
+ wantSize := len(in)
+ if s.WantLogLess > 0 {
+ wantSize -= wantSize >> s.WantLogLess
+ }
+
+ // Reset for next run.
+ s.clearCount = true
+ s.maxCount = 0
+ if maxCount >= len(in) {
+ if maxCount > len(in) {
+ return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
+ }
+ if len(in) == 1 {
+ return nil, false, ErrIncompressible
+ }
+ // One symbol, use RLE
+ return nil, false, ErrUseRLE
+ }
+ if maxCount == 1 || maxCount < (len(in)>>7) {
+ // Each symbol present maximum once or too well distributed.
+ return nil, false, ErrIncompressible
+ }
+ if s.Reuse == ReusePolicyMust && !canReuse {
+ // We must reuse, but we can't.
+ return nil, false, ErrIncompressible
+ }
+ if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse {
+ keepTable := s.cTable
+ keepTL := s.actualTableLog
+ s.cTable = s.prevTable
+ s.actualTableLog = s.prevTableLog
+ s.Out, err = compressor(in)
+ s.cTable = keepTable
+ s.actualTableLog = keepTL
+ if err == nil && len(s.Out) < wantSize {
+ s.OutData = s.Out
+ return s.Out, true, nil
+ }
+ if s.Reuse == ReusePolicyMust {
+ return nil, false, ErrIncompressible
+ }
+ // Do not attempt to re-use later.
+ s.prevTable = s.prevTable[:0]
+ }
+
+ // Calculate new table.
+ err = s.buildCTable()
+ if err != nil {
+ return nil, false, err
+ }
+
+ if false && !s.canUseTable(s.cTable) {
+ panic("invalid table generated")
+ }
+
+ if s.Reuse == ReusePolicyAllow && canReuse {
+ hSize := len(s.Out)
+ oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
+ newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
+ if oldSize <= hSize+newSize || hSize+12 >= wantSize {
+ // Retain cTable even if we re-use.
+ keepTable := s.cTable
+ keepTL := s.actualTableLog
+
+ s.cTable = s.prevTable
+ s.actualTableLog = s.prevTableLog
+ s.Out, err = compressor(in)
+
+ // Restore ctable.
+ s.cTable = keepTable
+ s.actualTableLog = keepTL
+ if err != nil {
+ return nil, false, err
+ }
+ if len(s.Out) >= wantSize {
+ return nil, false, ErrIncompressible
+ }
+ s.OutData = s.Out
+ return s.Out, true, nil
+ }
+ }
+
+ // Use new table
+ err = s.cTable.write(s)
+ if err != nil {
+ s.OutTable = nil
+ return nil, false, err
+ }
+ s.OutTable = s.Out
+
+ // Compress using new table
+ s.Out, err = compressor(in)
+ if err != nil {
+ s.OutTable = nil
+ return nil, false, err
+ }
+ if len(s.Out) >= wantSize {
+ s.OutTable = nil
+ return nil, false, ErrIncompressible
+ }
+ // Move current table into previous.
+ s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0]
+ s.OutData = s.Out[len(s.OutTable):]
+ return s.Out, false, nil
+}
+
+func (s *Scratch) compress1X(src []byte) ([]byte, error) {
+ return s.compress1xDo(s.Out, src)
+}
+
+func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+ var bw = bitWriter{out: dst}
+
+ // N is length divisible by 4.
+ n := len(src)
+ n -= n & 3
+ cTable := s.cTable[:256]
+
+ // Encode last bytes.
+ for i := len(src) & 3; i > 0; i-- {
+ bw.encSymbol(cTable, src[n+i-1])
+ }
+ n -= 4
+ if s.actualTableLog <= 8 {
+ for ; n >= 0; n -= 4 {
+ tmp := src[n : n+4]
+ // tmp should be len 4
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[3], tmp[2])
+ bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+ }
+ } else {
+ for ; n >= 0; n -= 4 {
+ tmp := src[n : n+4]
+ // tmp should be len 4
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[3], tmp[2])
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+ }
+ }
+ err := bw.close()
+ return bw.out, err
+}
+
+var sixZeros [6]byte
+
+func (s *Scratch) compress4X(src []byte) ([]byte, error) {
+ if len(src) < 12 {
+ return nil, ErrIncompressible
+ }
+ segmentSize := (len(src) + 3) / 4
+
+ // Add placeholder for output length
+ offsetIdx := len(s.Out)
+ s.Out = append(s.Out, sixZeros[:]...)
+
+ for i := 0; i < 4; i++ {
+ toDo := src
+ if len(toDo) > segmentSize {
+ toDo = toDo[:segmentSize]
+ }
+ src = src[len(toDo):]
+
+ var err error
+ idx := len(s.Out)
+ s.Out, err = s.compress1xDo(s.Out, toDo)
+ if err != nil {
+ return nil, err
+ }
+ // Write compressed length as little endian before block.
+ if i < 3 {
+ // Last length is not written.
+ length := len(s.Out) - idx
+ s.Out[i*2+offsetIdx] = byte(length)
+ s.Out[i*2+offsetIdx+1] = byte(length >> 8)
+ }
+ }
+
+ return s.Out, nil
+}
+
+// compress4Xp will compress 4 streams using separate goroutines.
+func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
+ if len(src) < 12 {
+ return nil, ErrIncompressible
+ }
+ // Add placeholder for output length
+ s.Out = s.Out[:6]
+
+ segmentSize := (len(src) + 3) / 4
+ var wg sync.WaitGroup
+ var errs [4]error
+ wg.Add(4)
+ for i := 0; i < 4; i++ {
+ toDo := src
+ if len(toDo) > segmentSize {
+ toDo = toDo[:segmentSize]
+ }
+ src = src[len(toDo):]
+
+ // Separate goroutine for each block.
+ go func(i int) {
+ s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ for i := 0; i < 4; i++ {
+ if errs[i] != nil {
+ return nil, errs[i]
+ }
+ o := s.tmpOut[i]
+ // Write compressed length as little endian before block.
+ if i < 3 {
+ // Last length is not written.
+ s.Out[i*2] = byte(len(o))
+ s.Out[i*2+1] = byte(len(o) >> 8)
+ }
+
+ // Write output.
+ s.Out = append(s.Out, o...)
+ }
+ return s.Out, nil
+}
+
+// countSimple will create a simple histogram in s.count.
+// Returns the biggest count.
+// Does not update s.clearCount.
+func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
+ reuse = true
+ for _, v := range in {
+ s.count[v]++
+ }
+ m := uint32(0)
+ if len(s.prevTable) > 0 {
+ for i, v := range s.count[:] {
+ if v > m {
+ m = v
+ }
+ if v > 0 {
+ s.symbolLen = uint16(i) + 1
+ if i >= len(s.prevTable) {
+ reuse = false
+ } else {
+ if s.prevTable[i].nBits == 0 {
+ reuse = false
+ }
+ }
+ }
+ }
+ return int(m), reuse
+ }
+ for i, v := range s.count[:] {
+ if v > m {
+ m = v
+ }
+ if v > 0 {
+ s.symbolLen = uint16(i) + 1
+ }
+ }
+ return int(m), false
+}
+
+func (s *Scratch) canUseTable(c cTable) bool {
+ if len(c) < int(s.symbolLen) {
+ return false
+ }
+ for i, v := range s.count[:s.symbolLen] {
+ if v != 0 && c[i].nBits == 0 {
+ return false
+ }
+ }
+ return true
+}
+
+func (s *Scratch) validateTable(c cTable) bool {
+ if len(c) < int(s.symbolLen) {
+ return false
+ }
+ for i, v := range s.count[:s.symbolLen] {
+ if v != 0 {
+ if c[i].nBits == 0 {
+ return false
+ }
+ if c[i].nBits > s.actualTableLog {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// minTableLog provides the minimum logSize to safely represent a distribution.
+func (s *Scratch) minTableLog() uint8 {
+ minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+ minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
+ if minBitsSrc < minBitsSymbols {
+ return uint8(minBitsSrc)
+ }
+ return uint8(minBitsSymbols)
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *Scratch) optimalTableLog() {
+ tableLog := s.TableLog
+ minBits := s.minTableLog()
+ maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+ if maxBitsSrc < tableLog {
+ // Accuracy can be reduced
+ tableLog = maxBitsSrc
+ }
+ if minBits > tableLog {
+ tableLog = minBits
+ }
+ // Need a minimum to safely represent all symbol values
+ if tableLog < minTablelog {
+ tableLog = minTablelog
+ }
+ if tableLog > tableLogMax {
+ tableLog = tableLogMax
+ }
+ s.actualTableLog = tableLog
+}
+
+type cTableEntry struct {
+ val uint16
+ nBits uint8
+ // We have 8 bits extra
+}
+
+const huffNodesMask = huffNodesLen - 1
+
+func (s *Scratch) buildCTable() error {
+ s.optimalTableLog()
+ s.huffSort()
+ if cap(s.cTable) < maxSymbolValue+1 {
+ s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1)
+ } else {
+ s.cTable = s.cTable[:s.symbolLen]
+ for i := range s.cTable {
+ s.cTable[i] = cTableEntry{}
+ }
+ }
+
+ var startNode = int16(s.symbolLen)
+ nonNullRank := s.symbolLen - 1
+
+ nodeNb := startNode
+ huffNode := s.nodes[1 : huffNodesLen+1]
+
+ // This overlays the slice above, but allows "-1" index lookups.
+ // Different from reference implementation.
+ huffNode0 := s.nodes[0 : huffNodesLen+1]
+
+ for huffNode[nonNullRank].count == 0 {
+ nonNullRank--
+ }
+
+ lowS := int16(nonNullRank)
+ nodeRoot := nodeNb + lowS - 1
+ lowN := nodeNb
+ huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
+ huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+ nodeNb++
+ lowS -= 2
+ for n := nodeNb; n <= nodeRoot; n++ {
+ huffNode[n].count = 1 << 30
+ }
+ // fake entry, strong barrier
+ huffNode0[0].count = 1 << 31
+
+ // create parents
+ for nodeNb <= nodeRoot {
+ var n1, n2 int16
+ if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ n1 = lowS
+ lowS--
+ } else {
+ n1 = lowN
+ lowN++
+ }
+ if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ n2 = lowS
+ lowS--
+ } else {
+ n2 = lowN
+ lowN++
+ }
+
+ huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
+ huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+ nodeNb++
+ }
+
+ // distribute weights (unlimited tree height)
+ huffNode[nodeRoot].nbBits = 0
+ for n := nodeRoot - 1; n >= startNode; n-- {
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ }
+ for n := uint16(0); n <= nonNullRank; n++ {
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ }
+ s.actualTableLog = s.setMaxHeight(int(nonNullRank))
+ maxNbBits := s.actualTableLog
+
+ // fill result into tree (val, nbBits)
+ if maxNbBits > tableLogMax {
+ return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
+ }
+ var nbPerRank [tableLogMax + 1]uint16
+ var valPerRank [16]uint16
+ for _, v := range huffNode[:nonNullRank+1] {
+ nbPerRank[v.nbBits]++
+ }
+ // determine stating value per rank
+ {
+ min := uint16(0)
+ for n := maxNbBits; n > 0; n-- {
+ // get starting value within each rank
+ valPerRank[n] = min
+ min += nbPerRank[n]
+ min >>= 1
+ }
+ }
+
+ // push nbBits per symbol, symbol order
+ for _, v := range huffNode[:nonNullRank+1] {
+ s.cTable[v.symbol].nBits = v.nbBits
+ }
+
+ // assign value within rank, symbol order
+ t := s.cTable[:s.symbolLen]
+ for n, val := range t {
+ nbits := val.nBits & 15
+ v := valPerRank[nbits]
+ t[n].val = v
+ valPerRank[nbits] = v + 1
+ }
+
+ return nil
+}
+
+// huffSort will sort symbols, decreasing order.
+func (s *Scratch) huffSort() {
+ type rankPos struct {
+ base uint32
+ current uint32
+ }
+
+ // Clear nodes
+ nodes := s.nodes[:huffNodesLen+1]
+ s.nodes = nodes
+ nodes = nodes[1 : huffNodesLen+1]
+
+ // Sort into buckets based on length of symbol count.
+ var rank [32]rankPos
+ for _, v := range s.count[:s.symbolLen] {
+ r := highBit32(v+1) & 31
+ rank[r].base++
+ }
+ // maxBitLength is log2(BlockSizeMax) + 1
+ const maxBitLength = 18 + 1
+ for n := maxBitLength; n > 0; n-- {
+ rank[n-1].base += rank[n].base
+ }
+ for n := range rank[:maxBitLength] {
+ rank[n].current = rank[n].base
+ }
+ for n, c := range s.count[:s.symbolLen] {
+ r := (highBit32(c+1) + 1) & 31
+ pos := rank[r].current
+ rank[r].current++
+ prev := nodes[(pos-1)&huffNodesMask]
+ for pos > rank[r].base && c > prev.count {
+ nodes[pos&huffNodesMask] = prev
+ pos--
+ prev = nodes[(pos-1)&huffNodesMask]
+ }
+ nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+ }
+}
+
+func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
+ maxNbBits := s.actualTableLog
+ huffNode := s.nodes[1 : huffNodesLen+1]
+ //huffNode = huffNode[: huffNodesLen]
+
+ largestBits := huffNode[lastNonNull].nbBits
+
+ // early exit : no elt > maxNbBits
+ if largestBits <= maxNbBits {
+ return largestBits
+ }
+ totalCost := int(0)
+ baseCost := int(1) << (largestBits - maxNbBits)
+ n := uint32(lastNonNull)
+
+ for huffNode[n].nbBits > maxNbBits {
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
+ huffNode[n].nbBits = maxNbBits
+ n--
+ }
+ // n stops at huffNode[n].nbBits <= maxNbBits
+
+ for huffNode[n].nbBits == maxNbBits {
+ n--
+ }
+ // n end at index of smallest symbol using < maxNbBits
+
+ // renorm totalCost
+ totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */
+
+ // repay normalized cost
+ {
+ const noSymbol = 0xF0F0F0F0
+ var rankLast [tableLogMax + 2]uint32
+
+ for i := range rankLast[:] {
+ rankLast[i] = noSymbol
+ }
+
+ // Get pos of last (smallest) symbol per rank
+ {
+ currentNbBits := maxNbBits
+ for pos := int(n); pos >= 0; pos-- {
+ if huffNode[pos].nbBits >= currentNbBits {
+ continue
+ }
+ currentNbBits = huffNode[pos].nbBits // < maxNbBits
+ rankLast[maxNbBits-currentNbBits] = uint32(pos)
+ }
+ }
+
+ for totalCost > 0 {
+ nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1
+
+ for ; nBitsToDecrease > 1; nBitsToDecrease-- {
+ highPos := rankLast[nBitsToDecrease]
+ lowPos := rankLast[nBitsToDecrease-1]
+ if highPos == noSymbol {
+ continue
+ }
+ if lowPos == noSymbol {
+ break
+ }
+ highTotal := huffNode[highPos].count
+ lowTotal := 2 * huffNode[lowPos].count
+ if highTotal <= lowTotal {
+ break
+ }
+ }
+ // only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !)
+ // HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary
+ // FIXME: try to remove
+ for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) {
+ nBitsToDecrease++
+ }
+ totalCost -= 1 << (nBitsToDecrease - 1)
+ if rankLast[nBitsToDecrease-1] == noSymbol {
+ // this rank is no longer empty
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
+ }
+ huffNode[rankLast[nBitsToDecrease]].nbBits++
+ if rankLast[nBitsToDecrease] == 0 {
+ /* special case, reached largest symbol */
+ rankLast[nBitsToDecrease] = noSymbol
+ } else {
+ rankLast[nBitsToDecrease]--
+ if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+ rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
+ }
+ }
+ }
+
+ for totalCost < 0 { /* Sometimes, cost correction overshoot */
+ if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+ for huffNode[n].nbBits == maxNbBits {
+ n--
+ }
+ huffNode[n+1].nbBits--
+ rankLast[1] = n + 1
+ totalCost++
+ continue
+ }
+ huffNode[rankLast[1]+1].nbBits--
+ rankLast[1]++
+ totalCost++
+ }
+ }
+ return maxNbBits
+}
+
+type nodeElt struct {
+ count uint32
+ parent uint16
+ symbol byte
+ nbBits uint8
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
new file mode 100644
index 0000000..41703bb
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -0,0 +1,1164 @@
+package huff0
+
+import (
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/klauspost/compress/fse"
+)
+
+type dTable struct {
+ single []dEntrySingle
+ double []dEntryDouble
+}
+
+// single-symbols decoding
+type dEntrySingle struct {
+ entry uint16
+}
+
+// double-symbols decoding
+type dEntryDouble struct {
+ seq uint16
+ nBits uint8
+ len uint8
+}
+
+// Uses special code for all tables that are < 8 bits.
+const use8BitTables = true
+
+// ReadTable will read a table from the input.
+// The size of the input may be larger than the table definition.
+// Any content remaining after the table definition will be returned.
+// If no Scratch is provided a new one is allocated.
+// The returned Scratch can be used for encoding or decoding input using this table.
+func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return s, nil, err
+ }
+ if len(in) <= 1 {
+ return s, nil, errors.New("input too small for table")
+ }
+ iSize := in[0]
+ in = in[1:]
+ if iSize >= 128 {
+ // Uncompressed
+ oSize := iSize - 127
+ iSize = (oSize + 1) / 2
+ if int(iSize) > len(in) {
+ return s, nil, errors.New("input too small for table")
+ }
+ for n := uint8(0); n < oSize; n += 2 {
+ v := in[n/2]
+ s.huffWeight[n] = v >> 4
+ s.huffWeight[n+1] = v & 15
+ }
+ s.symbolLen = uint16(oSize)
+ in = in[iSize:]
+ } else {
+ if len(in) < int(iSize) {
+ return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
+ }
+ // FSE compressed weights
+ s.fse.DecompressLimit = 255
+ hw := s.huffWeight[:]
+ s.fse.Out = hw
+ b, err := fse.Decompress(in[:iSize], s.fse)
+ s.fse.Out = nil
+ if err != nil {
+ return s, nil, err
+ }
+ if len(b) > 255 {
+ return s, nil, errors.New("corrupt input: output table too large")
+ }
+ s.symbolLen = uint16(len(b))
+ in = in[iSize:]
+ }
+
+ // collect weight stats
+ var rankStats [16]uint32
+ weightTotal := uint32(0)
+ for _, v := range s.huffWeight[:s.symbolLen] {
+ if v > tableLogMax {
+ return s, nil, errors.New("corrupt input: weight too large")
+ }
+ v2 := v & 15
+ rankStats[v2]++
+ // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
+ weightTotal += (1 << v2) >> 1
+ }
+ if weightTotal == 0 {
+ return s, nil, errors.New("corrupt input: weights zero")
+ }
+
+ // get last non-null symbol weight (implied, total must be 2^n)
+ {
+ tableLog := highBit32(weightTotal) + 1
+ if tableLog > tableLogMax {
+ return s, nil, errors.New("corrupt input: tableLog too big")
+ }
+ s.actualTableLog = uint8(tableLog)
+ // determine last weight
+ {
+ total := uint32(1) << tableLog
+ rest := total - weightTotal
+ verif := uint32(1) << highBit32(rest)
+ lastWeight := highBit32(rest) + 1
+ if verif != rest {
+ // last value must be a clean power of 2
+ return s, nil, errors.New("corrupt input: last value not power of two")
+ }
+ s.huffWeight[s.symbolLen] = uint8(lastWeight)
+ s.symbolLen++
+ rankStats[lastWeight]++
+ }
+ }
+
+ if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
+ // by construction : at least 2 elts of rank 1, must be even
+ return s, nil, errors.New("corrupt input: min elt size, even check failed ")
+ }
+
+ // TODO: Choose between single/double symbol decoding
+
+ // Calculate starting value for each rank
+ {
+ var nextRankStart uint32
+ for n := uint8(1); n < s.actualTableLog+1; n++ {
+ current := nextRankStart
+ nextRankStart += rankStats[n] << (n - 1)
+ rankStats[n] = current
+ }
+ }
+
+ // fill DTable (always full size)
+ tSize := 1 << tableLogMax
+ if len(s.dt.single) != tSize {
+ s.dt.single = make([]dEntrySingle, tSize)
+ }
+ cTable := s.prevTable
+ if cap(cTable) < maxSymbolValue+1 {
+ cTable = make([]cTableEntry, 0, maxSymbolValue+1)
+ }
+ cTable = cTable[:maxSymbolValue+1]
+ s.prevTable = cTable[:s.symbolLen]
+ s.prevTableLog = s.actualTableLog
+
+ for n, w := range s.huffWeight[:s.symbolLen] {
+ if w == 0 {
+ cTable[n] = cTableEntry{
+ val: 0,
+ nBits: 0,
+ }
+ continue
+ }
+ length := (uint32(1) << w) >> 1
+ d := dEntrySingle{
+ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
+ }
+
+ rank := &rankStats[w]
+ cTable[n] = cTableEntry{
+ val: uint16(*rank >> (w - 1)),
+ nBits: uint8(d.entry),
+ }
+
+ single := s.dt.single[*rank : *rank+length]
+ for i := range single {
+ single[i] = d
+ }
+ *rank += length
+ }
+
+ return s, in, nil
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
+func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
+ if cap(s.Out) < s.MaxDecodedSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
+ }
+ s.Out = s.Out[:0:s.MaxDecodedSize]
+ s.Out, err = s.Decoder().Decompress1X(s.Out, in)
+ return s.Out, err
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+// The length of the supplied input must match the end of a block exactly.
+// The destination size of the uncompressed data must be known and provided.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
+func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
+ if dstSize > s.MaxDecodedSize {
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ if cap(s.Out) < dstSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
+ }
+ s.Out = s.Out[:0:dstSize]
+ s.Out, err = s.Decoder().Decompress4X(s.Out, in)
+ return s.Out, err
+}
+
+// Decoder will return a stateless decoder that can be used by multiple
+// decompressors concurrently.
+// Before this is called, the table must be initialized with ReadTable.
+// The Decoder is still linked to the scratch buffer so that cannot be reused.
+// However, it is safe to discard the scratch.
+func (s *Scratch) Decoder() *Decoder {
+ return &Decoder{
+ dt: s.dt,
+ actualTableLog: s.actualTableLog,
+ }
+}
+
+// Decoder provides stateless decoding.
+type Decoder struct {
+ dt dTable
+ actualTableLog uint8
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress1X8Bit(dst, src)
+ }
+ var br bitReaderShifted
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ const tlSize = 1 << tableLogMax
+ const tlMask = tlSize - 1
+ dt := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ for br.off >= 8 {
+ br.fillFast()
+ v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ // Refill
+ br.fillFast()
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 8, so uint8 is fine
+ bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= nBits
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress1X8BitExactly(dst, src)
+ }
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ shift := (8 - d.actualTableLog) & 7
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ const shift = 0
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if len(src) < 6+(4*1) {
+ return nil, errors.New("input too small")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress4X8bit(dst, src)
+ }
+
+ var br [4]bitReaderShifted
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const tlSize = 1 << tableLogMax
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 2 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
+
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
+ v2 := single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
+
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
+ v2 := single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ }
+
+ off += 2
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := br.off*8 + uint(64-br.bitsRead)
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ val := br.peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= uint(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress4X8bitExactly(dst, src)
+ }
+
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ shift := (8 - d.actualTableLog) & 7
+
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const shift = 0
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// matches will compare a decoding table to a coding table.
+// Errors are written to the writer.
+// Nothing will be written if table is ok.
+func (s *Scratch) matches(ct cTable, w io.Writer) {
+ if s == nil || len(s.dt.single) == 0 {
+ return
+ }
+ dt := s.dt.single[:1<<s.actualTableLog]
+ tablelog := s.actualTableLog
+ ok := 0
+ broken := 0
+ for sym, enc := range ct {
+ errs := 0
+ broken++
+ if enc.nBits == 0 {
+ for _, dec := range dt {
+ if uint8(dec.entry>>8) == byte(sym) {
+ fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
+ errs++
+ break
+ }
+ }
+ if errs == 0 {
+ broken--
+ }
+ continue
+ }
+ // Unused bits in input
+ ub := tablelog - enc.nBits
+ top := enc.val << ub
+ // decoder looks at top bits.
+ dec := dt[top]
+ if uint8(dec.entry) != enc.nBits {
+ fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
+ errs++
+ }
+ if uint8(dec.entry>>8) != uint8(sym) {
+ fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
+ errs++
+ }
+ if errs > 0 {
+ fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+ continue
+ }
+ // Ensure that all combinations are covered.
+ for i := uint16(0); i < (1 << ub); i++ {
+ vval := top | i
+ dec := dt[vval]
+ if uint8(dec.entry) != enc.nBits {
+ fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
+ errs++
+ }
+ if uint8(dec.entry>>8) != uint8(sym) {
+ fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
+ errs++
+ }
+ if errs > 20 {
+ fmt.Fprintf(w, "%d errros, stopping\n", errs)
+ break
+ }
+ }
+ if errs == 0 {
+ ok++
+ broken--
+ }
+ }
+ if broken > 0 {
+ fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
new file mode 100644
index 0000000..7ec2022
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -0,0 +1,273 @@
+// Package huff0 provides fast huffman encoding as used in zstd.
+//
+// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
+package huff0
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "math/bits"
+
+ "github.com/klauspost/compress/fse"
+)
+
+const (
+ maxSymbolValue = 255
+
+ // zstandard limits tablelog to 11, see:
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description
+ tableLogMax = 11
+ tableLogDefault = 11
+ minTablelog = 5
+ huffNodesLen = 512
+
+ // BlockSizeMax is maximum input size for a single block uncompressed.
+ BlockSizeMax = 1<<18 - 1
+)
+
+var (
+ // ErrIncompressible is returned when input is judged to be too hard to compress.
+ ErrIncompressible = errors.New("input is not compressible")
+
+ // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+ ErrUseRLE = errors.New("input is single value repeated")
+
+ // ErrTooBig is return if input is too large for a single block.
+ ErrTooBig = errors.New("input too big")
+
+ // ErrMaxDecodedSizeExceeded is return if input is too large for a single block.
+ ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded")
+)
+
+type ReusePolicy uint8
+
+const (
+ // ReusePolicyAllow will allow reuse if it produces smaller output.
+ ReusePolicyAllow ReusePolicy = iota
+
+ // ReusePolicyPrefer will re-use aggressively if possible.
+ // This will not check if a new table will produce smaller output,
+ // except if the current table is impossible to use or
+ // compressed output is bigger than input.
+ ReusePolicyPrefer
+
+ // ReusePolicyNone will disable re-use of tables.
+ // This is slightly faster than ReusePolicyAllow but may produce larger output.
+ ReusePolicyNone
+
+ // ReusePolicyMust must allow reuse and produce smaller output.
+ ReusePolicyMust
+)
+
+type Scratch struct {
+ count [maxSymbolValue + 1]uint32
+
+ // Per block parameters.
+ // These can be used to override compression parameters of the block.
+ // Do not touch, unless you know what you are doing.
+
+ // Out is output buffer.
+ // If the scratch is re-used before the caller is done processing the output,
+ // set this field to nil.
+ // Otherwise the output buffer will be re-used for next Compression/Decompression step
+ // and allocation will be avoided.
+ Out []byte
+
+ // OutTable will contain the table data only, if a new table has been generated.
+ // Slice of the returned data.
+ OutTable []byte
+
+ // OutData will contain the compressed data.
+ // Slice of the returned data.
+ OutData []byte
+
+ // MaxDecodedSize will set the maximum allowed output size.
+ // This value will automatically be set to BlockSizeMax if not set.
+ // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
+ MaxDecodedSize int
+
+ br byteReader
+
+ // MaxSymbolValue will override the maximum symbol value of the next block.
+ MaxSymbolValue uint8
+
+ // TableLog will attempt to override the tablelog for the next block.
+ // Must be <= 11 and >= 5.
+ TableLog uint8
+
+ // Reuse will specify the reuse policy
+ Reuse ReusePolicy
+
+ // WantLogLess allows to specify a log 2 reduction that should at least be achieved,
+ // otherwise the block will be returned as incompressible.
+ // The reduction should then at least be (input size >> WantLogLess)
+ // If WantLogLess == 0 any improvement will do.
+ WantLogLess uint8
+
+ symbolLen uint16 // Length of active part of the symbol table.
+ maxCount int // count of the most probable symbol
+ clearCount bool // clear count
+ actualTableLog uint8 // Selected tablelog.
+ prevTableLog uint8 // Tablelog for previous table
+ prevTable cTable // Table used for previous compression.
+ cTable cTable // compression table
+ dt dTable // decompression table
+ nodes []nodeElt
+ tmpOut [4][]byte
+ fse *fse.Scratch
+ huffWeight [maxSymbolValue + 1]byte
+}
+
+// TransferCTable will transfer the previously used compression table.
+func (s *Scratch) TransferCTable(src *Scratch) {
+ if cap(s.prevTable) < len(src.prevTable) {
+ s.prevTable = make(cTable, 0, maxSymbolValue+1)
+ }
+ s.prevTable = s.prevTable[:len(src.prevTable)]
+ copy(s.prevTable, src.prevTable)
+ s.prevTableLog = src.prevTableLog
+}
+
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+ if len(in) > BlockSizeMax {
+ return nil, ErrTooBig
+ }
+ if s == nil {
+ s = &Scratch{}
+ }
+ if s.MaxSymbolValue == 0 {
+ s.MaxSymbolValue = maxSymbolValue
+ }
+ if s.TableLog == 0 {
+ s.TableLog = tableLogDefault
+ }
+ if s.TableLog > tableLogMax || s.TableLog < minTablelog {
+ return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax)
+ }
+ if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax {
+ s.MaxDecodedSize = BlockSizeMax
+ }
+ if s.clearCount && s.maxCount == 0 {
+ for i := range s.count {
+ s.count[i] = 0
+ }
+ s.clearCount = false
+ }
+ if cap(s.Out) == 0 {
+ s.Out = make([]byte, 0, len(in))
+ }
+ s.Out = s.Out[:0]
+
+ s.OutTable = nil
+ s.OutData = nil
+ if cap(s.nodes) < huffNodesLen+1 {
+ s.nodes = make([]nodeElt, 0, huffNodesLen+1)
+ }
+ s.nodes = s.nodes[:0]
+ if s.fse == nil {
+ s.fse = &fse.Scratch{}
+ }
+ s.br.init(in)
+
+ return s, nil
+}
+
+type cTable []cTableEntry
+
+func (c cTable) write(s *Scratch) error {
+ var (
+ // precomputed conversion table
+ bitsToWeight [tableLogMax + 1]byte
+ huffLog = s.actualTableLog
+ // last weight is not saved.
+ maxSymbolValue = uint8(s.symbolLen - 1)
+ huffWeight = s.huffWeight[:256]
+ )
+ const (
+ maxFSETableLog = 6
+ )
+ // convert to weight
+ bitsToWeight[0] = 0
+ for n := uint8(1); n < huffLog+1; n++ {
+ bitsToWeight[n] = huffLog + 1 - n
+ }
+
+ // Acquire histogram for FSE.
+ hist := s.fse.Histogram()
+ hist = hist[:256]
+ for i := range hist[:16] {
+ hist[i] = 0
+ }
+ for n := uint8(0); n < maxSymbolValue; n++ {
+ v := bitsToWeight[c[n].nBits] & 15
+ huffWeight[n] = v
+ hist[v]++
+ }
+
+ // FSE compress if feasible.
+ if maxSymbolValue >= 2 {
+ huffMaxCnt := uint32(0)
+ huffMax := uint8(0)
+ for i, v := range hist[:16] {
+ if v == 0 {
+ continue
+ }
+ huffMax = byte(i)
+ if v > huffMaxCnt {
+ huffMaxCnt = v
+ }
+ }
+ s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+ s.fse.TableLog = maxFSETableLog
+ b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+ if err == nil && len(b) < int(s.symbolLen>>1) {
+ s.Out = append(s.Out, uint8(len(b)))
+ s.Out = append(s.Out, b...)
+ return nil
+ }
+ // Unable to compress (RLE/uncompressible)
+ }
+ // write raw values as 4-bits (max : 15)
+ if maxSymbolValue > (256 - 128) {
+ // should not happen : likely means source cannot be compressed
+ return ErrIncompressible
+ }
+ op := s.Out
+ // special case, pack weights 4 bits/weight.
+ op = append(op, 128|(maxSymbolValue-1))
+ // be sure it doesn't cause msan issue in final combination
+ huffWeight[maxSymbolValue] = 0
+ for n := uint16(0); n < uint16(maxSymbolValue); n += 2 {
+ op = append(op, (huffWeight[n]<<4)|huffWeight[n+1])
+ }
+ s.Out = op
+ return nil
+}
+
+// estimateSize returns the estimated size in bytes of the input represented in the
+// histogram supplied.
+func (c cTable) estimateSize(hist []uint32) int {
+ nbBits := uint32(7)
+ for i, v := range c[:len(hist)] {
+ nbBits += uint32(v.nBits) * hist[i]
+ }
+ return int(nbBits >> 3)
+}
+
+// minSize returns the minimum possible size considering the shannon limit.
+func (s *Scratch) minSize(total int) int {
+ nbBits := float64(7)
+ fTotal := float64(total)
+ for _, v := range s.count[:s.symbolLen] {
+ n := float64(v)
+ if n > 0 {
+ nbBits += math.Log2(fTotal/n) * n
+ }
+ }
+ return int(nbBits) >> 3
+}
+
+func highBit32(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}