[VOL-4291] Rw-core updates for gRPC migration
Change-Id: I8d5a554409115b29318089671ca4e1ab3fa98810
diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE
new file mode 100644
index 0000000..1eb75ef
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/LICENSE
@@ -0,0 +1,28 @@
+Copyright (c) 2012 The Go Authors. All rights reserved.
+Copyright (c) 2019 Klaus Post. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/klauspost/compress/fse/README.md b/vendor/github.com/klauspost/compress/fse/README.md
new file mode 100644
index 0000000..ea7324d
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/README.md
@@ -0,0 +1,79 @@
+# Finite State Entropy
+
+This package provides Finite State Entropy encoding and decoding.
+
+Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS))
+encoding provides a fast near-optimal symbol encoding/decoding
+for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd).
+
+This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
+This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
+but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding.
+
+* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse)
+
+## News
+
+ * Feb 2018: First implementation released. Consider this beta software for now.
+
+# Usage
+
+This package provides a low level interface that allows to compress single independent blocks.
+
+Each block is separate, and there is no built in integrity checks.
+This means that the caller should keep track of block sizes and also do checksums if needed.
+
+Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function.
+You must provide input and will receive the output and maybe an error.
+
+These error values can be returned:
+
+| Error | Description |
+|---------------------|-----------------------------------------------------------------------------|
+| `<nil>` | Everything ok, output is returned |
+| `ErrIncompressible` | Returned when input is judged to be too hard to compress |
+| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated |
+| `(error)` | An internal error occurred. |
+
+As can be seen above there are errors that will be returned even under normal operation so it is important to handle these.
+
+To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object
+that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same
+object can be used for both.
+
+Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
+you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
+
+Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function.
+You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
+your input was likely corrupted.
+
+It is important to note that a successful decoding does *not* mean your output matches your original input.
+There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
+
+For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples).
+
+# Performance
+
+A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors.
+All compression functions are currently only running on the calling goroutine so only one core will be used per block.
+
+The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input
+is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be
+beneficial to transpose all your input values down by 64.
+
+With moderate block sizes around 64k speed are typically 200MB/s per core for compression and
+around 300MB/s decompression speed.
+
+The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s.
+
+# Plans
+
+At one point, more internals will be exposed to facilitate more "expert" usage of the components.
+
+A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261).
+
+# Contributing
+
+Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
\ No newline at end of file
diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go
new file mode 100644
index 0000000..f65eb39
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/bitreader.go
@@ -0,0 +1,122 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.bitsRead += 8 - uint8(highBits(uint32(v)))
+ return nil
+}
+
+// getBits will return n bits. n can be 0.
+func (b *bitReader) getBits(n uint8) uint16 {
+ if n == 0 || b.bitsRead >= 64 {
+ return 0
+ }
+ return b.getBitsFast(n)
+}
+
+// getBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) getBitsFast(n uint8) uint16 {
+ const regMask = 64 - 1
+ v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+ b.bitsRead += n
+ return v
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value = (b.value << 8) | uint64(b.in[b.off-1])
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+ return b.bitsRead >= 64 && b.off == 0
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go
new file mode 100644
index 0000000..43e4636
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go
@@ -0,0 +1,168 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+ bitContainer uint64
+ nBits uint8
+ out []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+ 0, 1, 3, 7, 0xF, 0x1F,
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF} /* up to 16 bits */
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits16ZeroNC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+// This is fastest if bits can be zero.
+func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
+ if bits == 0 {
+ return
+ }
+ value <<= (16 - bits) & 15
+ value >>= (16 - bits) & 15
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+ v := b.nBits >> 3
+ switch v {
+ case 0:
+ case 1:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ )
+ case 2:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ )
+ case 3:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ )
+ case 4:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ )
+ case 5:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ )
+ case 6:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ )
+ case 7:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ )
+ case 8:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ byte(b.bitContainer>>56),
+ )
+ default:
+ panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+ }
+ b.bitContainer >>= v << 3
+ b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+ if b.nBits < 32 {
+ return
+ }
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24))
+ b.nBits -= 32
+ b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+ nbBytes := (b.nBits + 7) >> 3
+ for i := uint8(0); i < nbBytes; i++ {
+ b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+ }
+ b.nBits = 0
+ b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+ // End mark
+ b.addBits16Clean(1, 1)
+ // flush until next byte.
+ b.flushAlign()
+ return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+ b.bitContainer = 0
+ b.nBits = 0
+ b.out = out
+}
diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go
new file mode 100644
index 0000000..abade2d
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/bytereader.go
@@ -0,0 +1,47 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+ b []byte
+ off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+ b.b = in
+ b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+ b.off += int(n)
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+ return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+ return len(b.b) - b.off
+}
diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go
new file mode 100644
index 0000000..6f34191
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -0,0 +1,683 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package fse
+
+import (
+ "errors"
+ "fmt"
+)
+
+// Compress the input bytes. Input must be < 2GB.
+// Provide a Scratch buffer to avoid memory allocations.
+// Note that the output is also kept in the scratch buffer.
+// If input is too hard to compress, ErrIncompressible is returned.
+// If input is a single byte value repeated ErrUseRLE is returned.
+func Compress(in []byte, s *Scratch) ([]byte, error) {
+ if len(in) <= 1 {
+ return nil, ErrIncompressible
+ }
+ if len(in) > (2<<30)-1 {
+ return nil, errors.New("input too big, must be < 2GB")
+ }
+ s, err := s.prepare(in)
+ if err != nil {
+ return nil, err
+ }
+
+ // Create histogram, if none was provided.
+ maxCount := s.maxCount
+ if maxCount == 0 {
+ maxCount = s.countSimple(in)
+ }
+ // Reset for next run.
+ s.clearCount = true
+ s.maxCount = 0
+ if maxCount == len(in) {
+ // One symbol, use RLE
+ return nil, ErrUseRLE
+ }
+ if maxCount == 1 || maxCount < (len(in)>>7) {
+ // Each symbol present maximum once or too well distributed.
+ return nil, ErrIncompressible
+ }
+ s.optimalTableLog()
+ err = s.normalizeCount()
+ if err != nil {
+ return nil, err
+ }
+ err = s.writeCount()
+ if err != nil {
+ return nil, err
+ }
+
+ if false {
+ err = s.validateNorm()
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ err = s.buildCTable()
+ if err != nil {
+ return nil, err
+ }
+ err = s.compress(in)
+ if err != nil {
+ return nil, err
+ }
+ s.Out = s.bw.out
+ // Check if we compressed.
+ if len(s.Out) >= len(in) {
+ return nil, ErrIncompressible
+ }
+ return s.Out, nil
+}
+
+// cState contains the compression state of a stream.
+type cState struct {
+ bw *bitWriter
+ stateTable []uint16
+ state uint16
+}
+
+// init will initialize the compression state to the first symbol of the stream.
+func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) {
+ c.bw = bw
+ c.stateTable = ct.stateTable
+
+ nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
+ im := int32((nbBitsOut << 16) - first.deltaNbBits)
+ lu := (im >> nbBitsOut) + first.deltaFindState
+ c.state = c.stateTable[lu]
+}
+
+// encode the output symbol provided and write it to the bitstream.
+func (c *cState) encode(symbolTT symbolTransform) {
+ nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
+ dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
+ c.bw.addBits16NC(c.state, uint8(nbBitsOut))
+ c.state = c.stateTable[dstState]
+}
+
+// encode the output symbol provided and write it to the bitstream.
+func (c *cState) encodeZero(symbolTT symbolTransform) {
+ nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
+ dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState
+ c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut))
+ c.state = c.stateTable[dstState]
+}
+
+// flush will write the tablelog to the output and flush the remaining full bytes.
+func (c *cState) flush(tableLog uint8) {
+ c.bw.flush32()
+ c.bw.addBits16NC(c.state, tableLog)
+ c.bw.flush()
+}
+
+// compress is the main compression loop that will encode the input from the last byte to the first.
+func (s *Scratch) compress(src []byte) error {
+ if len(src) <= 2 {
+ return errors.New("compress: src too small")
+ }
+ tt := s.ct.symbolTT[:256]
+ s.bw.reset(s.Out)
+
+ // Our two states each encodes every second byte.
+ // Last byte encoded (first byte decoded) will always be encoded by c1.
+ var c1, c2 cState
+
+ // Encode so remaining size is divisible by 4.
+ ip := len(src)
+ if ip&1 == 1 {
+ c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
+ c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
+ c1.encodeZero(tt[src[ip-3]])
+ ip -= 3
+ } else {
+ c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]])
+ c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]])
+ ip -= 2
+ }
+ if ip&2 != 0 {
+ c2.encodeZero(tt[src[ip-1]])
+ c1.encodeZero(tt[src[ip-2]])
+ ip -= 2
+ }
+
+ // Main compression loop.
+ switch {
+ case !s.zeroBits && s.actualTableLog <= 8:
+ // We can encode 4 symbols without requiring a flush.
+ // We do not need to check if any output is 0 bits.
+ for ip >= 4 {
+ s.bw.flush32()
+ v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+ c2.encode(tt[v0])
+ c1.encode(tt[v1])
+ c2.encode(tt[v2])
+ c1.encode(tt[v3])
+ ip -= 4
+ }
+ case !s.zeroBits:
+ // We do not need to check if any output is 0 bits.
+ for ip >= 4 {
+ s.bw.flush32()
+ v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+ c2.encode(tt[v0])
+ c1.encode(tt[v1])
+ s.bw.flush32()
+ c2.encode(tt[v2])
+ c1.encode(tt[v3])
+ ip -= 4
+ }
+ case s.actualTableLog <= 8:
+ // We can encode 4 symbols without requiring a flush
+ for ip >= 4 {
+ s.bw.flush32()
+ v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+ c2.encodeZero(tt[v0])
+ c1.encodeZero(tt[v1])
+ c2.encodeZero(tt[v2])
+ c1.encodeZero(tt[v3])
+ ip -= 4
+ }
+ default:
+ for ip >= 4 {
+ s.bw.flush32()
+ v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1]
+ c2.encodeZero(tt[v0])
+ c1.encodeZero(tt[v1])
+ s.bw.flush32()
+ c2.encodeZero(tt[v2])
+ c1.encodeZero(tt[v3])
+ ip -= 4
+ }
+ }
+
+ // Flush final state.
+ // Used to initialize state when decoding.
+ c2.flush(s.actualTableLog)
+ c1.flush(s.actualTableLog)
+
+ return s.bw.close()
+}
+
+// writeCount will write the normalized histogram count to header.
+// This is read back by readNCount.
+func (s *Scratch) writeCount() error {
+ var (
+ tableLog = s.actualTableLog
+ tableSize = 1 << tableLog
+ previous0 bool
+ charnum uint16
+
+ maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+
+ // Write Table Size
+ bitStream = uint32(tableLog - minTablelog)
+ bitCount = uint(4)
+ remaining = int16(tableSize + 1) /* +1 for extra accuracy */
+ threshold = int16(tableSize)
+ nbBits = uint(tableLog + 1)
+ )
+ if cap(s.Out) < maxHeaderSize {
+ s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize)
+ }
+ outP := uint(0)
+ out := s.Out[:maxHeaderSize]
+
+ // stops at 1
+ for remaining > 1 {
+ if previous0 {
+ start := charnum
+ for s.norm[charnum] == 0 {
+ charnum++
+ }
+ for charnum >= start+24 {
+ start += 24
+ bitStream += uint32(0xFFFF) << bitCount
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ }
+ for charnum >= start+3 {
+ start += 3
+ bitStream += 3 << bitCount
+ bitCount += 2
+ }
+ bitStream += uint32(charnum-start) << bitCount
+ bitCount += 2
+ if bitCount > 16 {
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ bitCount -= 16
+ }
+ }
+
+ count := s.norm[charnum]
+ charnum++
+ max := (2*threshold - 1) - remaining
+ if count < 0 {
+ remaining += count
+ } else {
+ remaining -= count
+ }
+ count++ // +1 for extra accuracy
+ if count >= threshold {
+ count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
+ }
+ bitStream += uint32(count) << bitCount
+ bitCount += nbBits
+ if count < max {
+ bitCount--
+ }
+
+ previous0 = count == 1
+ if remaining < 1 {
+ return errors.New("internal error: remaining<1")
+ }
+ for remaining < threshold {
+ nbBits--
+ threshold >>= 1
+ }
+
+ if bitCount > 16 {
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ bitCount -= 16
+ }
+ }
+
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += (bitCount + 7) / 8
+
+ if charnum > s.symbolLen {
+ return errors.New("internal error: charnum > s.symbolLen")
+ }
+ s.Out = out[:outP]
+ return nil
+}
+
+// symbolTransform contains the state transform for a symbol.
+type symbolTransform struct {
+ deltaFindState int32
+ deltaNbBits uint32
+}
+
+// String prints values as a human readable string.
+func (s symbolTransform) String() string {
+ return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState)
+}
+
+// cTable contains tables used for compression.
+type cTable struct {
+ tableSymbol []byte
+ stateTable []uint16
+ symbolTT []symbolTransform
+}
+
+// allocCtable will allocate tables needed for compression.
+// If existing tables a re big enough, they are simply re-used.
+func (s *Scratch) allocCtable() {
+ tableSize := 1 << s.actualTableLog
+ // get tableSymbol that is big enough.
+ if cap(s.ct.tableSymbol) < tableSize {
+ s.ct.tableSymbol = make([]byte, tableSize)
+ }
+ s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
+
+ ctSize := tableSize
+ if cap(s.ct.stateTable) < ctSize {
+ s.ct.stateTable = make([]uint16, ctSize)
+ }
+ s.ct.stateTable = s.ct.stateTable[:ctSize]
+
+ if cap(s.ct.symbolTT) < 256 {
+ s.ct.symbolTT = make([]symbolTransform, 256)
+ }
+ s.ct.symbolTT = s.ct.symbolTT[:256]
+}
+
+// buildCTable will populate the compression table so it is ready to be used.
+func (s *Scratch) buildCTable() error {
+ tableSize := uint32(1 << s.actualTableLog)
+ highThreshold := tableSize - 1
+ var cumul [maxSymbolValue + 2]int16
+
+ s.allocCtable()
+ tableSymbol := s.ct.tableSymbol[:tableSize]
+ // symbol start positions
+ {
+ cumul[0] = 0
+ for ui, v := range s.norm[:s.symbolLen-1] {
+ u := byte(ui) // one less than reference
+ if v == -1 {
+ // Low proba symbol
+ cumul[u+1] = cumul[u] + 1
+ tableSymbol[highThreshold] = u
+ highThreshold--
+ } else {
+ cumul[u+1] = cumul[u] + v
+ }
+ }
+ // Encode last symbol separately to avoid overflowing u
+ u := int(s.symbolLen - 1)
+ v := s.norm[s.symbolLen-1]
+ if v == -1 {
+ // Low proba symbol
+ cumul[u+1] = cumul[u] + 1
+ tableSymbol[highThreshold] = byte(u)
+ highThreshold--
+ } else {
+ cumul[u+1] = cumul[u] + v
+ }
+ if uint32(cumul[s.symbolLen]) != tableSize {
+ return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
+ }
+ cumul[s.symbolLen] = int16(tableSize) + 1
+ }
+ // Spread symbols
+ s.zeroBits = false
+ {
+ step := tableStep(tableSize)
+ tableMask := tableSize - 1
+ var position uint32
+ // if any symbol > largeLimit, we may have 0 bits output.
+ largeLimit := int16(1 << (s.actualTableLog - 1))
+ for ui, v := range s.norm[:s.symbolLen] {
+ symbol := byte(ui)
+ if v > largeLimit {
+ s.zeroBits = true
+ }
+ for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
+ tableSymbol[position] = symbol
+ position = (position + step) & tableMask
+ for position > highThreshold {
+ position = (position + step) & tableMask
+ } /* Low proba area */
+ }
+ }
+
+ // Check if we have gone through all positions
+ if position != 0 {
+ return errors.New("position!=0")
+ }
+ }
+
+ // Build table
+ table := s.ct.stateTable
+ {
+ tsi := int(tableSize)
+ for u, v := range tableSymbol {
+ // TableU16 : sorted by symbol order; gives next state value
+ table[cumul[v]] = uint16(tsi + u)
+ cumul[v]++
+ }
+ }
+
+ // Build Symbol Transformation Table
+ {
+ total := int16(0)
+ symbolTT := s.ct.symbolTT[:s.symbolLen]
+ tableLog := s.actualTableLog
+ tl := (uint32(tableLog) << 16) - (1 << tableLog)
+ for i, v := range s.norm[:s.symbolLen] {
+ switch v {
+ case 0:
+ case -1, 1:
+ symbolTT[i].deltaNbBits = tl
+ symbolTT[i].deltaFindState = int32(total - 1)
+ total++
+ default:
+ maxBitsOut := uint32(tableLog) - highBits(uint32(v-1))
+ minStatePlus := uint32(v) << maxBitsOut
+ symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
+ symbolTT[i].deltaFindState = int32(total - v)
+ total += v
+ }
+ }
+ if total != int16(tableSize) {
+ return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
+ }
+ }
+ return nil
+}
+
+// countSimple will create a simple histogram in s.count.
+// Returns the biggest count.
+// Does not update s.clearCount.
+func (s *Scratch) countSimple(in []byte) (max int) {
+ for _, v := range in {
+ s.count[v]++
+ }
+ m := uint32(0)
+ for i, v := range s.count[:] {
+ if v > m {
+ m = v
+ }
+ if v > 0 {
+ s.symbolLen = uint16(i) + 1
+ }
+ }
+ return int(m)
+}
+
+// minTableLog provides the minimum logSize to safely represent a distribution.
+func (s *Scratch) minTableLog() uint8 {
+ minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1
+ minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2
+ if minBitsSrc < minBitsSymbols {
+ return uint8(minBitsSrc)
+ }
+ return uint8(minBitsSymbols)
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *Scratch) optimalTableLog() {
+ tableLog := s.TableLog
+ minBits := s.minTableLog()
+ maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2
+ if maxBitsSrc < tableLog {
+ // Accuracy can be reduced
+ tableLog = maxBitsSrc
+ }
+ if minBits > tableLog {
+ tableLog = minBits
+ }
+ // Need a minimum to safely represent all symbol values
+ if tableLog < minTablelog {
+ tableLog = minTablelog
+ }
+ if tableLog > maxTableLog {
+ tableLog = maxTableLog
+ }
+ s.actualTableLog = tableLog
+}
+
+var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
+
+// normalizeCount will normalize the count of the symbols so
+// the total is equal to the table size.
+func (s *Scratch) normalizeCount() error {
+ var (
+ tableLog = s.actualTableLog
+ scale = 62 - uint64(tableLog)
+ step = (1 << 62) / uint64(s.br.remain())
+ vStep = uint64(1) << (scale - 20)
+ stillToDistribute = int16(1 << tableLog)
+ largest int
+ largestP int16
+ lowThreshold = (uint32)(s.br.remain() >> tableLog)
+ )
+
+ for i, cnt := range s.count[:s.symbolLen] {
+ // already handled
+ // if (count[s] == s.length) return 0; /* rle special case */
+
+ if cnt == 0 {
+ s.norm[i] = 0
+ continue
+ }
+ if cnt <= lowThreshold {
+ s.norm[i] = -1
+ stillToDistribute--
+ } else {
+ proba := (int16)((uint64(cnt) * step) >> scale)
+ if proba < 8 {
+ restToBeat := vStep * uint64(rtbTable[proba])
+ v := uint64(cnt)*step - (uint64(proba) << scale)
+ if v > restToBeat {
+ proba++
+ }
+ }
+ if proba > largestP {
+ largestP = proba
+ largest = i
+ }
+ s.norm[i] = proba
+ stillToDistribute -= proba
+ }
+ }
+
+ if -stillToDistribute >= (s.norm[largest] >> 1) {
+ // corner case, need another normalization method
+ return s.normalizeCount2()
+ }
+ s.norm[largest] += stillToDistribute
+ return nil
+}
+
+// Secondary normalization method.
+// To be used when primary method fails.
+func (s *Scratch) normalizeCount2() error {
+ const notYetAssigned = -2
+ var (
+ distributed uint32
+ total = uint32(s.br.remain())
+ tableLog = s.actualTableLog
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
+ )
+ for i, cnt := range s.count[:s.symbolLen] {
+ if cnt == 0 {
+ s.norm[i] = 0
+ continue
+ }
+ if cnt <= lowThreshold {
+ s.norm[i] = -1
+ distributed++
+ total -= cnt
+ continue
+ }
+ if cnt <= lowOne {
+ s.norm[i] = 1
+ distributed++
+ total -= cnt
+ continue
+ }
+ s.norm[i] = notYetAssigned
+ }
+ toDistribute := (1 << tableLog) - distributed
+
+ if (total / toDistribute) > lowOne {
+ // risk of rounding to zero
+ lowOne = (total * 3) / (toDistribute * 2)
+ for i, cnt := range s.count[:s.symbolLen] {
+ if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
+ s.norm[i] = 1
+ distributed++
+ total -= cnt
+ continue
+ }
+ }
+ toDistribute = (1 << tableLog) - distributed
+ }
+ if distributed == uint32(s.symbolLen)+1 {
+ // all values are pretty poor;
+ // probably incompressible data (should have already been detected);
+ // find max, then give all remaining points to max
+ var maxV int
+ var maxC uint32
+ for i, cnt := range s.count[:s.symbolLen] {
+ if cnt > maxC {
+ maxV = i
+ maxC = cnt
+ }
+ }
+ s.norm[maxV] += int16(toDistribute)
+ return nil
+ }
+
+ if total == 0 {
+ // all of the symbols were low enough for the lowOne or lowThreshold
+ for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
+ if s.norm[i] > 0 {
+ toDistribute--
+ s.norm[i]++
+ }
+ }
+ return nil
+ }
+
+ var (
+ vStepLog = 62 - uint64(tableLog)
+ mid = uint64((1 << (vStepLog - 1)) - 1)
+ rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
+ tmpTotal = mid
+ )
+ for i, cnt := range s.count[:s.symbolLen] {
+ if s.norm[i] == notYetAssigned {
+ var (
+ end = tmpTotal + uint64(cnt)*rStep
+ sStart = uint32(tmpTotal >> vStepLog)
+ sEnd = uint32(end >> vStepLog)
+ weight = sEnd - sStart
+ )
+ if weight < 1 {
+ return errors.New("weight < 1")
+ }
+ s.norm[i] = int16(weight)
+ tmpTotal = end
+ }
+ }
+ return nil
+}
+
+// validateNorm validates the normalized histogram table.
+func (s *Scratch) validateNorm() (err error) {
+ var total int
+ for _, v := range s.norm[:s.symbolLen] {
+ if v >= 0 {
+ total += int(v)
+ } else {
+ total -= int(v)
+ }
+ }
+ defer func() {
+ if err == nil {
+ return
+ }
+ fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
+ for i, v := range s.norm[:s.symbolLen] {
+ fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
+ }
+ }()
+ if total != (1 << s.actualTableLog) {
+ return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
+ }
+ for i, v := range s.count[s.symbolLen:] {
+ if v != 0 {
+ return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
+ }
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
new file mode 100644
index 0000000..926f5f1
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -0,0 +1,374 @@
+package fse
+
+import (
+ "errors"
+ "fmt"
+)
+
+const (
+ tablelogAbsoluteMax = 15
+)
+
+// Decompress a block of data.
+// You can provide a scratch buffer to avoid allocations.
+// If nil is provided a temporary one will be allocated.
+// It is possible, but by no way guaranteed that corrupt data will
+// return an error.
+// It is up to the caller to verify integrity of the returned data.
+// Use a predefined Scrach to set maximum acceptable output size.
+func Decompress(b []byte, s *Scratch) ([]byte, error) {
+ s, err := s.prepare(b)
+ if err != nil {
+ return nil, err
+ }
+ s.Out = s.Out[:0]
+ err = s.readNCount()
+ if err != nil {
+ return nil, err
+ }
+ err = s.buildDtable()
+ if err != nil {
+ return nil, err
+ }
+ err = s.decompress()
+ if err != nil {
+ return nil, err
+ }
+
+ return s.Out, nil
+}
+
+// readNCount will read the symbol distribution so decoding tables can be constructed.
+func (s *Scratch) readNCount() error {
+ var (
+ charnum uint16
+ previous0 bool
+ b = &s.br
+ )
+ iend := b.remain()
+ if iend < 4 {
+ return errors.New("input too small")
+ }
+ bitStream := b.Uint32()
+ nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
+ if nbBits > tablelogAbsoluteMax {
+ return errors.New("tableLog too large")
+ }
+ bitStream >>= 4
+ bitCount := uint(4)
+
+ s.actualTableLog = uint8(nbBits)
+ remaining := int32((1 << nbBits) + 1)
+ threshold := int32(1 << nbBits)
+ gotTotal := int32(0)
+ nbBits++
+
+ for remaining > 1 {
+ if previous0 {
+ n0 := charnum
+ for (bitStream & 0xFFFF) == 0xFFFF {
+ n0 += 24
+ if b.off < iend-5 {
+ b.advance(2)
+ bitStream = b.Uint32() >> bitCount
+ } else {
+ bitStream >>= 16
+ bitCount += 16
+ }
+ }
+ for (bitStream & 3) == 3 {
+ n0 += 3
+ bitStream >>= 2
+ bitCount += 2
+ }
+ n0 += uint16(bitStream & 3)
+ bitCount += 2
+ if n0 > maxSymbolValue {
+ return errors.New("maxSymbolValue too small")
+ }
+ for charnum < n0 {
+ s.norm[charnum&0xff] = 0
+ charnum++
+ }
+
+ if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
+ b.advance(bitCount >> 3)
+ bitCount &= 7
+ bitStream = b.Uint32() >> bitCount
+ } else {
+ bitStream >>= 2
+ }
+ }
+
+ max := (2*(threshold) - 1) - (remaining)
+ var count int32
+
+ if (int32(bitStream) & (threshold - 1)) < max {
+ count = int32(bitStream) & (threshold - 1)
+ bitCount += nbBits - 1
+ } else {
+ count = int32(bitStream) & (2*threshold - 1)
+ if count >= threshold {
+ count -= max
+ }
+ bitCount += nbBits
+ }
+
+ count-- // extra accuracy
+ if count < 0 {
+ // -1 means +1
+ remaining += count
+ gotTotal -= count
+ } else {
+ remaining -= count
+ gotTotal += count
+ }
+ s.norm[charnum&0xff] = int16(count)
+ charnum++
+ previous0 = count == 0
+ for remaining < threshold {
+ nbBits--
+ threshold >>= 1
+ }
+ if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 {
+ b.advance(bitCount >> 3)
+ bitCount &= 7
+ } else {
+ bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
+ b.off = len(b.b) - 4
+ }
+ bitStream = b.Uint32() >> (bitCount & 31)
+ }
+ s.symbolLen = charnum
+
+ if s.symbolLen <= 1 {
+ return fmt.Errorf("symbolLen (%d) too small", s.symbolLen)
+ }
+ if s.symbolLen > maxSymbolValue+1 {
+ return fmt.Errorf("symbolLen (%d) too big", s.symbolLen)
+ }
+ if remaining != 1 {
+ return fmt.Errorf("corruption detected (remaining %d != 1)", remaining)
+ }
+ if bitCount > 32 {
+ return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount)
+ }
+ if gotTotal != 1<<s.actualTableLog {
+ return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
+ }
+ b.advance((bitCount + 7) >> 3)
+ return nil
+}
+
+// decSymbol contains information about a state entry,
+// Including the state offset base, the output symbol and
+// the number of bits to read for the low part of the destination state.
+type decSymbol struct {
+ newState uint16
+ symbol uint8
+ nbBits uint8
+}
+
+// allocDtable will allocate decoding tables if they are not big enough.
+func (s *Scratch) allocDtable() {
+ tableSize := 1 << s.actualTableLog
+ if cap(s.decTable) < tableSize {
+ s.decTable = make([]decSymbol, tableSize)
+ }
+ s.decTable = s.decTable[:tableSize]
+
+ if cap(s.ct.tableSymbol) < 256 {
+ s.ct.tableSymbol = make([]byte, 256)
+ }
+ s.ct.tableSymbol = s.ct.tableSymbol[:256]
+
+ if cap(s.ct.stateTable) < 256 {
+ s.ct.stateTable = make([]uint16, 256)
+ }
+ s.ct.stateTable = s.ct.stateTable[:256]
+}
+
+// buildDtable will build the decoding table.
+func (s *Scratch) buildDtable() error {
+ tableSize := uint32(1 << s.actualTableLog)
+ highThreshold := tableSize - 1
+ s.allocDtable()
+ symbolNext := s.ct.stateTable[:256]
+
+ // Init, lay down lowprob symbols
+ s.zeroBits = false
+ {
+ largeLimit := int16(1 << (s.actualTableLog - 1))
+ for i, v := range s.norm[:s.symbolLen] {
+ if v == -1 {
+ s.decTable[highThreshold].symbol = uint8(i)
+ highThreshold--
+ symbolNext[i] = 1
+ } else {
+ if v >= largeLimit {
+ s.zeroBits = true
+ }
+ symbolNext[i] = uint16(v)
+ }
+ }
+ }
+ // Spread symbols
+ {
+ tableMask := tableSize - 1
+ step := tableStep(tableSize)
+ position := uint32(0)
+ for ss, v := range s.norm[:s.symbolLen] {
+ for i := 0; i < int(v); i++ {
+ s.decTable[position].symbol = uint8(ss)
+ position = (position + step) & tableMask
+ for position > highThreshold {
+ // lowprob area
+ position = (position + step) & tableMask
+ }
+ }
+ }
+ if position != 0 {
+ // position must reach all cells once, otherwise normalizedCounter is incorrect
+ return errors.New("corrupted input (position != 0)")
+ }
+ }
+
+ // Build Decoding table
+ {
+ tableSize := uint16(1 << s.actualTableLog)
+ for u, v := range s.decTable {
+ symbol := v.symbol
+ nextState := symbolNext[symbol]
+ symbolNext[symbol] = nextState + 1
+ nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
+ s.decTable[u].nbBits = nBits
+ newState := (nextState << nBits) - tableSize
+ if newState >= tableSize {
+ return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
+ }
+ if newState == uint16(u) && nBits == 0 {
+ // Seems weird that this is possible with nbits > 0.
+ return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
+ }
+ s.decTable[u].newState = newState
+ }
+ }
+ return nil
+}
+
+// decompress will decompress the bitstream.
+// If the buffer is over-read an error is returned.
+func (s *Scratch) decompress() error {
+ br := &s.bits
+ br.init(s.br.unread())
+
+ var s1, s2 decoder
+ // Initialize and decode first state and symbol.
+ s1.init(br, s.decTable, s.actualTableLog)
+ s2.init(br, s.decTable, s.actualTableLog)
+
+ // Use temp table to avoid bound checks/append penalty.
+ var tmp = s.ct.tableSymbol[:256]
+ var off uint8
+
+ // Main part
+ if !s.zeroBits {
+ for br.off >= 8 {
+ br.fillFast()
+ tmp[off+0] = s1.nextFast()
+ tmp[off+1] = s2.nextFast()
+ br.fillFast()
+ tmp[off+2] = s1.nextFast()
+ tmp[off+3] = s2.nextFast()
+ off += 4
+ // When off is 0, we have overflowed and should write.
+ if off == 0 {
+ s.Out = append(s.Out, tmp...)
+ if len(s.Out) >= s.DecompressLimit {
+ return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+ }
+ }
+ }
+ } else {
+ for br.off >= 8 {
+ br.fillFast()
+ tmp[off+0] = s1.next()
+ tmp[off+1] = s2.next()
+ br.fillFast()
+ tmp[off+2] = s1.next()
+ tmp[off+3] = s2.next()
+ off += 4
+ if off == 0 {
+ s.Out = append(s.Out, tmp...)
+ // When off is 0, we have overflowed and should write.
+ if len(s.Out) >= s.DecompressLimit {
+ return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+ }
+ }
+ }
+ }
+ s.Out = append(s.Out, tmp[:off]...)
+
+ // Final bits, a bit more expensive check
+ for {
+ if s1.finished() {
+ s.Out = append(s.Out, s1.final(), s2.final())
+ break
+ }
+ br.fill()
+ s.Out = append(s.Out, s1.next())
+ if s2.finished() {
+ s.Out = append(s.Out, s2.final(), s1.final())
+ break
+ }
+ s.Out = append(s.Out, s2.next())
+ if len(s.Out) >= s.DecompressLimit {
+ return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit)
+ }
+ }
+ return br.close()
+}
+
+// decoder keeps track of the current state and updates it from the bitstream.
+type decoder struct {
+ state uint16
+ br *bitReader
+ dt []decSymbol
+}
+
+// init will initialize the decoder and read the first state from the stream.
+func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
+ d.dt = dt
+ d.br = in
+ d.state = in.getBits(tableLog)
+}
+
+// next returns the next symbol and sets the next state.
+// At least tablelog bits must be available in the bit reader.
+func (d *decoder) next() uint8 {
+ n := &d.dt[d.state]
+ lowBits := d.br.getBits(n.nbBits)
+ d.state = n.newState + lowBits
+ return n.symbol
+}
+
+// finished returns true if all bits have been read from the bitstream
+// and the next state would require reading bits from the input.
+func (d *decoder) finished() bool {
+ return d.br.finished() && d.dt[d.state].nbBits > 0
+}
+
+// final returns the current state symbol without decoding the next.
+func (d *decoder) final() uint8 {
+ return d.dt[d.state].symbol
+}
+
+// nextFast returns the next symbol and sets the next state.
+// This can only be used if no symbols are 0 bits.
+// At least tablelog bits must be available in the bit reader.
+func (d *decoder) nextFast() uint8 {
+ n := d.dt[d.state]
+ lowBits := d.br.getBitsFast(n.nbBits)
+ d.state = n.newState + lowBits
+ return n.symbol
+}
diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go
new file mode 100644
index 0000000..535cbad
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/fse/fse.go
@@ -0,0 +1,144 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+// Package fse provides Finite State Entropy encoding and decoding.
+//
+// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
+// for byte blocks as implemented in zstd.
+//
+// See https://github.com/klauspost/compress/tree/master/fse for more information.
+package fse
+
+import (
+ "errors"
+ "fmt"
+ "math/bits"
+)
+
+const (
+ /*!MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+ maxMemoryUsage = 14
+ defaultMemoryUsage = 13
+
+ maxTableLog = maxMemoryUsage - 2
+ maxTablesize = 1 << maxTableLog
+ defaultTablelog = defaultMemoryUsage - 2
+ minTablelog = 5
+ maxSymbolValue = 255
+)
+
+var (
+ // ErrIncompressible is returned when input is judged to be too hard to compress.
+ ErrIncompressible = errors.New("input is not compressible")
+
+ // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+ ErrUseRLE = errors.New("input is single value repeated")
+)
+
+// Scratch provides temporary storage for compression and decompression.
+type Scratch struct {
+ // Private
+ count [maxSymbolValue + 1]uint32
+ norm [maxSymbolValue + 1]int16
+ br byteReader
+ bits bitReader
+ bw bitWriter
+ ct cTable // Compression tables.
+ decTable []decSymbol // Decompression table.
+ maxCount int // count of the most probable symbol
+
+ // Per block parameters.
+ // These can be used to override compression parameters of the block.
+ // Do not touch, unless you know what you are doing.
+
+ // Out is output buffer.
+ // If the scratch is re-used before the caller is done processing the output,
+ // set this field to nil.
+ // Otherwise the output buffer will be re-used for next Compression/Decompression step
+ // and allocation will be avoided.
+ Out []byte
+
+ // DecompressLimit limits the maximum decoded size acceptable.
+ // If > 0 decompression will stop when approximately this many bytes
+ // has been decoded.
+ // If 0, maximum size will be 2GB.
+ DecompressLimit int
+
+ symbolLen uint16 // Length of active part of the symbol table.
+ actualTableLog uint8 // Selected tablelog.
+ zeroBits bool // no bits has prob > 50%.
+ clearCount bool // clear count
+
+ // MaxSymbolValue will override the maximum symbol value of the next block.
+ MaxSymbolValue uint8
+
+ // TableLog will attempt to override the tablelog for the next block.
+ TableLog uint8
+}
+
+// Histogram allows to populate the histogram and skip that step in the compression,
+// It otherwise allows to inspect the histogram when compression is done.
+// To indicate that you have populated the histogram call HistogramFinished
+// with the value of the highest populated symbol, as well as the number of entries
+// in the most populated entry. These are accepted at face value.
+// The returned slice will always be length 256.
+func (s *Scratch) Histogram() []uint32 {
+ return s.count[:]
+}
+
+// HistogramFinished can be called to indicate that the histogram has been populated.
+// maxSymbol is the index of the highest set symbol of the next data segment.
+// maxCount is the number of entries in the most populated entry.
+// These are accepted at face value.
+func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
+ s.maxCount = maxCount
+ s.symbolLen = uint16(maxSymbol) + 1
+ s.clearCount = maxCount != 0
+}
+
+// prepare will prepare and allocate scratch tables used for both compression and decompression.
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+ if s == nil {
+ s = &Scratch{}
+ }
+ if s.MaxSymbolValue == 0 {
+ s.MaxSymbolValue = 255
+ }
+ if s.TableLog == 0 {
+ s.TableLog = defaultTablelog
+ }
+ if s.TableLog > maxTableLog {
+ return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
+ }
+ if cap(s.Out) == 0 {
+ s.Out = make([]byte, 0, len(in))
+ }
+ if s.clearCount && s.maxCount == 0 {
+ for i := range s.count {
+ s.count[i] = 0
+ }
+ s.clearCount = false
+ }
+ s.br.init(in)
+ if s.DecompressLimit == 0 {
+ // Max size 2GB.
+ s.DecompressLimit = (2 << 30) - 1
+ }
+
+ return s, nil
+}
+
+// tableStep returns the next table index.
+func tableStep(tableSize uint32) uint32 {
+ return (tableSize >> 1) + (tableSize >> 3) + 3
+}
+
+func highBits(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/.gitignore b/vendor/github.com/klauspost/compress/huff0/.gitignore
new file mode 100644
index 0000000..b3d2629
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/.gitignore
@@ -0,0 +1 @@
+/huff0-fuzz.zip
diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md
new file mode 100644
index 0000000..8b6e5c6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/README.md
@@ -0,0 +1,89 @@
+# Huff0 entropy compression
+
+This package provides Huff0 encoding and decoding as used in zstd.
+
+[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders),
+a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU
+(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds.
+
+This can be used for compressing input with a lot of similar input values to the smallest number of bytes.
+This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders,
+but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding.
+
+* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
+
+## News
+
+This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package.
+
+This ensures that most functionality is well tested.
+
+# Usage
+
+This package provides a low level interface that allows to compress single independent blocks.
+
+Each block is separate, and there is no built in integrity checks.
+This means that the caller should keep track of block sizes and also do checksums if needed.
+
+Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and
+[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions.
+You must provide input and will receive the output and maybe an error.
+
+These error values can be returned:
+
+| Error | Description |
+|---------------------|-----------------------------------------------------------------------------|
+| `<nil>` | Everything ok, output is returned |
+| `ErrIncompressible` | Returned when input is judged to be too hard to compress |
+| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated |
+| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) |
+| `(error)` | An internal error occurred. |
+
+
+As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these.
+
+To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object
+that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same
+object can be used for both.
+
+Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this
+you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output.
+
+The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding.
+
+## Tables and re-use
+
+Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results.
+
+The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy)
+that controls this behaviour. See the documentation for details. This can be altered between each block.
+
+Do however note that this information is *not* stored in the output block and it is up to the users of the package to
+record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called,
+based on the boolean reported back from the CompressXX call.
+
+If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the
+[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object.
+
+## Decompressing
+
+The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable).
+This will initialize the decoding tables.
+You can supply the complete block to `ReadTable` and it will return the data part of the block
+which can be given to the decompressor.
+
+Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
+or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+
+For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
+
+You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
+your input was likely corrupted.
+
+It is important to note that a successful decoding does *not* mean your output matches your original input.
+There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid.
+
+# Contributing
+
+Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
new file mode 100644
index 0000000..a4979e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -0,0 +1,329 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.bitsRead += 8 - uint8(highBit32(uint32(v)))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) peekBitsFast(n uint8) uint16 {
+ const regMask = 64 - 1
+ v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+ return v
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+func (b *bitReader) advance(n uint8) {
+ b.bitsRead += n
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value = (b.value << 8) | uint64(b.in[b.off-1])
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderBytes struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderBytes) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderBytes) peekByteFast() uint8 {
+ got := uint8(b.value >> 56)
+ return got
+}
+
+func (b *bitReaderBytes) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderBytes) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
+func (b *bitReaderBytes) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderBytes) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderBytes) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderBytes) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReaderShifted reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderShifted struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderShifted) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
+ return uint16(b.value >> ((64 - n) & 63))
+}
+
+func (b *bitReaderShifted) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderShifted) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
+func (b *bitReaderShifted) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderShifted) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderShifted) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderShifted) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
new file mode 100644
index 0000000..6bce4e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -0,0 +1,210 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+ bitContainer uint64
+ nBits uint8
+ out []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+ 0, 1, 3, 7, 0xF, 0x1F,
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF} /* up to 16 bits */
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// encSymbol will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
+ enc := ct[symbol]
+ b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
+ if false {
+ if enc.nBits == 0 {
+ panic("nbits 0")
+ }
+ }
+ b.nBits += enc.nBits
+}
+
+// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) {
+ encA := ct[av]
+ encB := ct[bv]
+ sh := b.nBits & 63
+ combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
+ b.bitContainer |= combined << sh
+ if false {
+ if encA.nBits == 0 {
+ panic("nbitsA 0")
+ }
+ if encB.nBits == 0 {
+ panic("nbitsB 0")
+ }
+ }
+ b.nBits += encA.nBits + encB.nBits
+}
+
+// addBits16ZeroNC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+// This is fastest if bits can be zero.
+func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) {
+ if bits == 0 {
+ return
+ }
+ value <<= (16 - bits) & 15
+ value >>= (16 - bits) & 15
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+ v := b.nBits >> 3
+ switch v {
+ case 0:
+ return
+ case 1:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ )
+ b.bitContainer >>= 1 << 3
+ case 2:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ )
+ b.bitContainer >>= 2 << 3
+ case 3:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ )
+ b.bitContainer >>= 3 << 3
+ case 4:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ )
+ b.bitContainer >>= 4 << 3
+ case 5:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ )
+ b.bitContainer >>= 5 << 3
+ case 6:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ )
+ b.bitContainer >>= 6 << 3
+ case 7:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ )
+ b.bitContainer >>= 7 << 3
+ case 8:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ byte(b.bitContainer>>56),
+ )
+ b.bitContainer = 0
+ b.nBits = 0
+ return
+ default:
+ panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+ }
+ b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+ if b.nBits < 32 {
+ return
+ }
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24))
+ b.nBits -= 32
+ b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+ nbBytes := (b.nBits + 7) >> 3
+ for i := uint8(0); i < nbBytes; i++ {
+ b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+ }
+ b.nBits = 0
+ b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+ // End mark
+ b.addBits16Clean(1, 1)
+ // flush until next byte.
+ b.flushAlign()
+ return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+ b.bitContainer = 0
+ b.nBits = 0
+ b.out = out
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go
new file mode 100644
index 0000000..50bcdf6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go
@@ -0,0 +1,54 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package huff0
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+ b []byte
+ off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+ b.b = in
+ b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+ b.off += int(n)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+ v3 := int32(b.b[b.off+3])
+ v2 := int32(b.b[b.off+2])
+ v1 := int32(b.b[b.off+1])
+ v0 := int32(b.b[b.off])
+ return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+ v3 := uint32(b.b[b.off+3])
+ v2 := uint32(b.b[b.off+2])
+ v1 := uint32(b.b[b.off+1])
+ v0 := uint32(b.b[b.off])
+ return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+ return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+ return len(b.b) - b.off
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
new file mode 100644
index 0000000..0823c92
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -0,0 +1,656 @@
+package huff0
+
+import (
+ "fmt"
+ "runtime"
+ "sync"
+)
+
+// Compress1X will compress the input.
+// The output can be decoded using Decompress1X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return nil, false, err
+ }
+ return compress(in, s, s.compress1X)
+}
+
+// Compress4X will compress the input. The input is split into 4 independent blocks
+// and compressed similar to Compress1X.
+// The output can be decoded using Decompress4X.
+// Supply a Scratch object. The scratch object contains state about re-use,
+// So when sharing across independent encodes, be sure to set the re-use policy.
+func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return nil, false, err
+ }
+ if false {
+ // TODO: compress4Xp only slightly faster.
+ const parallelThreshold = 8 << 10
+ if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 {
+ return compress(in, s, s.compress4X)
+ }
+ return compress(in, s, s.compress4Xp)
+ }
+ return compress(in, s, s.compress4X)
+}
+
+func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) {
+ // Nuke previous table if we cannot reuse anyway.
+ if s.Reuse == ReusePolicyNone {
+ s.prevTable = s.prevTable[:0]
+ }
+
+ // Create histogram, if none was provided.
+ maxCount := s.maxCount
+ var canReuse = false
+ if maxCount == 0 {
+ maxCount, canReuse = s.countSimple(in)
+ } else {
+ canReuse = s.canUseTable(s.prevTable)
+ }
+
+ // We want the output size to be less than this:
+ wantSize := len(in)
+ if s.WantLogLess > 0 {
+ wantSize -= wantSize >> s.WantLogLess
+ }
+
+ // Reset for next run.
+ s.clearCount = true
+ s.maxCount = 0
+ if maxCount >= len(in) {
+ if maxCount > len(in) {
+ return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in))
+ }
+ if len(in) == 1 {
+ return nil, false, ErrIncompressible
+ }
+ // One symbol, use RLE
+ return nil, false, ErrUseRLE
+ }
+ if maxCount == 1 || maxCount < (len(in)>>7) {
+ // Each symbol present maximum once or too well distributed.
+ return nil, false, ErrIncompressible
+ }
+ if s.Reuse == ReusePolicyMust && !canReuse {
+ // We must reuse, but we can't.
+ return nil, false, ErrIncompressible
+ }
+ if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse {
+ keepTable := s.cTable
+ keepTL := s.actualTableLog
+ s.cTable = s.prevTable
+ s.actualTableLog = s.prevTableLog
+ s.Out, err = compressor(in)
+ s.cTable = keepTable
+ s.actualTableLog = keepTL
+ if err == nil && len(s.Out) < wantSize {
+ s.OutData = s.Out
+ return s.Out, true, nil
+ }
+ if s.Reuse == ReusePolicyMust {
+ return nil, false, ErrIncompressible
+ }
+ // Do not attempt to re-use later.
+ s.prevTable = s.prevTable[:0]
+ }
+
+ // Calculate new table.
+ err = s.buildCTable()
+ if err != nil {
+ return nil, false, err
+ }
+
+ if false && !s.canUseTable(s.cTable) {
+ panic("invalid table generated")
+ }
+
+ if s.Reuse == ReusePolicyAllow && canReuse {
+ hSize := len(s.Out)
+ oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen])
+ newSize := s.cTable.estimateSize(s.count[:s.symbolLen])
+ if oldSize <= hSize+newSize || hSize+12 >= wantSize {
+ // Retain cTable even if we re-use.
+ keepTable := s.cTable
+ keepTL := s.actualTableLog
+
+ s.cTable = s.prevTable
+ s.actualTableLog = s.prevTableLog
+ s.Out, err = compressor(in)
+
+ // Restore ctable.
+ s.cTable = keepTable
+ s.actualTableLog = keepTL
+ if err != nil {
+ return nil, false, err
+ }
+ if len(s.Out) >= wantSize {
+ return nil, false, ErrIncompressible
+ }
+ s.OutData = s.Out
+ return s.Out, true, nil
+ }
+ }
+
+ // Use new table
+ err = s.cTable.write(s)
+ if err != nil {
+ s.OutTable = nil
+ return nil, false, err
+ }
+ s.OutTable = s.Out
+
+ // Compress using new table
+ s.Out, err = compressor(in)
+ if err != nil {
+ s.OutTable = nil
+ return nil, false, err
+ }
+ if len(s.Out) >= wantSize {
+ s.OutTable = nil
+ return nil, false, ErrIncompressible
+ }
+ // Move current table into previous.
+ s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0]
+ s.OutData = s.Out[len(s.OutTable):]
+ return s.Out, false, nil
+}
+
+func (s *Scratch) compress1X(src []byte) ([]byte, error) {
+ return s.compress1xDo(s.Out, src)
+}
+
+func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) {
+ var bw = bitWriter{out: dst}
+
+ // N is length divisible by 4.
+ n := len(src)
+ n -= n & 3
+ cTable := s.cTable[:256]
+
+ // Encode last bytes.
+ for i := len(src) & 3; i > 0; i-- {
+ bw.encSymbol(cTable, src[n+i-1])
+ }
+ n -= 4
+ if s.actualTableLog <= 8 {
+ for ; n >= 0; n -= 4 {
+ tmp := src[n : n+4]
+ // tmp should be len 4
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[3], tmp[2])
+ bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+ }
+ } else {
+ for ; n >= 0; n -= 4 {
+ tmp := src[n : n+4]
+ // tmp should be len 4
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[3], tmp[2])
+ bw.flush32()
+ bw.encTwoSymbols(cTable, tmp[1], tmp[0])
+ }
+ }
+ err := bw.close()
+ return bw.out, err
+}
+
+var sixZeros [6]byte
+
+func (s *Scratch) compress4X(src []byte) ([]byte, error) {
+ if len(src) < 12 {
+ return nil, ErrIncompressible
+ }
+ segmentSize := (len(src) + 3) / 4
+
+ // Add placeholder for output length
+ offsetIdx := len(s.Out)
+ s.Out = append(s.Out, sixZeros[:]...)
+
+ for i := 0; i < 4; i++ {
+ toDo := src
+ if len(toDo) > segmentSize {
+ toDo = toDo[:segmentSize]
+ }
+ src = src[len(toDo):]
+
+ var err error
+ idx := len(s.Out)
+ s.Out, err = s.compress1xDo(s.Out, toDo)
+ if err != nil {
+ return nil, err
+ }
+ // Write compressed length as little endian before block.
+ if i < 3 {
+ // Last length is not written.
+ length := len(s.Out) - idx
+ s.Out[i*2+offsetIdx] = byte(length)
+ s.Out[i*2+offsetIdx+1] = byte(length >> 8)
+ }
+ }
+
+ return s.Out, nil
+}
+
+// compress4Xp will compress 4 streams using separate goroutines.
+func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
+ if len(src) < 12 {
+ return nil, ErrIncompressible
+ }
+ // Add placeholder for output length
+ s.Out = s.Out[:6]
+
+ segmentSize := (len(src) + 3) / 4
+ var wg sync.WaitGroup
+ var errs [4]error
+ wg.Add(4)
+ for i := 0; i < 4; i++ {
+ toDo := src
+ if len(toDo) > segmentSize {
+ toDo = toDo[:segmentSize]
+ }
+ src = src[len(toDo):]
+
+ // Separate goroutine for each block.
+ go func(i int) {
+ s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo)
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ for i := 0; i < 4; i++ {
+ if errs[i] != nil {
+ return nil, errs[i]
+ }
+ o := s.tmpOut[i]
+ // Write compressed length as little endian before block.
+ if i < 3 {
+ // Last length is not written.
+ s.Out[i*2] = byte(len(o))
+ s.Out[i*2+1] = byte(len(o) >> 8)
+ }
+
+ // Write output.
+ s.Out = append(s.Out, o...)
+ }
+ return s.Out, nil
+}
+
+// countSimple will create a simple histogram in s.count.
+// Returns the biggest count.
+// Does not update s.clearCount.
+func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
+ reuse = true
+ for _, v := range in {
+ s.count[v]++
+ }
+ m := uint32(0)
+ if len(s.prevTable) > 0 {
+ for i, v := range s.count[:] {
+ if v > m {
+ m = v
+ }
+ if v > 0 {
+ s.symbolLen = uint16(i) + 1
+ if i >= len(s.prevTable) {
+ reuse = false
+ } else {
+ if s.prevTable[i].nBits == 0 {
+ reuse = false
+ }
+ }
+ }
+ }
+ return int(m), reuse
+ }
+ for i, v := range s.count[:] {
+ if v > m {
+ m = v
+ }
+ if v > 0 {
+ s.symbolLen = uint16(i) + 1
+ }
+ }
+ return int(m), false
+}
+
+func (s *Scratch) canUseTable(c cTable) bool {
+ if len(c) < int(s.symbolLen) {
+ return false
+ }
+ for i, v := range s.count[:s.symbolLen] {
+ if v != 0 && c[i].nBits == 0 {
+ return false
+ }
+ }
+ return true
+}
+
+func (s *Scratch) validateTable(c cTable) bool {
+ if len(c) < int(s.symbolLen) {
+ return false
+ }
+ for i, v := range s.count[:s.symbolLen] {
+ if v != 0 {
+ if c[i].nBits == 0 {
+ return false
+ }
+ if c[i].nBits > s.actualTableLog {
+ return false
+ }
+ }
+ }
+ return true
+}
+
+// minTableLog provides the minimum logSize to safely represent a distribution.
+func (s *Scratch) minTableLog() uint8 {
+ minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+ minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
+ if minBitsSrc < minBitsSymbols {
+ return uint8(minBitsSrc)
+ }
+ return uint8(minBitsSymbols)
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *Scratch) optimalTableLog() {
+ tableLog := s.TableLog
+ minBits := s.minTableLog()
+ maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+ if maxBitsSrc < tableLog {
+ // Accuracy can be reduced
+ tableLog = maxBitsSrc
+ }
+ if minBits > tableLog {
+ tableLog = minBits
+ }
+ // Need a minimum to safely represent all symbol values
+ if tableLog < minTablelog {
+ tableLog = minTablelog
+ }
+ if tableLog > tableLogMax {
+ tableLog = tableLogMax
+ }
+ s.actualTableLog = tableLog
+}
+
+type cTableEntry struct {
+ val uint16
+ nBits uint8
+ // We have 8 bits extra
+}
+
+const huffNodesMask = huffNodesLen - 1
+
+func (s *Scratch) buildCTable() error {
+ s.optimalTableLog()
+ s.huffSort()
+ if cap(s.cTable) < maxSymbolValue+1 {
+ s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1)
+ } else {
+ s.cTable = s.cTable[:s.symbolLen]
+ for i := range s.cTable {
+ s.cTable[i] = cTableEntry{}
+ }
+ }
+
+ var startNode = int16(s.symbolLen)
+ nonNullRank := s.symbolLen - 1
+
+ nodeNb := startNode
+ huffNode := s.nodes[1 : huffNodesLen+1]
+
+ // This overlays the slice above, but allows "-1" index lookups.
+ // Different from reference implementation.
+ huffNode0 := s.nodes[0 : huffNodesLen+1]
+
+ for huffNode[nonNullRank].count == 0 {
+ nonNullRank--
+ }
+
+ lowS := int16(nonNullRank)
+ nodeRoot := nodeNb + lowS - 1
+ lowN := nodeNb
+ huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count
+ huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb)
+ nodeNb++
+ lowS -= 2
+ for n := nodeNb; n <= nodeRoot; n++ {
+ huffNode[n].count = 1 << 30
+ }
+ // fake entry, strong barrier
+ huffNode0[0].count = 1 << 31
+
+ // create parents
+ for nodeNb <= nodeRoot {
+ var n1, n2 int16
+ if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ n1 = lowS
+ lowS--
+ } else {
+ n1 = lowN
+ lowN++
+ }
+ if huffNode0[lowS+1].count < huffNode0[lowN+1].count {
+ n2 = lowS
+ lowS--
+ } else {
+ n2 = lowN
+ lowN++
+ }
+
+ huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count
+ huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb)
+ nodeNb++
+ }
+
+ // distribute weights (unlimited tree height)
+ huffNode[nodeRoot].nbBits = 0
+ for n := nodeRoot - 1; n >= startNode; n-- {
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ }
+ for n := uint16(0); n <= nonNullRank; n++ {
+ huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1
+ }
+ s.actualTableLog = s.setMaxHeight(int(nonNullRank))
+ maxNbBits := s.actualTableLog
+
+ // fill result into tree (val, nbBits)
+ if maxNbBits > tableLogMax {
+ return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax)
+ }
+ var nbPerRank [tableLogMax + 1]uint16
+ var valPerRank [16]uint16
+ for _, v := range huffNode[:nonNullRank+1] {
+ nbPerRank[v.nbBits]++
+ }
+ // determine stating value per rank
+ {
+ min := uint16(0)
+ for n := maxNbBits; n > 0; n-- {
+ // get starting value within each rank
+ valPerRank[n] = min
+ min += nbPerRank[n]
+ min >>= 1
+ }
+ }
+
+ // push nbBits per symbol, symbol order
+ for _, v := range huffNode[:nonNullRank+1] {
+ s.cTable[v.symbol].nBits = v.nbBits
+ }
+
+ // assign value within rank, symbol order
+ t := s.cTable[:s.symbolLen]
+ for n, val := range t {
+ nbits := val.nBits & 15
+ v := valPerRank[nbits]
+ t[n].val = v
+ valPerRank[nbits] = v + 1
+ }
+
+ return nil
+}
+
+// huffSort will sort symbols, decreasing order.
+func (s *Scratch) huffSort() {
+ type rankPos struct {
+ base uint32
+ current uint32
+ }
+
+ // Clear nodes
+ nodes := s.nodes[:huffNodesLen+1]
+ s.nodes = nodes
+ nodes = nodes[1 : huffNodesLen+1]
+
+ // Sort into buckets based on length of symbol count.
+ var rank [32]rankPos
+ for _, v := range s.count[:s.symbolLen] {
+ r := highBit32(v+1) & 31
+ rank[r].base++
+ }
+ // maxBitLength is log2(BlockSizeMax) + 1
+ const maxBitLength = 18 + 1
+ for n := maxBitLength; n > 0; n-- {
+ rank[n-1].base += rank[n].base
+ }
+ for n := range rank[:maxBitLength] {
+ rank[n].current = rank[n].base
+ }
+ for n, c := range s.count[:s.symbolLen] {
+ r := (highBit32(c+1) + 1) & 31
+ pos := rank[r].current
+ rank[r].current++
+ prev := nodes[(pos-1)&huffNodesMask]
+ for pos > rank[r].base && c > prev.count {
+ nodes[pos&huffNodesMask] = prev
+ pos--
+ prev = nodes[(pos-1)&huffNodesMask]
+ }
+ nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
+ }
+}
+
+func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
+ maxNbBits := s.actualTableLog
+ huffNode := s.nodes[1 : huffNodesLen+1]
+ //huffNode = huffNode[: huffNodesLen]
+
+ largestBits := huffNode[lastNonNull].nbBits
+
+ // early exit : no elt > maxNbBits
+ if largestBits <= maxNbBits {
+ return largestBits
+ }
+ totalCost := int(0)
+ baseCost := int(1) << (largestBits - maxNbBits)
+ n := uint32(lastNonNull)
+
+ for huffNode[n].nbBits > maxNbBits {
+ totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits))
+ huffNode[n].nbBits = maxNbBits
+ n--
+ }
+ // n stops at huffNode[n].nbBits <= maxNbBits
+
+ for huffNode[n].nbBits == maxNbBits {
+ n--
+ }
+ // n end at index of smallest symbol using < maxNbBits
+
+ // renorm totalCost
+ totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */
+
+ // repay normalized cost
+ {
+ const noSymbol = 0xF0F0F0F0
+ var rankLast [tableLogMax + 2]uint32
+
+ for i := range rankLast[:] {
+ rankLast[i] = noSymbol
+ }
+
+ // Get pos of last (smallest) symbol per rank
+ {
+ currentNbBits := maxNbBits
+ for pos := int(n); pos >= 0; pos-- {
+ if huffNode[pos].nbBits >= currentNbBits {
+ continue
+ }
+ currentNbBits = huffNode[pos].nbBits // < maxNbBits
+ rankLast[maxNbBits-currentNbBits] = uint32(pos)
+ }
+ }
+
+ for totalCost > 0 {
+ nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1
+
+ for ; nBitsToDecrease > 1; nBitsToDecrease-- {
+ highPos := rankLast[nBitsToDecrease]
+ lowPos := rankLast[nBitsToDecrease-1]
+ if highPos == noSymbol {
+ continue
+ }
+ if lowPos == noSymbol {
+ break
+ }
+ highTotal := huffNode[highPos].count
+ lowTotal := 2 * huffNode[lowPos].count
+ if highTotal <= lowTotal {
+ break
+ }
+ }
+ // only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !)
+ // HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary
+ // FIXME: try to remove
+ for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) {
+ nBitsToDecrease++
+ }
+ totalCost -= 1 << (nBitsToDecrease - 1)
+ if rankLast[nBitsToDecrease-1] == noSymbol {
+ // this rank is no longer empty
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]
+ }
+ huffNode[rankLast[nBitsToDecrease]].nbBits++
+ if rankLast[nBitsToDecrease] == 0 {
+ /* special case, reached largest symbol */
+ rankLast[nBitsToDecrease] = noSymbol
+ } else {
+ rankLast[nBitsToDecrease]--
+ if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease {
+ rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */
+ }
+ }
+ }
+
+ for totalCost < 0 { /* Sometimes, cost correction overshoot */
+ if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
+ for huffNode[n].nbBits == maxNbBits {
+ n--
+ }
+ huffNode[n+1].nbBits--
+ rankLast[1] = n + 1
+ totalCost++
+ continue
+ }
+ huffNode[rankLast[1]+1].nbBits--
+ rankLast[1]++
+ totalCost++
+ }
+ }
+ return maxNbBits
+}
+
+type nodeElt struct {
+ count uint32
+ parent uint16
+ symbol byte
+ nbBits uint8
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
new file mode 100644
index 0000000..41703bb
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -0,0 +1,1164 @@
+package huff0
+
+import (
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/klauspost/compress/fse"
+)
+
+type dTable struct {
+ single []dEntrySingle
+ double []dEntryDouble
+}
+
+// single-symbols decoding
+type dEntrySingle struct {
+ entry uint16
+}
+
+// double-symbols decoding
+type dEntryDouble struct {
+ seq uint16
+ nBits uint8
+ len uint8
+}
+
+// Uses special code for all tables that are < 8 bits.
+const use8BitTables = true
+
+// ReadTable will read a table from the input.
+// The size of the input may be larger than the table definition.
+// Any content remaining after the table definition will be returned.
+// If no Scratch is provided a new one is allocated.
+// The returned Scratch can be used for encoding or decoding input using this table.
+func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
+ s, err = s.prepare(in)
+ if err != nil {
+ return s, nil, err
+ }
+ if len(in) <= 1 {
+ return s, nil, errors.New("input too small for table")
+ }
+ iSize := in[0]
+ in = in[1:]
+ if iSize >= 128 {
+ // Uncompressed
+ oSize := iSize - 127
+ iSize = (oSize + 1) / 2
+ if int(iSize) > len(in) {
+ return s, nil, errors.New("input too small for table")
+ }
+ for n := uint8(0); n < oSize; n += 2 {
+ v := in[n/2]
+ s.huffWeight[n] = v >> 4
+ s.huffWeight[n+1] = v & 15
+ }
+ s.symbolLen = uint16(oSize)
+ in = in[iSize:]
+ } else {
+ if len(in) < int(iSize) {
+ return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
+ }
+ // FSE compressed weights
+ s.fse.DecompressLimit = 255
+ hw := s.huffWeight[:]
+ s.fse.Out = hw
+ b, err := fse.Decompress(in[:iSize], s.fse)
+ s.fse.Out = nil
+ if err != nil {
+ return s, nil, err
+ }
+ if len(b) > 255 {
+ return s, nil, errors.New("corrupt input: output table too large")
+ }
+ s.symbolLen = uint16(len(b))
+ in = in[iSize:]
+ }
+
+ // collect weight stats
+ var rankStats [16]uint32
+ weightTotal := uint32(0)
+ for _, v := range s.huffWeight[:s.symbolLen] {
+ if v > tableLogMax {
+ return s, nil, errors.New("corrupt input: weight too large")
+ }
+ v2 := v & 15
+ rankStats[v2]++
+ // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
+ weightTotal += (1 << v2) >> 1
+ }
+ if weightTotal == 0 {
+ return s, nil, errors.New("corrupt input: weights zero")
+ }
+
+ // get last non-null symbol weight (implied, total must be 2^n)
+ {
+ tableLog := highBit32(weightTotal) + 1
+ if tableLog > tableLogMax {
+ return s, nil, errors.New("corrupt input: tableLog too big")
+ }
+ s.actualTableLog = uint8(tableLog)
+ // determine last weight
+ {
+ total := uint32(1) << tableLog
+ rest := total - weightTotal
+ verif := uint32(1) << highBit32(rest)
+ lastWeight := highBit32(rest) + 1
+ if verif != rest {
+ // last value must be a clean power of 2
+ return s, nil, errors.New("corrupt input: last value not power of two")
+ }
+ s.huffWeight[s.symbolLen] = uint8(lastWeight)
+ s.symbolLen++
+ rankStats[lastWeight]++
+ }
+ }
+
+ if (rankStats[1] < 2) || (rankStats[1]&1 != 0) {
+ // by construction : at least 2 elts of rank 1, must be even
+ return s, nil, errors.New("corrupt input: min elt size, even check failed ")
+ }
+
+ // TODO: Choose between single/double symbol decoding
+
+ // Calculate starting value for each rank
+ {
+ var nextRankStart uint32
+ for n := uint8(1); n < s.actualTableLog+1; n++ {
+ current := nextRankStart
+ nextRankStart += rankStats[n] << (n - 1)
+ rankStats[n] = current
+ }
+ }
+
+ // fill DTable (always full size)
+ tSize := 1 << tableLogMax
+ if len(s.dt.single) != tSize {
+ s.dt.single = make([]dEntrySingle, tSize)
+ }
+ cTable := s.prevTable
+ if cap(cTable) < maxSymbolValue+1 {
+ cTable = make([]cTableEntry, 0, maxSymbolValue+1)
+ }
+ cTable = cTable[:maxSymbolValue+1]
+ s.prevTable = cTable[:s.symbolLen]
+ s.prevTableLog = s.actualTableLog
+
+ for n, w := range s.huffWeight[:s.symbolLen] {
+ if w == 0 {
+ cTable[n] = cTableEntry{
+ val: 0,
+ nBits: 0,
+ }
+ continue
+ }
+ length := (uint32(1) << w) >> 1
+ d := dEntrySingle{
+ entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
+ }
+
+ rank := &rankStats[w]
+ cTable[n] = cTableEntry{
+ val: uint16(*rank >> (w - 1)),
+ nBits: uint8(d.entry),
+ }
+
+ single := s.dt.single[*rank : *rank+length]
+ for i := range single {
+ single[i] = d
+ }
+ *rank += length
+ }
+
+ return s, in, nil
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
+func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
+ if cap(s.Out) < s.MaxDecodedSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
+ }
+ s.Out = s.Out[:0:s.MaxDecodedSize]
+ s.Out, err = s.Decoder().Decompress1X(s.Out, in)
+ return s.Out, err
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// Before this is called, the table must be initialized with ReadTable unless
+// the encoder re-used the table.
+// The length of the supplied input must match the end of a block exactly.
+// The destination size of the uncompressed data must be known and provided.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
+func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
+ if dstSize > s.MaxDecodedSize {
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ if cap(s.Out) < dstSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
+ }
+ s.Out = s.Out[:0:dstSize]
+ s.Out, err = s.Decoder().Decompress4X(s.Out, in)
+ return s.Out, err
+}
+
+// Decoder will return a stateless decoder that can be used by multiple
+// decompressors concurrently.
+// Before this is called, the table must be initialized with ReadTable.
+// The Decoder is still linked to the scratch buffer so that cannot be reused.
+// However, it is safe to discard the scratch.
+func (s *Scratch) Decoder() *Decoder {
+ return &Decoder{
+ dt: s.dt,
+ actualTableLog: s.actualTableLog,
+ }
+}
+
+// Decoder provides stateless decoding.
+type Decoder struct {
+ dt dTable
+ actualTableLog uint8
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress1X8Bit(dst, src)
+ }
+ var br bitReaderShifted
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ const tlSize = 1 << tableLogMax
+ const tlMask = tlSize - 1
+ dt := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ for br.off >= 8 {
+ br.fillFast()
+ v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ // Refill
+ br.fillFast()
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 8, so uint8 is fine
+ bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= nBits
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress1X8BitExactly(dst, src)
+ }
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ shift := (8 - d.actualTableLog) & 7
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ const shift = 0
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if len(src) < 6+(4*1) {
+ return nil, errors.New("input too small")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress4X8bit(dst, src)
+ }
+
+ var br [4]bitReaderShifted
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const tlSize = 1 << tableLogMax
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 2 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
+
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
+ v2 := single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
+
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
+ v2 := single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
+
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
+
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
+ }
+
+ off += 2
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := br.off*8 + uint(64-br.bitsRead)
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ val := br.peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= uint(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress4X8bitExactly(dst, src)
+ }
+
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ shift := (8 - d.actualTableLog) & 7
+
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const shift = 0
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// matches will compare a decoding table to a coding table.
+// Errors are written to the writer.
+// Nothing will be written if table is ok.
+func (s *Scratch) matches(ct cTable, w io.Writer) {
+ if s == nil || len(s.dt.single) == 0 {
+ return
+ }
+ dt := s.dt.single[:1<<s.actualTableLog]
+ tablelog := s.actualTableLog
+ ok := 0
+ broken := 0
+ for sym, enc := range ct {
+ errs := 0
+ broken++
+ if enc.nBits == 0 {
+ for _, dec := range dt {
+ if uint8(dec.entry>>8) == byte(sym) {
+ fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym)
+ errs++
+ break
+ }
+ }
+ if errs == 0 {
+ broken--
+ }
+ continue
+ }
+ // Unused bits in input
+ ub := tablelog - enc.nBits
+ top := enc.val << ub
+ // decoder looks at top bits.
+ dec := dt[top]
+ if uint8(dec.entry) != enc.nBits {
+ fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry))
+ errs++
+ }
+ if uint8(dec.entry>>8) != uint8(sym) {
+ fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8))
+ errs++
+ }
+ if errs > 0 {
+ fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+ continue
+ }
+ // Ensure that all combinations are covered.
+ for i := uint16(0); i < (1 << ub); i++ {
+ vval := top | i
+ dec := dt[vval]
+ if uint8(dec.entry) != enc.nBits {
+ fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry))
+ errs++
+ }
+ if uint8(dec.entry>>8) != uint8(sym) {
+ fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8))
+ errs++
+ }
+ if errs > 20 {
+ fmt.Fprintf(w, "%d errros, stopping\n", errs)
+ break
+ }
+ }
+ if errs == 0 {
+ ok++
+ broken--
+ }
+ }
+ if broken > 0 {
+ fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok)
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
new file mode 100644
index 0000000..7ec2022
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -0,0 +1,273 @@
+// Package huff0 provides fast huffman encoding as used in zstd.
+//
+// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details.
+package huff0
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "math/bits"
+
+ "github.com/klauspost/compress/fse"
+)
+
+const (
+ maxSymbolValue = 255
+
+ // zstandard limits tablelog to 11, see:
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description
+ tableLogMax = 11
+ tableLogDefault = 11
+ minTablelog = 5
+ huffNodesLen = 512
+
+ // BlockSizeMax is maximum input size for a single block uncompressed.
+ BlockSizeMax = 1<<18 - 1
+)
+
+var (
+ // ErrIncompressible is returned when input is judged to be too hard to compress.
+ ErrIncompressible = errors.New("input is not compressible")
+
+ // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
+ ErrUseRLE = errors.New("input is single value repeated")
+
+ // ErrTooBig is return if input is too large for a single block.
+ ErrTooBig = errors.New("input too big")
+
+ // ErrMaxDecodedSizeExceeded is return if input is too large for a single block.
+ ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded")
+)
+
+type ReusePolicy uint8
+
+const (
+ // ReusePolicyAllow will allow reuse if it produces smaller output.
+ ReusePolicyAllow ReusePolicy = iota
+
+ // ReusePolicyPrefer will re-use aggressively if possible.
+ // This will not check if a new table will produce smaller output,
+ // except if the current table is impossible to use or
+ // compressed output is bigger than input.
+ ReusePolicyPrefer
+
+ // ReusePolicyNone will disable re-use of tables.
+ // This is slightly faster than ReusePolicyAllow but may produce larger output.
+ ReusePolicyNone
+
+ // ReusePolicyMust must allow reuse and produce smaller output.
+ ReusePolicyMust
+)
+
+type Scratch struct {
+ count [maxSymbolValue + 1]uint32
+
+ // Per block parameters.
+ // These can be used to override compression parameters of the block.
+ // Do not touch, unless you know what you are doing.
+
+ // Out is output buffer.
+ // If the scratch is re-used before the caller is done processing the output,
+ // set this field to nil.
+ // Otherwise the output buffer will be re-used for next Compression/Decompression step
+ // and allocation will be avoided.
+ Out []byte
+
+ // OutTable will contain the table data only, if a new table has been generated.
+ // Slice of the returned data.
+ OutTable []byte
+
+ // OutData will contain the compressed data.
+ // Slice of the returned data.
+ OutData []byte
+
+ // MaxDecodedSize will set the maximum allowed output size.
+ // This value will automatically be set to BlockSizeMax if not set.
+ // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
+ MaxDecodedSize int
+
+ br byteReader
+
+ // MaxSymbolValue will override the maximum symbol value of the next block.
+ MaxSymbolValue uint8
+
+ // TableLog will attempt to override the tablelog for the next block.
+ // Must be <= 11 and >= 5.
+ TableLog uint8
+
+ // Reuse will specify the reuse policy
+ Reuse ReusePolicy
+
+ // WantLogLess allows to specify a log 2 reduction that should at least be achieved,
+ // otherwise the block will be returned as incompressible.
+ // The reduction should then at least be (input size >> WantLogLess)
+ // If WantLogLess == 0 any improvement will do.
+ WantLogLess uint8
+
+ symbolLen uint16 // Length of active part of the symbol table.
+ maxCount int // count of the most probable symbol
+ clearCount bool // clear count
+ actualTableLog uint8 // Selected tablelog.
+ prevTableLog uint8 // Tablelog for previous table
+ prevTable cTable // Table used for previous compression.
+ cTable cTable // compression table
+ dt dTable // decompression table
+ nodes []nodeElt
+ tmpOut [4][]byte
+ fse *fse.Scratch
+ huffWeight [maxSymbolValue + 1]byte
+}
+
+// TransferCTable will transfer the previously used compression table.
+func (s *Scratch) TransferCTable(src *Scratch) {
+ if cap(s.prevTable) < len(src.prevTable) {
+ s.prevTable = make(cTable, 0, maxSymbolValue+1)
+ }
+ s.prevTable = s.prevTable[:len(src.prevTable)]
+ copy(s.prevTable, src.prevTable)
+ s.prevTableLog = src.prevTableLog
+}
+
+func (s *Scratch) prepare(in []byte) (*Scratch, error) {
+ if len(in) > BlockSizeMax {
+ return nil, ErrTooBig
+ }
+ if s == nil {
+ s = &Scratch{}
+ }
+ if s.MaxSymbolValue == 0 {
+ s.MaxSymbolValue = maxSymbolValue
+ }
+ if s.TableLog == 0 {
+ s.TableLog = tableLogDefault
+ }
+ if s.TableLog > tableLogMax || s.TableLog < minTablelog {
+ return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax)
+ }
+ if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax {
+ s.MaxDecodedSize = BlockSizeMax
+ }
+ if s.clearCount && s.maxCount == 0 {
+ for i := range s.count {
+ s.count[i] = 0
+ }
+ s.clearCount = false
+ }
+ if cap(s.Out) == 0 {
+ s.Out = make([]byte, 0, len(in))
+ }
+ s.Out = s.Out[:0]
+
+ s.OutTable = nil
+ s.OutData = nil
+ if cap(s.nodes) < huffNodesLen+1 {
+ s.nodes = make([]nodeElt, 0, huffNodesLen+1)
+ }
+ s.nodes = s.nodes[:0]
+ if s.fse == nil {
+ s.fse = &fse.Scratch{}
+ }
+ s.br.init(in)
+
+ return s, nil
+}
+
+type cTable []cTableEntry
+
+func (c cTable) write(s *Scratch) error {
+ var (
+ // precomputed conversion table
+ bitsToWeight [tableLogMax + 1]byte
+ huffLog = s.actualTableLog
+ // last weight is not saved.
+ maxSymbolValue = uint8(s.symbolLen - 1)
+ huffWeight = s.huffWeight[:256]
+ )
+ const (
+ maxFSETableLog = 6
+ )
+ // convert to weight
+ bitsToWeight[0] = 0
+ for n := uint8(1); n < huffLog+1; n++ {
+ bitsToWeight[n] = huffLog + 1 - n
+ }
+
+ // Acquire histogram for FSE.
+ hist := s.fse.Histogram()
+ hist = hist[:256]
+ for i := range hist[:16] {
+ hist[i] = 0
+ }
+ for n := uint8(0); n < maxSymbolValue; n++ {
+ v := bitsToWeight[c[n].nBits] & 15
+ huffWeight[n] = v
+ hist[v]++
+ }
+
+ // FSE compress if feasible.
+ if maxSymbolValue >= 2 {
+ huffMaxCnt := uint32(0)
+ huffMax := uint8(0)
+ for i, v := range hist[:16] {
+ if v == 0 {
+ continue
+ }
+ huffMax = byte(i)
+ if v > huffMaxCnt {
+ huffMaxCnt = v
+ }
+ }
+ s.fse.HistogramFinished(huffMax, int(huffMaxCnt))
+ s.fse.TableLog = maxFSETableLog
+ b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse)
+ if err == nil && len(b) < int(s.symbolLen>>1) {
+ s.Out = append(s.Out, uint8(len(b)))
+ s.Out = append(s.Out, b...)
+ return nil
+ }
+ // Unable to compress (RLE/uncompressible)
+ }
+ // write raw values as 4-bits (max : 15)
+ if maxSymbolValue > (256 - 128) {
+ // should not happen : likely means source cannot be compressed
+ return ErrIncompressible
+ }
+ op := s.Out
+ // special case, pack weights 4 bits/weight.
+ op = append(op, 128|(maxSymbolValue-1))
+ // be sure it doesn't cause msan issue in final combination
+ huffWeight[maxSymbolValue] = 0
+ for n := uint16(0); n < uint16(maxSymbolValue); n += 2 {
+ op = append(op, (huffWeight[n]<<4)|huffWeight[n+1])
+ }
+ s.Out = op
+ return nil
+}
+
+// estimateSize returns the estimated size in bytes of the input represented in the
+// histogram supplied.
+func (c cTable) estimateSize(hist []uint32) int {
+ nbBits := uint32(7)
+ for i, v := range c[:len(hist)] {
+ nbBits += uint32(v.nBits) * hist[i]
+ }
+ return int(nbBits >> 3)
+}
+
+// minSize returns the minimum possible size considering the shannon limit.
+func (s *Scratch) minSize(total int) int {
+ nbBits := float64(7)
+ fTotal := float64(total)
+ for _, v := range s.count[:s.symbolLen] {
+ n := float64(v)
+ if n > 0 {
+ nbBits += math.Log2(fTotal/n) * n
+ }
+ }
+ return int(nbBits) >> 3
+}
+
+func highBit32(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
new file mode 100644
index 0000000..e7d7eb0
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -0,0 +1,417 @@
+# zstd
+
+[Zstandard](https://facebook.github.io/zstd/) is a real-time compression algorithm, providing high compression ratios.
+It offers a very wide range of compression / speed trade-off, while being backed by a very fast decoder.
+A high performance compression algorithm is implemented. For now focused on speed.
+
+This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content.
+
+This package is pure Go and without use of "unsafe".
+
+The `zstd` package is provided as open source software using a Go standard license.
+
+Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
+
+## Installation
+
+Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
+
+Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
+
+
+## Compressor
+
+### Status:
+
+STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively
+used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates.
+
+There may still be specific combinations of data types/size/settings that could lead to edge cases,
+so as always, testing is recommended.
+
+For now, a high speed (fastest) and medium-fast (default) compressor has been implemented.
+
+* The "Fastest" compression ratio is roughly equivalent to zstd level 1.
+* The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
+* The "Better" compression ratio is roughly equivalent to zstd level 7.
+* The "Best" compression ratio is roughly equivalent to zstd level 11.
+
+In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
+The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
+
+
+### Usage
+
+An Encoder can be used for either compressing a stream via the
+`io.WriteCloser` interface supported by the Encoder or as multiple independent
+tasks via the `EncodeAll` function.
+Smaller encodes are encouraged to use the EncodeAll function.
+Use `NewWriter` to create a new instance that can be used for both.
+
+To create a writer with default options, do like this:
+
+```Go
+// Compress input to output.
+func Compress(in io.Reader, out io.Writer) error {
+ enc, err := zstd.NewWriter(out)
+ if err != nil {
+ return err
+ }
+ _, err = io.Copy(enc, in)
+ if err != nil {
+ enc.Close()
+ return err
+ }
+ return enc.Close()
+}
+```
+
+Now you can encode by writing data to `enc`. The output will be finished writing when `Close()` is called.
+Even if your encode fails, you should still call `Close()` to release any resources that may be held up.
+
+The above is fine for big encodes. However, whenever possible try to *reuse* the writer.
+
+To reuse the encoder, you can use the `Reset(io.Writer)` function to change to another output.
+This will allow the encoder to reuse all resources and avoid wasteful allocations.
+
+Currently stream encoding has 'light' concurrency, meaning up to 2 goroutines can be working on part
+of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is likely to change
+in the future. So if you want to limit concurrency for future updates, specify the concurrency
+you would like.
+
+You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined
+compression settings can be specified.
+
+#### Future Compatibility Guarantees
+
+This will be an evolving project. When using this package it is important to note that both the compression efficiency and speed may change.
+
+The goal will be to keep the default efficiency at the default zstd (level 3).
+However the encoding should never be assumed to remain the same,
+and you should not use hashes of compressed output for similarity checks.
+
+The Encoder can be assumed to produce the same output from the exact same code version.
+However, the may be modes in the future that break this,
+although they will not be enabled without an explicit option.
+
+This encoder is not designed to (and will probably never) output the exact same bitstream as the reference encoder.
+
+Also note, that the cgo decompressor currently does not [report all errors on invalid input](https://github.com/DataDog/zstd/issues/59),
+[omits error checks](https://github.com/DataDog/zstd/issues/61), [ignores checksums](https://github.com/DataDog/zstd/issues/43)
+and seems to ignore concatenated streams, even though [it is part of the spec](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames).
+
+#### Blocks
+
+For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`.
+
+`EncodeAll` will encode all input in src and append it to dst.
+This function can be called concurrently, but each call will only run on a single goroutine.
+
+Encoded blocks can be concatenated and the result will be the combined input stream.
+Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`.
+
+Especially when encoding blocks you should take special care to reuse the encoder.
+This will effectively make it run without allocations after a warmup period.
+To make it run completely without allocations, supply a destination buffer with space for all content.
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+// Create a writer that caches compressors.
+// For this operation type we supply a nil Reader.
+var encoder, _ = zstd.NewWriter(nil)
+
+// Compress a buffer.
+// If you have a destination buffer, the allocation in the call can also be eliminated.
+func Compress(src []byte) []byte {
+ return encoder.EncodeAll(src, make([]byte, 0, len(src)))
+}
+```
+
+You can control the maximum number of concurrent encodes using the `WithEncoderConcurrency(n)`
+option when creating the writer.
+
+Using the Encoder for both a stream and individual blocks concurrently is safe.
+
+### Performance
+
+I have collected some speed examples to compare speed and compression against other compressors.
+
+* `file` is the input file.
+* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library.
+* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best".
+* `insize`/`outsize` is the input/output size.
+* `millis` is the number of milliseconds used for compression.
+* `mb/s` is megabytes (2^20 bytes) per second.
+
+```
+Silesia Corpus:
+http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
+
+This package:
+file out level insize outsize millis mb/s
+silesia.tar zskp 1 211947520 73101992 643 313.87
+silesia.tar zskp 2 211947520 67504318 969 208.38
+silesia.tar zskp 3 211947520 64595893 2007 100.68
+silesia.tar zskp 4 211947520 60995370 7691 26.28
+
+cgo zstd:
+silesia.tar zstd 1 211947520 73605392 543 371.56
+silesia.tar zstd 3 211947520 66793289 864 233.68
+silesia.tar zstd 6 211947520 62916450 1913 105.66
+silesia.tar zstd 9 211947520 60212393 5063 39.92
+
+gzip, stdlib/this package:
+silesia.tar gzstd 1 211947520 80007735 1654 122.21
+silesia.tar gzkp 1 211947520 80369488 1168 173.06
+
+GOB stream of binary data. Highly compressible.
+https://files.klauspost.com/compress/gob-stream.7z
+
+file out level insize outsize millis mb/s
+gob-stream zskp 1 1911399616 235022249 3088 590.30
+gob-stream zskp 2 1911399616 205669791 3786 481.34
+gob-stream zskp 3 1911399616 175034659 9636 189.17
+gob-stream zskp 4 1911399616 167273881 29337 62.13
+gob-stream zstd 1 1911399616 249810424 2637 691.26
+gob-stream zstd 3 1911399616 208192146 3490 522.31
+gob-stream zstd 6 1911399616 193632038 6687 272.56
+gob-stream zstd 9 1911399616 177620386 16175 112.70
+gob-stream gzstd 1 1911399616 357382641 10251 177.82
+gob-stream gzkp 1 1911399616 362156523 5695 320.08
+
+The test data for the Large Text Compression Benchmark is the first
+10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
+http://mattmahoney.net/dc/textdata.html
+
+file out level insize outsize millis mb/s
+enwik9 zskp 1 1000000000 343848582 3609 264.18
+enwik9 zskp 2 1000000000 317276632 5746 165.97
+enwik9 zskp 3 1000000000 292243069 12162 78.41
+enwik9 zskp 4 1000000000 275241169 36430 26.18
+enwik9 zstd 1 1000000000 358072021 3110 306.65
+enwik9 zstd 3 1000000000 313734672 4784 199.35
+enwik9 zstd 6 1000000000 295138875 10290 92.68
+enwik9 zstd 9 1000000000 278348700 28549 33.40
+enwik9 gzstd 1 1000000000 382578136 9604 99.30
+enwik9 gzkp 1 1000000000 383825945 6544 145.73
+
+Highly compressible JSON file.
+https://files.klauspost.com/compress/github-june-2days-2019.json.zst
+
+file out level insize outsize millis mb/s
+github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
+github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
+github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
+github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
+github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
+github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
+github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
+github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
+github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
+github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
+
+VM Image, Linux mint with a few installed applications:
+https://files.klauspost.com/compress/rawstudio-mint14.7z
+
+file out level insize outsize millis mb/s
+rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
+rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
+rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
+rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
+rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
+rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
+rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
+rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
+rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
+rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
+
+CSV data:
+https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
+
+file out level insize outsize millis mb/s
+nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
+nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
+nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
+nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
+nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
+nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
+nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
+nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
+nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
+nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
+```
+
+## Decompressor
+
+Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+
+This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
+kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
+The main purpose of the fuzz testing is to ensure that it is not possible to crash the decoder,
+or run it past its limits with ANY input provided.
+
+### Usage
+
+The package has been designed for two main usages, big streams of data and smaller in-memory buffers.
+There are two main usages of the package for these. Both of them are accessed by creating a `Decoder`.
+
+For streaming use a simple setup could look like this:
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+func Decompress(in io.Reader, out io.Writer) error {
+ d, err := zstd.NewReader(in)
+ if err != nil {
+ return err
+ }
+ defer d.Close()
+
+ // Copy content...
+ _, err = io.Copy(out, d)
+ return err
+}
+```
+
+It is important to use the "Close" function when you no longer need the Reader to stop running goroutines.
+See "Allocation-less operation" below.
+
+For decoding buffers, it could look something like this:
+
+```Go
+import "github.com/klauspost/compress/zstd"
+
+// Create a reader that caches decompressors.
+// For this operation type we supply a nil Reader.
+var decoder, _ = zstd.NewReader(nil)
+
+// Decompress a buffer. We don't supply a destination buffer,
+// so it will be allocated by the decoder.
+func Decompress(src []byte) ([]byte, error) {
+ return decoder.DecodeAll(src, nil)
+}
+```
+
+Both of these cases should provide the functionality needed.
+The decoder can be used for *concurrent* decompression of multiple buffers.
+It will only allow a certain number of concurrent operations to run.
+To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
+
+### Dictionaries
+
+Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed.
+
+Dictionaries are added individually to Decoders.
+Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder.
+To add a dictionary use the `WithDecoderDicts(dicts ...[]byte)` option with the dictionary data.
+Several dictionaries can be added at once.
+
+The dictionary will be used automatically for the data that specifies them.
+A re-used Decoder will still contain the dictionaries registered.
+
+When registering multiple dictionaries with the same ID, the last one will be used.
+
+It is possible to use dictionaries when compressing data.
+
+To enable a dictionary use `WithEncoderDict(dict []byte)`. Here only one dictionary will be used
+and it will likely be used even if it doesn't improve compression.
+
+The used dictionary must be used to decompress the content.
+
+For any real gains, the dictionary should be built with similar data.
+If an unsuitable dictionary is used the output may be slightly larger than using no dictionary.
+Use the [zstd commandline tool](https://github.com/facebook/zstd/releases) to build a dictionary from sample data.
+For information see [zstd dictionary information](https://github.com/facebook/zstd#the-case-for-small-data-compression).
+
+For now there is a fixed startup performance penalty for compressing content with dictionaries.
+This will likely be improved over time. Just be aware to test performance when implementing.
+
+### Allocation-less operation
+
+The decoder has been designed to operate without allocations after a warmup.
+
+This means that you should *store* the decoder for best performance.
+To re-use a stream decoder, use the `Reset(r io.Reader) error` to switch to another stream.
+A decoder can safely be re-used even if the previous stream failed.
+
+To release the resources, you must call the `Close()` function on a decoder.
+After this it can *no longer be reused*, but all running goroutines will be stopped.
+So you *must* use this if you will no longer need the Reader.
+
+For decompressing smaller buffers a single decoder can be used.
+When decoding buffers, you can supply a destination slice with length 0 and your expected capacity.
+In this case no unneeded allocations should be made.
+
+### Concurrency
+
+The buffer decoder does everything on the same goroutine and does nothing concurrently.
+It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that.
+
+The stream decoder operates on
+
+* One goroutine reads input and splits the input to several block decoders.
+* A number of decoders will decode blocks.
+* A goroutine coordinates these blocks and sends history from one to the next.
+
+So effectively this also means the decoder will "read ahead" and prepare data to always be available for output.
+
+Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency.
+
+In practice this means that concurrency is often limited to utilizing about 2 cores effectively.
+
+
+### Benchmarks
+
+These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
+
+The first two are streaming decodes and the last are smaller inputs.
+
+```
+BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
+BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
+
+BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
+BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
+
+Concurrent performance:
+
+BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
+
+BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
+```
+
+This reflects the performance around May 2020, but this may be out of date.
+
+# Contributions
+
+Contributions are always welcome.
+For new features/fixes, remember to add tests and for performance enhancements include benchmarks.
+
+For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
+
+For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).
+
+This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
new file mode 100644
index 0000000..8544585
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -0,0 +1,136 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "encoding/binary"
+ "errors"
+ "io"
+ "math/bits"
+)
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReader struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64 // Maybe use [16]byte, but shifting is awkward.
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReader) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.bitsRead += 8 - uint8(highBits(uint32(v)))
+ return nil
+}
+
+// getBits will return n bits. n can be 0.
+func (b *bitReader) getBits(n uint8) int {
+ if n == 0 /*|| b.bitsRead >= 64 */ {
+ return 0
+ }
+ return b.getBitsFast(n)
+}
+
+// getBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReader) getBitsFast(n uint8) int {
+ const regMask = 64 - 1
+ v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
+ b.bitsRead += n
+ return int(v)
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReader) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReader) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off >= 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value = (b.value << 32) | uint64(low)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value = (b.value << 8) | uint64(b.in[b.off-1])
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReader) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// overread returns true if more bits have been requested than is on the stream.
+func (b *bitReader) overread() bool {
+ return b.bitsRead > 64
+}
+
+// remain returns the number of bits remaining.
+func (b *bitReader) remain() uint {
+ return b.off*8 + 64 - uint(b.bitsRead)
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReader) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+func highBits(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
new file mode 100644
index 0000000..303ae90
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go
@@ -0,0 +1,169 @@
+// Copyright 2018 Klaus Post. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
+
+package zstd
+
+import "fmt"
+
+// bitWriter will write bits.
+// First bit will be LSB of the first byte of output.
+type bitWriter struct {
+ bitContainer uint64
+ nBits uint8
+ out []byte
+}
+
+// bitMask16 is bitmasks. Has extra to avoid bounds check.
+var bitMask16 = [32]uint16{
+ 0, 1, 3, 7, 0xF, 0x1F,
+ 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
+ 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
+ 0xFFFF, 0xFFFF} /* up to 16 bits */
+
+var bitMask32 = [32]uint32{
+ 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF,
+ 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF,
+ 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF,
+ 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF,
+} // up to 32 bits
+
+// addBits16NC will add up to 16 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits32NC will add up to 32 bits.
+// It will not check if there is space for them,
+// so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
+ b.bitContainer |= uint64(value&bitMask32[bits&31]) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
+// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
+func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
+ b.bitContainer |= uint64(value) << (b.nBits & 63)
+ b.nBits += bits
+}
+
+// flush will flush all pending full bytes.
+// There will be at least 56 bits available for writing when this has been called.
+// Using flush32 is faster, but leaves less space for writing.
+func (b *bitWriter) flush() {
+ v := b.nBits >> 3
+ switch v {
+ case 0:
+ case 1:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ )
+ case 2:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ )
+ case 3:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ )
+ case 4:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ )
+ case 5:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ )
+ case 6:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ )
+ case 7:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ )
+ case 8:
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24),
+ byte(b.bitContainer>>32),
+ byte(b.bitContainer>>40),
+ byte(b.bitContainer>>48),
+ byte(b.bitContainer>>56),
+ )
+ default:
+ panic(fmt.Errorf("bits (%d) > 64", b.nBits))
+ }
+ b.bitContainer >>= v << 3
+ b.nBits &= 7
+}
+
+// flush32 will flush out, so there are at least 32 bits available for writing.
+func (b *bitWriter) flush32() {
+ if b.nBits < 32 {
+ return
+ }
+ b.out = append(b.out,
+ byte(b.bitContainer),
+ byte(b.bitContainer>>8),
+ byte(b.bitContainer>>16),
+ byte(b.bitContainer>>24))
+ b.nBits -= 32
+ b.bitContainer >>= 32
+}
+
+// flushAlign will flush remaining full bytes and align to next byte boundary.
+func (b *bitWriter) flushAlign() {
+ nbBytes := (b.nBits + 7) >> 3
+ for i := uint8(0); i < nbBytes; i++ {
+ b.out = append(b.out, byte(b.bitContainer>>(i*8)))
+ }
+ b.nBits = 0
+ b.bitContainer = 0
+}
+
+// close will write the alignment bit and write the final byte(s)
+// to the output.
+func (b *bitWriter) close() error {
+ // End mark
+ b.addBits16Clean(1, 1)
+ // flush until next byte.
+ b.flushAlign()
+ return nil
+}
+
+// reset and continue writing by appending to out.
+func (b *bitWriter) reset(out []byte) {
+ b.bitContainer = 0
+ b.nBits = 0
+ b.out = out
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
new file mode 100644
index 0000000..b51d922
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -0,0 +1,739 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "fmt"
+ "io"
+ "sync"
+
+ "github.com/klauspost/compress/huff0"
+ "github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+type blockType uint8
+
+//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex
+
+const (
+ blockTypeRaw blockType = iota
+ blockTypeRLE
+ blockTypeCompressed
+ blockTypeReserved
+)
+
+type literalsBlockType uint8
+
+const (
+ literalsBlockRaw literalsBlockType = iota
+ literalsBlockRLE
+ literalsBlockCompressed
+ literalsBlockTreeless
+)
+
+const (
+ // maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
+ maxCompressedBlockSize = 128 << 10
+
+ // Maximum possible block size (all Raw+Uncompressed).
+ maxBlockSize = (1 << 21) - 1
+
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
+ maxCompressedLiteralSize = 1 << 18
+ maxRLELiteralSize = 1 << 20
+ maxMatchLen = 131074
+ maxSequences = 0x7f00 + 0xffff
+
+ // We support slightly less than the reference decoder to be able to
+ // use ints on 32 bit archs.
+ maxOffsetBits = 30
+)
+
+var (
+ huffDecoderPool = sync.Pool{New: func() interface{} {
+ return &huff0.Scratch{}
+ }}
+
+ fseDecoderPool = sync.Pool{New: func() interface{} {
+ return &fseDecoder{}
+ }}
+)
+
+type blockDec struct {
+ // Raw source data of the block.
+ data []byte
+ dataStorage []byte
+
+ // Destination of the decoded data.
+ dst []byte
+
+ // Buffer for literals data.
+ literalBuf []byte
+
+ // Window size of the block.
+ WindowSize uint64
+
+ history chan *history
+ input chan struct{}
+ result chan decodeOutput
+ sequenceBuf []seq
+ err error
+ decWG sync.WaitGroup
+
+ // Frame to use for singlethreaded decoding.
+ // Should not be used by the decoder itself since parent may be another frame.
+ localFrame *frameDec
+
+ // Block is RLE, this is the size.
+ RLESize uint32
+ tmp [4]byte
+
+ Type blockType
+
+ // Is this the last block of a frame?
+ Last bool
+
+ // Use less memory
+ lowMem bool
+}
+
+func (b *blockDec) String() string {
+ if b == nil {
+ return "<nil>"
+ }
+ return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize)
+}
+
+func newBlockDec(lowMem bool) *blockDec {
+ b := blockDec{
+ lowMem: lowMem,
+ result: make(chan decodeOutput, 1),
+ input: make(chan struct{}, 1),
+ history: make(chan *history, 1),
+ }
+ b.decWG.Add(1)
+ go b.startDecoder()
+ return &b
+}
+
+// reset will reset the block.
+// Input must be a start of a block and will be at the end of the block when returned.
+func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
+ b.WindowSize = windowSize
+ tmp := br.readSmall(3)
+ if tmp == nil {
+ if debug {
+ println("Reading block header:", io.ErrUnexpectedEOF)
+ }
+ return io.ErrUnexpectedEOF
+ }
+ bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
+ b.Last = bh&1 != 0
+ b.Type = blockType((bh >> 1) & 3)
+ // find size.
+ cSize := int(bh >> 3)
+ maxSize := maxBlockSize
+ switch b.Type {
+ case blockTypeReserved:
+ return ErrReservedBlockType
+ case blockTypeRLE:
+ b.RLESize = uint32(cSize)
+ if b.lowMem {
+ maxSize = cSize
+ }
+ cSize = 1
+ case blockTypeCompressed:
+ if debug {
+ println("Data size on stream:", cSize)
+ }
+ b.RLESize = 0
+ maxSize = maxCompressedBlockSize
+ if windowSize < maxCompressedBlockSize && b.lowMem {
+ maxSize = int(windowSize)
+ }
+ if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
+ if debug {
+ printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
+ }
+ return ErrCompressedSizeTooBig
+ }
+ case blockTypeRaw:
+ b.RLESize = 0
+ // We do not need a destination for raw blocks.
+ maxSize = -1
+ default:
+ panic("Invalid block type")
+ }
+
+ // Read block data.
+ if cap(b.dataStorage) < cSize {
+ if b.lowMem {
+ b.dataStorage = make([]byte, 0, cSize)
+ } else {
+ b.dataStorage = make([]byte, 0, maxBlockSize)
+ }
+ }
+ if cap(b.dst) <= maxSize {
+ b.dst = make([]byte, 0, maxSize+1)
+ }
+ var err error
+ b.data, err = br.readBig(cSize, b.dataStorage)
+ if err != nil {
+ if debug {
+ println("Reading block:", err, "(", cSize, ")", len(b.data))
+ printf("%T", br)
+ }
+ return err
+ }
+ return nil
+}
+
+// sendEOF will make the decoder send EOF on this frame.
+func (b *blockDec) sendErr(err error) {
+ b.Last = true
+ b.Type = blockTypeReserved
+ b.err = err
+ b.input <- struct{}{}
+}
+
+// Close will release resources.
+// Closed blockDec cannot be reset.
+func (b *blockDec) Close() {
+ close(b.input)
+ close(b.history)
+ close(b.result)
+ b.decWG.Wait()
+}
+
+// decodeAsync will prepare decoding the block when it receives input.
+// This will separate output and history.
+func (b *blockDec) startDecoder() {
+ defer b.decWG.Done()
+ for range b.input {
+ //println("blockDec: Got block input")
+ switch b.Type {
+ case blockTypeRLE:
+ if cap(b.dst) < int(b.RLESize) {
+ if b.lowMem {
+ b.dst = make([]byte, b.RLESize)
+ } else {
+ b.dst = make([]byte, maxBlockSize)
+ }
+ }
+ o := decodeOutput{
+ d: b,
+ b: b.dst[:b.RLESize],
+ err: nil,
+ }
+ v := b.data[0]
+ for i := range o.b {
+ o.b[i] = v
+ }
+ hist := <-b.history
+ hist.append(o.b)
+ b.result <- o
+ case blockTypeRaw:
+ o := decodeOutput{
+ d: b,
+ b: b.data,
+ err: nil,
+ }
+ hist := <-b.history
+ hist.append(o.b)
+ b.result <- o
+ case blockTypeCompressed:
+ b.dst = b.dst[:0]
+ err := b.decodeCompressed(nil)
+ o := decodeOutput{
+ d: b,
+ b: b.dst,
+ err: err,
+ }
+ if debug {
+ println("Decompressed to", len(b.dst), "bytes, error:", err)
+ }
+ b.result <- o
+ case blockTypeReserved:
+ // Used for returning errors.
+ <-b.history
+ b.result <- decodeOutput{
+ d: b,
+ b: nil,
+ err: b.err,
+ }
+ default:
+ panic("Invalid block type")
+ }
+ if debug {
+ println("blockDec: Finished block")
+ }
+ }
+}
+
+// decodeAsync will prepare decoding the block when it receives the history.
+// If history is provided, it will not fetch it from the channel.
+func (b *blockDec) decodeBuf(hist *history) error {
+ switch b.Type {
+ case blockTypeRLE:
+ if cap(b.dst) < int(b.RLESize) {
+ if b.lowMem {
+ b.dst = make([]byte, b.RLESize)
+ } else {
+ b.dst = make([]byte, maxBlockSize)
+ }
+ }
+ b.dst = b.dst[:b.RLESize]
+ v := b.data[0]
+ for i := range b.dst {
+ b.dst[i] = v
+ }
+ hist.appendKeep(b.dst)
+ return nil
+ case blockTypeRaw:
+ hist.appendKeep(b.data)
+ return nil
+ case blockTypeCompressed:
+ saved := b.dst
+ b.dst = hist.b
+ hist.b = nil
+ err := b.decodeCompressed(hist)
+ if debug {
+ println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
+ }
+ hist.b = b.dst
+ b.dst = saved
+ return err
+ case blockTypeReserved:
+ // Used for returning errors.
+ return b.err
+ default:
+ panic("Invalid block type")
+ }
+}
+
+// decodeCompressed will start decompressing a block.
+// If no history is supplied the decoder will decodeAsync as much as possible
+// before fetching from blockDec.history
+func (b *blockDec) decodeCompressed(hist *history) error {
+ in := b.data
+ delayedHistory := hist == nil
+
+ if delayedHistory {
+ // We must always grab history.
+ defer func() {
+ if hist == nil {
+ <-b.history
+ }
+ }()
+ }
+ // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header
+ if len(in) < 2 {
+ return ErrBlockTooSmall
+ }
+ litType := literalsBlockType(in[0] & 3)
+ var litRegenSize int
+ var litCompSize int
+ sizeFormat := (in[0] >> 2) & 3
+ var fourStreams bool
+ switch litType {
+ case literalsBlockRaw, literalsBlockRLE:
+ switch sizeFormat {
+ case 0, 2:
+ // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte.
+ litRegenSize = int(in[0] >> 3)
+ in = in[1:]
+ case 1:
+ // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes.
+ litRegenSize = int(in[0]>>4) + (int(in[1]) << 4)
+ in = in[2:]
+ case 3:
+ // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes.
+ if len(in) < 3 {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+ return ErrBlockTooSmall
+ }
+ litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12)
+ in = in[3:]
+ }
+ case literalsBlockCompressed, literalsBlockTreeless:
+ switch sizeFormat {
+ case 0, 1:
+ // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023).
+ if len(in) < 3 {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+ return ErrBlockTooSmall
+ }
+ n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12)
+ litRegenSize = int(n & 1023)
+ litCompSize = int(n >> 10)
+ fourStreams = sizeFormat == 1
+ in = in[3:]
+ case 2:
+ fourStreams = true
+ if len(in) < 4 {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+ return ErrBlockTooSmall
+ }
+ n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20)
+ litRegenSize = int(n & 16383)
+ litCompSize = int(n >> 14)
+ in = in[4:]
+ case 3:
+ fourStreams = true
+ if len(in) < 5 {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in))
+ return ErrBlockTooSmall
+ }
+ n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28)
+ litRegenSize = int(n & 262143)
+ litCompSize = int(n >> 18)
+ in = in[5:]
+ }
+ }
+ if debug {
+ println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
+ }
+ var literals []byte
+ var huff *huff0.Scratch
+ switch litType {
+ case literalsBlockRaw:
+ if len(in) < litRegenSize {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize)
+ return ErrBlockTooSmall
+ }
+ literals = in[:litRegenSize]
+ in = in[litRegenSize:]
+ //printf("Found %d uncompressed literals\n", litRegenSize)
+ case literalsBlockRLE:
+ if len(in) < 1 {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1)
+ return ErrBlockTooSmall
+ }
+ if cap(b.literalBuf) < litRegenSize {
+ if b.lowMem {
+ b.literalBuf = make([]byte, litRegenSize)
+ } else {
+ if litRegenSize > maxCompressedLiteralSize {
+ // Exceptional
+ b.literalBuf = make([]byte, litRegenSize)
+ } else {
+ b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
+
+ }
+ }
+ }
+ literals = b.literalBuf[:litRegenSize]
+ v := in[0]
+ for i := range literals {
+ literals[i] = v
+ }
+ in = in[1:]
+ if debug {
+ printf("Found %d RLE compressed literals\n", litRegenSize)
+ }
+ case literalsBlockTreeless:
+ if len(in) < litCompSize {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
+ return ErrBlockTooSmall
+ }
+ // Store compressed literals, so we defer decoding until we get history.
+ literals = in[:litCompSize]
+ in = in[litCompSize:]
+ if debug {
+ printf("Found %d compressed literals\n", litCompSize)
+ }
+ case literalsBlockCompressed:
+ if len(in) < litCompSize {
+ println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize)
+ return ErrBlockTooSmall
+ }
+ literals = in[:litCompSize]
+ in = in[litCompSize:]
+ huff = huffDecoderPool.Get().(*huff0.Scratch)
+ var err error
+ // Ensure we have space to store it.
+ if cap(b.literalBuf) < litRegenSize {
+ if b.lowMem {
+ b.literalBuf = make([]byte, 0, litRegenSize)
+ } else {
+ b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+ }
+ }
+ if huff == nil {
+ huff = &huff0.Scratch{}
+ }
+ huff, literals, err = huff0.ReadTable(literals, huff)
+ if err != nil {
+ println("reading huffman table:", err)
+ return err
+ }
+ // Use our out buffer.
+ if fourStreams {
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
+ } else {
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
+ }
+ if err != nil {
+ println("decoding compressed literals:", err)
+ return err
+ }
+ // Make sure we don't leak our literals buffer
+ if len(literals) != litRegenSize {
+ return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+ }
+ if debug {
+ printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
+ }
+ }
+
+ // Decode Sequences
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section
+ if len(in) < 1 {
+ return ErrBlockTooSmall
+ }
+ seqHeader := in[0]
+ nSeqs := 0
+ switch {
+ case seqHeader == 0:
+ in = in[1:]
+ case seqHeader < 128:
+ nSeqs = int(seqHeader)
+ in = in[1:]
+ case seqHeader < 255:
+ if len(in) < 2 {
+ return ErrBlockTooSmall
+ }
+ nSeqs = int(seqHeader-128)<<8 | int(in[1])
+ in = in[2:]
+ case seqHeader == 255:
+ if len(in) < 3 {
+ return ErrBlockTooSmall
+ }
+ nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
+ in = in[3:]
+ }
+ // Allocate sequences
+ if cap(b.sequenceBuf) < nSeqs {
+ if b.lowMem {
+ b.sequenceBuf = make([]seq, nSeqs)
+ } else {
+ // Allocate max
+ b.sequenceBuf = make([]seq, nSeqs, maxSequences)
+ }
+ } else {
+ // Reuse buffer
+ b.sequenceBuf = b.sequenceBuf[:nSeqs]
+ }
+ var seqs = &sequenceDecs{}
+ if nSeqs > 0 {
+ if len(in) < 1 {
+ return ErrBlockTooSmall
+ }
+ br := byteReader{b: in, off: 0}
+ compMode := br.Uint8()
+ br.advance(1)
+ if debug {
+ printf("Compression modes: 0b%b", compMode)
+ }
+ for i := uint(0); i < 3; i++ {
+ mode := seqCompMode((compMode >> (6 - i*2)) & 3)
+ if debug {
+ println("Table", tableIndex(i), "is", mode)
+ }
+ var seq *sequenceDec
+ switch tableIndex(i) {
+ case tableLiteralLengths:
+ seq = &seqs.litLengths
+ case tableOffsets:
+ seq = &seqs.offsets
+ case tableMatchLengths:
+ seq = &seqs.matchLengths
+ default:
+ panic("unknown table")
+ }
+ switch mode {
+ case compModePredefined:
+ seq.fse = &fsePredef[i]
+ case compModeRLE:
+ if br.remain() < 1 {
+ return ErrBlockTooSmall
+ }
+ v := br.Uint8()
+ br.advance(1)
+ dec := fseDecoderPool.Get().(*fseDecoder)
+ symb, err := decSymbolValue(v, symbolTableX[i])
+ if err != nil {
+ printf("RLE Transform table (%v) error: %v", tableIndex(i), err)
+ return err
+ }
+ dec.setRLE(symb)
+ seq.fse = dec
+ if debug {
+ printf("RLE set to %+v, code: %v", symb, v)
+ }
+ case compModeFSE:
+ println("Reading table for", tableIndex(i))
+ dec := fseDecoderPool.Get().(*fseDecoder)
+ err := dec.readNCount(&br, uint16(maxTableSymbol[i]))
+ if err != nil {
+ println("Read table error:", err)
+ return err
+ }
+ err = dec.transform(symbolTableX[i])
+ if err != nil {
+ println("Transform table error:", err)
+ return err
+ }
+ if debug {
+ println("Read table ok", "symbolLen:", dec.symbolLen)
+ }
+ seq.fse = dec
+ case compModeRepeat:
+ seq.repeat = true
+ }
+ if br.overread() {
+ return io.ErrUnexpectedEOF
+ }
+ }
+ in = br.unread()
+ }
+
+ // Wait for history.
+ // All time spent after this is critical since it is strictly sequential.
+ if hist == nil {
+ hist = <-b.history
+ if hist.error {
+ return ErrDecoderClosed
+ }
+ }
+
+ // Decode treeless literal block.
+ if litType == literalsBlockTreeless {
+ // TODO: We could send the history early WITHOUT the stream history.
+ // This would allow decoding treeless literals before the byte history is available.
+ // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
+ // So not much obvious gain here.
+
+ if hist.huffTree == nil {
+ return errors.New("literal block was treeless, but no history was defined")
+ }
+ // Ensure we have space to store it.
+ if cap(b.literalBuf) < litRegenSize {
+ if b.lowMem {
+ b.literalBuf = make([]byte, 0, litRegenSize)
+ } else {
+ b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
+ }
+ }
+ var err error
+ // Use our out buffer.
+ huff = hist.huffTree
+ if fourStreams {
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
+ } else {
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
+ }
+ // Make sure we don't leak our literals buffer
+ if err != nil {
+ println("decompressing literals:", err)
+ return err
+ }
+ if len(literals) != litRegenSize {
+ return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
+ }
+ } else {
+ if hist.huffTree != nil && huff != nil {
+ if hist.dict == nil || hist.dict.litEnc != hist.huffTree {
+ huffDecoderPool.Put(hist.huffTree)
+ }
+ hist.huffTree = nil
+ }
+ }
+ if huff != nil {
+ hist.huffTree = huff
+ }
+ if debug {
+ println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
+ }
+
+ if nSeqs == 0 {
+ // Decompressed content is defined entirely as Literals Section content.
+ b.dst = append(b.dst, literals...)
+ if delayedHistory {
+ hist.append(literals)
+ }
+ return nil
+ }
+
+ seqs, err := seqs.mergeHistory(&hist.decoders)
+ if err != nil {
+ return err
+ }
+ if debug {
+ println("History merged ok")
+ }
+ br := &bitReader{}
+ if err := br.init(in); err != nil {
+ return err
+ }
+
+ // TODO: Investigate if sending history without decoders are faster.
+ // This would allow the sequences to be decoded async and only have to construct stream history.
+ // If only recent offsets were not transferred, this would be an obvious win.
+ // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
+
+ hbytes := hist.b
+ if len(hbytes) > hist.windowSize {
+ hbytes = hbytes[len(hbytes)-hist.windowSize:]
+ // We do not need history any more.
+ if hist.dict != nil {
+ hist.dict.content = nil
+ }
+ }
+
+ if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
+ println("initializing sequences:", err)
+ return err
+ }
+
+ err = seqs.decode(nSeqs, br, hbytes)
+ if err != nil {
+ return err
+ }
+ if !br.finished() {
+ return fmt.Errorf("%d extra bits on block, should be 0", br.remain())
+ }
+
+ err = br.close()
+ if err != nil {
+ printf("Closing sequences: %v, %+v\n", err, *br)
+ }
+ if len(b.data) > maxCompressedBlockSize {
+ return fmt.Errorf("compressed block size too large (%d)", len(b.data))
+ }
+ // Set output and release references.
+ b.dst = seqs.out
+ seqs.out, seqs.literals, seqs.hist = nil, nil, nil
+
+ if !delayedHistory {
+ // If we don't have delayed history, no need to update.
+ hist.recentOffsets = seqs.prevOffset
+ return nil
+ }
+ if b.Last {
+ // if last block we don't care about history.
+ println("Last block, no history returned")
+ hist.b = hist.b[:0]
+ return nil
+ }
+ hist.append(b.dst)
+ hist.recentOffsets = seqs.prevOffset
+ if debug {
+ println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
+ }
+
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
new file mode 100644
index 0000000..e1be092
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -0,0 +1,871 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "fmt"
+ "math"
+ "math/bits"
+
+ "github.com/klauspost/compress/huff0"
+)
+
+type blockEnc struct {
+ size int
+ literals []byte
+ sequences []seq
+ coders seqCoders
+ litEnc *huff0.Scratch
+ dictLitEnc *huff0.Scratch
+ wr bitWriter
+
+ extraLits int
+ output []byte
+ recentOffsets [3]uint32
+ prevRecentOffsets [3]uint32
+
+ last bool
+ lowMem bool
+}
+
+// init should be used once the block has been created.
+// If called more than once, the effect is the same as calling reset.
+func (b *blockEnc) init() {
+ if b.lowMem {
+ // 1K literals
+ if cap(b.literals) < 1<<10 {
+ b.literals = make([]byte, 0, 1<<10)
+ }
+ const defSeqs = 20
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ // 1K
+ if cap(b.output) < 1<<10 {
+ b.output = make([]byte, 0, 1<<10)
+ }
+ } else {
+ if cap(b.literals) < maxCompressedBlockSize {
+ b.literals = make([]byte, 0, maxCompressedBlockSize)
+ }
+ const defSeqs = 200
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ if cap(b.output) < maxCompressedBlockSize {
+ b.output = make([]byte, 0, maxCompressedBlockSize)
+ }
+ }
+
+ if b.coders.mlEnc == nil {
+ b.coders.mlEnc = &fseEncoder{}
+ b.coders.mlPrev = &fseEncoder{}
+ b.coders.ofEnc = &fseEncoder{}
+ b.coders.ofPrev = &fseEncoder{}
+ b.coders.llEnc = &fseEncoder{}
+ b.coders.llPrev = &fseEncoder{}
+ }
+ b.litEnc = &huff0.Scratch{WantLogLess: 4}
+ b.reset(nil)
+}
+
+// initNewEncode can be used to reset offsets and encoders to the initial state.
+func (b *blockEnc) initNewEncode() {
+ b.recentOffsets = [3]uint32{1, 4, 8}
+ b.litEnc.Reuse = huff0.ReusePolicyNone
+ b.coders.setPrev(nil, nil, nil)
+}
+
+// reset will reset the block for a new encode, but in the same stream,
+// meaning that state will be carried over, but the block content is reset.
+// If a previous block is provided, the recent offsets are carried over.
+func (b *blockEnc) reset(prev *blockEnc) {
+ b.extraLits = 0
+ b.literals = b.literals[:0]
+ b.size = 0
+ b.sequences = b.sequences[:0]
+ b.output = b.output[:0]
+ b.last = false
+ if prev != nil {
+ b.recentOffsets = prev.prevRecentOffsets
+ }
+ b.dictLitEnc = nil
+}
+
+// reset will reset the block for a new encode, but in the same stream,
+// meaning that state will be carried over, but the block content is reset.
+// If a previous block is provided, the recent offsets are carried over.
+func (b *blockEnc) swapEncoders(prev *blockEnc) {
+ b.coders.swap(&prev.coders)
+ b.litEnc, prev.litEnc = prev.litEnc, b.litEnc
+}
+
+// blockHeader contains the information for a block header.
+type blockHeader uint32
+
+// setLast sets the 'last' indicator on a block.
+func (h *blockHeader) setLast(b bool) {
+ if b {
+ *h = *h | 1
+ } else {
+ const mask = (1 << 24) - 2
+ *h = *h & mask
+ }
+}
+
+// setSize will store the compressed size of a block.
+func (h *blockHeader) setSize(v uint32) {
+ const mask = 7
+ *h = (*h)&mask | blockHeader(v<<3)
+}
+
+// setType sets the block type.
+func (h *blockHeader) setType(t blockType) {
+ const mask = 1 | (((1 << 24) - 1) ^ 7)
+ *h = (*h & mask) | blockHeader(t<<1)
+}
+
+// appendTo will append the block header to a slice.
+func (h blockHeader) appendTo(b []byte) []byte {
+ return append(b, uint8(h), uint8(h>>8), uint8(h>>16))
+}
+
+// String returns a string representation of the block.
+func (h blockHeader) String() string {
+ return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1)
+}
+
+// literalsHeader contains literals header information.
+type literalsHeader uint64
+
+// setType can be used to set the type of literal block.
+func (h *literalsHeader) setType(t literalsBlockType) {
+ const mask = math.MaxUint64 - 3
+ *h = (*h & mask) | literalsHeader(t)
+}
+
+// setSize can be used to set a single size, for uncompressed and RLE content.
+func (h *literalsHeader) setSize(regenLen int) {
+ inBits := bits.Len32(uint32(regenLen))
+ // Only retain 2 bits
+ const mask = 3
+ lh := uint64(*h & mask)
+ switch {
+ case inBits < 5:
+ lh |= (uint64(regenLen) << 3) | (1 << 60)
+ if debug {
+ got := int(lh>>3) & 0xff
+ if got != regenLen {
+ panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
+ }
+ }
+ case inBits < 12:
+ lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60)
+ case inBits < 20:
+ lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60)
+ default:
+ panic(fmt.Errorf("internal error: block too big (%d)", regenLen))
+ }
+ *h = literalsHeader(lh)
+}
+
+// setSizes will set the size of a compressed literals section and the input length.
+func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
+ compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen))
+ // Only retain 2 bits
+ const mask = 3
+ lh := uint64(*h & mask)
+ switch {
+ case compBits <= 10 && inBits <= 10:
+ if !single {
+ lh |= 1 << 2
+ }
+ lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
+ if debug {
+ const mmask = (1 << 24) - 1
+ n := (lh >> 4) & mmask
+ if int(n&1023) != inLen {
+ panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits))
+ }
+ if int(n>>10) != compLen {
+ panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits))
+ }
+ }
+ case compBits <= 14 && inBits <= 14:
+ lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60)
+ if single {
+ panic("single stream used with more than 10 bits length.")
+ }
+ case compBits <= 18 && inBits <= 18:
+ lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60)
+ if single {
+ panic("single stream used with more than 10 bits length.")
+ }
+ default:
+ panic("internal error: block too big")
+ }
+ *h = literalsHeader(lh)
+}
+
+// appendTo will append the literals header to a byte slice.
+func (h literalsHeader) appendTo(b []byte) []byte {
+ size := uint8(h >> 60)
+ switch size {
+ case 1:
+ b = append(b, uint8(h))
+ case 2:
+ b = append(b, uint8(h), uint8(h>>8))
+ case 3:
+ b = append(b, uint8(h), uint8(h>>8), uint8(h>>16))
+ case 4:
+ b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24))
+ case 5:
+ b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32))
+ default:
+ panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size))
+ }
+ return b
+}
+
+// size returns the output size with currently set values.
+func (h literalsHeader) size() int {
+ return int(h >> 60)
+}
+
+func (h literalsHeader) String() string {
+ return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60)
+}
+
+// pushOffsets will push the recent offsets to the backup store.
+func (b *blockEnc) pushOffsets() {
+ b.prevRecentOffsets = b.recentOffsets
+}
+
+// pushOffsets will push the recent offsets to the backup store.
+func (b *blockEnc) popOffsets() {
+ b.recentOffsets = b.prevRecentOffsets
+}
+
+// matchOffset will adjust recent offsets and return the adjusted one,
+// if it matches a previous offset.
+func (b *blockEnc) matchOffset(offset, lits uint32) uint32 {
+ // Check if offset is one of the recent offsets.
+ // Adjusts the output offset accordingly.
+ // Gives a tiny bit of compression, typically around 1%.
+ if true {
+ if lits > 0 {
+ switch offset {
+ case b.recentOffsets[0]:
+ offset = 1
+ case b.recentOffsets[1]:
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset = 2
+ case b.recentOffsets[2]:
+ b.recentOffsets[2] = b.recentOffsets[1]
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset = 3
+ default:
+ b.recentOffsets[2] = b.recentOffsets[1]
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset += 3
+ }
+ } else {
+ switch offset {
+ case b.recentOffsets[1]:
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset = 1
+ case b.recentOffsets[2]:
+ b.recentOffsets[2] = b.recentOffsets[1]
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset = 2
+ case b.recentOffsets[0] - 1:
+ b.recentOffsets[2] = b.recentOffsets[1]
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset = 3
+ default:
+ b.recentOffsets[2] = b.recentOffsets[1]
+ b.recentOffsets[1] = b.recentOffsets[0]
+ b.recentOffsets[0] = offset
+ offset += 3
+ }
+ }
+ } else {
+ offset += 3
+ }
+ return offset
+}
+
+// encodeRaw can be used to set the output to a raw representation of supplied bytes.
+func (b *blockEnc) encodeRaw(a []byte) {
+ var bh blockHeader
+ bh.setLast(b.last)
+ bh.setSize(uint32(len(a)))
+ bh.setType(blockTypeRaw)
+ b.output = bh.appendTo(b.output[:0])
+ b.output = append(b.output, a...)
+ if debug {
+ println("Adding RAW block, length", len(a), "last:", b.last)
+ }
+}
+
+// encodeRaw can be used to set the output to a raw representation of supplied bytes.
+func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
+ var bh blockHeader
+ bh.setLast(b.last)
+ bh.setSize(uint32(len(src)))
+ bh.setType(blockTypeRaw)
+ dst = bh.appendTo(dst)
+ dst = append(dst, src...)
+ if debug {
+ println("Adding RAW block, length", len(src), "last:", b.last)
+ }
+ return dst
+}
+
+// encodeLits can be used if the block is only litLen.
+func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
+ var bh blockHeader
+ bh.setLast(b.last)
+ bh.setSize(uint32(len(lits)))
+
+ // Don't compress extremely small blocks
+ if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
+ if debug {
+ println("Adding RAW block, length", len(lits), "last:", b.last)
+ }
+ bh.setType(blockTypeRaw)
+ b.output = bh.appendTo(b.output)
+ b.output = append(b.output, lits...)
+ return nil
+ }
+
+ var (
+ out []byte
+ reUsed, single bool
+ err error
+ )
+ if b.dictLitEnc != nil {
+ b.litEnc.TransferCTable(b.dictLitEnc)
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ b.dictLitEnc = nil
+ }
+ if len(lits) >= 1024 {
+ // Use 4 Streams.
+ out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
+ } else if len(lits) > 32 {
+ // Use 1 stream
+ single = true
+ out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
+ } else {
+ err = huff0.ErrIncompressible
+ }
+
+ switch err {
+ case huff0.ErrIncompressible:
+ if debug {
+ println("Adding RAW block, length", len(lits), "last:", b.last)
+ }
+ bh.setType(blockTypeRaw)
+ b.output = bh.appendTo(b.output)
+ b.output = append(b.output, lits...)
+ return nil
+ case huff0.ErrUseRLE:
+ if debug {
+ println("Adding RLE block, length", len(lits))
+ }
+ bh.setType(blockTypeRLE)
+ b.output = bh.appendTo(b.output)
+ b.output = append(b.output, lits[0])
+ return nil
+ case nil:
+ default:
+ return err
+ }
+ // Compressed...
+ // Now, allow reuse
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ bh.setType(blockTypeCompressed)
+ var lh literalsHeader
+ if reUsed {
+ if debug {
+ println("Reused tree, compressed to", len(out))
+ }
+ lh.setType(literalsBlockTreeless)
+ } else {
+ if debug {
+ println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
+ }
+ lh.setType(literalsBlockCompressed)
+ }
+ // Set sizes
+ lh.setSizes(len(out), len(lits), single)
+ bh.setSize(uint32(len(out) + lh.size() + 1))
+
+ // Write block headers.
+ b.output = bh.appendTo(b.output)
+ b.output = lh.appendTo(b.output)
+ // Add compressed data.
+ b.output = append(b.output, out...)
+ // No sequences.
+ b.output = append(b.output, 0)
+ return nil
+}
+
+// fuzzFseEncoder can be used to fuzz the FSE encoder.
+func fuzzFseEncoder(data []byte) int {
+ if len(data) > maxSequences || len(data) < 2 {
+ return 0
+ }
+ enc := fseEncoder{}
+ hist := enc.Histogram()[:256]
+ maxSym := uint8(0)
+ for i, v := range data {
+ v = v & 63
+ data[i] = v
+ hist[v]++
+ if v > maxSym {
+ maxSym = v
+ }
+ }
+ if maxSym == 0 {
+ // All 0
+ return 0
+ }
+ maxCount := func(a []uint32) int {
+ var max uint32
+ for _, v := range a {
+ if v > max {
+ max = v
+ }
+ }
+ return int(max)
+ }
+ cnt := maxCount(hist[:maxSym])
+ if cnt == len(data) {
+ // RLE
+ return 0
+ }
+ enc.HistogramFinished(maxSym, cnt)
+ err := enc.normalizeCount(len(data))
+ if err != nil {
+ return 0
+ }
+ _, err = enc.writeCount(nil)
+ if err != nil {
+ panic(err)
+ }
+ return 1
+}
+
+// encode will encode the block and append the output in b.output.
+// Previous offset codes must be pushed if more blocks are expected.
+func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
+ if len(b.sequences) == 0 {
+ return b.encodeLits(b.literals, rawAllLits)
+ }
+ // We want some difference to at least account for the headers.
+ saved := b.size - len(b.literals) - (b.size >> 5)
+ if saved < 16 {
+ if org == nil {
+ return errIncompressible
+ }
+ b.popOffsets()
+ return b.encodeLits(org, rawAllLits)
+ }
+
+ var bh blockHeader
+ var lh literalsHeader
+ bh.setLast(b.last)
+ bh.setType(blockTypeCompressed)
+ // Store offset of the block header. Needed when we know the size.
+ bhOffset := len(b.output)
+ b.output = bh.appendTo(b.output)
+
+ var (
+ out []byte
+ reUsed, single bool
+ err error
+ )
+ if b.dictLitEnc != nil {
+ b.litEnc.TransferCTable(b.dictLitEnc)
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ b.dictLitEnc = nil
+ }
+ if len(b.literals) >= 1024 && !raw {
+ // Use 4 Streams.
+ out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
+ } else if len(b.literals) > 32 && !raw {
+ // Use 1 stream
+ single = true
+ out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
+ } else {
+ err = huff0.ErrIncompressible
+ }
+
+ switch err {
+ case huff0.ErrIncompressible:
+ lh.setType(literalsBlockRaw)
+ lh.setSize(len(b.literals))
+ b.output = lh.appendTo(b.output)
+ b.output = append(b.output, b.literals...)
+ if debug {
+ println("Adding literals RAW, length", len(b.literals))
+ }
+ case huff0.ErrUseRLE:
+ lh.setType(literalsBlockRLE)
+ lh.setSize(len(b.literals))
+ b.output = lh.appendTo(b.output)
+ b.output = append(b.output, b.literals[0])
+ if debug {
+ println("Adding literals RLE")
+ }
+ case nil:
+ // Compressed litLen...
+ if reUsed {
+ if debug {
+ println("reused tree")
+ }
+ lh.setType(literalsBlockTreeless)
+ } else {
+ if debug {
+ println("new tree, size:", len(b.litEnc.OutTable))
+ }
+ lh.setType(literalsBlockCompressed)
+ if debug {
+ _, _, err := huff0.ReadTable(out, nil)
+ if err != nil {
+ panic(err)
+ }
+ }
+ }
+ lh.setSizes(len(out), len(b.literals), single)
+ if debug {
+ printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
+ println("Adding literal header:", lh)
+ }
+ b.output = lh.appendTo(b.output)
+ b.output = append(b.output, out...)
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ if debug {
+ println("Adding literals compressed")
+ }
+ default:
+ if debug {
+ println("Adding literals ERROR:", err)
+ }
+ return err
+ }
+ // Sequence compression
+
+ // Write the number of sequences
+ switch {
+ case len(b.sequences) < 128:
+ b.output = append(b.output, uint8(len(b.sequences)))
+ case len(b.sequences) < 0x7f00: // TODO: this could be wrong
+ n := len(b.sequences)
+ b.output = append(b.output, 128+uint8(n>>8), uint8(n))
+ default:
+ n := len(b.sequences) - 0x7f00
+ b.output = append(b.output, 255, uint8(n), uint8(n>>8))
+ }
+ if debug {
+ println("Encoding", len(b.sequences), "sequences")
+ }
+ b.genCodes()
+ llEnc := b.coders.llEnc
+ ofEnc := b.coders.ofEnc
+ mlEnc := b.coders.mlEnc
+ err = llEnc.normalizeCount(len(b.sequences))
+ if err != nil {
+ return err
+ }
+ err = ofEnc.normalizeCount(len(b.sequences))
+ if err != nil {
+ return err
+ }
+ err = mlEnc.normalizeCount(len(b.sequences))
+ if err != nil {
+ return err
+ }
+
+ // Choose the best compression mode for each type.
+ // Will evaluate the new vs predefined and previous.
+ chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) {
+ // See if predefined/previous is better
+ hist := cur.count[:cur.symbolLen]
+ nSize := cur.approxSize(hist) + cur.maxHeaderSize()
+ predefSize := preDef.approxSize(hist)
+ prevSize := prev.approxSize(hist)
+
+ // Add a small penalty for new encoders.
+ // Don't bother with extremely small (<2 byte gains).
+ nSize = nSize + (nSize+2*8*16)>>4
+ switch {
+ case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
+ if debug {
+ println("Using predefined", predefSize>>3, "<=", nSize>>3)
+ }
+ return preDef, compModePredefined
+ case prevSize <= nSize:
+ if debug {
+ println("Using previous", prevSize>>3, "<=", nSize>>3)
+ }
+ return prev, compModeRepeat
+ default:
+ if debug {
+ println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
+ println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
+ }
+ return cur, compModeFSE
+ }
+ }
+
+ // Write compression mode
+ var mode uint8
+ if llEnc.useRLE {
+ mode |= uint8(compModeRLE) << 6
+ llEnc.setRLE(b.sequences[0].llCode)
+ if debug {
+ println("llEnc.useRLE")
+ }
+ } else {
+ var m seqCompMode
+ llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths])
+ mode |= uint8(m) << 6
+ }
+ if ofEnc.useRLE {
+ mode |= uint8(compModeRLE) << 4
+ ofEnc.setRLE(b.sequences[0].ofCode)
+ if debug {
+ println("ofEnc.useRLE")
+ }
+ } else {
+ var m seqCompMode
+ ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets])
+ mode |= uint8(m) << 4
+ }
+
+ if mlEnc.useRLE {
+ mode |= uint8(compModeRLE) << 2
+ mlEnc.setRLE(b.sequences[0].mlCode)
+ if debug {
+ println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
+ }
+ } else {
+ var m seqCompMode
+ mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths])
+ mode |= uint8(m) << 2
+ }
+ b.output = append(b.output, mode)
+ if debug {
+ printf("Compression modes: 0b%b", mode)
+ }
+ b.output, err = llEnc.writeCount(b.output)
+ if err != nil {
+ return err
+ }
+ start := len(b.output)
+ b.output, err = ofEnc.writeCount(b.output)
+ if err != nil {
+ return err
+ }
+ if false {
+ println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount)
+ fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen)
+ for i, v := range ofEnc.norm[:ofEnc.symbolLen] {
+ fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v)
+ }
+ }
+ b.output, err = mlEnc.writeCount(b.output)
+ if err != nil {
+ return err
+ }
+
+ // Maybe in block?
+ wr := &b.wr
+ wr.reset(b.output)
+
+ var ll, of, ml cState
+
+ // Current sequence
+ seq := len(b.sequences) - 1
+ s := b.sequences[seq]
+ llEnc.setBits(llBitsTable[:])
+ mlEnc.setBits(mlBitsTable[:])
+ ofEnc.setBits(nil)
+
+ llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256]
+
+ // We have 3 bounds checks here (and in the loop).
+ // Since we are iterating backwards it is kinda hard to avoid.
+ llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+ ll.init(wr, &llEnc.ct, llB)
+ of.init(wr, &ofEnc.ct, ofB)
+ wr.flush32()
+ ml.init(wr, &mlEnc.ct, mlB)
+
+ // Each of these lookups also generates a bounds check.
+ wr.addBits32NC(s.litLen, llB.outBits)
+ wr.addBits32NC(s.matchLen, mlB.outBits)
+ wr.flush32()
+ wr.addBits32NC(s.offset, ofB.outBits)
+ if debugSequences {
+ println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
+ }
+ seq--
+ if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
+ // No need to flush (common)
+ for seq >= 0 {
+ s = b.sequences[seq]
+ wr.flush32()
+ llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+ // tabelog max is 8 for all.
+ of.encode(ofB)
+ ml.encode(mlB)
+ ll.encode(llB)
+ wr.flush32()
+
+ // We checked that all can stay within 32 bits
+ wr.addBits32NC(s.litLen, llB.outBits)
+ wr.addBits32NC(s.matchLen, mlB.outBits)
+ wr.addBits32NC(s.offset, ofB.outBits)
+
+ if debugSequences {
+ println("Encoded seq", seq, s)
+ }
+
+ seq--
+ }
+ } else {
+ for seq >= 0 {
+ s = b.sequences[seq]
+ wr.flush32()
+ llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
+ // tabelog max is below 8 for each.
+ of.encode(ofB)
+ ml.encode(mlB)
+ ll.encode(llB)
+ wr.flush32()
+
+ // ml+ll = max 32 bits total
+ wr.addBits32NC(s.litLen, llB.outBits)
+ wr.addBits32NC(s.matchLen, mlB.outBits)
+ wr.flush32()
+ wr.addBits32NC(s.offset, ofB.outBits)
+
+ if debugSequences {
+ println("Encoded seq", seq, s)
+ }
+
+ seq--
+ }
+ }
+ ml.flush(mlEnc.actualTableLog)
+ of.flush(ofEnc.actualTableLog)
+ ll.flush(llEnc.actualTableLog)
+ err = wr.close()
+ if err != nil {
+ return err
+ }
+ b.output = wr.out
+
+ if len(b.output)-3-bhOffset >= b.size {
+ // Maybe even add a bigger margin.
+ b.litEnc.Reuse = huff0.ReusePolicyNone
+ return errIncompressible
+ }
+
+ // Size is output minus block header.
+ bh.setSize(uint32(len(b.output)-bhOffset) - 3)
+ if debug {
+ println("Rewriting block header", bh)
+ }
+ _ = bh.appendTo(b.output[bhOffset:bhOffset])
+ b.coders.setPrev(llEnc, mlEnc, ofEnc)
+ return nil
+}
+
+var errIncompressible = errors.New("incompressible")
+
+func (b *blockEnc) genCodes() {
+ if len(b.sequences) == 0 {
+ // nothing to do
+ return
+ }
+
+ if len(b.sequences) > math.MaxUint16 {
+ panic("can only encode up to 64K sequences")
+ }
+ // No bounds checks after here:
+ llH := b.coders.llEnc.Histogram()[:256]
+ ofH := b.coders.ofEnc.Histogram()[:256]
+ mlH := b.coders.mlEnc.Histogram()[:256]
+ for i := range llH {
+ llH[i] = 0
+ }
+ for i := range ofH {
+ ofH[i] = 0
+ }
+ for i := range mlH {
+ mlH[i] = 0
+ }
+
+ var llMax, ofMax, mlMax uint8
+ for i, seq := range b.sequences {
+ v := llCode(seq.litLen)
+ seq.llCode = v
+ llH[v]++
+ if v > llMax {
+ llMax = v
+ }
+
+ v = ofCode(seq.offset)
+ seq.ofCode = v
+ ofH[v]++
+ if v > ofMax {
+ ofMax = v
+ }
+
+ v = mlCode(seq.matchLen)
+ seq.mlCode = v
+ mlH[v]++
+ if v > mlMax {
+ mlMax = v
+ if debugAsserts && mlMax > maxMatchLengthSymbol {
+ panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
+ }
+ }
+ b.sequences[i] = seq
+ }
+ maxCount := func(a []uint32) int {
+ var max uint32
+ for _, v := range a {
+ if v > max {
+ max = v
+ }
+ }
+ return int(max)
+ }
+ if debugAsserts && mlMax > maxMatchLengthSymbol {
+ panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
+ }
+ if debugAsserts && ofMax > maxOffsetBits {
+ panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
+ }
+ if debugAsserts && llMax > maxLiteralLengthSymbol {
+ panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
+ }
+
+ b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1]))
+ b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1]))
+ b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1]))
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/blocktype_string.go b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go
new file mode 100644
index 0000000..01a01e4
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go
@@ -0,0 +1,85 @@
+// Code generated by "stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex"; DO NOT EDIT.
+
+package zstd
+
+import "strconv"
+
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[blockTypeRaw-0]
+ _ = x[blockTypeRLE-1]
+ _ = x[blockTypeCompressed-2]
+ _ = x[blockTypeReserved-3]
+}
+
+const _blockType_name = "blockTypeRawblockTypeRLEblockTypeCompressedblockTypeReserved"
+
+var _blockType_index = [...]uint8{0, 12, 24, 43, 60}
+
+func (i blockType) String() string {
+ if i >= blockType(len(_blockType_index)-1) {
+ return "blockType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _blockType_name[_blockType_index[i]:_blockType_index[i+1]]
+}
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[literalsBlockRaw-0]
+ _ = x[literalsBlockRLE-1]
+ _ = x[literalsBlockCompressed-2]
+ _ = x[literalsBlockTreeless-3]
+}
+
+const _literalsBlockType_name = "literalsBlockRawliteralsBlockRLEliteralsBlockCompressedliteralsBlockTreeless"
+
+var _literalsBlockType_index = [...]uint8{0, 16, 32, 55, 76}
+
+func (i literalsBlockType) String() string {
+ if i >= literalsBlockType(len(_literalsBlockType_index)-1) {
+ return "literalsBlockType(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _literalsBlockType_name[_literalsBlockType_index[i]:_literalsBlockType_index[i+1]]
+}
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[compModePredefined-0]
+ _ = x[compModeRLE-1]
+ _ = x[compModeFSE-2]
+ _ = x[compModeRepeat-3]
+}
+
+const _seqCompMode_name = "compModePredefinedcompModeRLEcompModeFSEcompModeRepeat"
+
+var _seqCompMode_index = [...]uint8{0, 18, 29, 40, 54}
+
+func (i seqCompMode) String() string {
+ if i >= seqCompMode(len(_seqCompMode_index)-1) {
+ return "seqCompMode(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _seqCompMode_name[_seqCompMode_index[i]:_seqCompMode_index[i+1]]
+}
+func _() {
+ // An "invalid array index" compiler error signifies that the constant values have changed.
+ // Re-run the stringer command to generate them again.
+ var x [1]struct{}
+ _ = x[tableLiteralLengths-0]
+ _ = x[tableOffsets-1]
+ _ = x[tableMatchLengths-2]
+}
+
+const _tableIndex_name = "tableLiteralLengthstableOffsetstableMatchLengths"
+
+var _tableIndex_index = [...]uint8{0, 19, 31, 48}
+
+func (i tableIndex) String() string {
+ if i >= tableIndex(len(_tableIndex_index)-1) {
+ return "tableIndex(" + strconv.FormatInt(int64(i), 10) + ")"
+ }
+ return _tableIndex_name[_tableIndex_index[i]:_tableIndex_index[i+1]]
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
new file mode 100644
index 0000000..658ef78
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -0,0 +1,127 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+)
+
+type byteBuffer interface {
+ // Read up to 8 bytes.
+ // Returns nil if no more input is available.
+ readSmall(n int) []byte
+
+ // Read >8 bytes.
+ // MAY use the destination slice.
+ readBig(n int, dst []byte) ([]byte, error)
+
+ // Read a single byte.
+ readByte() (byte, error)
+
+ // Skip n bytes.
+ skipN(n int) error
+}
+
+// in-memory buffer
+type byteBuf []byte
+
+func (b *byteBuf) readSmall(n int) []byte {
+ if debugAsserts && n > 8 {
+ panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
+ }
+ bb := *b
+ if len(bb) < n {
+ return nil
+ }
+ r := bb[:n]
+ *b = bb[n:]
+ return r
+}
+
+func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
+ bb := *b
+ if len(bb) < n {
+ return nil, io.ErrUnexpectedEOF
+ }
+ r := bb[:n]
+ *b = bb[n:]
+ return r, nil
+}
+
+func (b *byteBuf) remain() []byte {
+ return *b
+}
+
+func (b *byteBuf) readByte() (byte, error) {
+ bb := *b
+ if len(bb) < 1 {
+ return 0, nil
+ }
+ r := bb[0]
+ *b = bb[1:]
+ return r, nil
+}
+
+func (b *byteBuf) skipN(n int) error {
+ bb := *b
+ if len(bb) < n {
+ return io.ErrUnexpectedEOF
+ }
+ *b = bb[n:]
+ return nil
+}
+
+// wrapper around a reader.
+type readerWrapper struct {
+ r io.Reader
+ tmp [8]byte
+}
+
+func (r *readerWrapper) readSmall(n int) []byte {
+ if debugAsserts && n > 8 {
+ panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
+ }
+ n2, err := io.ReadFull(r.r, r.tmp[:n])
+ // We only really care about the actual bytes read.
+ if n2 != n {
+ if debug {
+ println("readSmall: got", n2, "want", n, "err", err)
+ }
+ return nil
+ }
+ return r.tmp[:n]
+}
+
+func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
+ if cap(dst) < n {
+ dst = make([]byte, n)
+ }
+ n2, err := io.ReadFull(r.r, dst[:n])
+ if err == io.EOF && n > 0 {
+ err = io.ErrUnexpectedEOF
+ }
+ return dst[:n2], err
+}
+
+func (r *readerWrapper) readByte() (byte, error) {
+ n2, err := r.r.Read(r.tmp[:1])
+ if err != nil {
+ return 0, err
+ }
+ if n2 != 1 {
+ return 0, io.ErrUnexpectedEOF
+ }
+ return r.tmp[0], nil
+}
+
+func (r *readerWrapper) skipN(n int) error {
+ n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
+ if n2 != int64(n) {
+ err = io.ErrUnexpectedEOF
+ }
+ return err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
new file mode 100644
index 0000000..2c4fca1
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -0,0 +1,88 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+// byteReader provides a byte reader that reads
+// little endian values from a byte stream.
+// The input stream is manually advanced.
+// The reader performs no bounds checks.
+type byteReader struct {
+ b []byte
+ off int
+}
+
+// init will initialize the reader and set the input.
+func (b *byteReader) init(in []byte) {
+ b.b = in
+ b.off = 0
+}
+
+// advance the stream b n bytes.
+func (b *byteReader) advance(n uint) {
+ b.off += int(n)
+}
+
+// overread returns whether we have advanced too far.
+func (b *byteReader) overread() bool {
+ return b.off > len(b.b)
+}
+
+// Int32 returns a little endian int32 starting at current offset.
+func (b byteReader) Int32() int32 {
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := int32(b2[3])
+ v2 := int32(b2[2])
+ v1 := int32(b2[1])
+ v0 := int32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint8 returns the next byte
+func (b *byteReader) Uint8() uint8 {
+ v := b.b[b.off]
+ return v
+}
+
+// Uint32 returns a little endian uint32 starting at current offset.
+func (b byteReader) Uint32() uint32 {
+ if r := b.remain(); r < 4 {
+ // Very rare
+ v := uint32(0)
+ for i := 1; i <= r; i++ {
+ v = (v << 8) | uint32(b.b[len(b.b)-i])
+ }
+ return v
+ }
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint32NC returns a little endian uint32 starting at current offset.
+// The caller must be sure if there are at least 4 bytes left.
+func (b byteReader) Uint32NC() uint32 {
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// unread returns the unread portion of the input.
+func (b byteReader) unread() []byte {
+ return b.b[b.off:]
+}
+
+// remain will return the number of bytes remaining.
+func (b byteReader) remain() int {
+ return len(b.b) - b.off
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
new file mode 100644
index 0000000..69736e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -0,0 +1,202 @@
+// Copyright 2020+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "bytes"
+ "errors"
+ "io"
+)
+
+// HeaderMaxSize is the maximum size of a Frame and Block Header.
+// If less is sent to Header.Decode it *may* still contain enough information.
+const HeaderMaxSize = 14 + 3
+
+// Header contains information about the first frame and block within that.
+type Header struct {
+ // Window Size the window of data to keep while decoding.
+ // Will only be set if HasFCS is false.
+ WindowSize uint64
+
+ // Frame content size.
+ // Expected size of the entire frame.
+ FrameContentSize uint64
+
+ // Dictionary ID.
+ // If 0, no dictionary.
+ DictionaryID uint32
+
+ // First block information.
+ FirstBlock struct {
+ // OK will be set if first block could be decoded.
+ OK bool
+
+ // Is this the last block of a frame?
+ Last bool
+
+ // Is the data compressed?
+ // If true CompressedSize will be populated.
+ // Unfortunately DecompressedSize cannot be determined
+ // without decoding the blocks.
+ Compressed bool
+
+ // DecompressedSize is the expected decompressed size of the block.
+ // Will be 0 if it cannot be determined.
+ DecompressedSize int
+
+ // CompressedSize of the data in the block.
+ // Does not include the block header.
+ // Will be equal to DecompressedSize if not Compressed.
+ CompressedSize int
+ }
+
+ // Skippable will be true if the frame is meant to be skipped.
+ // No other information will be populated.
+ Skippable bool
+
+ // If set there is a checksum present for the block content.
+ HasCheckSum bool
+
+ // If this is true FrameContentSize will have a valid value
+ HasFCS bool
+
+ SingleSegment bool
+}
+
+// Decode the header from the beginning of the stream.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) Decode(in []byte) error {
+ if len(in) < 4 {
+ return io.ErrUnexpectedEOF
+ }
+ b, in := in[:4], in[4:]
+ if !bytes.Equal(b, frameMagic) {
+ if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+ return ErrMagicMismatch
+ }
+ *h = Header{Skippable: true}
+ return nil
+ }
+ if len(in) < 1 {
+ return io.ErrUnexpectedEOF
+ }
+
+ // Clear output
+ *h = Header{}
+ fhd, in := in[0], in[1:]
+ h.SingleSegment = fhd&(1<<5) != 0
+ h.HasCheckSum = fhd&(1<<2) != 0
+
+ if fhd&(1<<3) != 0 {
+ return errors.New("reserved bit set on frame header")
+ }
+
+ // Read Window_Descriptor
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
+ if !h.SingleSegment {
+ if len(in) < 1 {
+ return io.ErrUnexpectedEOF
+ }
+ var wd byte
+ wd, in = in[0], in[1:]
+ windowLog := 10 + (wd >> 3)
+ windowBase := uint64(1) << windowLog
+ windowAdd := (windowBase / 8) * uint64(wd&0x7)
+ h.WindowSize = windowBase + windowAdd
+ }
+
+ // Read Dictionary_ID
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
+ if size := fhd & 3; size != 0 {
+ if size == 3 {
+ size = 4
+ }
+ if len(in) < int(size) {
+ return io.ErrUnexpectedEOF
+ }
+ b, in = in[:size], in[size:]
+ if b == nil {
+ return io.ErrUnexpectedEOF
+ }
+ switch size {
+ case 1:
+ h.DictionaryID = uint32(b[0])
+ case 2:
+ h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
+ case 4:
+ h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ }
+ }
+
+ // Read Frame_Content_Size
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
+ var fcsSize int
+ v := fhd >> 6
+ switch v {
+ case 0:
+ if h.SingleSegment {
+ fcsSize = 1
+ }
+ default:
+ fcsSize = 1 << v
+ }
+
+ if fcsSize > 0 {
+ h.HasFCS = true
+ if len(in) < fcsSize {
+ return io.ErrUnexpectedEOF
+ }
+ b, in = in[:fcsSize], in[fcsSize:]
+ if b == nil {
+ return io.ErrUnexpectedEOF
+ }
+ switch fcsSize {
+ case 1:
+ h.FrameContentSize = uint64(b[0])
+ case 2:
+ // When FCS_Field_Size is 2, the offset of 256 is added.
+ h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
+ case 4:
+ h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
+ case 8:
+ d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
+ h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
+ }
+ }
+
+ // Frame Header done, we will not fail from now on.
+ if len(in) < 3 {
+ return nil
+ }
+ tmp := in[:3]
+ bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
+ h.FirstBlock.Last = bh&1 != 0
+ blockType := blockType((bh >> 1) & 3)
+ // find size.
+ cSize := int(bh >> 3)
+ switch blockType {
+ case blockTypeReserved:
+ return nil
+ case blockTypeRLE:
+ h.FirstBlock.Compressed = true
+ h.FirstBlock.DecompressedSize = cSize
+ h.FirstBlock.CompressedSize = 1
+ case blockTypeCompressed:
+ h.FirstBlock.Compressed = true
+ h.FirstBlock.CompressedSize = cSize
+ case blockTypeRaw:
+ h.FirstBlock.DecompressedSize = cSize
+ h.FirstBlock.CompressedSize = cSize
+ default:
+ panic("Invalid block type")
+ }
+
+ h.FirstBlock.OK = true
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
new file mode 100644
index 0000000..f593e46
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -0,0 +1,557 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "io"
+ "sync"
+)
+
+// Decoder provides decoding of zstandard streams.
+// The decoder has been designed to operate without allocations after a warmup.
+// This means that you should store the decoder for best performance.
+// To re-use a stream decoder, use the Reset(r io.Reader) error to switch to another stream.
+// A decoder can safely be re-used even if the previous stream failed.
+// To release the resources, you must call the Close() function on a decoder.
+type Decoder struct {
+ o decoderOptions
+
+ // Unreferenced decoders, ready for use.
+ decoders chan *blockDec
+
+ // Streams ready to be decoded.
+ stream chan decodeStream
+
+ // Current read position used for Reader functionality.
+ current decoderState
+
+ // Custom dictionaries.
+ // Always uses copies.
+ dicts map[uint32]dict
+
+ // streamWg is the waitgroup for all streams
+ streamWg sync.WaitGroup
+}
+
+// decoderState is used for maintaining state when the decoder
+// is used for streaming.
+type decoderState struct {
+ // current block being written to stream.
+ decodeOutput
+
+ // output in order to be written to stream.
+ output chan decodeOutput
+
+ // cancel remaining output.
+ cancel chan struct{}
+
+ flushed bool
+}
+
+var (
+ // Check the interfaces we want to support.
+ _ = io.WriterTo(&Decoder{})
+ _ = io.Reader(&Decoder{})
+)
+
+// NewReader creates a new decoder.
+// A nil Reader can be provided in which case Reset can be used to start a decode.
+//
+// A Decoder can be used in two modes:
+//
+// 1) As a stream, or
+// 2) For stateless decoding using DecodeAll.
+//
+// Only a single stream can be decoded concurrently, but the same decoder
+// can run multiple concurrent stateless decodes. It is even possible to
+// use stateless decodes while a stream is being decoded.
+//
+// The Reset function can be used to initiate a new stream, which is will considerably
+// reduce the allocations normally caused by NewReader.
+func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
+ initPredefined()
+ var d Decoder
+ d.o.setDefault()
+ for _, o := range opts {
+ err := o(&d.o)
+ if err != nil {
+ return nil, err
+ }
+ }
+ d.current.output = make(chan decodeOutput, d.o.concurrent)
+ d.current.flushed = true
+
+ if r == nil {
+ d.current.err = ErrDecoderNilInput
+ }
+
+ // Transfer option dicts.
+ d.dicts = make(map[uint32]dict, len(d.o.dicts))
+ for _, dc := range d.o.dicts {
+ d.dicts[dc.id] = dc
+ }
+ d.o.dicts = nil
+
+ // Create decoders
+ d.decoders = make(chan *blockDec, d.o.concurrent)
+ for i := 0; i < d.o.concurrent; i++ {
+ dec := newBlockDec(d.o.lowMem)
+ dec.localFrame = newFrameDec(d.o)
+ d.decoders <- dec
+ }
+
+ if r == nil {
+ return &d, nil
+ }
+ return &d, d.Reset(r)
+}
+
+// Read bytes from the decompressed stream into p.
+// Returns the number of bytes written and any error that occurred.
+// When the stream is done, io.EOF will be returned.
+func (d *Decoder) Read(p []byte) (int, error) {
+ if d.stream == nil {
+ return 0, ErrDecoderNilInput
+ }
+ var n int
+ for {
+ if len(d.current.b) > 0 {
+ filled := copy(p, d.current.b)
+ p = p[filled:]
+ d.current.b = d.current.b[filled:]
+ n += filled
+ }
+ if len(p) == 0 {
+ break
+ }
+ if len(d.current.b) == 0 {
+ // We have an error and no more data
+ if d.current.err != nil {
+ break
+ }
+ if !d.nextBlock(n == 0) {
+ return n, nil
+ }
+ }
+ }
+ if len(d.current.b) > 0 {
+ if debug {
+ println("returning", n, "still bytes left:", len(d.current.b))
+ }
+ // Only return error at end of block
+ return n, nil
+ }
+ if d.current.err != nil {
+ d.drainOutput()
+ }
+ if debug {
+ println("returning", n, d.current.err, len(d.decoders))
+ }
+ return n, d.current.err
+}
+
+// Reset will reset the decoder the supplied stream after the current has finished processing.
+// Note that this functionality cannot be used after Close has been called.
+// Reset can be called with a nil reader to release references to the previous reader.
+// After being called with a nil reader, no other operations than Reset or DecodeAll or Close
+// should be used.
+func (d *Decoder) Reset(r io.Reader) error {
+ if d.current.err == ErrDecoderClosed {
+ return d.current.err
+ }
+
+ d.drainOutput()
+
+ if r == nil {
+ d.current.err = ErrDecoderNilInput
+ d.current.flushed = true
+ return nil
+ }
+
+ if d.stream == nil {
+ d.stream = make(chan decodeStream, 1)
+ d.streamWg.Add(1)
+ go d.startStreamDecoder(d.stream)
+ }
+
+ // If bytes buffer and < 1MB, do sync decoding anyway.
+ if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
+ bb2 := bb
+ if debug {
+ println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
+ }
+ b := bb2.Bytes()
+ var dst []byte
+ if cap(d.current.b) > 0 {
+ dst = d.current.b
+ }
+
+ dst, err := d.DecodeAll(b, dst[:0])
+ if err == nil {
+ err = io.EOF
+ }
+ d.current.b = dst
+ d.current.err = err
+ d.current.flushed = true
+ if debug {
+ println("sync decode to", len(dst), "bytes, err:", err)
+ }
+ return nil
+ }
+
+ // Remove current block.
+ d.current.decodeOutput = decodeOutput{}
+ d.current.err = nil
+ d.current.cancel = make(chan struct{})
+ d.current.flushed = false
+ d.current.d = nil
+
+ d.stream <- decodeStream{
+ r: r,
+ output: d.current.output,
+ cancel: d.current.cancel,
+ }
+ return nil
+}
+
+// drainOutput will drain the output until errEndOfStream is sent.
+func (d *Decoder) drainOutput() {
+ if d.current.cancel != nil {
+ println("cancelling current")
+ close(d.current.cancel)
+ d.current.cancel = nil
+ }
+ if d.current.d != nil {
+ if debug {
+ printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
+ }
+ d.decoders <- d.current.d
+ d.current.d = nil
+ d.current.b = nil
+ }
+ if d.current.output == nil || d.current.flushed {
+ println("current already flushed")
+ return
+ }
+ for v := range d.current.output {
+ if v.d != nil {
+ if debug {
+ printf("re-adding decoder %p", v.d)
+ }
+ d.decoders <- v.d
+ }
+ if v.err == errEndOfStream {
+ println("current flushed")
+ d.current.flushed = true
+ return
+ }
+ }
+}
+
+// WriteTo writes data to w until there's no more data to write or when an error occurs.
+// The return value n is the number of bytes written.
+// Any error encountered during the write is also returned.
+func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
+ if d.stream == nil {
+ return 0, ErrDecoderNilInput
+ }
+ var n int64
+ for {
+ if len(d.current.b) > 0 {
+ n2, err2 := w.Write(d.current.b)
+ n += int64(n2)
+ if err2 != nil && d.current.err == nil {
+ d.current.err = err2
+ break
+ }
+ }
+ if d.current.err != nil {
+ break
+ }
+ d.nextBlock(true)
+ }
+ err := d.current.err
+ if err != nil {
+ d.drainOutput()
+ }
+ if err == io.EOF {
+ err = nil
+ }
+ return n, err
+}
+
+// DecodeAll allows stateless decoding of a blob of bytes.
+// Output will be appended to dst, so if the destination size is known
+// you can pre-allocate the destination slice to avoid allocations.
+// DecodeAll can be used concurrently.
+// The Decoder concurrency limits will be respected.
+func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
+ if d.current.err == ErrDecoderClosed {
+ return dst, ErrDecoderClosed
+ }
+
+ // Grab a block decoder and frame decoder.
+ block := <-d.decoders
+ frame := block.localFrame
+ defer func() {
+ if debug {
+ printf("re-adding decoder: %p", block)
+ }
+ frame.rawInput = nil
+ frame.bBuf = nil
+ d.decoders <- block
+ }()
+ frame.bBuf = input
+
+ for {
+ frame.history.reset()
+ err := frame.reset(&frame.bBuf)
+ if err == io.EOF {
+ if debug {
+ println("frame reset return EOF")
+ }
+ return dst, nil
+ }
+ if frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ return nil, ErrUnknownDictionary
+ }
+ frame.history.setDict(&dict)
+ }
+ if err != nil {
+ return dst, err
+ }
+ if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
+ return dst, ErrDecoderSizeExceeded
+ }
+ if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
+ // Never preallocate moe than 1 GB up front.
+ if cap(dst)-len(dst) < int(frame.FrameContentSize) {
+ dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
+ copy(dst2, dst)
+ dst = dst2
+ }
+ }
+ if cap(dst) == 0 {
+ // Allocate len(input) * 2 by default if nothing is provided
+ // and we didn't get frame content size.
+ size := len(input) * 2
+ // Cap to 1 MB.
+ if size > 1<<20 {
+ size = 1 << 20
+ }
+ if uint64(size) > d.o.maxDecodedSize {
+ size = int(d.o.maxDecodedSize)
+ }
+ dst = make([]byte, 0, size)
+ }
+
+ dst, err = frame.runDecoder(dst, block)
+ if err != nil {
+ return dst, err
+ }
+ if len(frame.bBuf) == 0 {
+ if debug {
+ println("frame dbuf empty")
+ }
+ break
+ }
+ }
+ return dst, nil
+}
+
+// nextBlock returns the next block.
+// If an error occurs d.err will be set.
+// Optionally the function can block for new output.
+// If non-blocking mode is used the returned boolean will be false
+// if no data was available without blocking.
+func (d *Decoder) nextBlock(blocking bool) (ok bool) {
+ if d.current.d != nil {
+ if debug {
+ printf("re-adding current decoder %p", d.current.d)
+ }
+ d.decoders <- d.current.d
+ d.current.d = nil
+ }
+ if d.current.err != nil {
+ // Keep error state.
+ return blocking
+ }
+
+ if blocking {
+ d.current.decodeOutput = <-d.current.output
+ } else {
+ select {
+ case d.current.decodeOutput = <-d.current.output:
+ default:
+ return false
+ }
+ }
+ if debug {
+ println("got", len(d.current.b), "bytes, error:", d.current.err)
+ }
+ return true
+}
+
+// Close will release all resources.
+// It is NOT possible to reuse the decoder after this.
+func (d *Decoder) Close() {
+ if d.current.err == ErrDecoderClosed {
+ return
+ }
+ d.drainOutput()
+ if d.stream != nil {
+ close(d.stream)
+ d.streamWg.Wait()
+ d.stream = nil
+ }
+ if d.decoders != nil {
+ close(d.decoders)
+ for dec := range d.decoders {
+ dec.Close()
+ }
+ d.decoders = nil
+ }
+ if d.current.d != nil {
+ d.current.d.Close()
+ d.current.d = nil
+ }
+ d.current.err = ErrDecoderClosed
+}
+
+// IOReadCloser returns the decoder as an io.ReadCloser for convenience.
+// Any changes to the decoder will be reflected, so the returned ReadCloser
+// can be reused along with the decoder.
+// io.WriterTo is also supported by the returned ReadCloser.
+func (d *Decoder) IOReadCloser() io.ReadCloser {
+ return closeWrapper{d: d}
+}
+
+// closeWrapper wraps a function call as a closer.
+type closeWrapper struct {
+ d *Decoder
+}
+
+// WriteTo forwards WriteTo calls to the decoder.
+func (c closeWrapper) WriteTo(w io.Writer) (n int64, err error) {
+ return c.d.WriteTo(w)
+}
+
+// Read forwards read calls to the decoder.
+func (c closeWrapper) Read(p []byte) (n int, err error) {
+ return c.d.Read(p)
+}
+
+// Close closes the decoder.
+func (c closeWrapper) Close() error {
+ c.d.Close()
+ return nil
+}
+
+type decodeOutput struct {
+ d *blockDec
+ b []byte
+ err error
+}
+
+type decodeStream struct {
+ r io.Reader
+
+ // Blocks ready to be written to output.
+ output chan decodeOutput
+
+ // cancel reading from the input
+ cancel chan struct{}
+}
+
+// errEndOfStream indicates that everything from the stream was read.
+var errEndOfStream = errors.New("end-of-stream")
+
+// Create Decoder:
+// Spawn n block decoders. These accept tasks to decode a block.
+// Create goroutine that handles stream processing, this will send history to decoders as they are available.
+// Decoders update the history as they decode.
+// When a block is returned:
+// a) history is sent to the next decoder,
+// b) content written to CRC.
+// c) return data to WRITER.
+// d) wait for next block to return data.
+// Once WRITTEN, the decoders reused by the writer frame decoder for re-use.
+func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
+ defer d.streamWg.Done()
+ frame := newFrameDec(d.o)
+ for stream := range inStream {
+ if debug {
+ println("got new stream")
+ }
+ br := readerWrapper{r: stream.r}
+ decodeStream:
+ for {
+ frame.history.reset()
+ err := frame.reset(&br)
+ if debug && err != nil {
+ println("Frame decoder returned", err)
+ }
+ if err == nil && frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ err = ErrUnknownDictionary
+ } else {
+ frame.history.setDict(&dict)
+ }
+ }
+ if err != nil {
+ stream.output <- decodeOutput{
+ err: err,
+ }
+ break
+ }
+ if debug {
+ println("starting frame decoder")
+ }
+
+ // This goroutine will forward history between frames.
+ frame.frameDone.Add(1)
+ frame.initAsync()
+
+ go frame.startDecoder(stream.output)
+ decodeFrame:
+ // Go through all blocks of the frame.
+ for {
+ dec := <-d.decoders
+ select {
+ case <-stream.cancel:
+ if !frame.sendErr(dec, io.EOF) {
+ // To not let the decoder dangle, send it back.
+ stream.output <- decodeOutput{d: dec}
+ }
+ break decodeStream
+ default:
+ }
+ err := frame.next(dec)
+ switch err {
+ case io.EOF:
+ // End of current frame, no error
+ println("EOF on next block")
+ break decodeFrame
+ case nil:
+ continue
+ default:
+ println("block decoder returned", err)
+ break decodeStream
+ }
+ }
+ // All blocks have started decoding, check if there are more frames.
+ println("waiting for done")
+ frame.frameDone.Wait()
+ println("done waiting...")
+ }
+ frame.frameDone.Wait()
+ println("Sending EOS")
+ stream.output <- decodeOutput{err: errEndOfStream}
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
new file mode 100644
index 0000000..c0fd058
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -0,0 +1,83 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "runtime"
+)
+
+// DOption is an option for creating a decoder.
+type DOption func(*decoderOptions) error
+
+// options retains accumulated state of multiple options.
+type decoderOptions struct {
+ lowMem bool
+ concurrent int
+ maxDecodedSize uint64
+ dicts []dict
+}
+
+func (o *decoderOptions) setDefault() {
+ *o = decoderOptions{
+ // use less ram: true for now, but may change.
+ lowMem: true,
+ concurrent: runtime.GOMAXPROCS(0),
+ }
+ o.maxDecodedSize = 1 << 63
+}
+
+// WithDecoderLowmem will set whether to use a lower amount of memory,
+// but possibly have to allocate more while running.
+func WithDecoderLowmem(b bool) DOption {
+ return func(o *decoderOptions) error { o.lowMem = b; return nil }
+}
+
+// WithDecoderConcurrency will set the concurrency,
+// meaning the maximum number of decoders to run concurrently.
+// The value supplied must be at least 1.
+// By default this will be set to GOMAXPROCS.
+func WithDecoderConcurrency(n int) DOption {
+ return func(o *decoderOptions) error {
+ if n <= 0 {
+ return errors.New("concurrency must be at least 1")
+ }
+ o.concurrent = n
+ return nil
+ }
+}
+
+// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
+// non-streaming operations or maximum window size for streaming operations.
+// This can be used to control memory usage of potentially hostile content.
+// For streaming operations, the maximum window size is capped at 1<<30 bytes.
+// Maximum and default is 1 << 63 bytes.
+func WithDecoderMaxMemory(n uint64) DOption {
+ return func(o *decoderOptions) error {
+ if n == 0 {
+ return errors.New("WithDecoderMaxMemory must be at least 1")
+ }
+ if n > 1<<63 {
+ return errors.New("WithDecoderMaxmemory must be less than 1 << 63")
+ }
+ o.maxDecodedSize = n
+ return nil
+ }
+}
+
+// WithDecoderDicts allows to register one or more dictionaries for the decoder.
+// If several dictionaries with the same ID is provided the last one will be used.
+func WithDecoderDicts(dicts ...[]byte) DOption {
+ return func(o *decoderOptions) error {
+ for _, b := range dicts {
+ d, err := loadDict(b)
+ if err != nil {
+ return err
+ }
+ o.dicts = append(o.dicts, *d)
+ }
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
new file mode 100644
index 0000000..fa25a18
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -0,0 +1,122 @@
+package zstd
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/klauspost/compress/huff0"
+)
+
+type dict struct {
+ id uint32
+
+ litEnc *huff0.Scratch
+ llDec, ofDec, mlDec sequenceDec
+ //llEnc, ofEnc, mlEnc []*fseEncoder
+ offsets [3]int
+ content []byte
+}
+
+var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+
+// ID returns the dictionary id or 0 if d is nil.
+func (d *dict) ID() uint32 {
+ if d == nil {
+ return 0
+ }
+ return d.id
+}
+
+// DictContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) DictContentSize() int {
+ if d == nil {
+ return 0
+ }
+ return len(d.content)
+}
+
+// Load a dictionary as described in
+// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+func loadDict(b []byte) (*dict, error) {
+ // Check static field size.
+ if len(b) <= 8+(3*4) {
+ return nil, io.ErrUnexpectedEOF
+ }
+ d := dict{
+ llDec: sequenceDec{fse: &fseDecoder{}},
+ ofDec: sequenceDec{fse: &fseDecoder{}},
+ mlDec: sequenceDec{fse: &fseDecoder{}},
+ }
+ if !bytes.Equal(b[:4], dictMagic[:]) {
+ return nil, ErrMagicMismatch
+ }
+ d.id = binary.LittleEndian.Uint32(b[4:8])
+ if d.id == 0 {
+ return nil, errors.New("dictionaries cannot have ID 0")
+ }
+
+ // Read literal table
+ var err error
+ d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
+ if err != nil {
+ return nil, err
+ }
+ d.litEnc.Reuse = huff0.ReusePolicyMust
+
+ br := byteReader{
+ b: b,
+ off: 0,
+ }
+ readDec := func(i tableIndex, dec *fseDecoder) error {
+ if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
+ return err
+ }
+ if br.overread() {
+ return io.ErrUnexpectedEOF
+ }
+ err = dec.transform(symbolTableX[i])
+ if err != nil {
+ println("Transform table error:", err)
+ return err
+ }
+ if debug {
+ println("Read table ok", "symbolLen:", dec.symbolLen)
+ }
+ // Set decoders as predefined so they aren't reused.
+ dec.preDefined = true
+ return nil
+ }
+
+ if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
+ return nil, err
+ }
+ if br.remain() < 12 {
+ return nil, io.ErrUnexpectedEOF
+ }
+
+ d.offsets[0] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[1] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[2] = int(br.Uint32())
+ br.advance(4)
+ if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
+ return nil, errors.New("invalid offset in dictionary")
+ }
+ d.content = make([]byte, br.remain())
+ copy(d.content, br.unread())
+ if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
+ return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
+ }
+
+ return &d, nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
new file mode 100644
index 0000000..60f2986
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -0,0 +1,178 @@
+package zstd
+
+import (
+ "fmt"
+ "math/bits"
+
+ "github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+const (
+ dictShardBits = 6
+)
+
+type fastBase struct {
+ // cur is the offset at the start of hist
+ cur int32
+ // maximum offset. Should be at least 2x block size.
+ maxMatchOff int32
+ hist []byte
+ crc *xxhash.Digest
+ tmp [8]byte
+ blk *blockEnc
+ lastDictID uint32
+ lowMem bool
+}
+
+// CRC returns the underlying CRC writer.
+func (e *fastBase) CRC() *xxhash.Digest {
+ return e.crc
+}
+
+// AppendCRC will append the CRC to the destination slice and return it.
+func (e *fastBase) AppendCRC(dst []byte) []byte {
+ crc := e.crc.Sum(e.tmp[:0])
+ dst = append(dst, crc[7], crc[6], crc[5], crc[4])
+ return dst
+}
+
+// WindowSize returns the window size of the encoder,
+// or a window size small enough to contain the input size, if > 0.
+func (e *fastBase) WindowSize(size int) int32 {
+ if size > 0 && size < int(e.maxMatchOff) {
+ b := int32(1) << uint(bits.Len(uint(size)))
+ // Keep minimum window.
+ if b < 1024 {
+ b = 1024
+ }
+ return b
+ }
+ return e.maxMatchOff
+}
+
+// Block returns the current block.
+func (e *fastBase) Block() *blockEnc {
+ return e.blk
+}
+
+func (e *fastBase) addBlock(src []byte) int32 {
+ if debugAsserts && e.cur > bufferReset {
+ panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+ }
+ // check if we have space already
+ if len(e.hist)+len(src) > cap(e.hist) {
+ if cap(e.hist) == 0 {
+ e.ensureHist(len(src))
+ } else {
+ if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
+ panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
+ }
+ // Move down
+ offset := int32(len(e.hist)) - e.maxMatchOff
+ copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
+ e.cur += offset
+ e.hist = e.hist[:e.maxMatchOff]
+ }
+ }
+ s := int32(len(e.hist))
+ e.hist = append(e.hist, src...)
+ return s
+}
+
+// ensureHist will ensure that history can keep at least this many bytes.
+func (e *fastBase) ensureHist(n int) {
+ if cap(e.hist) >= n {
+ return
+ }
+ l := e.maxMatchOff
+ if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
+ l += maxCompressedBlockSize
+ } else {
+ l += e.maxMatchOff
+ }
+ // Make it at least 1MB.
+ if l < 1<<20 && !e.lowMem {
+ l = 1 << 20
+ }
+ // Make it at least the requested size.
+ if l < int32(n) {
+ l = int32(n)
+ }
+ e.hist = make([]byte, 0, l)
+}
+
+// useBlock will replace the block with the provided one,
+// but transfer recent offsets from the previous.
+func (e *fastBase) UseBlock(enc *blockEnc) {
+ enc.reset(e.blk)
+ e.blk = enc
+}
+
+func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
+
+func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
+ if debugAsserts {
+ if s < 0 {
+ err := fmt.Sprintf("s (%d) < 0", s)
+ panic(err)
+ }
+ if t < 0 {
+ err := fmt.Sprintf("s (%d) < 0", s)
+ panic(err)
+ }
+ if s-t > e.maxMatchOff {
+ err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
+ panic(err)
+ }
+ if len(src)-int(s) > maxCompressedBlockSize {
+ panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
+ }
+ }
+
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastBase) resetBase(d *dict, singleBlock bool) {
+ if e.blk == nil {
+ e.blk = &blockEnc{lowMem: e.lowMem}
+ e.blk.init()
+ } else {
+ e.blk.reset(nil)
+ }
+ e.blk.initNewEncode()
+ if e.crc == nil {
+ e.crc = xxhash.New()
+ } else {
+ e.crc.Reset()
+ }
+ if d != nil {
+ low := e.lowMem
+ if singleBlock {
+ e.lowMem = true
+ }
+ e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+ e.lowMem = low
+ }
+
+ // We offset current position so everything will be out of reach.
+ // If above reset line, history will be purged.
+ if e.cur < bufferReset {
+ e.cur += e.maxMatchOff + int32(len(e.hist))
+ }
+ e.hist = e.hist[:0]
+ if d != nil {
+ // Set offsets (currently not used)
+ for i, off := range d.offsets {
+ e.blk.recentOffsets[i] = uint32(off)
+ e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
+ }
+ // Transfer litenc.
+ e.blk.dictLitEnc = d.litEnc
+ e.hist = append(e.hist, d.content...)
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
new file mode 100644
index 0000000..dc1eed5
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -0,0 +1,501 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "fmt"
+ "math/bits"
+)
+
+const (
+ bestLongTableBits = 20 // Bits used in the long match table
+ bestLongTableSize = 1 << bestLongTableBits // Size of the table
+
+ // Note: Increasing the short table bits or making the hash shorter
+ // can actually lead to compression degradation since it will 'steal' more from the
+ // long match table and match offsets are quite big.
+ // This greatly depends on the type of input.
+ bestShortTableBits = 16 // Bits used in the short match table
+ bestShortTableSize = 1 << bestShortTableBits // Size of the table
+)
+
+// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
+// The long match table contains the previous entry with the same hash,
+// effectively making it a "chain" of length 2.
+// When we find a long match we choose between the two values and select the longest.
+// When we find a short match, after checking the long, we check if we can find a long at n+1
+// and that it is longer (lazy matching).
+type bestFastEncoder struct {
+ fastBase
+ table [bestShortTableSize]prevEntry
+ longTable [bestLongTableSize]prevEntry
+ dictTable []prevEntry
+ dictLongTable []prevEntry
+}
+
+// Encode improves compression...
+func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 4
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = prevEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ v2 := e.table[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.table[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ const kSearchStrength = 10
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+ offset3 := int32(blk.recentOffsets[2])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ _ = addLiterals
+
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ type match struct {
+ offset int32
+ s int32
+ length int32
+ rep int32
+ }
+ matchAt := func(offset int32, s int32, first uint32, rep int32) match {
+ if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+ return match{offset: offset, s: s}
+ }
+ return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
+ }
+
+ bestOf := func(a, b match) match {
+ aScore := b.s - a.s + a.length
+ bScore := a.s - b.s + b.length
+ if a.rep < 0 {
+ aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
+ }
+ if b.rep < 0 {
+ bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
+ }
+ if aScore >= bScore {
+ return a
+ }
+ return b
+ }
+ const goodEnough = 100
+
+ nextHashL := hash8(cv, bestLongTableBits)
+ nextHashS := hash4x64(cv, bestShortTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+ if canRepeat && best.length < goodEnough {
+ best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
+ best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
+ best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
+ if best.length > 0 {
+ best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
+ best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
+ best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
+ }
+ }
+ // Load next and check...
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
+ e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+
+ // Look far ahead, unless we have a really long match already...
+ if best.length < goodEnough {
+ // No match found, move forward on input, no need to check forward...
+ if best.length < 4 {
+ s += 1 + (s-nextEmit)>>(kSearchStrength-1)
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+
+ s++
+ candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
+ cv = load6432(src, s)
+ cv2 := load6432(src, s+1)
+ candidateL = e.longTable[hash8(cv, bestLongTableBits)]
+ candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
+
+ best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
+ best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+
+ // See if we can find a better match by checking where the current best ends.
+ // Use that offset to see if we can find a better full match.
+ if sAt := best.s + best.length; sAt < sLimit {
+ nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
+ candidateEnd := e.longTable[nextHashL]
+ if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
+ bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
+ if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
+ bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+ }
+ best = bestEnd
+ }
+ }
+ }
+
+ // We have a match, we can store the forward value
+ if best.rep > 0 {
+ s = best.s
+ var seq seq
+ seq.matchLen = uint32(best.length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := best.s
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ repIndex := best.offset
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = uint32(best.rep)
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s
+ s = best.s + best.length
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, best.length)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ off := index0 + e.cur
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ h0 := hash8(cv0, bestLongTableBits)
+ h1 := hash4x64(cv0, bestShortTableBits)
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
+ off++
+ index0++
+ }
+ switch best.rep {
+ case 2:
+ offset1, offset2 = offset2, offset1
+ case 3:
+ offset1, offset2, offset3 = offset3, offset1, offset2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ s = best.s
+ t := best.offset
+ offset1, offset2, offset3 = s-t, offset1, offset2
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := best.length
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ // every entry
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ h0 := hash8(cv0, bestLongTableBits)
+ h1 := hash4x64(cv0, bestShortTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
+ index0++
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash4x64(cv, bestShortTableBits)
+ nextHashL := hash8(cv, bestLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ blk.recentOffsets[2] = uint32(offset3)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
+ e.Encode(blk, src)
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
+ }
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]prevEntry, len(e.table))
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff; i < end; i += 4 {
+ const hashLog = bestShortTableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash4x64(cv, hashLog) // 0 -> 4
+ nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
+ nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
+ nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
+ e.dictTable[nextHash] = prevEntry{
+ prev: e.dictTable[nextHash].offset,
+ offset: i,
+ }
+ e.dictTable[nextHash1] = prevEntry{
+ prev: e.dictTable[nextHash1].offset,
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = prevEntry{
+ prev: e.dictTable[nextHash2].offset,
+ offset: i + 2,
+ }
+ e.dictTable[nextHash3] = prevEntry{
+ prev: e.dictTable[nextHash3].offset,
+ offset: i + 3,
+ }
+ }
+ e.lastDictID = d.id
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]prevEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ h := hash8(cv, bestLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: e.maxMatchOff,
+ prev: e.dictLongTable[h].offset,
+ }
+
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ off := 8 // First to read
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[off]) << 56)
+ h := hash8(cv, bestLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: i,
+ prev: e.dictLongTable[h].offset,
+ }
+ off++
+ }
+ }
+ e.lastDictID = d.id
+ }
+ // Reset table to initial state
+ copy(e.longTable[:], e.dictLongTable)
+
+ e.cur = e.maxMatchOff
+ // Reset table to initial state
+ copy(e.table[:], e.dictTable)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
new file mode 100644
index 0000000..6049542
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -0,0 +1,1235 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import "fmt"
+
+const (
+ betterLongTableBits = 19 // Bits used in the long match table
+ betterLongTableSize = 1 << betterLongTableBits // Size of the table
+
+ // Note: Increasing the short table bits or making the hash shorter
+ // can actually lead to compression degradation since it will 'steal' more from the
+ // long match table and match offsets are quite big.
+ // This greatly depends on the type of input.
+ betterShortTableBits = 13 // Bits used in the short match table
+ betterShortTableSize = 1 << betterShortTableBits // Size of the table
+
+ betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
+ betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
+
+ betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
+ betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
+)
+
+type prevEntry struct {
+ offset int32
+ prev int32
+}
+
+// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
+// The long match table contains the previous entry with the same hash,
+// effectively making it a "chain" of length 2.
+// When we find a long match we choose between the two values and select the longest.
+// When we find a short match, after checking the long, we check if we can find a long at n+1
+// and that it is longer (lazy matching).
+type betterFastEncoder struct {
+ fastBase
+ table [betterShortTableSize]tableEntry
+ longTable [betterLongTableSize]prevEntry
+}
+
+type betterFastEncoderDict struct {
+ betterFastEncoder
+ dictTable []tableEntry
+ dictLongTable []prevEntry
+ shortTableShardDirty [betterShortTableShardCnt]bool
+ longTableShardDirty [betterLongTableShardCnt]bool
+ allDirty bool
+}
+
+// Encode improves compression...
+func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 9
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+ var matched int32
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ off := s + e.cur
+ e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
+ e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s + repOff
+ s += lenght + repOff
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ const repOff2 = 1
+
+ // We deviate from the reference encoder and also check offset 2.
+ // Still slower and not much better, so disabled.
+ // repIndex = s - offset2 + repOff2
+ if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
+ // Consider history as well.
+ var seq seq
+ lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 2
+ seq.offset = 2
+ if debugSequences {
+ println("repeat sequence 2", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ index0 := s + repOff2
+ s += lenght + repOff2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ // Swap offsets
+ offset1, offset2 = offset2, offset1
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := candidateL.offset - e.cur
+ coffsetLP := candidateL.prev - e.cur
+
+ // Check if we have a long match.
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetL+8, src) + 8
+ t = coffsetL
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
+ if prevMatch > matched {
+ matched = prevMatch
+ t = coffsetLP
+ }
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ }
+ break
+ }
+
+ // Check if we have a long match on prev.
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetLP+8, src) + 8
+ t = coffsetLP
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ coffsetS := candidateS.offset - e.cur
+
+ // Check if we have a short match.
+ if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ matched = e.matchlen(s+4, coffsetS+4, src) + 4
+
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, betterLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = candidateL.offset - e.cur
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+ }
+
+ // Check prev long...
+ coffsetL = candidateL.prev - e.cur
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match (after short)")
+ }
+ break
+ }
+ }
+ t = coffsetS
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // Try to find a better match by searching for a long match at the end of the current best match
+ if true && s+matched < sLimit {
+ nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+ cv := load3232(src, s)
+ candidateL := e.longTable[nextHashL]
+ coffsetL := candidateL.offset - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("long match at end-of-match")
+ }
+ }
+ }
+
+ // Check prev long...
+ if true {
+ coffsetL = candidateL.prev - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match at end-of-match")
+ }
+ }
+ }
+ }
+ }
+ // A match has been found. Update recent offsets.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := matched
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
+ e.Encode(blk, src)
+}
+
+// Encode improves compression...
+func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ e.allDirty = true
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.allDirty = true
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 9
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+ var matched int32
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ off := s + e.cur
+ e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s + repOff
+ s += lenght + repOff
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ const repOff2 = 1
+
+ // We deviate from the reference encoder and also check offset 2.
+ // Still slower and not much better, so disabled.
+ // repIndex = s - offset2 + repOff2
+ if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
+ // Consider history as well.
+ var seq seq
+ lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 2
+ seq.offset = 2
+ if debugSequences {
+ println("repeat sequence 2", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ index0 := s + repOff2
+ s += lenght + repOff2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ // Swap offsets
+ offset1, offset2 = offset2, offset1
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := candidateL.offset - e.cur
+ coffsetLP := candidateL.prev - e.cur
+
+ // Check if we have a long match.
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetL+8, src) + 8
+ t = coffsetL
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
+ if prevMatch > matched {
+ matched = prevMatch
+ t = coffsetLP
+ }
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ }
+ break
+ }
+
+ // Check if we have a long match on prev.
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetLP+8, src) + 8
+ t = coffsetLP
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ coffsetS := candidateS.offset - e.cur
+
+ // Check if we have a short match.
+ if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ matched = e.matchlen(s+4, coffsetS+4, src) + 4
+
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, betterLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = candidateL.offset - e.cur
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+ }
+
+ // Check prev long...
+ coffsetL = candidateL.prev - e.cur
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match (after short)")
+ }
+ break
+ }
+ }
+ t = coffsetS
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // Try to find a better match by searching for a long match at the end of the current best match
+ if s+matched < sLimit {
+ nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+ cv := load3232(src, s)
+ candidateL := e.longTable[nextHashL]
+ coffsetL := candidateL.offset - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("long match at end-of-match")
+ }
+ }
+ }
+
+ // Check prev long...
+ if true {
+ coffsetL = candidateL.prev - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match at end-of-match")
+ }
+ }
+ }
+ }
+ }
+ // A match has been found. Update recent offsets.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := matched
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("betterFastEncoder: Reset with dict")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
+ }
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]tableEntry, len(e.table))
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff; i < end; i += 4 {
+ const hashLog = betterShortTableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash5(cv, hashLog) // 0 -> 4
+ nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
+ nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
+ nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
+ e.dictTable[nextHash] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ e.dictTable[nextHash1] = tableEntry{
+ val: uint32(cv >> 8),
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = tableEntry{
+ val: uint32(cv >> 16),
+ offset: i + 2,
+ }
+ e.dictTable[nextHash3] = tableEntry{
+ val: uint32(cv >> 24),
+ offset: i + 3,
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]prevEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ h := hash8(cv, betterLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: e.maxMatchOff,
+ prev: e.dictLongTable[h].offset,
+ }
+
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ off := 8 // First to read
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[off]) << 56)
+ h := hash8(cv, betterLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: i,
+ prev: e.dictLongTable[h].offset,
+ }
+ off++
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+
+ // Reset table to initial state
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterShortTableShardCnt
+ const shardSize = betterShortTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.shortTableShardDirty {
+ e.shortTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.shortTableShardDirty {
+ if !e.shortTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.shortTableShardDirty[i] = false
+ }
+ }
+ }
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterLongTableShardCnt
+ const shardSize = betterLongTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
+ e.longTableShardDirty[i] = false
+ }
+ }
+ }
+ e.cur = e.maxMatchOff
+ e.allDirty = false
+}
+
+func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
+}
+
+func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
+ e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
new file mode 100644
index 0000000..8629d43
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -0,0 +1,1121 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import "fmt"
+
+const (
+ dFastLongTableBits = 17 // Bits used in the long match table
+ dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
+ dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+
+ dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
+ dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
+
+ dFastShortTableBits = tableBits // Bits used in the short match table
+ dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
+ dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+)
+
+type doubleFastEncoder struct {
+ fastEncoder
+ longTable [dFastLongTableSize]tableEntry
+}
+
+type doubleFastEncoderDict struct {
+ fastEncoderDict
+ longTable [dFastLongTableSize]tableEntry
+ dictLongTable []tableEntry
+ longTableShardDirty [dLongTableShardCnt]bool
+}
+
+// Encode mimmics functionality in zstd_dfast.c
+func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.longTable[i].offset = v
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.table[nextHashS] = entry
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += lenght + repOff
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := s - (candidateL.offset - e.cur)
+ coffsetS := s - (candidateS.offset - e.cur)
+
+ // Check if we have a long match.
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ // Check if we have a short match.
+ if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, dFastLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ s += checkAt
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+
+ t = candidateS.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ l := e.matchlen(s+4, t+4, src) + 4
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) and start+2 (short)
+ index0 := s - l + 1
+ // Index match end-2 (long) and end-1 (short)
+ index1 := s - 2
+
+ cv0 := load6432(src, index0)
+ cv1 := load6432(src, index1)
+ te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+ te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+ e.longTable[hash8(cv0, dFastLongTableBits)] = te0
+ e.longTable[hash8(cv1, dFastLongTableBits)] = te1
+ cv0 >>= 8
+ cv1 >>= 8
+ te0.offset++
+ te1.offset++
+ te0.val = uint32(cv0)
+ te1.val = uint32(cv1)
+ e.table[hash5(cv0, dFastShortTableBits)] = te0
+ e.table[hash5(cv1, dFastShortTableBits)] = te1
+
+ cv = load6432(src, s)
+
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.table[nextHashS] = entry
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ if e.cur >= bufferReset {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ }
+
+ s := int32(0)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ for {
+
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.table[nextHashS] = entry
+
+ if len(blk.sequences) > 2 {
+ if load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+ length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:]))
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + repOff
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := s - (candidateL.offset - e.cur)
+ coffsetS := s - (candidateS.offset - e.cur)
+
+ // Check if we have a long match.
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ // Check if we have a short match.
+ if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, dFastLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ s += checkAt
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+
+ t = candidateS.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlen(s+4, t+4, src) + 4
+ l := int32(matchLen(src[s+4:], src[t+4:])) + 4
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) and start+2 (short)
+ index0 := s - l + 1
+ // Index match end-2 (long) and end-1 (short)
+ index1 := s - 2
+
+ cv0 := load6432(src, index0)
+ cv1 := load6432(src, index1)
+ te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+ te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+ e.longTable[hash8(cv0, dFastLongTableBits)] = te0
+ e.longTable[hash8(cv1, dFastLongTableBits)] = te1
+ cv0 >>= 8
+ cv1 >>= 8
+ te0.offset++
+ te1.offset++
+ te0.val = uint32(cv0)
+ te1.val = uint32(cv1)
+ e.table[hash5(cv0, dFastShortTableBits)] = te0
+ e.table[hash5(cv1, dFastShortTableBits)] = te1
+
+ cv = load6432(src, s)
+
+ if len(blk.sequences) <= 2 {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv1>>8, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
+
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.table[nextHashS] = entry
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
+ }
+}
+
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = tableEntry{}
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.longTable[i].offset = v
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += lenght + repOff
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := s - (candidateL.offset - e.cur)
+ coffsetS := s - (candidateS.offset - e.cur)
+
+ // Check if we have a long match.
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ // Check if we have a short match.
+ if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, dFastLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+ e.markLongShardDirty(nextHashL)
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ s += checkAt
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+
+ t = candidateS.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ l := e.matchlen(s+4, t+4, src) + 4
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) and start+2 (short)
+ index0 := s - l + 1
+ // Index match end-2 (long) and end-1 (short)
+ index1 := s - 2
+
+ cv0 := load6432(src, index0)
+ cv1 := load6432(src, index1)
+ te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+ te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+ longHash1 := hash8(cv0, dFastLongTableBits)
+ longHash2 := hash8(cv0, dFastLongTableBits)
+ e.longTable[longHash1] = te0
+ e.longTable[longHash2] = te1
+ e.markLongShardDirty(longHash1)
+ e.markLongShardDirty(longHash2)
+ cv0 >>= 8
+ cv1 >>= 8
+ te0.offset++
+ te1.offset++
+ te0.val = uint32(cv0)
+ te1.val = uint32(cv1)
+ hashVal1 := hash5(cv0, dFastShortTableBits)
+ hashVal2 := hash5(cv1, dFastShortTableBits)
+ e.table[hashVal1] = te0
+ e.markShardDirty(hashVal1)
+ e.table[hashVal2] = te1
+ e.markShardDirty(hashVal2)
+
+ cv = load6432(src, s)
+
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+ // If we encoded more than 64K mark all dirty.
+ if len(src) > 64<<10 {
+ e.markAllShardsDirty()
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.fastEncoder.Reset(d, singleBlock)
+ if d != nil {
+ panic("doubleFastEncoder: Reset with dict not supported")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ allDirty := e.allDirty
+ e.fastEncoderDict.Reset(d, singleBlock)
+ if d == nil {
+ return
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]tableEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+ val: uint32(cv),
+ offset: e.maxMatchOff,
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
+ e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+ // Reset table to initial state
+ e.cur = e.maxMatchOff
+
+ dirtyShardCnt := 0
+ if !allDirty {
+ for i := range e.longTableShardDirty {
+ if e.longTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ return
+ }
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ e.longTableShardDirty[i] = false
+ }
+}
+
+func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/dLongTableShardSize] = true
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
new file mode 100644
index 0000000..ba4a17e
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -0,0 +1,1018 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "fmt"
+ "math"
+ "math/bits"
+)
+
+const (
+ tableBits = 15 // Bits used in the table
+ tableSize = 1 << tableBits // Size of the table
+ tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
+ tableShardSize = tableSize / tableShardCnt // Size of an individual shard
+ tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+ maxMatchLength = 131074
+)
+
+type tableEntry struct {
+ val uint32
+ offset int32
+}
+
+type fastEncoder struct {
+ fastBase
+ table [tableSize]tableEntry
+}
+
+type fastEncoderDict struct {
+ fastEncoder
+ dictTable []tableEntry
+ tableShardDirty [tableShardCnt]bool
+ allDirty bool
+}
+
+// Encode mimmics functionality in zstd_fast.c
+func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ inputMargin = 8
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 2.
+ const stepSize = 2
+
+ // TEMPLATE
+ const hashLog = tableBits
+ // seems global, but would be nice to tweak.
+ const kSearchStrength = 7
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // t will contain the match offset when we find one.
+ // When existing the search loop, we have already checked 4 bytes.
+ var t int32
+
+ // We will not use repeat offsets across blocks.
+ // By not using them for the first 3 matches
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHash := hash6(cv, hashLog)
+ nextHash2 := hash6(cv>>8, hashLog)
+ candidate := e.table[nextHash]
+ candidate2 := e.table[nextHash2]
+ repIndex := s - offset1 + 2
+
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
+
+ if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+ // Consider history as well.
+ var seq seq
+ var length int32
+ // length = 4 + e.matchlen(s+6, repIndex+4, src)
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + 2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ sMin := s - e.maxMatchOff
+ if sMin < 0 {
+ sMin = 0
+ }
+ for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + 2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ coffset0 := s - (candidate.offset - e.cur)
+ coffset1 := s - (candidate2.offset - e.cur) + 1
+ if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
+ // found a regular match
+ t = candidate.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ break
+ }
+
+ if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
+ // found a regular match
+ t = candidate2.offset - e.cur
+ s++
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ break
+ }
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // A 4-byte match has been found. We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlen(s+4, t+4, src) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence.
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ // Don't use repeat offsets
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+
+ // Check offset 2
+ if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Store this, since we have it.
+ nextHash := hash6(cv, hashLog)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ break encodeLoop
+ }
+ // Prepare next loop.
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ const (
+ inputMargin = 8
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+ if debug {
+ if len(src) > maxBlockSize {
+ panic("src too big")
+ }
+ }
+
+ // Protect against e.cur wraparound.
+ if e.cur >= bufferReset {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ }
+
+ s := int32(0)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 2.
+ const stepSize = 2
+
+ // TEMPLATE
+ const hashLog = tableBits
+ // seems global, but would be nice to tweak.
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // t will contain the match offset when we find one.
+ // When existing the search loop, we have already checked 4 bytes.
+ var t int32
+
+ // We will not use repeat offsets across blocks.
+ // By not using them for the first 3 matches
+
+ for {
+ nextHash := hash6(cv, hashLog)
+ nextHash2 := hash6(cv>>8, hashLog)
+ candidate := e.table[nextHash]
+ candidate2 := e.table[nextHash2]
+ repIndex := s - offset1 + 2
+
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
+
+ if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
+ // Consider history as well.
+ var seq seq
+ // length := 4 + e.matchlen(s+6, repIndex+4, src)
+ // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
+ var length int32
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + 2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ sMin := s - e.maxMatchOff
+ if sMin < 0 {
+ sMin = 0
+ }
+ for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + 2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ coffset0 := s - (candidate.offset - e.cur)
+ coffset1 := s - (candidate2.offset - e.cur) + 1
+ if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
+ // found a regular match
+ t = candidate.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
+ }
+ break
+ }
+
+ if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
+ // found a regular match
+ t = candidate2.offset - e.cur
+ s++
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ break
+ }
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // A 4-byte match has been found. We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0 ", t))
+ }
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlenNoHist(s+4, t+4, src) + 4
+ // l := int32(matchLen(src[s+4:], src[t+4:])) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence.
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ // Don't use repeat offsets
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+
+ // Check offset 2
+ if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlenNoHist(s+4, o2+4, src)
+ // l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Store this, since we have it.
+ nextHash := hash6(cv, hashLog)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ break encodeLoop
+ }
+ // Prepare next loop.
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
+ }
+}
+
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ inputMargin = 8
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+ if e.allDirty || len(src) > 32<<10 {
+ e.fastEncoder.Encode(blk, src)
+ e.allDirty = true
+ return
+ }
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 2.
+ const stepSize = 2
+
+ // TEMPLATE
+ const hashLog = tableBits
+ // seems global, but would be nice to tweak.
+ const kSearchStrength = 7
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // t will contain the match offset when we find one.
+ // When existing the search loop, we have already checked 4 bytes.
+ var t int32
+
+ // We will not use repeat offsets across blocks.
+ // By not using them for the first 3 matches
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHash := hash6(cv, hashLog)
+ nextHash2 := hash6(cv>>8, hashLog)
+ candidate := e.table[nextHash]
+ candidate2 := e.table[nextHash2]
+ repIndex := s - offset1 + 2
+
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
+ e.markShardDirty(nextHash2)
+
+ if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+ // Consider history as well.
+ var seq seq
+ var length int32
+ // length = 4 + e.matchlen(s+6, repIndex+4, src)
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + 2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ sMin := s - e.maxMatchOff
+ if sMin < 0 {
+ sMin = 0
+ }
+ for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + 2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ coffset0 := s - (candidate.offset - e.cur)
+ coffset1 := s - (candidate2.offset - e.cur) + 1
+ if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
+ // found a regular match
+ t = candidate.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ break
+ }
+
+ if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
+ // found a regular match
+ t = candidate2.offset - e.cur
+ s++
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ break
+ }
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // A 4-byte match has been found. We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlen(s+4, t+4, src) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence.
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ // Don't use repeat offsets
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+
+ // Check offset 2
+ if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Store this, since we have it.
+ nextHash := hash6(cv, hashLog)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ break encodeLoop
+ }
+ // Prepare next loop.
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("fastEncoder: Reset with dict")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
+ }
+
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]tableEntry, len(e.table))
+ }
+ if true {
+ end := e.maxMatchOff + int32(len(d.content)) - 8
+ for i := e.maxMatchOff; i < end; i += 3 {
+ const hashLog = tableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash6(cv, hashLog) // 0 -> 5
+ nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
+ nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
+ e.dictTable[nextHash] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ e.dictTable[nextHash1] = tableEntry{
+ val: uint32(cv >> 8),
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = tableEntry{
+ val: uint32(cv >> 16),
+ offset: i + 2,
+ }
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+
+ e.cur = e.maxMatchOff
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.tableShardDirty {
+ if e.tableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ const shardCnt = tableShardCnt
+ const shardSize = tableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.tableShardDirty {
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+ return
+ }
+ for i := range e.tableShardDirty {
+ if !e.tableShardDirty[i] {
+ continue
+ }
+
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+}
+
+func (e *fastEncoderDict) markAllShardsDirty() {
+ e.allDirty = true
+}
+
+func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
+ e.tableShardDirty[entryNum/tableShardSize] = true
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
new file mode 100644
index 0000000..4871dd0
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -0,0 +1,576 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "crypto/rand"
+ "fmt"
+ "io"
+ rdebug "runtime/debug"
+ "sync"
+
+ "github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+// Encoder provides encoding to Zstandard.
+// An Encoder can be used for either compressing a stream via the
+// io.WriteCloser interface supported by the Encoder or as multiple independent
+// tasks via the EncodeAll function.
+// Smaller encodes are encouraged to use the EncodeAll function.
+// Use NewWriter to create a new instance.
+type Encoder struct {
+ o encoderOptions
+ encoders chan encoder
+ state encoderState
+ init sync.Once
+}
+
+type encoder interface {
+ Encode(blk *blockEnc, src []byte)
+ EncodeNoHist(blk *blockEnc, src []byte)
+ Block() *blockEnc
+ CRC() *xxhash.Digest
+ AppendCRC([]byte) []byte
+ WindowSize(size int) int32
+ UseBlock(*blockEnc)
+ Reset(d *dict, singleBlock bool)
+}
+
+type encoderState struct {
+ w io.Writer
+ filling []byte
+ current []byte
+ previous []byte
+ encoder encoder
+ writing *blockEnc
+ err error
+ writeErr error
+ nWritten int64
+ headerWritten bool
+ eofWritten bool
+ fullFrameWritten bool
+
+ // This waitgroup indicates an encode is running.
+ wg sync.WaitGroup
+ // This waitgroup indicates we have a block encoding/writing.
+ wWg sync.WaitGroup
+}
+
+// NewWriter will create a new Zstandard encoder.
+// If the encoder will be used for encoding blocks a nil writer can be used.
+func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) {
+ initPredefined()
+ var e Encoder
+ e.o.setDefault()
+ for _, o := range opts {
+ err := o(&e.o)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if w != nil {
+ e.Reset(w)
+ }
+ return &e, nil
+}
+
+func (e *Encoder) initialize() {
+ if e.o.concurrent == 0 {
+ e.o.setDefault()
+ }
+ e.encoders = make(chan encoder, e.o.concurrent)
+ for i := 0; i < e.o.concurrent; i++ {
+ enc := e.o.encoder()
+ e.encoders <- enc
+ }
+}
+
+// Reset will re-initialize the writer and new writes will encode to the supplied writer
+// as a new, independent stream.
+func (e *Encoder) Reset(w io.Writer) {
+ s := &e.state
+ s.wg.Wait()
+ s.wWg.Wait()
+ if cap(s.filling) == 0 {
+ s.filling = make([]byte, 0, e.o.blockSize)
+ }
+ if cap(s.current) == 0 {
+ s.current = make([]byte, 0, e.o.blockSize)
+ }
+ if cap(s.previous) == 0 {
+ s.previous = make([]byte, 0, e.o.blockSize)
+ }
+ if s.encoder == nil {
+ s.encoder = e.o.encoder()
+ }
+ if s.writing == nil {
+ s.writing = &blockEnc{lowMem: e.o.lowMem}
+ s.writing.init()
+ }
+ s.writing.initNewEncode()
+ s.filling = s.filling[:0]
+ s.current = s.current[:0]
+ s.previous = s.previous[:0]
+ s.encoder.Reset(e.o.dict, false)
+ s.headerWritten = false
+ s.eofWritten = false
+ s.fullFrameWritten = false
+ s.w = w
+ s.err = nil
+ s.nWritten = 0
+ s.writeErr = nil
+}
+
+// Write data to the encoder.
+// Input data will be buffered and as the buffer fills up
+// content will be compressed and written to the output.
+// When done writing, use Close to flush the remaining output
+// and write CRC if requested.
+func (e *Encoder) Write(p []byte) (n int, err error) {
+ s := &e.state
+ for len(p) > 0 {
+ if len(p)+len(s.filling) < e.o.blockSize {
+ if e.o.crc {
+ _, _ = s.encoder.CRC().Write(p)
+ }
+ s.filling = append(s.filling, p...)
+ return n + len(p), nil
+ }
+ add := p
+ if len(p)+len(s.filling) > e.o.blockSize {
+ add = add[:e.o.blockSize-len(s.filling)]
+ }
+ if e.o.crc {
+ _, _ = s.encoder.CRC().Write(add)
+ }
+ s.filling = append(s.filling, add...)
+ p = p[len(add):]
+ n += len(add)
+ if len(s.filling) < e.o.blockSize {
+ return n, nil
+ }
+ err := e.nextBlock(false)
+ if err != nil {
+ return n, err
+ }
+ if debugAsserts && len(s.filling) > 0 {
+ panic(len(s.filling))
+ }
+ }
+ return n, nil
+}
+
+// nextBlock will synchronize and start compressing input in e.state.filling.
+// If an error has occurred during encoding it will be returned.
+func (e *Encoder) nextBlock(final bool) error {
+ s := &e.state
+ // Wait for current block.
+ s.wg.Wait()
+ if s.err != nil {
+ return s.err
+ }
+ if len(s.filling) > e.o.blockSize {
+ return fmt.Errorf("block > maxStoreBlockSize")
+ }
+ if !s.headerWritten {
+ // If we have a single block encode, do a sync compression.
+ if final && len(s.filling) == 0 && !e.o.fullZero {
+ s.headerWritten = true
+ s.fullFrameWritten = true
+ s.eofWritten = true
+ return nil
+ }
+ if final && len(s.filling) > 0 {
+ s.current = e.EncodeAll(s.filling, s.current[:0])
+ var n2 int
+ n2, s.err = s.w.Write(s.current)
+ if s.err != nil {
+ return s.err
+ }
+ s.nWritten += int64(n2)
+ s.current = s.current[:0]
+ s.filling = s.filling[:0]
+ s.headerWritten = true
+ s.fullFrameWritten = true
+ s.eofWritten = true
+ return nil
+ }
+
+ var tmp [maxHeaderSize]byte
+ fh := frameHeader{
+ ContentSize: 0,
+ WindowSize: uint32(s.encoder.WindowSize(0)),
+ SingleSegment: false,
+ Checksum: e.o.crc,
+ DictID: e.o.dict.ID(),
+ }
+
+ dst, err := fh.appendTo(tmp[:0])
+ if err != nil {
+ return err
+ }
+ s.headerWritten = true
+ s.wWg.Wait()
+ var n2 int
+ n2, s.err = s.w.Write(dst)
+ if s.err != nil {
+ return s.err
+ }
+ s.nWritten += int64(n2)
+ }
+ if s.eofWritten {
+ // Ensure we only write it once.
+ final = false
+ }
+
+ if len(s.filling) == 0 {
+ // Final block, but no data.
+ if final {
+ enc := s.encoder
+ blk := enc.Block()
+ blk.reset(nil)
+ blk.last = true
+ blk.encodeRaw(nil)
+ s.wWg.Wait()
+ _, s.err = s.w.Write(blk.output)
+ s.nWritten += int64(len(blk.output))
+ s.eofWritten = true
+ }
+ return s.err
+ }
+
+ // Move blocks forward.
+ s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
+ s.wg.Add(1)
+ go func(src []byte) {
+ if debug {
+ println("Adding block,", len(src), "bytes, final:", final)
+ }
+ defer func() {
+ if r := recover(); r != nil {
+ s.err = fmt.Errorf("panic while encoding: %v", r)
+ rdebug.PrintStack()
+ }
+ s.wg.Done()
+ }()
+ enc := s.encoder
+ blk := enc.Block()
+ enc.Encode(blk, src)
+ blk.last = final
+ if final {
+ s.eofWritten = true
+ }
+ // Wait for pending writes.
+ s.wWg.Wait()
+ if s.writeErr != nil {
+ s.err = s.writeErr
+ return
+ }
+ // Transfer encoders from previous write block.
+ blk.swapEncoders(s.writing)
+ // Transfer recent offsets to next.
+ enc.UseBlock(s.writing)
+ s.writing = blk
+ s.wWg.Add(1)
+ go func() {
+ defer func() {
+ if r := recover(); r != nil {
+ s.writeErr = fmt.Errorf("panic while encoding/writing: %v", r)
+ rdebug.PrintStack()
+ }
+ s.wWg.Done()
+ }()
+ err := errIncompressible
+ // If we got the exact same number of literals as input,
+ // assume the literals cannot be compressed.
+ if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
+ err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+ }
+ switch err {
+ case errIncompressible:
+ if debug {
+ println("Storing incompressible block as raw")
+ }
+ blk.encodeRaw(src)
+ // In fast mode, we do not transfer offsets, so we don't have to deal with changing the.
+ case nil:
+ default:
+ s.writeErr = err
+ return
+ }
+ _, s.writeErr = s.w.Write(blk.output)
+ s.nWritten += int64(len(blk.output))
+ }()
+ }(s.current)
+ return nil
+}
+
+// ReadFrom reads data from r until EOF or error.
+// The return value n is the number of bytes read.
+// Any error except io.EOF encountered during the read is also returned.
+//
+// The Copy function uses ReaderFrom if available.
+func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
+ if debug {
+ println("Using ReadFrom")
+ }
+
+ // Flush any current writes.
+ if len(e.state.filling) > 0 {
+ if err := e.nextBlock(false); err != nil {
+ return 0, err
+ }
+ }
+ e.state.filling = e.state.filling[:e.o.blockSize]
+ src := e.state.filling
+ for {
+ n2, err := r.Read(src)
+ if e.o.crc {
+ _, _ = e.state.encoder.CRC().Write(src[:n2])
+ }
+ // src is now the unfilled part...
+ src = src[n2:]
+ n += int64(n2)
+ switch err {
+ case io.EOF:
+ e.state.filling = e.state.filling[:len(e.state.filling)-len(src)]
+ if debug {
+ println("ReadFrom: got EOF final block:", len(e.state.filling))
+ }
+ return n, nil
+ case nil:
+ default:
+ if debug {
+ println("ReadFrom: got error:", err)
+ }
+ e.state.err = err
+ return n, err
+ }
+ if len(src) > 0 {
+ if debug {
+ println("ReadFrom: got space left in source:", len(src))
+ }
+ continue
+ }
+ err = e.nextBlock(false)
+ if err != nil {
+ return n, err
+ }
+ e.state.filling = e.state.filling[:e.o.blockSize]
+ src = e.state.filling
+ }
+}
+
+// Flush will send the currently written data to output
+// and block until everything has been written.
+// This should only be used on rare occasions where pushing the currently queued data is critical.
+func (e *Encoder) Flush() error {
+ s := &e.state
+ if len(s.filling) > 0 {
+ err := e.nextBlock(false)
+ if err != nil {
+ return err
+ }
+ }
+ s.wg.Wait()
+ s.wWg.Wait()
+ if s.err != nil {
+ return s.err
+ }
+ return s.writeErr
+}
+
+// Close will flush the final output and close the stream.
+// The function will block until everything has been written.
+// The Encoder can still be re-used after calling this.
+func (e *Encoder) Close() error {
+ s := &e.state
+ if s.encoder == nil {
+ return nil
+ }
+ err := e.nextBlock(true)
+ if err != nil {
+ return err
+ }
+ if e.state.fullFrameWritten {
+ return s.err
+ }
+ s.wg.Wait()
+ s.wWg.Wait()
+
+ if s.err != nil {
+ return s.err
+ }
+ if s.writeErr != nil {
+ return s.writeErr
+ }
+
+ // Write CRC
+ if e.o.crc && s.err == nil {
+ // heap alloc.
+ var tmp [4]byte
+ _, s.err = s.w.Write(s.encoder.AppendCRC(tmp[:0]))
+ s.nWritten += 4
+ }
+
+ // Add padding with content from crypto/rand.Reader
+ if s.err == nil && e.o.pad > 0 {
+ add := calcSkippableFrame(s.nWritten, int64(e.o.pad))
+ frame, err := skippableFrame(s.filling[:0], add, rand.Reader)
+ if err != nil {
+ return err
+ }
+ _, s.err = s.w.Write(frame)
+ }
+ return s.err
+}
+
+// EncodeAll will encode all input in src and append it to dst.
+// This function can be called concurrently, but each call will only run on a single goroutine.
+// If empty input is given, nothing is returned, unless WithZeroFrames is specified.
+// Encoded blocks can be concatenated and the result will be the combined input stream.
+// Data compressed with EncodeAll can be decoded with the Decoder,
+// using either a stream or DecodeAll.
+func (e *Encoder) EncodeAll(src, dst []byte) []byte {
+ if len(src) == 0 {
+ if e.o.fullZero {
+ // Add frame header.
+ fh := frameHeader{
+ ContentSize: 0,
+ WindowSize: MinWindowSize,
+ SingleSegment: true,
+ // Adding a checksum would be a waste of space.
+ Checksum: false,
+ DictID: 0,
+ }
+ dst, _ = fh.appendTo(dst)
+
+ // Write raw block as last one only.
+ var blk blockHeader
+ blk.setSize(0)
+ blk.setType(blockTypeRaw)
+ blk.setLast(true)
+ dst = blk.appendTo(dst)
+ }
+ return dst
+ }
+ e.init.Do(e.initialize)
+ enc := <-e.encoders
+ defer func() {
+ // Release encoder reference to last block.
+ // If a non-single block is needed the encoder will reset again.
+ e.encoders <- enc
+ }()
+ // Use single segments when above minimum window and below 1MB.
+ single := len(src) < 1<<20 && len(src) > MinWindowSize
+ if e.o.single != nil {
+ single = *e.o.single
+ }
+ fh := frameHeader{
+ ContentSize: uint64(len(src)),
+ WindowSize: uint32(enc.WindowSize(len(src))),
+ SingleSegment: single,
+ Checksum: e.o.crc,
+ DictID: e.o.dict.ID(),
+ }
+
+ // If less than 1MB, allocate a buffer up front.
+ if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
+ dst = make([]byte, 0, len(src))
+ }
+ dst, err := fh.appendTo(dst)
+ if err != nil {
+ panic(err)
+ }
+
+ // If we can do everything in one block, prefer that.
+ if len(src) <= maxCompressedBlockSize {
+ enc.Reset(e.o.dict, true)
+ // Slightly faster with no history and everything in one block.
+ if e.o.crc {
+ _, _ = enc.CRC().Write(src)
+ }
+ blk := enc.Block()
+ blk.last = true
+ if e.o.dict == nil {
+ enc.EncodeNoHist(blk, src)
+ } else {
+ enc.Encode(blk, src)
+ }
+
+ // If we got the exact same number of literals as input,
+ // assume the literals cannot be compressed.
+ err := errIncompressible
+ oldout := blk.output
+ if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
+ // Output directly to dst
+ blk.output = dst
+ err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
+ }
+
+ switch err {
+ case errIncompressible:
+ if debug {
+ println("Storing incompressible block as raw")
+ }
+ dst = blk.encodeRawTo(dst, src)
+ case nil:
+ dst = blk.output
+ default:
+ panic(err)
+ }
+ blk.output = oldout
+ } else {
+ enc.Reset(e.o.dict, false)
+ blk := enc.Block()
+ for len(src) > 0 {
+ todo := src
+ if len(todo) > e.o.blockSize {
+ todo = todo[:e.o.blockSize]
+ }
+ src = src[len(todo):]
+ if e.o.crc {
+ _, _ = enc.CRC().Write(todo)
+ }
+ blk.pushOffsets()
+ enc.Encode(blk, todo)
+ if len(src) == 0 {
+ blk.last = true
+ }
+ err := errIncompressible
+ // If we got the exact same number of literals as input,
+ // assume the literals cannot be compressed.
+ if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
+ err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
+ }
+
+ switch err {
+ case errIncompressible:
+ if debug {
+ println("Storing incompressible block as raw")
+ }
+ dst = blk.encodeRawTo(dst, todo)
+ blk.popOffsets()
+ case nil:
+ dst = append(dst, blk.output...)
+ default:
+ panic(err)
+ }
+ blk.reset(nil)
+ }
+ }
+ if e.o.crc {
+ dst = enc.AppendCRC(dst)
+ }
+ // Add padding with content from crypto/rand.Reader
+ if e.o.pad > 0 {
+ add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad))
+ dst, err = skippableFrame(dst, add, rand.Reader)
+ if err != nil {
+ panic(err)
+ }
+ }
+ return dst
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
new file mode 100644
index 0000000..16d4ab6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -0,0 +1,312 @@
+package zstd
+
+import (
+ "errors"
+ "fmt"
+ "runtime"
+ "strings"
+)
+
+// EOption is an option for creating a encoder.
+type EOption func(*encoderOptions) error
+
+// options retains accumulated state of multiple options.
+type encoderOptions struct {
+ concurrent int
+ level EncoderLevel
+ single *bool
+ pad int
+ blockSize int
+ windowSize int
+ crc bool
+ fullZero bool
+ noEntropy bool
+ allLitEntropy bool
+ customWindow bool
+ customALEntropy bool
+ lowMem bool
+ dict *dict
+}
+
+func (o *encoderOptions) setDefault() {
+ *o = encoderOptions{
+ concurrent: runtime.GOMAXPROCS(0),
+ crc: true,
+ single: nil,
+ blockSize: 1 << 16,
+ windowSize: 8 << 20,
+ level: SpeedDefault,
+ allLitEntropy: true,
+ lowMem: false,
+ }
+}
+
+// encoder returns an encoder with the selected options.
+func (o encoderOptions) encoder() encoder {
+ switch o.level {
+ case SpeedFastest:
+ if o.dict != nil {
+ return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+
+ case SpeedDefault:
+ if o.dict != nil {
+ return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+ }
+ return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ case SpeedBetterCompression:
+ if o.dict != nil {
+ return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ case SpeedBestCompression:
+ return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ }
+ panic("unknown compression level")
+}
+
+// WithEncoderCRC will add CRC value to output.
+// Output will be 4 bytes larger.
+func WithEncoderCRC(b bool) EOption {
+ return func(o *encoderOptions) error { o.crc = b; return nil }
+}
+
+// WithEncoderConcurrency will set the concurrency,
+// meaning the maximum number of encoders to run concurrently.
+// The value supplied must be at least 1.
+// By default this will be set to GOMAXPROCS.
+func WithEncoderConcurrency(n int) EOption {
+ return func(o *encoderOptions) error {
+ if n <= 0 {
+ return fmt.Errorf("concurrency must be at least 1")
+ }
+ o.concurrent = n
+ return nil
+ }
+}
+
+// WithWindowSize will set the maximum allowed back-reference distance.
+// The value must be a power of two between MinWindowSize and MaxWindowSize.
+// A larger value will enable better compression but allocate more memory and,
+// for above-default values, take considerably longer.
+// The default value is determined by the compression level.
+func WithWindowSize(n int) EOption {
+ return func(o *encoderOptions) error {
+ switch {
+ case n < MinWindowSize:
+ return fmt.Errorf("window size must be at least %d", MinWindowSize)
+ case n > MaxWindowSize:
+ return fmt.Errorf("window size must be at most %d", MaxWindowSize)
+ case (n & (n - 1)) != 0:
+ return errors.New("window size must be a power of 2")
+ }
+
+ o.windowSize = n
+ o.customWindow = true
+ if o.blockSize > o.windowSize {
+ o.blockSize = o.windowSize
+ }
+ return nil
+ }
+}
+
+// WithEncoderPadding will add padding to all output so the size will be a multiple of n.
+// This can be used to obfuscate the exact output size or make blocks of a certain size.
+// The contents will be a skippable frame, so it will be invisible by the decoder.
+// n must be > 0 and <= 1GB, 1<<30 bytes.
+// The padded area will be filled with data from crypto/rand.Reader.
+// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
+func WithEncoderPadding(n int) EOption {
+ return func(o *encoderOptions) error {
+ if n <= 0 {
+ return fmt.Errorf("padding must be at least 1")
+ }
+ // No need to waste our time.
+ if n == 1 {
+ o.pad = 0
+ }
+ if n > 1<<30 {
+ return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
+ }
+ o.pad = n
+ return nil
+ }
+}
+
+// EncoderLevel predefines encoder compression levels.
+// Only use the constants made available, since the actual mapping
+// of these values are very likely to change and your compression could change
+// unpredictably when upgrading the library.
+type EncoderLevel int
+
+const (
+ speedNotSet EncoderLevel = iota
+
+ // SpeedFastest will choose the fastest reasonable compression.
+ // This is roughly equivalent to the fastest Zstandard mode.
+ SpeedFastest
+
+ // SpeedDefault is the default "pretty fast" compression option.
+ // This is roughly equivalent to the default Zstandard mode (level 3).
+ SpeedDefault
+
+ // SpeedBetterCompression will yield better compression than the default.
+ // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
+ // By using this, notice that CPU usage may go up in the future.
+ SpeedBetterCompression
+
+ // SpeedBestCompression will choose the best available compression option.
+ // This will offer the best compression no matter the CPU cost.
+ SpeedBestCompression
+
+ // speedLast should be kept as the last actual compression option.
+ // The is not for external usage, but is used to keep track of the valid options.
+ speedLast
+)
+
+// EncoderLevelFromString will convert a string representation of an encoding level back
+// to a compression level. The compare is not case sensitive.
+// If the string wasn't recognized, (false, SpeedDefault) will be returned.
+func EncoderLevelFromString(s string) (bool, EncoderLevel) {
+ for l := speedNotSet + 1; l < speedLast; l++ {
+ if strings.EqualFold(s, l.String()) {
+ return true, l
+ }
+ }
+ return false, SpeedDefault
+}
+
+// EncoderLevelFromZstd will return an encoder level that closest matches the compression
+// ratio of a specific zstd compression level.
+// Many input values will provide the same compression level.
+func EncoderLevelFromZstd(level int) EncoderLevel {
+ switch {
+ case level < 3:
+ return SpeedFastest
+ case level >= 3 && level < 6:
+ return SpeedDefault
+ case level >= 6 && level < 10:
+ return SpeedBetterCompression
+ case level >= 10:
+ return SpeedBetterCompression
+ }
+ return SpeedDefault
+}
+
+// String provides a string representation of the compression level.
+func (e EncoderLevel) String() string {
+ switch e {
+ case SpeedFastest:
+ return "fastest"
+ case SpeedDefault:
+ return "default"
+ case SpeedBetterCompression:
+ return "better"
+ case SpeedBestCompression:
+ return "best"
+ default:
+ return "invalid"
+ }
+}
+
+// WithEncoderLevel specifies a predefined compression level.
+func WithEncoderLevel(l EncoderLevel) EOption {
+ return func(o *encoderOptions) error {
+ switch {
+ case l <= speedNotSet || l >= speedLast:
+ return fmt.Errorf("unknown encoder level")
+ }
+ o.level = l
+ if !o.customWindow {
+ switch o.level {
+ case SpeedFastest:
+ o.windowSize = 4 << 20
+ case SpeedDefault:
+ o.windowSize = 8 << 20
+ case SpeedBetterCompression:
+ o.windowSize = 16 << 20
+ case SpeedBestCompression:
+ o.windowSize = 32 << 20
+ }
+ }
+ if !o.customALEntropy {
+ o.allLitEntropy = l > SpeedFastest
+ }
+
+ return nil
+ }
+}
+
+// WithZeroFrames will encode 0 length input as full frames.
+// This can be needed for compatibility with zstandard usage,
+// but is not needed for this package.
+func WithZeroFrames(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.fullZero = b
+ return nil
+ }
+}
+
+// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
+// Disabling this will skip incompressible data faster, but in cases with no matches but
+// skewed character distribution compression is lost.
+// Default value depends on the compression level selected.
+func WithAllLitEntropyCompression(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.customALEntropy = true
+ o.allLitEntropy = b
+ return nil
+ }
+}
+
+// WithNoEntropyCompression will always skip entropy compression of literals.
+// This can be useful if content has matches, but unlikely to benefit from entropy
+// compression. Usually the slight speed improvement is not worth enabling this.
+func WithNoEntropyCompression(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.noEntropy = b
+ return nil
+ }
+}
+
+// WithSingleSegment will set the "single segment" flag when EncodeAll is used.
+// If this flag is set, data must be regenerated within a single continuous memory segment.
+// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
+// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
+// In order to preserve the decoder from unreasonable memory requirements,
+// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
+// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
+// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
+// If this is not specified, block encodes will automatically choose this based on the input size.
+// This setting has no effect on streamed encodes.
+func WithSingleSegment(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.single = &b
+ return nil
+ }
+}
+
+// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
+// slower encoding speed.
+// This will not change the window size which is the primary function for reducing
+// memory usage. See WithWindowSize.
+func WithLowerEncoderMem(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.lowMem = b
+ return nil
+ }
+}
+
+// WithEncoderDict allows to register a dictionary that will be used for the encode.
+// The encoder *may* choose to use no dictionary instead for certain payloads.
+func WithEncoderDict(dict []byte) EOption {
+ return func(o *encoderOptions) error {
+ d, err := loadDict(dict)
+ if err != nil {
+ return err
+ }
+ o.dict = d
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
new file mode 100644
index 0000000..693c5f0
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -0,0 +1,494 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "bytes"
+ "encoding/hex"
+ "errors"
+ "hash"
+ "io"
+ "sync"
+
+ "github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+type frameDec struct {
+ o decoderOptions
+ crc hash.Hash64
+ offset int64
+
+ WindowSize uint64
+
+ // maxWindowSize is the maximum windows size to support.
+ // should never be bigger than max-int.
+ maxWindowSize uint64
+
+ // In order queue of blocks being decoded.
+ decoding chan *blockDec
+
+ // Frame history passed between blocks
+ history history
+
+ rawInput byteBuffer
+
+ // Byte buffer that can be reused for small input blocks.
+ bBuf byteBuf
+
+ FrameContentSize uint64
+ frameDone sync.WaitGroup
+
+ DictionaryID *uint32
+ HasCheckSum bool
+ SingleSegment bool
+
+ // asyncRunning indicates whether the async routine processes input on 'decoding'.
+ asyncRunningMu sync.Mutex
+ asyncRunning bool
+}
+
+const (
+ // The minimum Window_Size is 1 KB.
+ MinWindowSize = 1 << 10
+ MaxWindowSize = 1 << 29
+)
+
+var (
+ frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
+ skippableFrameMagic = []byte{0x2a, 0x4d, 0x18}
+)
+
+func newFrameDec(o decoderOptions) *frameDec {
+ d := frameDec{
+ o: o,
+ maxWindowSize: MaxWindowSize,
+ }
+ if d.maxWindowSize > o.maxDecodedSize {
+ d.maxWindowSize = o.maxDecodedSize
+ }
+ return &d
+}
+
+// reset will read the frame header and prepare for block decoding.
+// If nothing can be read from the input, io.EOF will be returned.
+// Any other error indicated that the stream contained data, but
+// there was a problem.
+func (d *frameDec) reset(br byteBuffer) error {
+ d.HasCheckSum = false
+ d.WindowSize = 0
+ var b []byte
+ for {
+ b = br.readSmall(4)
+ if b == nil {
+ return io.EOF
+ }
+ if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+ if debug {
+ println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic))
+ }
+ // Break if not skippable frame.
+ break
+ }
+ // Read size to skip
+ b = br.readSmall(4)
+ if b == nil {
+ println("Reading Frame Size EOF")
+ return io.ErrUnexpectedEOF
+ }
+ n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ println("Skipping frame with", n, "bytes.")
+ err := br.skipN(int(n))
+ if err != nil {
+ if debug {
+ println("Reading discarded frame", err)
+ }
+ return err
+ }
+ }
+ if !bytes.Equal(b, frameMagic) {
+ println("Got magic numbers: ", b, "want:", frameMagic)
+ return ErrMagicMismatch
+ }
+
+ // Read Frame_Header_Descriptor
+ fhd, err := br.readByte()
+ if err != nil {
+ println("Reading Frame_Header_Descriptor", err)
+ return err
+ }
+ d.SingleSegment = fhd&(1<<5) != 0
+
+ if fhd&(1<<3) != 0 {
+ return errors.New("reserved bit set on frame header")
+ }
+
+ // Read Window_Descriptor
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
+ d.WindowSize = 0
+ if !d.SingleSegment {
+ wd, err := br.readByte()
+ if err != nil {
+ println("Reading Window_Descriptor", err)
+ return err
+ }
+ printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+ windowLog := 10 + (wd >> 3)
+ windowBase := uint64(1) << windowLog
+ windowAdd := (windowBase / 8) * uint64(wd&0x7)
+ d.WindowSize = windowBase + windowAdd
+ }
+
+ // Read Dictionary_ID
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
+ d.DictionaryID = nil
+ if size := fhd & 3; size != 0 {
+ if size == 3 {
+ size = 4
+ }
+ b = br.readSmall(int(size))
+ if b == nil {
+ if debug {
+ println("Reading Dictionary_ID", io.ErrUnexpectedEOF)
+ }
+ return io.ErrUnexpectedEOF
+ }
+ var id uint32
+ switch size {
+ case 1:
+ id = uint32(b[0])
+ case 2:
+ id = uint32(b[0]) | (uint32(b[1]) << 8)
+ case 4:
+ id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ }
+ if debug {
+ println("Dict size", size, "ID:", id)
+ }
+ if id > 0 {
+ // ID 0 means "sorry, no dictionary anyway".
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
+ d.DictionaryID = &id
+ }
+ }
+
+ // Read Frame_Content_Size
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
+ var fcsSize int
+ v := fhd >> 6
+ switch v {
+ case 0:
+ if d.SingleSegment {
+ fcsSize = 1
+ }
+ default:
+ fcsSize = 1 << v
+ }
+ d.FrameContentSize = 0
+ if fcsSize > 0 {
+ b := br.readSmall(fcsSize)
+ if b == nil {
+ println("Reading Frame content", io.ErrUnexpectedEOF)
+ return io.ErrUnexpectedEOF
+ }
+ switch fcsSize {
+ case 1:
+ d.FrameContentSize = uint64(b[0])
+ case 2:
+ // When FCS_Field_Size is 2, the offset of 256 is added.
+ d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
+ case 4:
+ d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
+ case 8:
+ d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
+ d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
+ }
+ if debug {
+ println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
+ }
+ }
+ // Move this to shared.
+ d.HasCheckSum = fhd&(1<<2) != 0
+ if d.HasCheckSum {
+ if d.crc == nil {
+ d.crc = xxhash.New()
+ }
+ d.crc.Reset()
+ }
+
+ if d.WindowSize == 0 && d.SingleSegment {
+ // We may not need window in this case.
+ d.WindowSize = d.FrameContentSize
+ if d.WindowSize < MinWindowSize {
+ d.WindowSize = MinWindowSize
+ }
+ }
+
+ if d.WindowSize > d.maxWindowSize {
+ printf("window size %d > max %d\n", d.WindowSize, d.maxWindowSize)
+ return ErrWindowSizeExceeded
+ }
+ // The minimum Window_Size is 1 KB.
+ if d.WindowSize < MinWindowSize {
+ println("got window size: ", d.WindowSize)
+ return ErrWindowSizeTooSmall
+ }
+ d.history.windowSize = int(d.WindowSize)
+ if d.o.lowMem && d.history.windowSize < maxBlockSize {
+ d.history.maxSize = d.history.windowSize * 2
+ } else {
+ d.history.maxSize = d.history.windowSize + maxBlockSize
+ }
+ // history contains input - maybe we do something
+ d.rawInput = br
+ return nil
+}
+
+// next will start decoding the next block from stream.
+func (d *frameDec) next(block *blockDec) error {
+ if debug {
+ printf("decoding new block %p:%p", block, block.data)
+ }
+ err := block.reset(d.rawInput, d.WindowSize)
+ if err != nil {
+ println("block error:", err)
+ // Signal the frame decoder we have a problem.
+ d.sendErr(block, err)
+ return err
+ }
+ block.input <- struct{}{}
+ if debug {
+ println("next block:", block)
+ }
+ d.asyncRunningMu.Lock()
+ defer d.asyncRunningMu.Unlock()
+ if !d.asyncRunning {
+ return nil
+ }
+ if block.Last {
+ // We indicate the frame is done by sending io.EOF
+ d.decoding <- block
+ return io.EOF
+ }
+ d.decoding <- block
+ return nil
+}
+
+// sendEOF will queue an error block on the frame.
+// This will cause the frame decoder to return when it encounters the block.
+// Returns true if the decoder was added.
+func (d *frameDec) sendErr(block *blockDec, err error) bool {
+ d.asyncRunningMu.Lock()
+ defer d.asyncRunningMu.Unlock()
+ if !d.asyncRunning {
+ return false
+ }
+
+ println("sending error", err.Error())
+ block.sendErr(err)
+ d.decoding <- block
+ return true
+}
+
+// checkCRC will check the checksum if the frame has one.
+// Will return ErrCRCMismatch if crc check failed, otherwise nil.
+func (d *frameDec) checkCRC() error {
+ if !d.HasCheckSum {
+ return nil
+ }
+ var tmp [4]byte
+ got := d.crc.Sum64()
+ // Flip to match file order.
+ tmp[0] = byte(got >> 0)
+ tmp[1] = byte(got >> 8)
+ tmp[2] = byte(got >> 16)
+ tmp[3] = byte(got >> 24)
+
+ // We can overwrite upper tmp now
+ want := d.rawInput.readSmall(4)
+ if want == nil {
+ println("CRC missing?")
+ return io.ErrUnexpectedEOF
+ }
+
+ if !bytes.Equal(tmp[:], want) {
+ if debug {
+ println("CRC Check Failed:", tmp[:], "!=", want)
+ }
+ return ErrCRCMismatch
+ }
+ if debug {
+ println("CRC ok", tmp[:])
+ }
+ return nil
+}
+
+func (d *frameDec) initAsync() {
+ if !d.o.lowMem && !d.SingleSegment {
+ // set max extra size history to 10MB.
+ d.history.maxSize = d.history.windowSize + maxBlockSize*5
+ }
+ // re-alloc if more than one extra block size.
+ if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize {
+ d.history.b = make([]byte, 0, d.history.maxSize)
+ }
+ if cap(d.history.b) < d.history.maxSize {
+ d.history.b = make([]byte, 0, d.history.maxSize)
+ }
+ if cap(d.decoding) < d.o.concurrent {
+ d.decoding = make(chan *blockDec, d.o.concurrent)
+ }
+ if debug {
+ h := d.history
+ printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
+ }
+ d.asyncRunningMu.Lock()
+ d.asyncRunning = true
+ d.asyncRunningMu.Unlock()
+}
+
+// startDecoder will start decoding blocks and write them to the writer.
+// The decoder will stop as soon as an error occurs or at end of frame.
+// When the frame has finished decoding the *bufio.Reader
+// containing the remaining input will be sent on frameDec.frameDone.
+func (d *frameDec) startDecoder(output chan decodeOutput) {
+ written := int64(0)
+
+ defer func() {
+ d.asyncRunningMu.Lock()
+ d.asyncRunning = false
+ d.asyncRunningMu.Unlock()
+
+ // Drain the currently decoding.
+ d.history.error = true
+ flushdone:
+ for {
+ select {
+ case b := <-d.decoding:
+ b.history <- &d.history
+ output <- <-b.result
+ default:
+ break flushdone
+ }
+ }
+ println("frame decoder done, signalling done")
+ d.frameDone.Done()
+ }()
+ // Get decoder for first block.
+ block := <-d.decoding
+ block.history <- &d.history
+ for {
+ var next *blockDec
+ // Get result
+ r := <-block.result
+ if r.err != nil {
+ println("Result contained error", r.err)
+ output <- r
+ return
+ }
+ if debug {
+ println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
+ d.offset += int64(len(r.b))
+ }
+ if !block.Last {
+ // Send history to next block
+ select {
+ case next = <-d.decoding:
+ if debug {
+ println("Sending ", len(d.history.b), "bytes as history")
+ }
+ next.history <- &d.history
+ default:
+ // Wait until we have sent the block, so
+ // other decoders can potentially get the decoder.
+ next = nil
+ }
+ }
+
+ // Add checksum, async to decoding.
+ if d.HasCheckSum {
+ n, err := d.crc.Write(r.b)
+ if err != nil {
+ r.err = err
+ if n != len(r.b) {
+ r.err = io.ErrShortWrite
+ }
+ output <- r
+ return
+ }
+ }
+ written += int64(len(r.b))
+ if d.SingleSegment && uint64(written) > d.FrameContentSize {
+ println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize)
+ r.err = ErrFrameSizeExceeded
+ output <- r
+ return
+ }
+ if block.Last {
+ r.err = d.checkCRC()
+ output <- r
+ return
+ }
+ output <- r
+ if next == nil {
+ // There was no decoder available, we wait for one now that we have sent to the writer.
+ if debug {
+ println("Sending ", len(d.history.b), " bytes as history")
+ }
+ next = <-d.decoding
+ next.history <- &d.history
+ }
+ block = next
+ }
+}
+
+// runDecoder will create a sync decoder that will decode a block of data.
+func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
+ saved := d.history.b
+
+ // We use the history for output to avoid copying it.
+ d.history.b = dst
+ // Store input length, so we only check new data.
+ crcStart := len(dst)
+ var err error
+ for {
+ err = dec.reset(d.rawInput, d.WindowSize)
+ if err != nil {
+ break
+ }
+ if debug {
+ println("next block:", dec)
+ }
+ err = dec.decodeBuf(&d.history)
+ if err != nil || dec.Last {
+ break
+ }
+ if uint64(len(d.history.b)) > d.o.maxDecodedSize {
+ err = ErrDecoderSizeExceeded
+ break
+ }
+ if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize {
+ println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize)
+ err = ErrFrameSizeExceeded
+ break
+ }
+ }
+ dst = d.history.b
+ if err == nil {
+ if d.HasCheckSum {
+ var n int
+ n, err = d.crc.Write(dst[crcStart:])
+ if err == nil {
+ if n != len(dst)-crcStart {
+ err = io.ErrShortWrite
+ } else {
+ err = d.checkCRC()
+ }
+ }
+ }
+ }
+ d.history.b = saved
+ return dst, err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
new file mode 100644
index 0000000..4ef7f5a
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -0,0 +1,137 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "math"
+ "math/bits"
+)
+
+type frameHeader struct {
+ ContentSize uint64
+ WindowSize uint32
+ SingleSegment bool
+ Checksum bool
+ DictID uint32
+}
+
+const maxHeaderSize = 14
+
+func (f frameHeader) appendTo(dst []byte) ([]byte, error) {
+ dst = append(dst, frameMagic...)
+ var fhd uint8
+ if f.Checksum {
+ fhd |= 1 << 2
+ }
+ if f.SingleSegment {
+ fhd |= 1 << 5
+ }
+
+ var dictIDContent []byte
+ if f.DictID > 0 {
+ var tmp [4]byte
+ if f.DictID < 256 {
+ fhd |= 1
+ tmp[0] = uint8(f.DictID)
+ dictIDContent = tmp[:1]
+ } else if f.DictID < 1<<16 {
+ fhd |= 2
+ binary.LittleEndian.PutUint16(tmp[:2], uint16(f.DictID))
+ dictIDContent = tmp[:2]
+ } else {
+ fhd |= 3
+ binary.LittleEndian.PutUint32(tmp[:4], f.DictID)
+ dictIDContent = tmp[:4]
+ }
+ }
+ var fcs uint8
+ if f.ContentSize >= 256 {
+ fcs++
+ }
+ if f.ContentSize >= 65536+256 {
+ fcs++
+ }
+ if f.ContentSize >= 0xffffffff {
+ fcs++
+ }
+
+ fhd |= fcs << 6
+
+ dst = append(dst, fhd)
+ if !f.SingleSegment {
+ const winLogMin = 10
+ windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3
+ dst = append(dst, uint8(windowLog))
+ }
+ if f.DictID > 0 {
+ dst = append(dst, dictIDContent...)
+ }
+ switch fcs {
+ case 0:
+ if f.SingleSegment {
+ dst = append(dst, uint8(f.ContentSize))
+ }
+ // Unless SingleSegment is set, framessizes < 256 are nto stored.
+ case 1:
+ f.ContentSize -= 256
+ dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
+ case 2:
+ dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24))
+ case 3:
+ dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24),
+ uint8(f.ContentSize>>32), uint8(f.ContentSize>>40), uint8(f.ContentSize>>48), uint8(f.ContentSize>>56))
+ default:
+ panic("invalid fcs")
+ }
+ return dst, nil
+}
+
+const skippableFrameHeader = 4 + 4
+
+// calcSkippableFrame will return a total size to be added for written
+// to be divisible by multiple.
+// The value will always be > skippableFrameHeader.
+// The function will panic if written < 0 or wantMultiple <= 0.
+func calcSkippableFrame(written, wantMultiple int64) int {
+ if wantMultiple <= 0 {
+ panic("wantMultiple <= 0")
+ }
+ if written < 0 {
+ panic("written < 0")
+ }
+ leftOver := written % wantMultiple
+ if leftOver == 0 {
+ return 0
+ }
+ toAdd := wantMultiple - leftOver
+ for toAdd < skippableFrameHeader {
+ toAdd += wantMultiple
+ }
+ return int(toAdd)
+}
+
+// skippableFrame will add a skippable frame with a total size of bytes.
+// total should be >= skippableFrameHeader and < math.MaxUint32.
+func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) {
+ if total == 0 {
+ return dst, nil
+ }
+ if total < skippableFrameHeader {
+ return dst, fmt.Errorf("requested skippable frame (%d) < 8", total)
+ }
+ if int64(total) > math.MaxUint32 {
+ return dst, fmt.Errorf("requested skippable frame (%d) > max uint32", total)
+ }
+ dst = append(dst, 0x50, 0x2a, 0x4d, 0x18)
+ f := uint32(total - skippableFrameHeader)
+ dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24))
+ start := len(dst)
+ dst = append(dst, make([]byte, f)...)
+ _, err := io.ReadFull(r, dst[start:])
+ return dst, err
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
new file mode 100644
index 0000000..e6d3d49
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -0,0 +1,385 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "fmt"
+)
+
+const (
+ tablelogAbsoluteMax = 9
+)
+
+const (
+ /*!MEMORY_USAGE :
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+ * Increasing memory usage improves compression ratio
+ * Reduced memory usage can improve speed, due to cache effect
+ * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
+ maxMemoryUsage = tablelogAbsoluteMax + 2
+
+ maxTableLog = maxMemoryUsage - 2
+ maxTablesize = 1 << maxTableLog
+ maxTableMask = (1 << maxTableLog) - 1
+ minTablelog = 5
+ maxSymbolValue = 255
+)
+
+// fseDecoder provides temporary storage for compression and decompression.
+type fseDecoder struct {
+ dt [maxTablesize]decSymbol // Decompression table.
+ symbolLen uint16 // Length of active part of the symbol table.
+ actualTableLog uint8 // Selected tablelog.
+ maxBits uint8 // Maximum number of additional bits
+
+ // used for table creation to avoid allocations.
+ stateTable [256]uint16
+ norm [maxSymbolValue + 1]int16
+ preDefined bool
+}
+
+// tableStep returns the next table index.
+func tableStep(tableSize uint32) uint32 {
+ return (tableSize >> 1) + (tableSize >> 3) + 3
+}
+
+// readNCount will read the symbol distribution so decoding tables can be constructed.
+func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
+ var (
+ charnum uint16
+ previous0 bool
+ )
+ if b.remain() < 4 {
+ return errors.New("input too small")
+ }
+ bitStream := b.Uint32NC()
+ nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
+ if nbBits > tablelogAbsoluteMax {
+ println("Invalid tablelog:", nbBits)
+ return errors.New("tableLog too large")
+ }
+ bitStream >>= 4
+ bitCount := uint(4)
+
+ s.actualTableLog = uint8(nbBits)
+ remaining := int32((1 << nbBits) + 1)
+ threshold := int32(1 << nbBits)
+ gotTotal := int32(0)
+ nbBits++
+
+ for remaining > 1 && charnum <= maxSymbol {
+ if previous0 {
+ //println("prev0")
+ n0 := charnum
+ for (bitStream & 0xFFFF) == 0xFFFF {
+ //println("24 x 0")
+ n0 += 24
+ if r := b.remain(); r > 5 {
+ b.advance(2)
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
+ } else {
+ // end of bit stream
+ bitStream >>= 16
+ bitCount += 16
+ }
+ }
+ //printf("bitstream: %d, 0b%b", bitStream&3, bitStream)
+ for (bitStream & 3) == 3 {
+ n0 += 3
+ bitStream >>= 2
+ bitCount += 2
+ }
+ n0 += uint16(bitStream & 3)
+ bitCount += 2
+
+ if n0 > maxSymbolValue {
+ return errors.New("maxSymbolValue too small")
+ }
+ //println("inserting ", n0-charnum, "zeroes from idx", charnum, "ending before", n0)
+ for charnum < n0 {
+ s.norm[uint8(charnum)] = 0
+ charnum++
+ }
+
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
+ b.advance(bitCount >> 3)
+ bitCount &= 7
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
+ } else {
+ bitStream >>= 2
+ }
+ }
+
+ max := (2*threshold - 1) - remaining
+ var count int32
+
+ if int32(bitStream)&(threshold-1) < max {
+ count = int32(bitStream) & (threshold - 1)
+ if debugAsserts && nbBits < 1 {
+ panic("nbBits underflow")
+ }
+ bitCount += nbBits - 1
+ } else {
+ count = int32(bitStream) & (2*threshold - 1)
+ if count >= threshold {
+ count -= max
+ }
+ bitCount += nbBits
+ }
+
+ // extra accuracy
+ count--
+ if count < 0 {
+ // -1 means +1
+ remaining += count
+ gotTotal -= count
+ } else {
+ remaining -= count
+ gotTotal += count
+ }
+ s.norm[charnum&0xff] = int16(count)
+ charnum++
+ previous0 = count == 0
+ for remaining < threshold {
+ nbBits--
+ threshold >>= 1
+ }
+
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
+ b.advance(bitCount >> 3)
+ bitCount &= 7
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> (bitCount & 31)
+ } else {
+ bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
+ b.off = len(b.b) - 4
+ bitStream = b.Uint32() >> (bitCount & 31)
+ }
+ }
+ s.symbolLen = charnum
+ if s.symbolLen <= 1 {
+ return fmt.Errorf("symbolLen (%d) too small", s.symbolLen)
+ }
+ if s.symbolLen > maxSymbolValue+1 {
+ return fmt.Errorf("symbolLen (%d) too big", s.symbolLen)
+ }
+ if remaining != 1 {
+ return fmt.Errorf("corruption detected (remaining %d != 1)", remaining)
+ }
+ if bitCount > 32 {
+ return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount)
+ }
+ if gotTotal != 1<<s.actualTableLog {
+ return fmt.Errorf("corruption detected (total %d != %d)", gotTotal, 1<<s.actualTableLog)
+ }
+ b.advance((bitCount + 7) >> 3)
+ // println(s.norm[:s.symbolLen], s.symbolLen)
+ return s.buildDtable()
+}
+
+// decSymbol contains information about a state entry,
+// Including the state offset base, the output symbol and
+// the number of bits to read for the low part of the destination state.
+// Using a composite uint64 is faster than a struct with separate members.
+type decSymbol uint64
+
+func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol {
+ return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
+}
+
+func (d decSymbol) nbBits() uint8 {
+ return uint8(d)
+}
+
+func (d decSymbol) addBits() uint8 {
+ return uint8(d >> 8)
+}
+
+func (d decSymbol) newState() uint16 {
+ return uint16(d >> 16)
+}
+
+func (d decSymbol) baseline() uint32 {
+ return uint32(d >> 32)
+}
+
+func (d decSymbol) baselineInt() int {
+ return int(d >> 32)
+}
+
+func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
+ *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
+}
+
+func (d *decSymbol) setNBits(nBits uint8) {
+ const mask = 0xffffffffffffff00
+ *d = (*d & mask) | decSymbol(nBits)
+}
+
+func (d *decSymbol) setAddBits(addBits uint8) {
+ const mask = 0xffffffffffff00ff
+ *d = (*d & mask) | (decSymbol(addBits) << 8)
+}
+
+func (d *decSymbol) setNewState(state uint16) {
+ const mask = 0xffffffff0000ffff
+ *d = (*d & mask) | decSymbol(state)<<16
+}
+
+func (d *decSymbol) setBaseline(baseline uint32) {
+ const mask = 0xffffffff
+ *d = (*d & mask) | decSymbol(baseline)<<32
+}
+
+func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
+ const mask = 0xffff00ff
+ *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
+}
+
+// decSymbolValue returns the transformed decSymbol for the given symbol.
+func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) {
+ if int(symb) >= len(t) {
+ return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t))
+ }
+ lu := t[symb]
+ return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil
+}
+
+// setRLE will set the decoder til RLE mode.
+func (s *fseDecoder) setRLE(symbol decSymbol) {
+ s.actualTableLog = 0
+ s.maxBits = symbol.addBits()
+ s.dt[0] = symbol
+}
+
+// buildDtable will build the decoding table.
+func (s *fseDecoder) buildDtable() error {
+ tableSize := uint32(1 << s.actualTableLog)
+ highThreshold := tableSize - 1
+ symbolNext := s.stateTable[:256]
+
+ // Init, lay down lowprob symbols
+ {
+ for i, v := range s.norm[:s.symbolLen] {
+ if v == -1 {
+ s.dt[highThreshold].setAddBits(uint8(i))
+ highThreshold--
+ symbolNext[i] = 1
+ } else {
+ symbolNext[i] = uint16(v)
+ }
+ }
+ }
+ // Spread symbols
+ {
+ tableMask := tableSize - 1
+ step := tableStep(tableSize)
+ position := uint32(0)
+ for ss, v := range s.norm[:s.symbolLen] {
+ for i := 0; i < int(v); i++ {
+ s.dt[position].setAddBits(uint8(ss))
+ position = (position + step) & tableMask
+ for position > highThreshold {
+ // lowprob area
+ position = (position + step) & tableMask
+ }
+ }
+ }
+ if position != 0 {
+ // position must reach all cells once, otherwise normalizedCounter is incorrect
+ return errors.New("corrupted input (position != 0)")
+ }
+ }
+
+ // Build Decoding table
+ {
+ tableSize := uint16(1 << s.actualTableLog)
+ for u, v := range s.dt[:tableSize] {
+ symbol := v.addBits()
+ nextState := symbolNext[symbol]
+ symbolNext[symbol] = nextState + 1
+ nBits := s.actualTableLog - byte(highBits(uint32(nextState)))
+ s.dt[u&maxTableMask].setNBits(nBits)
+ newState := (nextState << nBits) - tableSize
+ if newState > tableSize {
+ return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize)
+ }
+ if newState == uint16(u) && nBits == 0 {
+ // Seems weird that this is possible with nbits > 0.
+ return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u)
+ }
+ s.dt[u&maxTableMask].setNewState(newState)
+ }
+ }
+ return nil
+}
+
+// transform will transform the decoder table into a table usable for
+// decoding without having to apply the transformation while decoding.
+// The state will contain the base value and the number of bits to read.
+func (s *fseDecoder) transform(t []baseOffset) error {
+ tableSize := uint16(1 << s.actualTableLog)
+ s.maxBits = 0
+ for i, v := range s.dt[:tableSize] {
+ add := v.addBits()
+ if int(add) >= len(t) {
+ return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t))
+ }
+ lu := t[add]
+ if lu.addBits > s.maxBits {
+ s.maxBits = lu.addBits
+ }
+ v.setExt(lu.addBits, lu.baseLine)
+ s.dt[i] = v
+ }
+ return nil
+}
+
+type fseState struct {
+ dt []decSymbol
+ state decSymbol
+}
+
+// Initialize and decodeAsync first state and symbol.
+func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
+ s.dt = dt
+ br.fill()
+ s.state = dt[br.getBits(tableLog)]
+}
+
+// next returns the current symbol and sets the next state.
+// At least tablelog bits must be available in the bit reader.
+func (s *fseState) next(br *bitReader) {
+ lowBits := uint16(br.getBits(s.state.nbBits()))
+ s.state = s.dt[s.state.newState()+lowBits]
+}
+
+// finished returns true if all bits have been read from the bitstream
+// and the next state would require reading bits from the input.
+func (s *fseState) finished(br *bitReader) bool {
+ return br.finished() && s.state.nbBits() > 0
+}
+
+// final returns the current state symbol without decoding the next.
+func (s *fseState) final() (int, uint8) {
+ return s.state.baselineInt(), s.state.addBits()
+}
+
+// final returns the current state symbol without decoding the next.
+func (s decSymbol) final() (int, uint8) {
+ return s.baselineInt(), s.addBits()
+}
+
+// nextFast returns the next symbol and sets the next state.
+// This can only be used if no symbols are 0 bits.
+// At least tablelog bits must be available in the bit reader.
+func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
+ lowBits := uint16(br.getBitsFast(s.state.nbBits()))
+ s.state = s.dt[s.state.newState()+lowBits]
+ return s.state.baseline(), s.state.addBits()
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
new file mode 100644
index 0000000..c74681b
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -0,0 +1,725 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "fmt"
+ "math"
+)
+
+const (
+ // For encoding we only support up to
+ maxEncTableLog = 8
+ maxEncTablesize = 1 << maxTableLog
+ maxEncTableMask = (1 << maxTableLog) - 1
+ minEncTablelog = 5
+ maxEncSymbolValue = maxMatchLengthSymbol
+)
+
+// Scratch provides temporary storage for compression and decompression.
+type fseEncoder struct {
+ symbolLen uint16 // Length of active part of the symbol table.
+ actualTableLog uint8 // Selected tablelog.
+ ct cTable // Compression tables.
+ maxCount int // count of the most probable symbol
+ zeroBits bool // no bits has prob > 50%.
+ clearCount bool // clear count
+ useRLE bool // This encoder is for RLE
+ preDefined bool // This encoder is predefined.
+ reUsed bool // Set to know when the encoder has been reused.
+ rleVal uint8 // RLE Symbol
+ maxBits uint8 // Maximum output bits after transform.
+
+ // TODO: Technically zstd should be fine with 64 bytes.
+ count [256]uint32
+ norm [256]int16
+}
+
+// cTable contains tables used for compression.
+type cTable struct {
+ tableSymbol []byte
+ stateTable []uint16
+ symbolTT []symbolTransform
+}
+
+// symbolTransform contains the state transform for a symbol.
+type symbolTransform struct {
+ deltaNbBits uint32
+ deltaFindState int16
+ outBits uint8
+}
+
+// String prints values as a human readable string.
+func (s symbolTransform) String() string {
+ return fmt.Sprintf("{deltabits: %08x, findstate:%d outbits:%d}", s.deltaNbBits, s.deltaFindState, s.outBits)
+}
+
+// Histogram allows to populate the histogram and skip that step in the compression,
+// It otherwise allows to inspect the histogram when compression is done.
+// To indicate that you have populated the histogram call HistogramFinished
+// with the value of the highest populated symbol, as well as the number of entries
+// in the most populated entry. These are accepted at face value.
+// The returned slice will always be length 256.
+func (s *fseEncoder) Histogram() []uint32 {
+ return s.count[:]
+}
+
+// HistogramFinished can be called to indicate that the histogram has been populated.
+// maxSymbol is the index of the highest set symbol of the next data segment.
+// maxCount is the number of entries in the most populated entry.
+// These are accepted at face value.
+func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
+ s.maxCount = maxCount
+ s.symbolLen = uint16(maxSymbol) + 1
+ s.clearCount = maxCount != 0
+}
+
+// prepare will prepare and allocate scratch tables used for both compression and decompression.
+func (s *fseEncoder) prepare() (*fseEncoder, error) {
+ if s == nil {
+ s = &fseEncoder{}
+ }
+ s.useRLE = false
+ if s.clearCount && s.maxCount == 0 {
+ for i := range s.count {
+ s.count[i] = 0
+ }
+ s.clearCount = false
+ }
+ return s, nil
+}
+
+// allocCtable will allocate tables needed for compression.
+// If existing tables a re big enough, they are simply re-used.
+func (s *fseEncoder) allocCtable() {
+ tableSize := 1 << s.actualTableLog
+ // get tableSymbol that is big enough.
+ if cap(s.ct.tableSymbol) < tableSize {
+ s.ct.tableSymbol = make([]byte, tableSize)
+ }
+ s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
+
+ ctSize := tableSize
+ if cap(s.ct.stateTable) < ctSize {
+ s.ct.stateTable = make([]uint16, ctSize)
+ }
+ s.ct.stateTable = s.ct.stateTable[:ctSize]
+
+ if cap(s.ct.symbolTT) < 256 {
+ s.ct.symbolTT = make([]symbolTransform, 256)
+ }
+ s.ct.symbolTT = s.ct.symbolTT[:256]
+}
+
+// buildCTable will populate the compression table so it is ready to be used.
+func (s *fseEncoder) buildCTable() error {
+ tableSize := uint32(1 << s.actualTableLog)
+ highThreshold := tableSize - 1
+ var cumul [256]int16
+
+ s.allocCtable()
+ tableSymbol := s.ct.tableSymbol[:tableSize]
+ // symbol start positions
+ {
+ cumul[0] = 0
+ for ui, v := range s.norm[:s.symbolLen-1] {
+ u := byte(ui) // one less than reference
+ if v == -1 {
+ // Low proba symbol
+ cumul[u+1] = cumul[u] + 1
+ tableSymbol[highThreshold] = u
+ highThreshold--
+ } else {
+ cumul[u+1] = cumul[u] + v
+ }
+ }
+ // Encode last symbol separately to avoid overflowing u
+ u := int(s.symbolLen - 1)
+ v := s.norm[s.symbolLen-1]
+ if v == -1 {
+ // Low proba symbol
+ cumul[u+1] = cumul[u] + 1
+ tableSymbol[highThreshold] = byte(u)
+ highThreshold--
+ } else {
+ cumul[u+1] = cumul[u] + v
+ }
+ if uint32(cumul[s.symbolLen]) != tableSize {
+ return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
+ }
+ cumul[s.symbolLen] = int16(tableSize) + 1
+ }
+ // Spread symbols
+ s.zeroBits = false
+ {
+ step := tableStep(tableSize)
+ tableMask := tableSize - 1
+ var position uint32
+ // if any symbol > largeLimit, we may have 0 bits output.
+ largeLimit := int16(1 << (s.actualTableLog - 1))
+ for ui, v := range s.norm[:s.symbolLen] {
+ symbol := byte(ui)
+ if v > largeLimit {
+ s.zeroBits = true
+ }
+ for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
+ tableSymbol[position] = symbol
+ position = (position + step) & tableMask
+ for position > highThreshold {
+ position = (position + step) & tableMask
+ } /* Low proba area */
+ }
+ }
+
+ // Check if we have gone through all positions
+ if position != 0 {
+ return errors.New("position!=0")
+ }
+ }
+
+ // Build table
+ table := s.ct.stateTable
+ {
+ tsi := int(tableSize)
+ for u, v := range tableSymbol {
+ // TableU16 : sorted by symbol order; gives next state value
+ table[cumul[v]] = uint16(tsi + u)
+ cumul[v]++
+ }
+ }
+
+ // Build Symbol Transformation Table
+ {
+ total := int16(0)
+ symbolTT := s.ct.symbolTT[:s.symbolLen]
+ tableLog := s.actualTableLog
+ tl := (uint32(tableLog) << 16) - (1 << tableLog)
+ for i, v := range s.norm[:s.symbolLen] {
+ switch v {
+ case 0:
+ case -1, 1:
+ symbolTT[i].deltaNbBits = tl
+ symbolTT[i].deltaFindState = total - 1
+ total++
+ default:
+ maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
+ minStatePlus := uint32(v) << maxBitsOut
+ symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
+ symbolTT[i].deltaFindState = total - v
+ total += v
+ }
+ }
+ if total != int16(tableSize) {
+ return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
+ }
+ }
+ return nil
+}
+
+var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
+
+func (s *fseEncoder) setRLE(val byte) {
+ s.allocCtable()
+ s.actualTableLog = 0
+ s.ct.stateTable = s.ct.stateTable[:1]
+ s.ct.symbolTT[val] = symbolTransform{
+ deltaFindState: 0,
+ deltaNbBits: 0,
+ }
+ if debug {
+ println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
+ }
+ s.rleVal = val
+ s.useRLE = true
+}
+
+// setBits will set output bits for the transform.
+// if nil is provided, the number of bits is equal to the index.
+func (s *fseEncoder) setBits(transform []byte) {
+ if s.reUsed || s.preDefined {
+ return
+ }
+ if s.useRLE {
+ if transform == nil {
+ s.ct.symbolTT[s.rleVal].outBits = s.rleVal
+ s.maxBits = s.rleVal
+ return
+ }
+ s.maxBits = transform[s.rleVal]
+ s.ct.symbolTT[s.rleVal].outBits = s.maxBits
+ return
+ }
+ if transform == nil {
+ for i := range s.ct.symbolTT[:s.symbolLen] {
+ s.ct.symbolTT[i].outBits = uint8(i)
+ }
+ s.maxBits = uint8(s.symbolLen - 1)
+ return
+ }
+ s.maxBits = 0
+ for i, v := range transform[:s.symbolLen] {
+ s.ct.symbolTT[i].outBits = v
+ if v > s.maxBits {
+ // We could assume bits always going up, but we play safe.
+ s.maxBits = v
+ }
+ }
+}
+
+// normalizeCount will normalize the count of the symbols so
+// the total is equal to the table size.
+// If successful, compression tables will also be made ready.
+func (s *fseEncoder) normalizeCount(length int) error {
+ if s.reUsed {
+ return nil
+ }
+ s.optimalTableLog(length)
+ var (
+ tableLog = s.actualTableLog
+ scale = 62 - uint64(tableLog)
+ step = (1 << 62) / uint64(length)
+ vStep = uint64(1) << (scale - 20)
+ stillToDistribute = int16(1 << tableLog)
+ largest int
+ largestP int16
+ lowThreshold = (uint32)(length >> tableLog)
+ )
+ if s.maxCount == length {
+ s.useRLE = true
+ return nil
+ }
+ s.useRLE = false
+ for i, cnt := range s.count[:s.symbolLen] {
+ // already handled
+ // if (count[s] == s.length) return 0; /* rle special case */
+
+ if cnt == 0 {
+ s.norm[i] = 0
+ continue
+ }
+ if cnt <= lowThreshold {
+ s.norm[i] = -1
+ stillToDistribute--
+ } else {
+ proba := (int16)((uint64(cnt) * step) >> scale)
+ if proba < 8 {
+ restToBeat := vStep * uint64(rtbTable[proba])
+ v := uint64(cnt)*step - (uint64(proba) << scale)
+ if v > restToBeat {
+ proba++
+ }
+ }
+ if proba > largestP {
+ largestP = proba
+ largest = i
+ }
+ s.norm[i] = proba
+ stillToDistribute -= proba
+ }
+ }
+
+ if -stillToDistribute >= (s.norm[largest] >> 1) {
+ // corner case, need another normalization method
+ err := s.normalizeCount2(length)
+ if err != nil {
+ return err
+ }
+ if debugAsserts {
+ err = s.validateNorm()
+ if err != nil {
+ return err
+ }
+ }
+ return s.buildCTable()
+ }
+ s.norm[largest] += stillToDistribute
+ if debugAsserts {
+ err := s.validateNorm()
+ if err != nil {
+ return err
+ }
+ }
+ return s.buildCTable()
+}
+
+// Secondary normalization method.
+// To be used when primary method fails.
+func (s *fseEncoder) normalizeCount2(length int) error {
+ const notYetAssigned = -2
+ var (
+ distributed uint32
+ total = uint32(length)
+ tableLog = s.actualTableLog
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
+ )
+ for i, cnt := range s.count[:s.symbolLen] {
+ if cnt == 0 {
+ s.norm[i] = 0
+ continue
+ }
+ if cnt <= lowThreshold {
+ s.norm[i] = -1
+ distributed++
+ total -= cnt
+ continue
+ }
+ if cnt <= lowOne {
+ s.norm[i] = 1
+ distributed++
+ total -= cnt
+ continue
+ }
+ s.norm[i] = notYetAssigned
+ }
+ toDistribute := (1 << tableLog) - distributed
+
+ if (total / toDistribute) > lowOne {
+ // risk of rounding to zero
+ lowOne = (total * 3) / (toDistribute * 2)
+ for i, cnt := range s.count[:s.symbolLen] {
+ if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
+ s.norm[i] = 1
+ distributed++
+ total -= cnt
+ continue
+ }
+ }
+ toDistribute = (1 << tableLog) - distributed
+ }
+ if distributed == uint32(s.symbolLen)+1 {
+ // all values are pretty poor;
+ // probably incompressible data (should have already been detected);
+ // find max, then give all remaining points to max
+ var maxV int
+ var maxC uint32
+ for i, cnt := range s.count[:s.symbolLen] {
+ if cnt > maxC {
+ maxV = i
+ maxC = cnt
+ }
+ }
+ s.norm[maxV] += int16(toDistribute)
+ return nil
+ }
+
+ if total == 0 {
+ // all of the symbols were low enough for the lowOne or lowThreshold
+ for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
+ if s.norm[i] > 0 {
+ toDistribute--
+ s.norm[i]++
+ }
+ }
+ return nil
+ }
+
+ var (
+ vStepLog = 62 - uint64(tableLog)
+ mid = uint64((1 << (vStepLog - 1)) - 1)
+ rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
+ tmpTotal = mid
+ )
+ for i, cnt := range s.count[:s.symbolLen] {
+ if s.norm[i] == notYetAssigned {
+ var (
+ end = tmpTotal + uint64(cnt)*rStep
+ sStart = uint32(tmpTotal >> vStepLog)
+ sEnd = uint32(end >> vStepLog)
+ weight = sEnd - sStart
+ )
+ if weight < 1 {
+ return errors.New("weight < 1")
+ }
+ s.norm[i] = int16(weight)
+ tmpTotal = end
+ }
+ }
+ return nil
+}
+
+// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
+func (s *fseEncoder) optimalTableLog(length int) {
+ tableLog := uint8(maxEncTableLog)
+ minBitsSrc := highBit(uint32(length)) + 1
+ minBitsSymbols := highBit(uint32(s.symbolLen-1)) + 2
+ minBits := uint8(minBitsSymbols)
+ if minBitsSrc < minBitsSymbols {
+ minBits = uint8(minBitsSrc)
+ }
+
+ maxBitsSrc := uint8(highBit(uint32(length-1))) - 2
+ if maxBitsSrc < tableLog {
+ // Accuracy can be reduced
+ tableLog = maxBitsSrc
+ }
+ if minBits > tableLog {
+ tableLog = minBits
+ }
+ // Need a minimum to safely represent all symbol values
+ if tableLog < minEncTablelog {
+ tableLog = minEncTablelog
+ }
+ if tableLog > maxEncTableLog {
+ tableLog = maxEncTableLog
+ }
+ s.actualTableLog = tableLog
+}
+
+// validateNorm validates the normalized histogram table.
+func (s *fseEncoder) validateNorm() (err error) {
+ var total int
+ for _, v := range s.norm[:s.symbolLen] {
+ if v >= 0 {
+ total += int(v)
+ } else {
+ total -= int(v)
+ }
+ }
+ defer func() {
+ if err == nil {
+ return
+ }
+ fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
+ for i, v := range s.norm[:s.symbolLen] {
+ fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
+ }
+ }()
+ if total != (1 << s.actualTableLog) {
+ return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
+ }
+ for i, v := range s.count[s.symbolLen:] {
+ if v != 0 {
+ return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
+ }
+ }
+ return nil
+}
+
+// writeCount will write the normalized histogram count to header.
+// This is read back by readNCount.
+func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
+ if s.useRLE {
+ return append(out, s.rleVal), nil
+ }
+ if s.preDefined || s.reUsed {
+ // Never write predefined.
+ return out, nil
+ }
+
+ var (
+ tableLog = s.actualTableLog
+ tableSize = 1 << tableLog
+ previous0 bool
+ charnum uint16
+
+ // maximum header size plus 2 extra bytes for final output if bitCount == 0.
+ maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
+
+ // Write Table Size
+ bitStream = uint32(tableLog - minEncTablelog)
+ bitCount = uint(4)
+ remaining = int16(tableSize + 1) /* +1 for extra accuracy */
+ threshold = int16(tableSize)
+ nbBits = uint(tableLog + 1)
+ outP = len(out)
+ )
+ if cap(out) < outP+maxHeaderSize {
+ out = append(out, make([]byte, maxHeaderSize*3)...)
+ out = out[:len(out)-maxHeaderSize*3]
+ }
+ out = out[:outP+maxHeaderSize]
+
+ // stops at 1
+ for remaining > 1 {
+ if previous0 {
+ start := charnum
+ for s.norm[charnum] == 0 {
+ charnum++
+ }
+ for charnum >= start+24 {
+ start += 24
+ bitStream += uint32(0xFFFF) << bitCount
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ }
+ for charnum >= start+3 {
+ start += 3
+ bitStream += 3 << bitCount
+ bitCount += 2
+ }
+ bitStream += uint32(charnum-start) << bitCount
+ bitCount += 2
+ if bitCount > 16 {
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ bitCount -= 16
+ }
+ }
+
+ count := s.norm[charnum]
+ charnum++
+ max := (2*threshold - 1) - remaining
+ if count < 0 {
+ remaining += count
+ } else {
+ remaining -= count
+ }
+ count++ // +1 for extra accuracy
+ if count >= threshold {
+ count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
+ }
+ bitStream += uint32(count) << bitCount
+ bitCount += nbBits
+ if count < max {
+ bitCount--
+ }
+
+ previous0 = count == 1
+ if remaining < 1 {
+ return nil, errors.New("internal error: remaining < 1")
+ }
+ for remaining < threshold {
+ nbBits--
+ threshold >>= 1
+ }
+
+ if bitCount > 16 {
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += 2
+ bitStream >>= 16
+ bitCount -= 16
+ }
+ }
+
+ if outP+2 > len(out) {
+ return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
+ }
+ out[outP] = byte(bitStream)
+ out[outP+1] = byte(bitStream >> 8)
+ outP += int((bitCount + 7) / 8)
+
+ if charnum > s.symbolLen {
+ return nil, errors.New("internal error: charnum > s.symbolLen")
+ }
+ return out[:outP], nil
+}
+
+// Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
+// note 1 : assume symbolValue is valid (<= maxSymbolValue)
+// note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits *
+func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
+ minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16
+ threshold := (minNbBits + 1) << 16
+ if debugAsserts {
+ if !(s.actualTableLog < 16) {
+ panic("!s.actualTableLog < 16")
+ }
+ // ensure enough room for renormalization double shift
+ if !(uint8(accuracyLog) < 31-s.actualTableLog) {
+ panic("!uint8(accuracyLog) < 31-s.actualTableLog")
+ }
+ }
+ tableSize := uint32(1) << s.actualTableLog
+ deltaFromThreshold := threshold - (s.ct.symbolTT[symbolValue].deltaNbBits + tableSize)
+ // linear interpolation (very approximate)
+ normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog
+ bitMultiplier := uint32(1) << accuracyLog
+ if debugAsserts {
+ if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold {
+ panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold")
+ }
+ if normalizedDeltaFromThreshold > bitMultiplier {
+ panic("normalizedDeltaFromThreshold > bitMultiplier")
+ }
+ }
+ return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold
+}
+
+// Returns the cost in bits of encoding the distribution in count using ctable.
+// Histogram should only be up to the last non-zero symbol.
+// Returns an -1 if ctable cannot represent all the symbols in count.
+func (s *fseEncoder) approxSize(hist []uint32) uint32 {
+ if int(s.symbolLen) < len(hist) {
+ // More symbols than we have.
+ return math.MaxUint32
+ }
+ if s.useRLE {
+ // We will never reuse RLE encoders.
+ return math.MaxUint32
+ }
+ const kAccuracyLog = 8
+ badCost := (uint32(s.actualTableLog) + 1) << kAccuracyLog
+ var cost uint32
+ for i, v := range hist {
+ if v == 0 {
+ continue
+ }
+ if s.norm[i] == 0 {
+ return math.MaxUint32
+ }
+ bitCost := s.bitCost(uint8(i), kAccuracyLog)
+ if bitCost > badCost {
+ return math.MaxUint32
+ }
+ cost += v * bitCost
+ }
+ return cost >> kAccuracyLog
+}
+
+// maxHeaderSize returns the maximum header size in bits.
+// This is not exact size, but we want a penalty for new tables anyway.
+func (s *fseEncoder) maxHeaderSize() uint32 {
+ if s.preDefined {
+ return 0
+ }
+ if s.useRLE {
+ return 8
+ }
+ return (((uint32(s.symbolLen) * uint32(s.actualTableLog)) >> 3) + 3) * 8
+}
+
+// cState contains the compression state of a stream.
+type cState struct {
+ bw *bitWriter
+ stateTable []uint16
+ state uint16
+}
+
+// init will initialize the compression state to the first symbol of the stream.
+func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
+ c.bw = bw
+ c.stateTable = ct.stateTable
+ if len(c.stateTable) == 1 {
+ // RLE
+ c.stateTable[0] = uint16(0)
+ c.state = 0
+ return
+ }
+ nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
+ im := int32((nbBitsOut << 16) - first.deltaNbBits)
+ lu := (im >> nbBitsOut) + int32(first.deltaFindState)
+ c.state = c.stateTable[lu]
+}
+
+// encode the output symbol provided and write it to the bitstream.
+func (c *cState) encode(symbolTT symbolTransform) {
+ nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
+ dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
+ c.bw.addBits16NC(c.state, uint8(nbBitsOut))
+ c.state = c.stateTable[dstState]
+}
+
+// flush will write the tablelog to the output and flush the remaining full bytes.
+func (c *cState) flush(tableLog uint8) {
+ c.bw.flush32()
+ c.bw.addBits16NC(c.state, tableLog)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
new file mode 100644
index 0000000..474cb77
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
@@ -0,0 +1,158 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "fmt"
+ "math"
+ "sync"
+)
+
+var (
+ // fsePredef are the predefined fse tables as defined here:
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
+ // These values are already transformed.
+ fsePredef [3]fseDecoder
+
+ // fsePredefEnc are the predefined encoder based on fse tables as defined here:
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
+ // These values are already transformed.
+ fsePredefEnc [3]fseEncoder
+
+ // symbolTableX contain the transformations needed for each type as defined in
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
+ symbolTableX [3][]baseOffset
+
+ // maxTableSymbol is the biggest supported symbol for each table type
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets
+ maxTableSymbol = [3]uint8{tableLiteralLengths: maxLiteralLengthSymbol, tableOffsets: maxOffsetLengthSymbol, tableMatchLengths: maxMatchLengthSymbol}
+
+ // bitTables is the bits table for each table.
+ bitTables = [3][]byte{tableLiteralLengths: llBitsTable[:], tableOffsets: nil, tableMatchLengths: mlBitsTable[:]}
+)
+
+type tableIndex uint8
+
+const (
+ // indexes for fsePredef and symbolTableX
+ tableLiteralLengths tableIndex = 0
+ tableOffsets tableIndex = 1
+ tableMatchLengths tableIndex = 2
+
+ maxLiteralLengthSymbol = 35
+ maxOffsetLengthSymbol = 30
+ maxMatchLengthSymbol = 52
+)
+
+// baseOffset is used for calculating transformations.
+type baseOffset struct {
+ baseLine uint32
+ addBits uint8
+}
+
+// fillBase will precalculate base offsets with the given bit distributions.
+func fillBase(dst []baseOffset, base uint32, bits ...uint8) {
+ if len(bits) != len(dst) {
+ panic(fmt.Sprintf("len(dst) (%d) != len(bits) (%d)", len(dst), len(bits)))
+ }
+ for i, bit := range bits {
+ if base > math.MaxInt32 {
+ panic("invalid decoding table, base overflows int32")
+ }
+
+ dst[i] = baseOffset{
+ baseLine: base,
+ addBits: bit,
+ }
+ base += 1 << bit
+ }
+}
+
+var predef sync.Once
+
+func initPredefined() {
+ predef.Do(func() {
+ // Literals length codes
+ tmp := make([]baseOffset, 36)
+ for i := range tmp[:16] {
+ tmp[i] = baseOffset{
+ baseLine: uint32(i),
+ addBits: 0,
+ }
+ }
+ fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ symbolTableX[tableLiteralLengths] = tmp
+
+ // Match length codes
+ tmp = make([]baseOffset, 53)
+ for i := range tmp[:32] {
+ tmp[i] = baseOffset{
+ // The transformation adds the 3 length.
+ baseLine: uint32(i) + 3,
+ addBits: 0,
+ }
+ }
+ fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
+ symbolTableX[tableMatchLengths] = tmp
+
+ // Offset codes
+ tmp = make([]baseOffset, maxOffsetBits+1)
+ tmp[1] = baseOffset{
+ baseLine: 1,
+ addBits: 1,
+ }
+ fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)
+ symbolTableX[tableOffsets] = tmp
+
+ // Fill predefined tables and transform them.
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions
+ for i := range fsePredef[:] {
+ f := &fsePredef[i]
+ switch tableIndex(i) {
+ case tableLiteralLengths:
+ // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243
+ f.actualTableLog = 6
+ copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
+ -1, -1, -1, -1})
+ f.symbolLen = 36
+ case tableOffsets:
+ // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281
+ f.actualTableLog = 5
+ copy(f.norm[:], []int16{
+ 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1})
+ f.symbolLen = 29
+ case tableMatchLengths:
+ //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304
+ f.actualTableLog = 6
+ copy(f.norm[:], []int16{
+ 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
+ -1, -1, -1, -1, -1})
+ f.symbolLen = 53
+ }
+ if err := f.buildDtable(); err != nil {
+ panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ }
+ if err := f.transform(symbolTableX[i]); err != nil {
+ panic(fmt.Errorf("building table %v: %v", tableIndex(i), err))
+ }
+ f.preDefined = true
+
+ // Create encoder as well
+ enc := &fsePredefEnc[i]
+ copy(enc.norm[:], f.norm[:])
+ enc.symbolLen = f.symbolLen
+ enc.actualTableLog = f.actualTableLog
+ if err := enc.buildCTable(); err != nil {
+ panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err))
+ }
+ enc.setBits(bitTables[i])
+ enc.preDefined = true
+ }
+ })
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go
new file mode 100644
index 0000000..4a75206
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/hash.go
@@ -0,0 +1,77 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+const (
+ prime3bytes = 506832829
+ prime4bytes = 2654435761
+ prime5bytes = 889523592379
+ prime6bytes = 227718039650203
+ prime7bytes = 58295818150454627
+ prime8bytes = 0xcf1bbcdcb7a56463
+)
+
+// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes.
+// l must be >=4 and <=8. Any other value will return hash for 4 bytes.
+// h should always be <32.
+// Preferably h and l should be a constant.
+// FIXME: This does NOT get resolved, if 'mls' is constant,
+// so this cannot be used.
+func hashLen(u uint64, hashLog, mls uint8) uint32 {
+ switch mls {
+ case 5:
+ return hash5(u, hashLog)
+ case 6:
+ return hash6(u, hashLog)
+ case 7:
+ return hash7(u, hashLog)
+ case 8:
+ return hash8(u, hashLog)
+ default:
+ return hash4x64(u, hashLog)
+ }
+}
+
+// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash3(u uint32, h uint8) uint32 {
+ return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
+}
+
+// hash4 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4(u uint32, h uint8) uint32 {
+ return (u * prime4bytes) >> ((32 - h) & 31)
+}
+
+// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <32.
+func hash4x64(u uint64, h uint8) uint32 {
+ return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
+}
+
+// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash5(u uint64, h uint8) uint32 {
+ return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
+}
+
+// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash6(u uint64, h uint8) uint32 {
+ return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
+}
+
+// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash7(u uint64, h uint8) uint32 {
+ return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
+}
+
+// hash8 returns the hash of u to fit in a hash table with h bits.
+// Preferably h should be a constant and should always be <64.
+func hash8(u uint64, h uint8) uint32 {
+ return uint32((u * prime8bytes) >> ((64 - h) & 63))
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
new file mode 100644
index 0000000..f783e32
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -0,0 +1,89 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "github.com/klauspost/compress/huff0"
+)
+
+// history contains the information transferred between blocks.
+type history struct {
+ b []byte
+ huffTree *huff0.Scratch
+ recentOffsets [3]int
+ decoders sequenceDecs
+ windowSize int
+ maxSize int
+ error bool
+ dict *dict
+}
+
+// reset will reset the history to initial state of a frame.
+// The history must already have been initialized to the desired size.
+func (h *history) reset() {
+ h.b = h.b[:0]
+ h.error = false
+ h.recentOffsets = [3]int{1, 4, 8}
+ if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ }
+ if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ }
+ if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
+ fseDecoderPool.Put(f)
+ }
+ h.decoders = sequenceDecs{}
+ if h.huffTree != nil {
+ if h.dict == nil || h.dict.litEnc != h.huffTree {
+ huffDecoderPool.Put(h.huffTree)
+ }
+ }
+ h.huffTree = nil
+ h.dict = nil
+ //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
+}
+
+func (h *history) setDict(dict *dict) {
+ if dict == nil {
+ return
+ }
+ h.dict = dict
+ h.decoders.litLengths = dict.llDec
+ h.decoders.offsets = dict.ofDec
+ h.decoders.matchLengths = dict.mlDec
+ h.recentOffsets = dict.offsets
+ h.huffTree = dict.litEnc
+}
+
+// append bytes to history.
+// This function will make sure there is space for it,
+// if the buffer has been allocated with enough extra space.
+func (h *history) append(b []byte) {
+ if len(b) >= h.windowSize {
+ // Discard all history by simply overwriting
+ h.b = h.b[:h.windowSize]
+ copy(h.b, b[len(b)-h.windowSize:])
+ return
+ }
+
+ // If there is space, append it.
+ if len(b) < cap(h.b)-len(h.b) {
+ h.b = append(h.b, b...)
+ return
+ }
+
+ // Move data down so we only have window size left.
+ // We know we have less than window size in b at this point.
+ discard := len(b) + len(h.b) - h.windowSize
+ copy(h.b, h.b[discard:])
+ h.b = h.b[:h.windowSize]
+ copy(h.b[h.windowSize-len(b):], b)
+}
+
+// append bytes to history without ever discarding anything.
+func (h *history) appendKeep(b []byte) {
+ h.b = append(h.b, b...)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt
new file mode 100644
index 0000000..24b5306
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt
@@ -0,0 +1,22 @@
+Copyright (c) 2016 Caleb Spare
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
new file mode 100644
index 0000000..69aa3bb
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md
@@ -0,0 +1,58 @@
+# xxhash
+
+VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
+
+
+[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
+[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
+
+xxhash is a Go implementation of the 64-bit
+[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
+high-quality hashing algorithm that is much faster than anything in the Go
+standard library.
+
+This package provides a straightforward API:
+
+```
+func Sum64(b []byte) uint64
+func Sum64String(s string) uint64
+type Digest struct{ ... }
+ func New() *Digest
+```
+
+The `Digest` type implements hash.Hash64. Its key methods are:
+
+```
+func (*Digest) Write([]byte) (int, error)
+func (*Digest) WriteString(string) (int, error)
+func (*Digest) Sum64() uint64
+```
+
+This implementation provides a fast pure-Go implementation and an even faster
+assembly implementation for amd64.
+
+## Benchmarks
+
+Here are some quick benchmarks comparing the pure-Go and assembly
+implementations of Sum64.
+
+| input size | purego | asm |
+| --- | --- | --- |
+| 5 B | 979.66 MB/s | 1291.17 MB/s |
+| 100 B | 7475.26 MB/s | 7973.40 MB/s |
+| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
+| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
+
+These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
+the following commands under Go 1.11.2:
+
+```
+$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
+$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
+```
+
+## Projects using this package
+
+- [InfluxDB](https://github.com/influxdata/influxdb)
+- [Prometheus](https://github.com/prometheus/prometheus)
+- [FreeCache](https://github.com/coocood/freecache)
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
new file mode 100644
index 0000000..426b9ca
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go
@@ -0,0 +1,238 @@
+// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
+// at http://cyan4973.github.io/xxHash/.
+// THIS IS VENDORED: Go to github.com/cespare/xxhash for original package.
+
+package xxhash
+
+import (
+ "encoding/binary"
+ "errors"
+ "math/bits"
+)
+
+const (
+ prime1 uint64 = 11400714785074694791
+ prime2 uint64 = 14029467366897019727
+ prime3 uint64 = 1609587929392839161
+ prime4 uint64 = 9650029242287828579
+ prime5 uint64 = 2870177450012600261
+)
+
+// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
+// possible in the Go code is worth a small (but measurable) performance boost
+// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
+// convenience in the Go code in a few places where we need to intentionally
+// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
+// result overflows a uint64).
+var (
+ prime1v = prime1
+ prime2v = prime2
+ prime3v = prime3
+ prime4v = prime4
+ prime5v = prime5
+)
+
+// Digest implements hash.Hash64.
+type Digest struct {
+ v1 uint64
+ v2 uint64
+ v3 uint64
+ v4 uint64
+ total uint64
+ mem [32]byte
+ n int // how much of mem is used
+}
+
+// New creates a new Digest that computes the 64-bit xxHash algorithm.
+func New() *Digest {
+ var d Digest
+ d.Reset()
+ return &d
+}
+
+// Reset clears the Digest's state so that it can be reused.
+func (d *Digest) Reset() {
+ d.v1 = prime1v + prime2
+ d.v2 = prime2
+ d.v3 = 0
+ d.v4 = -prime1v
+ d.total = 0
+ d.n = 0
+}
+
+// Size always returns 8 bytes.
+func (d *Digest) Size() int { return 8 }
+
+// BlockSize always returns 32 bytes.
+func (d *Digest) BlockSize() int { return 32 }
+
+// Write adds more data to d. It always returns len(b), nil.
+func (d *Digest) Write(b []byte) (n int, err error) {
+ n = len(b)
+ d.total += uint64(n)
+
+ if d.n+n < 32 {
+ // This new data doesn't even fill the current block.
+ copy(d.mem[d.n:], b)
+ d.n += n
+ return
+ }
+
+ if d.n > 0 {
+ // Finish off the partial block.
+ copy(d.mem[d.n:], b)
+ d.v1 = round(d.v1, u64(d.mem[0:8]))
+ d.v2 = round(d.v2, u64(d.mem[8:16]))
+ d.v3 = round(d.v3, u64(d.mem[16:24]))
+ d.v4 = round(d.v4, u64(d.mem[24:32]))
+ b = b[32-d.n:]
+ d.n = 0
+ }
+
+ if len(b) >= 32 {
+ // One or more full blocks left.
+ nw := writeBlocks(d, b)
+ b = b[nw:]
+ }
+
+ // Store any remaining partial block.
+ copy(d.mem[:], b)
+ d.n = len(b)
+
+ return
+}
+
+// Sum appends the current hash to b and returns the resulting slice.
+func (d *Digest) Sum(b []byte) []byte {
+ s := d.Sum64()
+ return append(
+ b,
+ byte(s>>56),
+ byte(s>>48),
+ byte(s>>40),
+ byte(s>>32),
+ byte(s>>24),
+ byte(s>>16),
+ byte(s>>8),
+ byte(s),
+ )
+}
+
+// Sum64 returns the current hash.
+func (d *Digest) Sum64() uint64 {
+ var h uint64
+
+ if d.total >= 32 {
+ v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
+ h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+ h = mergeRound(h, v1)
+ h = mergeRound(h, v2)
+ h = mergeRound(h, v3)
+ h = mergeRound(h, v4)
+ } else {
+ h = d.v3 + prime5
+ }
+
+ h += d.total
+
+ i, end := 0, d.n
+ for ; i+8 <= end; i += 8 {
+ k1 := round(0, u64(d.mem[i:i+8]))
+ h ^= k1
+ h = rol27(h)*prime1 + prime4
+ }
+ if i+4 <= end {
+ h ^= uint64(u32(d.mem[i:i+4])) * prime1
+ h = rol23(h)*prime2 + prime3
+ i += 4
+ }
+ for i < end {
+ h ^= uint64(d.mem[i]) * prime5
+ h = rol11(h) * prime1
+ i++
+ }
+
+ h ^= h >> 33
+ h *= prime2
+ h ^= h >> 29
+ h *= prime3
+ h ^= h >> 32
+
+ return h
+}
+
+const (
+ magic = "xxh\x06"
+ marshaledSize = len(magic) + 8*5 + 32
+)
+
+// MarshalBinary implements the encoding.BinaryMarshaler interface.
+func (d *Digest) MarshalBinary() ([]byte, error) {
+ b := make([]byte, 0, marshaledSize)
+ b = append(b, magic...)
+ b = appendUint64(b, d.v1)
+ b = appendUint64(b, d.v2)
+ b = appendUint64(b, d.v3)
+ b = appendUint64(b, d.v4)
+ b = appendUint64(b, d.total)
+ b = append(b, d.mem[:d.n]...)
+ b = b[:len(b)+len(d.mem)-d.n]
+ return b, nil
+}
+
+// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
+func (d *Digest) UnmarshalBinary(b []byte) error {
+ if len(b) < len(magic) || string(b[:len(magic)]) != magic {
+ return errors.New("xxhash: invalid hash state identifier")
+ }
+ if len(b) != marshaledSize {
+ return errors.New("xxhash: invalid hash state size")
+ }
+ b = b[len(magic):]
+ b, d.v1 = consumeUint64(b)
+ b, d.v2 = consumeUint64(b)
+ b, d.v3 = consumeUint64(b)
+ b, d.v4 = consumeUint64(b)
+ b, d.total = consumeUint64(b)
+ copy(d.mem[:], b)
+ b = b[len(d.mem):]
+ d.n = int(d.total % uint64(len(d.mem)))
+ return nil
+}
+
+func appendUint64(b []byte, x uint64) []byte {
+ var a [8]byte
+ binary.LittleEndian.PutUint64(a[:], x)
+ return append(b, a[:]...)
+}
+
+func consumeUint64(b []byte) ([]byte, uint64) {
+ x := u64(b)
+ return b[8:], x
+}
+
+func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
+func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
+
+func round(acc, input uint64) uint64 {
+ acc += input * prime2
+ acc = rol31(acc)
+ acc *= prime1
+ return acc
+}
+
+func mergeRound(acc, val uint64) uint64 {
+ val = round(0, val)
+ acc ^= val
+ acc = acc*prime1 + prime4
+ return acc
+}
+
+func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) }
+func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) }
+func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
+func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
+func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
+func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
+func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
+func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
new file mode 100644
index 0000000..35318d7
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go
@@ -0,0 +1,13 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+package xxhash
+
+// Sum64 computes the 64-bit xxHash digest of b.
+//
+//go:noescape
+func Sum64(b []byte) uint64
+
+//go:noescape
+func writeBlocks(*Digest, []byte) int
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
new file mode 100644
index 0000000..2c9c535
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -0,0 +1,215 @@
+// +build !appengine
+// +build gc
+// +build !purego
+
+#include "textflag.h"
+
+// Register allocation:
+// AX h
+// CX pointer to advance through b
+// DX n
+// BX loop end
+// R8 v1, k1
+// R9 v2
+// R10 v3
+// R11 v4
+// R12 tmp
+// R13 prime1v
+// R14 prime2v
+// R15 prime4v
+
+// round reads from and advances the buffer pointer in CX.
+// It assumes that R13 has prime1v and R14 has prime2v.
+#define round(r) \
+ MOVQ (CX), R12 \
+ ADDQ $8, CX \
+ IMULQ R14, R12 \
+ ADDQ R12, r \
+ ROLQ $31, r \
+ IMULQ R13, r
+
+// mergeRound applies a merge round on the two registers acc and val.
+// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v.
+#define mergeRound(acc, val) \
+ IMULQ R14, val \
+ ROLQ $31, val \
+ IMULQ R13, val \
+ XORQ val, acc \
+ IMULQ R13, acc \
+ ADDQ R15, acc
+
+// func Sum64(b []byte) uint64
+TEXT ·Sum64(SB), NOSPLIT, $0-32
+ // Load fixed primes.
+ MOVQ ·prime1v(SB), R13
+ MOVQ ·prime2v(SB), R14
+ MOVQ ·prime4v(SB), R15
+
+ // Load slice.
+ MOVQ b_base+0(FP), CX
+ MOVQ b_len+8(FP), DX
+ LEAQ (CX)(DX*1), BX
+
+ // The first loop limit will be len(b)-32.
+ SUBQ $32, BX
+
+ // Check whether we have at least one block.
+ CMPQ DX, $32
+ JLT noBlocks
+
+ // Set up initial state (v1, v2, v3, v4).
+ MOVQ R13, R8
+ ADDQ R14, R8
+ MOVQ R14, R9
+ XORQ R10, R10
+ XORQ R11, R11
+ SUBQ R13, R11
+
+ // Loop until CX > BX.
+blockLoop:
+ round(R8)
+ round(R9)
+ round(R10)
+ round(R11)
+
+ CMPQ CX, BX
+ JLE blockLoop
+
+ MOVQ R8, AX
+ ROLQ $1, AX
+ MOVQ R9, R12
+ ROLQ $7, R12
+ ADDQ R12, AX
+ MOVQ R10, R12
+ ROLQ $12, R12
+ ADDQ R12, AX
+ MOVQ R11, R12
+ ROLQ $18, R12
+ ADDQ R12, AX
+
+ mergeRound(AX, R8)
+ mergeRound(AX, R9)
+ mergeRound(AX, R10)
+ mergeRound(AX, R11)
+
+ JMP afterBlocks
+
+noBlocks:
+ MOVQ ·prime5v(SB), AX
+
+afterBlocks:
+ ADDQ DX, AX
+
+ // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8.
+ ADDQ $24, BX
+
+ CMPQ CX, BX
+ JG fourByte
+
+wordLoop:
+ // Calculate k1.
+ MOVQ (CX), R8
+ ADDQ $8, CX
+ IMULQ R14, R8
+ ROLQ $31, R8
+ IMULQ R13, R8
+
+ XORQ R8, AX
+ ROLQ $27, AX
+ IMULQ R13, AX
+ ADDQ R15, AX
+
+ CMPQ CX, BX
+ JLE wordLoop
+
+fourByte:
+ ADDQ $4, BX
+ CMPQ CX, BX
+ JG singles
+
+ MOVL (CX), R8
+ ADDQ $4, CX
+ IMULQ R13, R8
+ XORQ R8, AX
+
+ ROLQ $23, AX
+ IMULQ R14, AX
+ ADDQ ·prime3v(SB), AX
+
+singles:
+ ADDQ $4, BX
+ CMPQ CX, BX
+ JGE finalize
+
+singlesLoop:
+ MOVBQZX (CX), R12
+ ADDQ $1, CX
+ IMULQ ·prime5v(SB), R12
+ XORQ R12, AX
+
+ ROLQ $11, AX
+ IMULQ R13, AX
+
+ CMPQ CX, BX
+ JL singlesLoop
+
+finalize:
+ MOVQ AX, R12
+ SHRQ $33, R12
+ XORQ R12, AX
+ IMULQ R14, AX
+ MOVQ AX, R12
+ SHRQ $29, R12
+ XORQ R12, AX
+ IMULQ ·prime3v(SB), AX
+ MOVQ AX, R12
+ SHRQ $32, R12
+ XORQ R12, AX
+
+ MOVQ AX, ret+24(FP)
+ RET
+
+// writeBlocks uses the same registers as above except that it uses AX to store
+// the d pointer.
+
+// func writeBlocks(d *Digest, b []byte) int
+TEXT ·writeBlocks(SB), NOSPLIT, $0-40
+ // Load fixed primes needed for round.
+ MOVQ ·prime1v(SB), R13
+ MOVQ ·prime2v(SB), R14
+
+ // Load slice.
+ MOVQ arg1_base+8(FP), CX
+ MOVQ arg1_len+16(FP), DX
+ LEAQ (CX)(DX*1), BX
+ SUBQ $32, BX
+
+ // Load vN from d.
+ MOVQ arg+0(FP), AX
+ MOVQ 0(AX), R8 // v1
+ MOVQ 8(AX), R9 // v2
+ MOVQ 16(AX), R10 // v3
+ MOVQ 24(AX), R11 // v4
+
+ // We don't need to check the loop condition here; this function is
+ // always called with at least one block of data to process.
+blockLoop:
+ round(R8)
+ round(R9)
+ round(R10)
+ round(R11)
+
+ CMPQ CX, BX
+ JLE blockLoop
+
+ // Copy vN back to d.
+ MOVQ R8, 0(AX)
+ MOVQ R9, 8(AX)
+ MOVQ R10, 16(AX)
+ MOVQ R11, 24(AX)
+
+ // The number of bytes written is CX minus the old base pointer.
+ SUBQ arg1_base+8(FP), CX
+ MOVQ CX, ret+32(FP)
+
+ RET
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
new file mode 100644
index 0000000..4a5a821
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go
@@ -0,0 +1,76 @@
+// +build !amd64 appengine !gc purego
+
+package xxhash
+
+// Sum64 computes the 64-bit xxHash digest of b.
+func Sum64(b []byte) uint64 {
+ // A simpler version would be
+ // d := New()
+ // d.Write(b)
+ // return d.Sum64()
+ // but this is faster, particularly for small inputs.
+
+ n := len(b)
+ var h uint64
+
+ if n >= 32 {
+ v1 := prime1v + prime2
+ v2 := prime2
+ v3 := uint64(0)
+ v4 := -prime1v
+ for len(b) >= 32 {
+ v1 = round(v1, u64(b[0:8:len(b)]))
+ v2 = round(v2, u64(b[8:16:len(b)]))
+ v3 = round(v3, u64(b[16:24:len(b)]))
+ v4 = round(v4, u64(b[24:32:len(b)]))
+ b = b[32:len(b):len(b)]
+ }
+ h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
+ h = mergeRound(h, v1)
+ h = mergeRound(h, v2)
+ h = mergeRound(h, v3)
+ h = mergeRound(h, v4)
+ } else {
+ h = prime5
+ }
+
+ h += uint64(n)
+
+ i, end := 0, len(b)
+ for ; i+8 <= end; i += 8 {
+ k1 := round(0, u64(b[i:i+8:len(b)]))
+ h ^= k1
+ h = rol27(h)*prime1 + prime4
+ }
+ if i+4 <= end {
+ h ^= uint64(u32(b[i:i+4:len(b)])) * prime1
+ h = rol23(h)*prime2 + prime3
+ i += 4
+ }
+ for ; i < end; i++ {
+ h ^= uint64(b[i]) * prime5
+ h = rol11(h) * prime1
+ }
+
+ h ^= h >> 33
+ h *= prime2
+ h ^= h >> 29
+ h *= prime3
+ h ^= h >> 32
+
+ return h
+}
+
+func writeBlocks(d *Digest, b []byte) int {
+ v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
+ n := len(b)
+ for len(b) >= 32 {
+ v1 = round(v1, u64(b[0:8:len(b)]))
+ v2 = round(v2, u64(b[8:16:len(b)]))
+ v3 = round(v3, u64(b[16:24:len(b)]))
+ v4 = round(v4, u64(b[24:32:len(b)]))
+ b = b[32:len(b):len(b)]
+ }
+ d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4
+ return n - len(b)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go
new file mode 100644
index 0000000..6f3b0cb
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go
@@ -0,0 +1,11 @@
+package xxhash
+
+// Sum64String computes the 64-bit xxHash digest of s.
+func Sum64String(s string) uint64 {
+ return Sum64([]byte(s))
+}
+
+// WriteString adds more data to d. It always returns len(s), nil.
+func (d *Digest) WriteString(s string) (n int, err error) {
+ return d.Write([]byte(s))
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
new file mode 100644
index 0000000..1dd39e6
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -0,0 +1,492 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "errors"
+ "fmt"
+ "io"
+)
+
+type seq struct {
+ litLen uint32
+ matchLen uint32
+ offset uint32
+
+ // Codes are stored here for the encoder
+ // so they only have to be looked up once.
+ llCode, mlCode, ofCode uint8
+}
+
+func (s seq) String() string {
+ if s.offset <= 3 {
+ if s.offset == 0 {
+ return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)")
+ }
+ return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)")
+ }
+ return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)")
+}
+
+type seqCompMode uint8
+
+const (
+ compModePredefined seqCompMode = iota
+ compModeRLE
+ compModeFSE
+ compModeRepeat
+)
+
+type sequenceDec struct {
+ // decoder keeps track of the current state and updates it from the bitstream.
+ fse *fseDecoder
+ state fseState
+ repeat bool
+}
+
+// init the state of the decoder with input from stream.
+func (s *sequenceDec) init(br *bitReader) error {
+ if s.fse == nil {
+ return errors.New("sequence decoder not defined")
+ }
+ s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<<s.fse.actualTableLog])
+ return nil
+}
+
+// sequenceDecs contains all 3 sequence decoders and their state.
+type sequenceDecs struct {
+ litLengths sequenceDec
+ offsets sequenceDec
+ matchLengths sequenceDec
+ prevOffset [3]int
+ hist []byte
+ dict []byte
+ literals []byte
+ out []byte
+ windowSize int
+ maxBits uint8
+}
+
+// initialize all 3 decoders from the stream input.
+func (s *sequenceDecs) initialize(br *bitReader, hist *history, literals, out []byte) error {
+ if err := s.litLengths.init(br); err != nil {
+ return errors.New("litLengths:" + err.Error())
+ }
+ if err := s.offsets.init(br); err != nil {
+ return errors.New("offsets:" + err.Error())
+ }
+ if err := s.matchLengths.init(br); err != nil {
+ return errors.New("matchLengths:" + err.Error())
+ }
+ s.literals = literals
+ s.hist = hist.b
+ s.prevOffset = hist.recentOffsets
+ s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
+ s.windowSize = hist.windowSize
+ s.out = out
+ s.dict = nil
+ if hist.dict != nil {
+ s.dict = hist.dict.content
+ }
+ return nil
+}
+
+// decode sequences from the stream with the provided history.
+func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
+ startSize := len(s.out)
+ // Grab full sizes tables, to avoid bounds checks.
+ llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
+ llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
+
+ for i := seqs - 1; i >= 0; i-- {
+ if br.overread() {
+ printf("reading sequence %d, exceeded available data\n", seqs-i)
+ return io.ErrUnexpectedEOF
+ }
+ var ll, mo, ml int
+ if br.off > 4+((maxOffsetBits+16+16)>>3) {
+ // inlined function:
+ // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
+
+ // Final will not read from stream.
+ var llB, mlB, moB uint8
+ ll, llB = llState.final()
+ ml, mlB = mlState.final()
+ mo, moB = ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fillFast()
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
+ br.fillFast()
+ }
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ if moB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = mo
+ } else {
+ // mo = s.adjustOffset(mo, ll, moB)
+ // Inlined for rather big speedup
+ if ll == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ mo++
+ }
+
+ if mo == 0 {
+ mo = s.prevOffset[0]
+ } else {
+ var temp int
+ if mo == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[mo]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("temp was 0")
+ temp = 1
+ }
+
+ if mo != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ mo = temp
+ }
+ }
+ br.fillFast()
+ } else {
+ ll, mo, ml = s.next(br, llState, mlState, ofState)
+ br.fill()
+ }
+
+ if debugSequences {
+ println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
+ }
+
+ if ll > len(s.literals) {
+ return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
+ }
+ size := ll + ml + len(s.out)
+ if size-startSize > maxBlockSize {
+ return fmt.Errorf("output (%d) bigger than max block size", size)
+ }
+ if size > cap(s.out) {
+ // Not enough size, which can happen under high volume block streaming conditions
+ // but could be if destination slice is too small for sync operations.
+ // over-allocating here can create a large amount of GC pressure so we try to keep
+ // it as contained as possible
+ used := len(s.out) - startSize
+ addBytes := 256 + ll + ml + used>>2
+ // Clamp to max block size.
+ if used+addBytes > maxBlockSize {
+ addBytes = maxBlockSize - used
+ }
+ s.out = append(s.out, make([]byte, addBytes)...)
+ s.out = s.out[:len(s.out)-addBytes]
+ }
+ if ml > maxMatchLen {
+ return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
+ }
+
+ // Add literals
+ s.out = append(s.out, s.literals[:ll]...)
+ s.literals = s.literals[ll:]
+ out := s.out
+
+ if mo == 0 && ml > 0 {
+ return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+ }
+
+ if mo > len(s.out)+len(hist) || mo > s.windowSize {
+ if len(s.dict) == 0 {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+
+ // we may be in dictionary.
+ dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
+ if dictO < 0 || dictO >= len(s.dict) {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+ end := dictO + ml
+ if end > len(s.dict) {
+ out = append(out, s.dict[dictO:]...)
+ mo -= len(s.dict) - dictO
+ ml -= len(s.dict) - dictO
+ } else {
+ out = append(out, s.dict[dictO:end]...)
+ mo = 0
+ ml = 0
+ }
+ }
+
+ // Copy from history.
+ // TODO: Blocks without history could be made to ignore this completely.
+ if v := mo - len(s.out); v > 0 {
+ // v is the start position in history from end.
+ start := len(s.hist) - v
+ if ml > v {
+ // Some goes into current block.
+ // Copy remainder of history
+ out = append(out, s.hist[start:]...)
+ mo -= v
+ ml -= v
+ } else {
+ out = append(out, s.hist[start:start+ml]...)
+ ml = 0
+ }
+ }
+ // We must be in current buffer now
+ if ml > 0 {
+ start := len(s.out) - mo
+ if ml <= len(s.out)-start {
+ // No overlap
+ out = append(out, s.out[start:start+ml]...)
+ } else {
+ // Overlapping copy
+ // Extend destination slice and copy one byte at the time.
+ out = out[:len(out)+ml]
+ src := out[start : start+ml]
+ // Destination is the space we just added.
+ dst := out[len(out)-ml:]
+ dst = dst[:len(src)]
+ for i := range src {
+ dst[i] = src[i]
+ }
+ }
+ }
+ s.out = out
+ if i == 0 {
+ // This is the last sequence, so we shouldn't update state.
+ break
+ }
+
+ // Manually inlined, ~ 5-20% faster
+ // Update all 3 states at once. Approx 20% faster.
+ nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
+ if nBits == 0 {
+ llState = llTable[llState.newState()&maxTableMask]
+ mlState = mlTable[mlState.newState()&maxTableMask]
+ ofState = ofTable[ofState.newState()&maxTableMask]
+ } else {
+ bits := br.getBitsFast(nBits)
+ lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
+ llState = llTable[(llState.newState()+lowBits)&maxTableMask]
+
+ lowBits = uint16(bits >> (ofState.nbBits() & 31))
+ lowBits &= bitMask[mlState.nbBits()&15]
+ mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
+
+ lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
+ ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
+ }
+ }
+
+ // Add final literals
+ s.out = append(s.out, s.literals...)
+ return nil
+}
+
+// update states, at least 27 bits must be available.
+func (s *sequenceDecs) update(br *bitReader) {
+ // Max 8 bits
+ s.litLengths.state.next(br)
+ // Max 9 bits
+ s.matchLengths.state.next(br)
+ // Max 8 bits
+ s.offsets.state.next(br)
+}
+
+var bitMask [16]uint16
+
+func init() {
+ for i := range bitMask[:] {
+ bitMask[i] = uint16((1 << uint(i)) - 1)
+ }
+}
+
+// update states, at least 27 bits must be available.
+func (s *sequenceDecs) updateAlt(br *bitReader) {
+ // Update all 3 states at once. Approx 20% faster.
+ a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
+
+ nBits := a.nbBits() + b.nbBits() + c.nbBits()
+ if nBits == 0 {
+ s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
+ s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
+ s.offsets.state.state = s.offsets.state.dt[c.newState()]
+ return
+ }
+ bits := br.getBitsFast(nBits)
+ lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
+ s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
+
+ lowBits = uint16(bits >> (c.nbBits() & 31))
+ lowBits &= bitMask[b.nbBits()&15]
+ s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
+
+ lowBits = uint16(bits) & bitMask[c.nbBits()&15]
+ s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
+}
+
+// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
+func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
+ // Final will not read from stream.
+ ll, llB := llState.final()
+ ml, mlB := mlState.final()
+ mo, moB := ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fillFast()
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
+ br.fillFast()
+ }
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ if moB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = mo
+ return
+ }
+ // mo = s.adjustOffset(mo, ll, moB)
+ // Inlined for rather big speedup
+ if ll == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ mo++
+ }
+
+ if mo == 0 {
+ mo = s.prevOffset[0]
+ return
+ }
+ var temp int
+ if mo == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[mo]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("temp was 0")
+ temp = 1
+ }
+
+ if mo != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ mo = temp
+ return
+}
+
+func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
+ // Final will not read from stream.
+ ll, llB := llState.final()
+ ml, mlB := mlState.final()
+ mo, moB := ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fill()
+ if s.maxBits <= 32 {
+ mo += br.getBits(moB)
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+ } else {
+ mo += br.getBits(moB)
+ br.fill()
+ // matchlength+literal length, max 32 bits
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ }
+ mo = s.adjustOffset(mo, ll, moB)
+ return
+}
+
+func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int {
+ if offsetB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = offset
+ return offset
+ }
+
+ if litLen == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ offset++
+ }
+
+ if offset == 0 {
+ return s.prevOffset[0]
+ }
+ var temp int
+ if offset == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[offset]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("temp was 0")
+ temp = 1
+ }
+
+ if offset != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ return temp
+}
+
+// mergeHistory will merge history.
+func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) {
+ for i := uint(0); i < 3; i++ {
+ var sNew, sHist *sequenceDec
+ switch i {
+ default:
+ // same as "case 0":
+ sNew = &s.litLengths
+ sHist = &hist.litLengths
+ case 1:
+ sNew = &s.offsets
+ sHist = &hist.offsets
+ case 2:
+ sNew = &s.matchLengths
+ sHist = &hist.matchLengths
+ }
+ if sNew.repeat {
+ if sHist.fse == nil {
+ return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i)
+ }
+ continue
+ }
+ if sNew.fse == nil {
+ return nil, fmt.Errorf("sequence stream %d, no fse found", i)
+ }
+ if sHist.fse != nil && !sHist.fse.preDefined {
+ fseDecoderPool.Put(sHist.fse)
+ }
+ sHist.fse = sNew.fse
+ }
+ return hist, nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go
new file mode 100644
index 0000000..8014174
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go
@@ -0,0 +1,114 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import "math/bits"
+
+type seqCoders struct {
+ llEnc, ofEnc, mlEnc *fseEncoder
+ llPrev, ofPrev, mlPrev *fseEncoder
+}
+
+// swap coders with another (block).
+func (s *seqCoders) swap(other *seqCoders) {
+ *s, *other = *other, *s
+}
+
+// setPrev will update the previous encoders to the actually used ones
+// and make sure a fresh one is in the main slot.
+func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) {
+ compareSwap := func(used *fseEncoder, current, prev **fseEncoder) {
+ // We used the new one, more current to history and reuse the previous history
+ if *current == used {
+ *prev, *current = *current, *prev
+ c := *current
+ p := *prev
+ c.reUsed = false
+ p.reUsed = true
+ return
+ }
+ if used == *prev {
+ return
+ }
+ // Ensure we cannot reuse by accident
+ prevEnc := *prev
+ prevEnc.symbolLen = 0
+ }
+ compareSwap(ll, &s.llEnc, &s.llPrev)
+ compareSwap(ml, &s.mlEnc, &s.mlPrev)
+ compareSwap(of, &s.ofEnc, &s.ofPrev)
+}
+
+func highBit(val uint32) (n uint32) {
+ return uint32(bits.Len32(val) - 1)
+}
+
+var llCodeTable = [64]byte{0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 16, 17, 17, 18, 18, 19, 19,
+ 20, 20, 20, 20, 21, 21, 21, 21,
+ 22, 22, 22, 22, 22, 22, 22, 22,
+ 23, 23, 23, 23, 23, 23, 23, 23,
+ 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24}
+
+// Up to 6 bits
+const maxLLCode = 35
+
+// llBitsTable translates from ll code to number of bits.
+var llBitsTable = [maxLLCode + 1]byte{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 3, 3,
+ 4, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16}
+
+// llCode returns the code that represents the literal length requested.
+func llCode(litLength uint32) uint8 {
+ const llDeltaCode = 19
+ if litLength <= 63 {
+ // Compiler insists on bounds check (Go 1.12)
+ return llCodeTable[litLength&63]
+ }
+ return uint8(highBit(litLength)) + llDeltaCode
+}
+
+var mlCodeTable = [128]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+ 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42}
+
+// Up to 6 bits
+const maxMLCode = 52
+
+// mlBitsTable translates from ml code to number of bits.
+var mlBitsTable = [maxMLCode + 1]byte{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 2, 2, 3, 3,
+ 4, 4, 5, 7, 8, 9, 10, 11,
+ 12, 13, 14, 15, 16}
+
+// note : mlBase = matchLength - MINMATCH;
+// because it's the format it's stored in seqStore->sequences
+func mlCode(mlBase uint32) uint8 {
+ const mlDeltaCode = 36
+ if mlBase <= 127 {
+ // Compiler insists on bounds check (Go 1.12)
+ return mlCodeTable[mlBase&127]
+ }
+ return uint8(highBit(mlBase)) + mlDeltaCode
+}
+
+func ofCode(offset uint32) uint8 {
+ // A valid offset will always be > 0.
+ return uint8(bits.Len32(offset) - 1)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
new file mode 100644
index 0000000..9d9d1d5
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -0,0 +1,435 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "encoding/binary"
+ "errors"
+ "hash/crc32"
+ "io"
+
+ "github.com/golang/snappy"
+ "github.com/klauspost/compress/huff0"
+)
+
+const (
+ snappyTagLiteral = 0x00
+ snappyTagCopy1 = 0x01
+ snappyTagCopy2 = 0x02
+ snappyTagCopy4 = 0x03
+)
+
+const (
+ snappyChecksumSize = 4
+ snappyMagicBody = "sNaPpY"
+
+ // snappyMaxBlockSize is the maximum size of the input to encodeBlock. It is not
+ // part of the wire format per se, but some parts of the encoder assume
+ // that an offset fits into a uint16.
+ //
+ // Also, for the framing format (Writer type instead of Encode function),
+ // https://github.com/google/snappy/blob/master/framing_format.txt says
+ // that "the uncompressed data in a chunk must be no longer than 65536
+ // bytes".
+ snappyMaxBlockSize = 65536
+
+ // snappyMaxEncodedLenOfMaxBlockSize equals MaxEncodedLen(snappyMaxBlockSize), but is
+ // hard coded to be a const instead of a variable, so that obufLen can also
+ // be a const. Their equivalence is confirmed by
+ // TestMaxEncodedLenOfMaxBlockSize.
+ snappyMaxEncodedLenOfMaxBlockSize = 76490
+)
+
+const (
+ chunkTypeCompressedData = 0x00
+ chunkTypeUncompressedData = 0x01
+ chunkTypePadding = 0xfe
+ chunkTypeStreamIdentifier = 0xff
+)
+
+var (
+ // ErrSnappyCorrupt reports that the input is invalid.
+ ErrSnappyCorrupt = errors.New("snappy: corrupt input")
+ // ErrSnappyTooLarge reports that the uncompressed length is too large.
+ ErrSnappyTooLarge = errors.New("snappy: decoded block is too large")
+ // ErrSnappyUnsupported reports that the input isn't supported.
+ ErrSnappyUnsupported = errors.New("snappy: unsupported input")
+
+ errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
+)
+
+// SnappyConverter can read SnappyConverter-compressed streams and convert them to zstd.
+// Conversion is done by converting the stream directly from Snappy without intermediate
+// full decoding.
+// Therefore the compression ratio is much less than what can be done by a full decompression
+// and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without
+// any errors being generated.
+// No CRC value is being generated and not all CRC values of the Snappy stream are checked.
+// However, it provides really fast recompression of Snappy streams.
+// The converter can be reused to avoid allocations, even after errors.
+type SnappyConverter struct {
+ r io.Reader
+ err error
+ buf []byte
+ block *blockEnc
+}
+
+// Convert the Snappy stream supplied in 'in' and write the zStandard stream to 'w'.
+// If any error is detected on the Snappy stream it is returned.
+// The number of bytes written is returned.
+func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
+ initPredefined()
+ r.err = nil
+ r.r = in
+ if r.block == nil {
+ r.block = &blockEnc{}
+ r.block.init()
+ }
+ r.block.initNewEncode()
+ if len(r.buf) != snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize {
+ r.buf = make([]byte, snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize)
+ }
+ r.block.litEnc.Reuse = huff0.ReusePolicyNone
+ var written int64
+ var readHeader bool
+ {
+ var header []byte
+ var n int
+ header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0])
+
+ n, r.err = w.Write(header)
+ if r.err != nil {
+ return written, r.err
+ }
+ written += int64(n)
+ }
+
+ for {
+ if !r.readFull(r.buf[:4], true) {
+ // Add empty last block
+ r.block.reset(nil)
+ r.block.last = true
+ err := r.block.encodeLits(r.block.literals, false)
+ if err != nil {
+ return written, err
+ }
+ n, err := w.Write(r.block.output)
+ if err != nil {
+ return written, err
+ }
+ written += int64(n)
+
+ return written, r.err
+ }
+ chunkType := r.buf[0]
+ if !readHeader {
+ if chunkType != chunkTypeStreamIdentifier {
+ println("chunkType != chunkTypeStreamIdentifier", chunkType)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ readHeader = true
+ }
+ chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+ if chunkLen > len(r.buf) {
+ println("chunkLen > len(r.buf)", chunkType)
+ r.err = ErrSnappyUnsupported
+ return written, r.err
+ }
+
+ // The chunk types are specified at
+ // https://github.com/google/snappy/blob/master/framing_format.txt
+ switch chunkType {
+ case chunkTypeCompressedData:
+ // Section 4.2. Compressed data (chunk type 0x00).
+ if chunkLen < snappyChecksumSize {
+ println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ buf := r.buf[:chunkLen]
+ if !r.readFull(buf, false) {
+ return written, r.err
+ }
+ //checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+ buf = buf[snappyChecksumSize:]
+
+ n, hdr, err := snappyDecodedLen(buf)
+ if err != nil {
+ r.err = err
+ return written, r.err
+ }
+ buf = buf[hdr:]
+ if n > snappyMaxBlockSize {
+ println("n > snappyMaxBlockSize", n, snappyMaxBlockSize)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ r.block.reset(nil)
+ r.block.pushOffsets()
+ if err := decodeSnappy(r.block, buf); err != nil {
+ r.err = err
+ return written, r.err
+ }
+ if r.block.size+r.block.extraLits != n {
+ printf("invalid size, want %d, got %d\n", n, r.block.size+r.block.extraLits)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ err = r.block.encode(nil, false, false)
+ switch err {
+ case errIncompressible:
+ r.block.popOffsets()
+ r.block.reset(nil)
+ r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen])
+ if err != nil {
+ return written, err
+ }
+ err = r.block.encodeLits(r.block.literals, false)
+ if err != nil {
+ return written, err
+ }
+ case nil:
+ default:
+ return written, err
+ }
+
+ n, r.err = w.Write(r.block.output)
+ if r.err != nil {
+ return written, err
+ }
+ written += int64(n)
+ continue
+ case chunkTypeUncompressedData:
+ if debug {
+ println("Uncompressed, chunklen", chunkLen)
+ }
+ // Section 4.3. Uncompressed data (chunk type 0x01).
+ if chunkLen < snappyChecksumSize {
+ println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ r.block.reset(nil)
+ buf := r.buf[:snappyChecksumSize]
+ if !r.readFull(buf, false) {
+ return written, r.err
+ }
+ checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+ // Read directly into r.decoded instead of via r.buf.
+ n := chunkLen - snappyChecksumSize
+ if n > snappyMaxBlockSize {
+ println("n > snappyMaxBlockSize", n, snappyMaxBlockSize)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ r.block.literals = r.block.literals[:n]
+ if !r.readFull(r.block.literals, false) {
+ return written, r.err
+ }
+ if snappyCRC(r.block.literals) != checksum {
+ println("literals crc mismatch")
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ err := r.block.encodeLits(r.block.literals, false)
+ if err != nil {
+ return written, err
+ }
+ n, r.err = w.Write(r.block.output)
+ if r.err != nil {
+ return written, err
+ }
+ written += int64(n)
+ continue
+
+ case chunkTypeStreamIdentifier:
+ if debug {
+ println("stream id", chunkLen, len(snappyMagicBody))
+ }
+ // Section 4.1. Stream identifier (chunk type 0xff).
+ if chunkLen != len(snappyMagicBody) {
+ println("chunkLen != len(snappyMagicBody)", chunkLen, len(snappyMagicBody))
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ if !r.readFull(r.buf[:len(snappyMagicBody)], false) {
+ return written, r.err
+ }
+ for i := 0; i < len(snappyMagicBody); i++ {
+ if r.buf[i] != snappyMagicBody[i] {
+ println("r.buf[i] != snappyMagicBody[i]", r.buf[i], snappyMagicBody[i], i)
+ r.err = ErrSnappyCorrupt
+ return written, r.err
+ }
+ }
+ continue
+ }
+
+ if chunkType <= 0x7f {
+ // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+ println("chunkType <= 0x7f")
+ r.err = ErrSnappyUnsupported
+ return written, r.err
+ }
+ // Section 4.4 Padding (chunk type 0xfe).
+ // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+ if !r.readFull(r.buf[:chunkLen], false) {
+ return written, r.err
+ }
+ }
+}
+
+// decodeSnappy writes the decoding of src to dst. It assumes that the varint-encoded
+// length of the decompressed bytes has already been read.
+func decodeSnappy(blk *blockEnc, src []byte) error {
+ //decodeRef(make([]byte, snappyMaxBlockSize), src)
+ var s, length int
+ lits := blk.extraLits
+ var offset uint32
+ for s < len(src) {
+ switch src[s] & 0x03 {
+ case snappyTagLiteral:
+ x := uint32(src[s] >> 2)
+ switch {
+ case x < 60:
+ s++
+ case x == 60:
+ s += 2
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, src)
+ return ErrSnappyCorrupt
+ }
+ x = uint32(src[s-1])
+ case x == 61:
+ s += 3
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, src)
+ return ErrSnappyCorrupt
+ }
+ x = uint32(src[s-2]) | uint32(src[s-1])<<8
+ case x == 62:
+ s += 4
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, src)
+ return ErrSnappyCorrupt
+ }
+ x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
+ case x == 63:
+ s += 5
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, src)
+ return ErrSnappyCorrupt
+ }
+ x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+ }
+ if x > snappyMaxBlockSize {
+ println("x > snappyMaxBlockSize", x, snappyMaxBlockSize)
+ return ErrSnappyCorrupt
+ }
+ length = int(x) + 1
+ if length <= 0 {
+ println("length <= 0 ", length)
+
+ return errUnsupportedLiteralLength
+ }
+ //if length > snappyMaxBlockSize-d || uint32(length) > len(src)-s {
+ // return ErrSnappyCorrupt
+ //}
+
+ blk.literals = append(blk.literals, src[s:s+length]...)
+ //println(length, "litLen")
+ lits += length
+ s += length
+ continue
+
+ case snappyTagCopy1:
+ s += 2
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, len(src))
+ return ErrSnappyCorrupt
+ }
+ length = 4 + int(src[s-2])>>2&0x7
+ offset = uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])
+
+ case snappyTagCopy2:
+ s += 3
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, len(src))
+ return ErrSnappyCorrupt
+ }
+ length = 1 + int(src[s-3])>>2
+ offset = uint32(src[s-2]) | uint32(src[s-1])<<8
+
+ case snappyTagCopy4:
+ s += 5
+ if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
+ println("uint(s) > uint(len(src)", s, len(src))
+ return ErrSnappyCorrupt
+ }
+ length = 1 + int(src[s-5])>>2
+ offset = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
+ }
+
+ if offset <= 0 || blk.size+lits < int(offset) /*|| length > len(blk)-d */ {
+ println("offset <= 0 || blk.size+lits < int(offset)", offset, blk.size+lits, int(offset), blk.size, lits)
+
+ return ErrSnappyCorrupt
+ }
+
+ // Check if offset is one of the recent offsets.
+ // Adjusts the output offset accordingly.
+ // Gives a tiny bit of compression, typically around 1%.
+ if false {
+ offset = blk.matchOffset(offset, uint32(lits))
+ } else {
+ offset += 3
+ }
+
+ blk.sequences = append(blk.sequences, seq{
+ litLen: uint32(lits),
+ offset: offset,
+ matchLen: uint32(length) - zstdMinMatch,
+ })
+ blk.size += length + lits
+ lits = 0
+ }
+ blk.extraLits = lits
+ return nil
+}
+
+func (r *SnappyConverter) readFull(p []byte, allowEOF bool) (ok bool) {
+ if _, r.err = io.ReadFull(r.r, p); r.err != nil {
+ if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
+ r.err = ErrSnappyCorrupt
+ }
+ return false
+ }
+ return true
+}
+
+var crcTable = crc32.MakeTable(crc32.Castagnoli)
+
+// crc implements the checksum specified in section 3 of
+// https://github.com/google/snappy/blob/master/framing_format.txt
+func snappyCRC(b []byte) uint32 {
+ c := crc32.Update(0, crcTable, b)
+ return c>>15 | c<<17 + 0xa282ead8
+}
+
+// snappyDecodedLen returns the length of the decoded block and the number of bytes
+// that the length header occupied.
+func snappyDecodedLen(src []byte) (blockLen, headerLen int, err error) {
+ v, n := binary.Uvarint(src)
+ if n <= 0 || v > 0xffffffff {
+ return 0, 0, ErrSnappyCorrupt
+ }
+
+ const wordSize = 32 << (^uint(0) >> 32 & 1)
+ if wordSize == 32 && v > 0x7fffffff {
+ return 0, 0, ErrSnappyTooLarge
+ }
+ return int(v), n, nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go
new file mode 100644
index 0000000..e35a0a2
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@@ -0,0 +1,120 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "errors"
+ "io"
+ "sync"
+)
+
+// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
+// See https://www.winzip.com/win/en/comp_info.html
+const ZipMethodWinZip = 93
+
+// ZipMethodPKWare is the method number used by PKWARE to indicate Zstandard compression.
+// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.7.TXT
+const ZipMethodPKWare = 20
+
+var zipReaderPool sync.Pool
+
+// newZipReader cannot be used since we would leak goroutines...
+func newZipReader(r io.Reader) io.ReadCloser {
+ dec, ok := zipReaderPool.Get().(*Decoder)
+ if ok {
+ dec.Reset(r)
+ } else {
+ d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+ if err != nil {
+ panic(err)
+ }
+ dec = d
+ }
+ return &pooledZipReader{dec: dec}
+}
+
+type pooledZipReader struct {
+ mu sync.Mutex // guards Close and Read
+ dec *Decoder
+}
+
+func (r *pooledZipReader) Read(p []byte) (n int, err error) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ if r.dec == nil {
+ return 0, errors.New("Read after Close")
+ }
+ dec, err := r.dec.Read(p)
+
+ return dec, err
+}
+
+func (r *pooledZipReader) Close() error {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ var err error
+ if r.dec != nil {
+ err = r.dec.Reset(nil)
+ zipReaderPool.Put(r.dec)
+ r.dec = nil
+ }
+ return err
+}
+
+type pooledZipWriter struct {
+ mu sync.Mutex // guards Close and Read
+ enc *Encoder
+}
+
+func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if w.enc == nil {
+ return 0, errors.New("Write after Close")
+ }
+ return w.enc.Write(p)
+}
+
+func (w *pooledZipWriter) Close() error {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ var err error
+ if w.enc != nil {
+ err = w.enc.Close()
+ zipReaderPool.Put(w.enc)
+ w.enc = nil
+ }
+ return err
+}
+
+// ZipCompressor returns a compressor that can be registered with zip libraries.
+// The provided encoder options will be used on all encodes.
+func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
+ var pool sync.Pool
+ return func(w io.Writer) (io.WriteCloser, error) {
+ enc, ok := pool.Get().(*Encoder)
+ if ok {
+ enc.Reset(w)
+ } else {
+ var err error
+ enc, err = NewWriter(w, opts...)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return &pooledZipWriter{enc: enc}, nil
+ }
+}
+
+// ZipDecompressor returns a decompressor that can be registered with zip libraries.
+// See ZipCompressor for example.
+func ZipDecompressor() func(r io.Reader) io.ReadCloser {
+ return func(r io.Reader) io.ReadCloser {
+ d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+ if err != nil {
+ panic(err)
+ }
+ return d.IOReadCloser()
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
new file mode 100644
index 0000000..1ba308c
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -0,0 +1,146 @@
+// Package zstd provides decompression of zstandard files.
+//
+// For advanced usage and examples, go to the README: https://github.com/klauspost/compress/tree/master/zstd#zstd
+package zstd
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "log"
+ "math"
+ "math/bits"
+)
+
+// enable debug printing
+const debug = false
+
+// Enable extra assertions.
+const debugAsserts = debug || false
+
+// print sequence details
+const debugSequences = false
+
+// print detailed matching information
+const debugMatches = false
+
+// force encoder to use predefined tables.
+const forcePreDef = false
+
+// zstdMinMatch is the minimum zstd match length.
+const zstdMinMatch = 3
+
+// Reset the buffer offset when reaching this.
+const bufferReset = math.MaxInt32 - MaxWindowSize
+
+var (
+ // ErrReservedBlockType is returned when a reserved block type is found.
+ // Typically this indicates wrong or corrupted input.
+ ErrReservedBlockType = errors.New("invalid input: reserved block type encountered")
+
+ // ErrCompressedSizeTooBig is returned when a block is bigger than allowed.
+ // Typically this indicates wrong or corrupted input.
+ ErrCompressedSizeTooBig = errors.New("invalid input: compressed size too big")
+
+ // ErrBlockTooSmall is returned when a block is too small to be decoded.
+ // Typically returned on invalid input.
+ ErrBlockTooSmall = errors.New("block too small")
+
+ // ErrMagicMismatch is returned when a "magic" number isn't what is expected.
+ // Typically this indicates wrong or corrupted input.
+ ErrMagicMismatch = errors.New("invalid input: magic number mismatch")
+
+ // ErrWindowSizeExceeded is returned when a reference exceeds the valid window size.
+ // Typically this indicates wrong or corrupted input.
+ ErrWindowSizeExceeded = errors.New("window size exceeded")
+
+ // ErrWindowSizeTooSmall is returned when no window size is specified.
+ // Typically this indicates wrong or corrupted input.
+ ErrWindowSizeTooSmall = errors.New("invalid input: window size was too small")
+
+ // ErrDecoderSizeExceeded is returned if decompressed size exceeds the configured limit.
+ ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit")
+
+ // ErrUnknownDictionary is returned if the dictionary ID is unknown.
+ // For the time being dictionaries are not supported.
+ ErrUnknownDictionary = errors.New("unknown dictionary")
+
+ // ErrFrameSizeExceeded is returned if the stated frame size is exceeded.
+ // This is only returned if SingleSegment is specified on the frame.
+ ErrFrameSizeExceeded = errors.New("frame size exceeded")
+
+ // ErrCRCMismatch is returned if CRC mismatches.
+ ErrCRCMismatch = errors.New("CRC check failed")
+
+ // ErrDecoderClosed will be returned if the Decoder was used after
+ // Close has been called.
+ ErrDecoderClosed = errors.New("decoder used after Close")
+
+ // ErrDecoderNilInput is returned when a nil Reader was provided
+ // and an operation other than Reset/DecodeAll/Close was attempted.
+ ErrDecoderNilInput = errors.New("nil input provided as reader")
+)
+
+func println(a ...interface{}) {
+ if debug {
+ log.Println(a...)
+ }
+}
+
+func printf(format string, a ...interface{}) {
+ if debug {
+ log.Printf(format, a...)
+ }
+}
+
+// matchLenFast does matching, but will not match the last up to 7 bytes.
+func matchLenFast(a, b []byte) int {
+ endI := len(a) & (math.MaxInt32 - 7)
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ return i + bits.TrailingZeros64(diff)>>3
+ }
+ }
+ return endI
+}
+
+// matchLen returns the maximum length.
+// a must be the shortest of the two.
+// The function also returns whether all bytes matched.
+func matchLen(a, b []byte) int {
+ b = b[:len(a)]
+ for i := 0; i < len(a)-7; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ return i + (bits.TrailingZeros64(diff) >> 3)
+ }
+ }
+
+ checked := (len(a) >> 3) << 3
+ a = a[checked:]
+ b = b[checked:]
+ for i := range a {
+ if a[i] != b[i] {
+ return i + checked
+ }
+ }
+ return len(a) + checked
+}
+
+func load3232(b []byte, i int32) uint32 {
+ return binary.LittleEndian.Uint32(b[i:])
+}
+
+func load6432(b []byte, i int32) uint64 {
+ return binary.LittleEndian.Uint64(b[i:])
+}
+
+func load64(b []byte, i int) uint64 {
+ return binary.LittleEndian.Uint64(b[i:])
+}
+
+type byter interface {
+ Bytes() []byte
+ Len() int
+}
+
+var _ byter = &bytes.Buffer{}