gRPC migration update
Change-Id: Icdd1a824948fa994cd36bd121c962f5ecf74e3cf
diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go
index b9db204..f65eb39 100644
--- a/vendor/github.com/klauspost/compress/fse/bitreader.go
+++ b/vendor/github.com/klauspost/compress/fse/bitreader.go
@@ -6,6 +6,7 @@
package fse
import (
+ "encoding/binary"
"errors"
"io"
)
@@ -34,8 +35,12 @@
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil
}
@@ -63,8 +68,9 @@
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
- v := b.in[b.off-4 : b.off]
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -77,7 +83,8 @@
return
}
if b.off > 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -91,9 +98,17 @@
}
}
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// finished returns true if all bits have been read from the bit stream.
func (b *bitReader) finished() bool {
- return b.off == 0 && b.bitsRead >= 64
+ return b.bitsRead >= 64 && b.off == 0
}
// close the bitstream and returns an error if out-of-buffer reads occurred.
diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go
index f228a46..abade2d 100644
--- a/vendor/github.com/klauspost/compress/fse/bytereader.go
+++ b/vendor/github.com/klauspost/compress/fse/bytereader.go
@@ -25,19 +25,10 @@
b.off += int(n)
}
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
- v3 := int32(b2[3])
- v2 := int32(b2[2])
- v1 := int32(b2[1])
- v0 := int32(b2[0])
- return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
-}
-
// Uint32 returns a little endian uint32 starting at current offset.
func (b byteReader) Uint32() uint32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
v3 := uint32(b2[3])
v2 := uint32(b2[2])
v1 := uint32(b2[1])
diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go
index b69237c..6f34191 100644
--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@@ -92,7 +92,6 @@
im := int32((nbBitsOut << 16) - first.deltaNbBits)
lu := (im >> nbBitsOut) + first.deltaFindState
c.state = c.stateTable[lu]
- return
}
// encode the output symbol provided and write it to the bitstream.
@@ -301,7 +300,7 @@
out[outP+1] = byte(bitStream >> 8)
outP += (bitCount + 7) / 8
- if uint16(charnum) > s.symbolLen {
+ if charnum > s.symbolLen {
return errors.New("internal error: charnum > s.symbolLen")
}
s.Out = out[:outP]
@@ -331,7 +330,7 @@
func (s *Scratch) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
- if cap(s.ct.tableSymbol) < int(tableSize) {
+ if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@@ -565,8 +564,8 @@
distributed uint32
total = uint32(s.br.remain())
tableLog = s.actualTableLog
- lowThreshold = uint32(total >> tableLog)
- lowOne = uint32((total * 3) >> (tableLog + 1))
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@@ -591,7 +590,7 @@
if (total / toDistribute) > lowOne {
// risk of rounding to zero
- lowOne = uint32((total * 3) / (toDistribute * 2))
+ lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
index 413ec3b..926f5f1 100644
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -172,7 +172,7 @@
// allocDtable will allocate decoding tables if they are not big enough.
func (s *Scratch) allocDtable() {
tableSize := 1 << s.actualTableLog
- if cap(s.decTable) < int(tableSize) {
+ if cap(s.decTable) < tableSize {
s.decTable = make([]decSymbol, tableSize)
}
s.decTable = s.decTable[:tableSize]
@@ -340,7 +340,7 @@
func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
d.dt = dt
d.br = in
- d.state = uint16(in.getBits(tableLog))
+ d.state = in.getBits(tableLog)
}
// next returns the next symbol and sets the next state.
diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go
index 075357b..535cbad 100644
--- a/vendor/github.com/klauspost/compress/fse/fse.go
+++ b/vendor/github.com/klauspost/compress/fse/fse.go
@@ -44,18 +44,14 @@
// Scratch provides temporary storage for compression and decompression.
type Scratch struct {
// Private
- count [maxSymbolValue + 1]uint32
- norm [maxSymbolValue + 1]int16
- symbolLen uint16 // Length of active part of the symbol table.
- actualTableLog uint8 // Selected tablelog.
- br byteReader
- bits bitReader
- bw bitWriter
- ct cTable // Compression tables.
- decTable []decSymbol // Decompression table.
- zeroBits bool // no bits has prob > 50%.
- clearCount bool // clear count
- maxCount int // count of the most probable symbol
+ count [maxSymbolValue + 1]uint32
+ norm [maxSymbolValue + 1]int16
+ br byteReader
+ bits bitReader
+ bw bitWriter
+ ct cTable // Compression tables.
+ decTable []decSymbol // Decompression table.
+ maxCount int // count of the most probable symbol
// Per block parameters.
// These can be used to override compression parameters of the block.
@@ -68,17 +64,22 @@
// and allocation will be avoided.
Out []byte
- // MaxSymbolValue will override the maximum symbol value of the next block.
- MaxSymbolValue uint8
-
- // TableLog will attempt to override the tablelog for the next block.
- TableLog uint8
-
// DecompressLimit limits the maximum decoded size acceptable.
// If > 0 decompression will stop when approximately this many bytes
// has been decoded.
// If 0, maximum size will be 2GB.
DecompressLimit int
+
+ symbolLen uint16 // Length of active part of the symbol table.
+ actualTableLog uint8 // Selected tablelog.
+ zeroBits bool // no bits has prob > 50%.
+ clearCount bool // clear count
+
+ // MaxSymbolValue will override the maximum symbol value of the next block.
+ MaxSymbolValue uint8
+
+ // TableLog will attempt to override the tablelog for the next block.
+ TableLog uint8
}
// Histogram allows to populate the histogram and skip that step in the compression,
diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md
index 0a8448c..8b6e5c6 100644
--- a/vendor/github.com/klauspost/compress/huff0/README.md
+++ b/vendor/github.com/klauspost/compress/huff0/README.md
@@ -12,11 +12,11 @@
* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0)
-THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE.
-
## News
- * Mar 2018: First implementation released. Consider this beta software for now.
+This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package.
+
+This ensures that most functionality is well tested.
# Usage
@@ -75,6 +75,8 @@
Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X)
or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function.
+For concurrently decompressing content with a fixed table a stateless [`Decoder`](https://godoc.org/github.com/klauspost/compress/huff0#Decoder) can be requested which will remain correct as long as the scratch is unchanged. The capacity of the provided slice indicates the expected output size.
+
You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back
your input was likely corrupted.
@@ -84,4 +86,4 @@
# Contributing
Contributions are always welcome. Be aware that adding public functions will require good justification and breaking
-changes will likely not be accepted. If in doubt open an issue before writing the PR.
\ No newline at end of file
+changes will likely not be accepted. If in doubt open an issue before writing the PR.
diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go
index 7d0903c..a4979e8 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitreader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go
@@ -6,6 +6,7 @@
package huff0
import (
+ "encoding/binary"
"errors"
"io"
)
@@ -34,29 +35,16 @@
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBit32(uint32(v)))
return nil
}
-// getBits will return n bits. n can be 0.
-func (b *bitReader) getBits(n uint8) uint16 {
- if n == 0 || b.bitsRead >= 64 {
- return 0
- }
- return b.getBitsFast(n)
-}
-
-// getBitsFast requires that at least one bit is requested every time.
-// There are no checks if the buffer is filled.
-func (b *bitReader) getBitsFast(n uint8) uint16 {
- const regMask = 64 - 1
- v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
- b.bitsRead += n
- return v
-}
-
// peekBitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) peekBitsFast(n uint8) uint16 {
@@ -71,21 +59,36 @@
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
+
+ // 2 bounds checks.
v := b.in[b.off-4 : b.off]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
+func (b *bitReader) advance(n uint8) {
+ b.bitsRead += n
+}
+
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off > 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
@@ -113,3 +116,214 @@
}
return nil
}
+
+// bitReader reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderBytes struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderBytes) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderBytes) peekByteFast() uint8 {
+ got := uint8(b.value >> 56)
+ return got
+}
+
+func (b *bitReaderBytes) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderBytes) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderBytes is empty and there is at least 8 bytes to read.
+func (b *bitReaderBytes) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderBytes) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << (b.bitsRead - 32)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << (b.bitsRead - 8)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderBytes) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderBytes) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+
+// bitReaderShifted reads a bitstream in reverse.
+// The last set bit indicates the start of the stream and is used
+// for aligning the input.
+type bitReaderShifted struct {
+ in []byte
+ off uint // next byte to read is at in[off - 1]
+ value uint64
+ bitsRead uint8
+}
+
+// init initializes and resets the bit reader.
+func (b *bitReaderShifted) init(in []byte) error {
+ if len(in) < 1 {
+ return errors.New("corrupt stream: too short")
+ }
+ b.in = in
+ b.off = uint(len(in))
+ // The highest bit of the last byte indicates where to start
+ v := in[len(in)-1]
+ if v == 0 {
+ return errors.New("corrupt stream, did not find end of stream")
+ }
+ b.bitsRead = 64
+ b.value = 0
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
+ b.advance(8 - uint8(highBit32(uint32(v))))
+ return nil
+}
+
+// peekBitsFast requires that at least one bit is requested every time.
+// There are no checks if the buffer is filled.
+func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
+ return uint16(b.value >> ((64 - n) & 63))
+}
+
+func (b *bitReaderShifted) advance(n uint8) {
+ b.bitsRead += n
+ b.value <<= n & 63
+}
+
+// fillFast() will make sure at least 32 bits are available.
+// There must be at least 4 bytes available.
+func (b *bitReaderShifted) fillFast() {
+ if b.bitsRead < 32 {
+ return
+ }
+
+ // 2 bounds checks.
+ v := b.in[b.off-4 : b.off]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+}
+
+// fillFastStart() assumes the bitReaderShifted is empty and there is at least 8 bytes to read.
+func (b *bitReaderShifted) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
+// fill() will make sure at least 32 bits are available.
+func (b *bitReaderShifted) fill() {
+ if b.bitsRead < 32 {
+ return
+ }
+ if b.off > 4 {
+ v := b.in[b.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ b.value |= uint64(low) << ((b.bitsRead - 32) & 63)
+ b.bitsRead -= 32
+ b.off -= 4
+ return
+ }
+ for b.off > 0 {
+ b.value |= uint64(b.in[b.off-1]) << ((b.bitsRead - 8) & 63)
+ b.bitsRead -= 8
+ b.off--
+ }
+}
+
+// finished returns true if all bits have been read from the bit stream.
+func (b *bitReaderShifted) finished() bool {
+ return b.off == 0 && b.bitsRead >= 64
+}
+
+// close the bitstream and returns an error if out-of-buffer reads occurred.
+func (b *bitReaderShifted) close() error {
+ // Release reference.
+ b.in = nil
+ if b.bitsRead > 64 {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
index bda4021..6bce4e8 100644
--- a/vendor/github.com/klauspost/compress/huff0/bitwriter.go
+++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go
@@ -43,6 +43,11 @@
func (b *bitWriter) encSymbol(ct cTable, symbol byte) {
enc := ct[symbol]
b.bitContainer |= uint64(enc.val) << (b.nBits & 63)
+ if false {
+ if enc.nBits == 0 {
+ panic("nbits 0")
+ }
+ }
b.nBits += enc.nBits
}
@@ -54,6 +59,14 @@
sh := b.nBits & 63
combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63))
b.bitContainer |= combined << sh
+ if false {
+ if encA.nBits == 0 {
+ panic("nbitsA 0")
+ }
+ if encB.nBits == 0 {
+ panic("nbitsB 0")
+ }
+ }
b.nBits += encA.nBits + encB.nBits
}
diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go
index 0843cb0..0823c92 100644
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@@ -77,8 +77,11 @@
// Each symbol present maximum once or too well distributed.
return nil, false, ErrIncompressible
}
-
- if s.Reuse == ReusePolicyPrefer && canReuse {
+ if s.Reuse == ReusePolicyMust && !canReuse {
+ // We must reuse, but we can't.
+ return nil, false, ErrIncompressible
+ }
+ if (s.Reuse == ReusePolicyPrefer || s.Reuse == ReusePolicyMust) && canReuse {
keepTable := s.cTable
keepTL := s.actualTableLog
s.cTable = s.prevTable
@@ -90,6 +93,9 @@
s.OutData = s.Out
return s.Out, true, nil
}
+ if s.Reuse == ReusePolicyMust {
+ return nil, false, ErrIncompressible
+ }
// Do not attempt to re-use later.
s.prevTable = s.prevTable[:0]
}
@@ -397,7 +403,7 @@
var startNode = int16(s.symbolLen)
nonNullRank := s.symbolLen - 1
- nodeNb := int16(startNode)
+ nodeNb := startNode
huffNode := s.nodes[1 : huffNodesLen+1]
// This overlays the slice above, but allows "-1" index lookups.
@@ -530,7 +536,6 @@
}
nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
}
- return
}
func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
@@ -574,7 +579,7 @@
// Get pos of last (smallest) symbol per rank
{
- currentNbBits := uint8(maxNbBits)
+ currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
if huffNode[pos].nbBits >= currentNbBits {
continue
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 97ae66a..41703bb 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -25,11 +25,14 @@
len uint8
}
+// Uses special code for all tables that are < 8 bits.
+const use8BitTables = true
+
// ReadTable will read a table from the input.
// The size of the input may be larger than the table definition.
// Any content remaining after the table definition will be returned.
// If no Scratch is provided a new one is allocated.
-// The returned Scratch can be used for decoding input using this table.
+// The returned Scratch can be used for encoding or decoding input using this table.
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
s, err = s.prepare(in)
if err != nil {
@@ -55,8 +58,8 @@
s.symbolLen = uint16(oSize)
in = in[iSize:]
} else {
- if len(in) <= int(iSize) {
- return s, nil, errors.New("input too small for table")
+ if len(in) < int(iSize) {
+ return s, nil, fmt.Errorf("input too small for table, want %d bytes, have %d", iSize, len(in))
}
// FSE compressed weights
s.fse.DecompressLimit = 255
@@ -83,6 +86,7 @@
}
v2 := v & 15
rankStats[v2]++
+ // (1 << (v2-1)) is slower since the compiler cannot prove that v2 isn't 0.
weightTotal += (1 << v2) >> 1
}
if weightTotal == 0 {
@@ -134,20 +138,40 @@
if len(s.dt.single) != tSize {
s.dt.single = make([]dEntrySingle, tSize)
}
+ cTable := s.prevTable
+ if cap(cTable) < maxSymbolValue+1 {
+ cTable = make([]cTableEntry, 0, maxSymbolValue+1)
+ }
+ cTable = cTable[:maxSymbolValue+1]
+ s.prevTable = cTable[:s.symbolLen]
+ s.prevTableLog = s.actualTableLog
+
for n, w := range s.huffWeight[:s.symbolLen] {
if w == 0 {
+ cTable[n] = cTableEntry{
+ val: 0,
+ nBits: 0,
+ }
continue
}
length := (uint32(1) << w) >> 1
d := dEntrySingle{
entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8),
}
- single := s.dt.single[rankStats[w] : rankStats[w]+length]
+
+ rank := &rankStats[w]
+ cTable[n] = cTableEntry{
+ val: uint16(*rank >> (w - 1)),
+ nBits: uint8(d.entry),
+ }
+
+ single := s.dt.single[*rank : *rank+length]
for i := range single {
single[i] = d
}
- rankStats[w] += length
+ *rank += length
}
+
return s, in, nil
}
@@ -155,69 +179,14 @@
// The length of the supplied input must match the end of a block exactly.
// Before this is called, the table must be initialized with ReadTable unless
// the encoder re-used the table.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) {
- if len(s.dt.single) == 0 {
- return nil, errors.New("no table loaded")
+ if cap(s.Out) < s.MaxDecodedSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
}
- var br bitReader
- err = br.init(in)
- if err != nil {
- return nil, err
- }
- s.Out = s.Out[:0]
-
- decode := func() byte {
- val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
- v := s.dt.single[val]
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
- hasDec := func(v dEntrySingle) byte {
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
-
- // Avoid bounds check by always having full sized table.
- const tlSize = 1 << tableLogMax
- const tlMask = tlSize - 1
- dt := s.dt.single[:tlSize]
-
- // Use temp table to avoid bound checks/append penalty.
- var tmp = s.huffWeight[:256]
- var off uint8
-
- for br.off >= 8 {
- br.fillFast()
- tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
- tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
- br.fillFast()
- tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
- tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask])
- off += 4
- if off == 0 {
- if len(s.Out)+256 > s.MaxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- s.Out = append(s.Out, tmp...)
- }
- }
-
- if len(s.Out)+int(off) > s.MaxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- s.Out = append(s.Out, tmp[:off]...)
-
- for !br.finished() {
- br.fill()
- if len(s.Out) >= s.MaxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- s.Out = append(s.Out, decode())
- }
- return s.Out, br.close()
+ s.Out = s.Out[:0:s.MaxDecodedSize]
+ s.Out, err = s.Decoder().Decompress1X(s.Out, in)
+ return s.Out, err
}
// Decompress4X will decompress a 4X encoded stream.
@@ -225,123 +194,402 @@
// the encoder re-used the table.
// The length of the supplied input must match the end of a block exactly.
// The destination size of the uncompressed data must be known and provided.
+// deprecated: Use the stateless Decoder() to get a concurrent version.
func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) {
- if len(s.dt.single) == 0 {
- return nil, errors.New("no table loaded")
- }
- if len(in) < 6+(4*1) {
- return nil, errors.New("input too small")
- }
if dstSize > s.MaxDecodedSize {
return nil, ErrMaxDecodedSizeExceeded
}
- // TODO: We do not detect when we overrun a buffer, except if the last one does.
+ if cap(s.Out) < dstSize {
+ s.Out = make([]byte, s.MaxDecodedSize)
+ }
+ s.Out = s.Out[:0:dstSize]
+ s.Out, err = s.Decoder().Decompress4X(s.Out, in)
+ return s.Out, err
+}
- var br [4]bitReader
+// Decoder will return a stateless decoder that can be used by multiple
+// decompressors concurrently.
+// Before this is called, the table must be initialized with ReadTable.
+// The Decoder is still linked to the scratch buffer so that cannot be reused.
+// However, it is safe to discard the scratch.
+func (s *Scratch) Decoder() *Decoder {
+ return &Decoder{
+ dt: s.dt,
+ actualTableLog: s.actualTableLog,
+ }
+}
+
+// Decoder provides stateless decoding.
+type Decoder struct {
+ dt dTable
+ actualTableLog uint8
+}
+
+// Decompress1X will decompress a 1X encoded stream.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress1X8Bit(dst, src)
+ }
+ var br bitReaderShifted
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ const tlSize = 1 << tableLogMax
+ const tlMask = tlSize - 1
+ dt := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ for br.off >= 8 {
+ br.fillFast()
+ v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ // Refill
+ br.fillFast()
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 8, so uint8 is fine
+ bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
+ for bitsLeft > 0 {
+ br.fill()
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= nBits
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8Bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress1X8BitExactly(dst, src)
+ }
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ shift := (8 - d.actualTableLog) & 7
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
+// The cap of the output buffer will be the maximum decompressed size.
+// The length of the supplied input must match the end of a block exactly.
+func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
+ var br bitReaderBytes
+ err := br.init(src)
+ if err != nil {
+ return dst, err
+ }
+ maxDecodedSize := cap(dst)
+ dst = dst[:0]
+
+ // Avoid bounds check by always having full sized table.
+ dt := d.dt.single[:256]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+
+ const shift = 0
+
+ //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[br.peekByteFast()>>shift]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+
+ if len(dst)+int(off) > maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:off]...)
+
+ // br < 4, so uint8 is fine
+ bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ for bitsLeft > 0 {
+ if br.bitsRead >= 64-8 {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ if len(dst) >= maxDecodedSize {
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ v := dt[br.peekByteFast()>>shift]
+ nBits := uint8(v.entry)
+ br.advance(nBits)
+ bitsLeft -= int8(nBits)
+ dst = append(dst, uint8(v.entry>>8))
+ }
+ return dst, br.close()
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
+ if len(d.dt.single) == 0 {
+ return nil, errors.New("no table loaded")
+ }
+ if len(src) < 6+(4*1) {
+ return nil, errors.New("input too small")
+ }
+ if use8BitTables && d.actualTableLog <= 8 {
+ return d.decompress4X8bit(dst, src)
+ }
+
+ var br [4]bitReaderShifted
start := 6
for i := 0; i < 3; i++ {
- length := int(in[i*2]) | (int(in[i*2+1]) << 8)
- if start+length >= len(in) {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
return nil, errors.New("truncated input (or invalid offset)")
}
- err = br[i].init(in[start : start+length])
+ err := br[i].init(src[start : start+length])
if err != nil {
return nil, err
}
start += length
}
- err = br[3].init(in[start:])
+ err := br[3].init(src[start:])
if err != nil {
return nil, err
}
- // Prepare output
- if cap(s.Out) < dstSize {
- s.Out = make([]byte, 0, dstSize)
- }
- s.Out = s.Out[:dstSize]
// destination, offset to match first output
- dstOut := s.Out
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
dstEvery := (dstSize + 3) / 4
const tlSize = 1 << tableLogMax
const tlMask = tlSize - 1
- single := s.dt.single[:tlSize]
-
- decode := func(br *bitReader) byte {
- val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */
- v := single[val&tlMask]
- br.bitsRead += uint8(v.entry)
- return uint8(v.entry >> 8)
- }
+ single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var tmp = s.huffWeight[:256]
+ var buf [256]byte
var off uint8
var decoded int
// Decode 2 values from each decoder/loop.
const bufoff = 256 / 4
-bigloop:
for {
- for i := range br {
- br := &br[i]
- if br.off < 4 {
- break bigloop
- }
- br.fillFast()
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
}
{
const stream = 0
- val := br[stream].peekBitsFast(s.actualTableLog)
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
- tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- tmp[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
- }
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
- {
- const stream = 1
- val := br[stream].peekBitsFast(s.actualTableLog)
- v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
- v2 := single[val2&tlMask]
- tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- tmp[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
{
const stream = 2
- val := br[stream].peekBitsFast(s.actualTableLog)
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
+ val2 := br[stream2].peekBitsFast(d.actualTableLog)
v2 := single[val2&tlMask]
- tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- tmp[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
- }
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
- {
- const stream = 3
- val := br[stream].peekBitsFast(s.actualTableLog)
- v := single[val&tlMask]
- br[stream].bitsRead += uint8(v.entry)
+ val = br[stream].peekBitsFast(d.actualTableLog)
+ v = single[val&tlMask]
+ br[stream].advance(uint8(v.entry))
+ buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
- val2 := br[stream].peekBitsFast(s.actualTableLog)
- v2 := single[val2&tlMask]
- tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8)
- tmp[off+bufoff*stream] = uint8(v.entry >> 8)
- br[stream].bitsRead += uint8(v2.entry)
+ val2 = br[stream2].peekBitsFast(d.actualTableLog)
+ v2 = single[val2&tlMask]
+ br[stream2].advance(uint8(v2.entry))
+ buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
off += 2
@@ -350,42 +598,67 @@
if bufoff > dstEvery {
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(dstOut, tmp[:bufoff])
- copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2])
- copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3])
- copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4])
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
off = 0
- dstOut = dstOut[bufoff:]
+ out = out[bufoff:]
decoded += 256
// There must at least be 3 buffers left.
- if len(dstOut) < dstEvery*3 {
+ if len(out) < dstEvery*3 {
return nil, errors.New("corruption detected: stream overrun 2")
}
}
}
if off > 0 {
ioff := int(off)
- if len(dstOut) < dstEvery*3+ioff {
+ if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(dstOut, tmp[:off])
- copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2])
- copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3])
- copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4])
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
decoded += int(off) * 4
- dstOut = dstOut[off:]
+ out = out[off:]
}
// Decode remaining.
for i := range br {
offset := dstEvery * i
br := &br[i]
- for !br.finished() {
+ bitsLeft := br.off*8 + uint(64-br.bitsRead)
+ for bitsLeft > 0 {
br.fill()
- if offset >= len(dstOut) {
+ if false && br.bitsRead >= 32 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value = (br.value << 32) | uint64(low)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value = (br.value << 8) | uint64(br.in[br.off-1])
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
return nil, errors.New("corruption detected: stream overrun 4")
}
- dstOut[offset] = decode(br)
+
+ // Read value and increment offset.
+ val := br.peekBitsFast(d.actualTableLog)
+ v := single[val&tlMask].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= uint(nBits)
+ out[offset] = uint8(v >> 8)
offset++
}
decoded += offset - dstEvery*i
@@ -397,7 +670,426 @@
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
- return s.Out, nil
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
+ if d.actualTableLog == 8 {
+ return d.decompress4X8bitExactly(dst, src)
+ }
+
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ shift := (8 - d.actualTableLog) & 7
+
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
+}
+
+// Decompress4X will decompress a 4X encoded stream.
+// The length of the supplied input must match the end of a block exactly.
+// The *capacity* of the dst slice must match the destination size of
+// the uncompressed data exactly.
+func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
+ var br [4]bitReaderBytes
+ start := 6
+ for i := 0; i < 3; i++ {
+ length := int(src[i*2]) | (int(src[i*2+1]) << 8)
+ if start+length >= len(src) {
+ return nil, errors.New("truncated input (or invalid offset)")
+ }
+ err := br[i].init(src[start : start+length])
+ if err != nil {
+ return nil, err
+ }
+ start += length
+ }
+ err := br[3].init(src[start:])
+ if err != nil {
+ return nil, err
+ }
+
+ // destination, offset to match first output
+ dstSize := cap(dst)
+ dst = dst[:dstSize]
+ out := dst
+ dstEvery := (dstSize + 3) / 4
+
+ const shift = 0
+ const tlSize = 1 << 8
+ const tlMask = tlSize - 1
+ single := d.dt.single[:tlSize]
+
+ // Use temp table to avoid bound checks/append penalty.
+ var buf [256]byte
+ var off uint8
+ var decoded int
+
+ // Decode 4 values from each decoder/loop.
+ const bufoff = 256 / 4
+ for {
+ if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
+ break
+ }
+
+ {
+ // Interleave 2 decodes.
+ const stream = 0
+ const stream2 = 1
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ {
+ const stream = 2
+ const stream2 = 3
+ br[stream].fillFast()
+ br[stream2].fillFast()
+
+ v := single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 := single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+1] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+2] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+
+ v = single[br[stream].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream+3] = uint8(v >> 8)
+ br[stream].advance(uint8(v))
+
+ v2 = single[br[stream2].peekByteFast()>>shift].entry
+ buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
+ br[stream2].advance(uint8(v2))
+ }
+
+ off += 4
+
+ if off == bufoff {
+ if bufoff > dstEvery {
+ return nil, errors.New("corruption detected: stream overrun 1")
+ }
+ copy(out, buf[:bufoff])
+ copy(out[dstEvery:], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
+ off = 0
+ out = out[bufoff:]
+ decoded += 256
+ // There must at least be 3 buffers left.
+ if len(out) < dstEvery*3 {
+ return nil, errors.New("corruption detected: stream overrun 2")
+ }
+ }
+ }
+ if off > 0 {
+ ioff := int(off)
+ if len(out) < dstEvery*3+ioff {
+ return nil, errors.New("corruption detected: stream overrun 3")
+ }
+ copy(out, buf[:off])
+ copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
+ copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
+ copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ decoded += int(off) * 4
+ out = out[off:]
+ }
+
+ // Decode remaining.
+ for i := range br {
+ offset := dstEvery * i
+ br := &br[i]
+ bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ for bitsLeft > 0 {
+ if br.finished() {
+ return nil, io.ErrUnexpectedEOF
+ }
+ if br.bitsRead >= 56 {
+ if br.off >= 4 {
+ v := br.in[br.off-4:]
+ v = v[:4]
+ low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
+ br.value |= uint64(low) << (br.bitsRead - 32)
+ br.bitsRead -= 32
+ br.off -= 4
+ } else {
+ for br.off > 0 {
+ br.value |= uint64(br.in[br.off-1]) << (br.bitsRead - 8)
+ br.bitsRead -= 8
+ br.off--
+ }
+ }
+ }
+ // end inline...
+ if offset >= len(out) {
+ return nil, errors.New("corruption detected: stream overrun 4")
+ }
+
+ // Read value and increment offset.
+ v := single[br.peekByteFast()>>shift].entry
+ nBits := uint8(v)
+ br.advance(nBits)
+ bitsLeft -= int(nBits)
+ out[offset] = uint8(v >> 8)
+ offset++
+ }
+ decoded += offset - dstEvery*i
+ err = br.close()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if dstSize != decoded {
+ return nil, errors.New("corruption detected: short output block")
+ }
+ return dst, nil
}
// matches will compare a decoding table to a coding table.
diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go
index 53249df..7ec2022 100644
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@@ -55,6 +55,9 @@
// ReusePolicyNone will disable re-use of tables.
// This is slightly faster than ReusePolicyAllow but may produce larger output.
ReusePolicyNone
+
+ // ReusePolicyMust must allow reuse and produce smaller output.
+ ReusePolicyMust
)
type Scratch struct {
@@ -79,6 +82,13 @@
// Slice of the returned data.
OutData []byte
+ // MaxDecodedSize will set the maximum allowed output size.
+ // This value will automatically be set to BlockSizeMax if not set.
+ // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
+ MaxDecodedSize int
+
+ br byteReader
+
// MaxSymbolValue will override the maximum symbol value of the next block.
MaxSymbolValue uint8
@@ -95,12 +105,6 @@
// If WantLogLess == 0 any improvement will do.
WantLogLess uint8
- // MaxDecodedSize will set the maximum allowed output size.
- // This value will automatically be set to BlockSizeMax if not set.
- // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
- MaxDecodedSize int
-
- br byteReader
symbolLen uint16 // Length of active part of the symbol table.
maxCount int // count of the most probable symbol
clearCount bool // clear count
@@ -115,6 +119,16 @@
huffWeight [maxSymbolValue + 1]byte
}
+// TransferCTable will transfer the previously used compression table.
+func (s *Scratch) TransferCTable(src *Scratch) {
+ if cap(s.prevTable) < len(src.prevTable) {
+ s.prevTable = make(cTable, 0, maxSymbolValue+1)
+ }
+ s.prevTable = s.prevTable[:len(src.prevTable)]
+ copy(s.prevTable, src.prevTable)
+ s.prevTableLog = src.prevTableLog
+}
+
func (s *Scratch) prepare(in []byte) (*Scratch, error) {
if len(in) > BlockSizeMax {
return nil, ErrTooBig
diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/klauspost/compress/snappy/.gitignore
deleted file mode 100644
index 042091d..0000000
--- a/vendor/github.com/klauspost/compress/snappy/.gitignore
+++ /dev/null
@@ -1,16 +0,0 @@
-cmd/snappytool/snappytool
-testdata/bench
-
-# These explicitly listed benchmark data files are for an obsolete version of
-# snappy_test.go.
-testdata/alice29.txt
-testdata/asyoulik.txt
-testdata/fireworks.jpeg
-testdata/geo.protodata
-testdata/html
-testdata/html_x_4
-testdata/kppkn.gtb
-testdata/lcet10.txt
-testdata/paper-100k.pdf
-testdata/plrabn12.txt
-testdata/urls.10K
diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/klauspost/compress/snappy/AUTHORS
deleted file mode 100644
index bcfa195..0000000
--- a/vendor/github.com/klauspost/compress/snappy/AUTHORS
+++ /dev/null
@@ -1,15 +0,0 @@
-# This is the official list of Snappy-Go authors for copyright purposes.
-# This file is distinct from the CONTRIBUTORS files.
-# See the latter for an explanation.
-
-# Names should be added to this file as
-# Name or Organization <email address>
-# The email address is not required for organizations.
-
-# Please keep the list sorted.
-
-Damian Gryski <dgryski@gmail.com>
-Google Inc.
-Jan Mercl <0xjnml@gmail.com>
-Rodolfo Carvalho <rhcarvalho@gmail.com>
-Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS
deleted file mode 100644
index 931ae31..0000000
--- a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS
+++ /dev/null
@@ -1,37 +0,0 @@
-# This is the official list of people who can contribute
-# (and typically have contributed) code to the Snappy-Go repository.
-# The AUTHORS file lists the copyright holders; this file
-# lists people. For example, Google employees are listed here
-# but not in AUTHORS, because Google holds the copyright.
-#
-# The submission process automatically checks to make sure
-# that people submitting code are listed in this file (by email address).
-#
-# Names should be added to this file only after verifying that
-# the individual or the individual's organization has agreed to
-# the appropriate Contributor License Agreement, found here:
-#
-# http://code.google.com/legal/individual-cla-v1.0.html
-# http://code.google.com/legal/corporate-cla-v1.0.html
-#
-# The agreement for individuals can be filled out on the web.
-#
-# When adding J Random Contributor's name to this file,
-# either J's name or J's organization's name should be
-# added to the AUTHORS file, depending on whether the
-# individual or corporate CLA was used.
-
-# Names should be added to this file like so:
-# Name <email address>
-
-# Please keep the list sorted.
-
-Damian Gryski <dgryski@gmail.com>
-Jan Mercl <0xjnml@gmail.com>
-Kai Backman <kaib@golang.org>
-Marc-Antoine Ruel <maruel@chromium.org>
-Nigel Tao <nigeltao@golang.org>
-Rob Pike <r@golang.org>
-Rodolfo Carvalho <rhcarvalho@gmail.com>
-Russ Cox <rsc@golang.org>
-Sebastien Binet <seb.binet@gmail.com>
diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/klauspost/compress/snappy/LICENSE
deleted file mode 100644
index 6050c10..0000000
--- a/vendor/github.com/klauspost/compress/snappy/LICENSE
+++ /dev/null
@@ -1,27 +0,0 @@
-Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/klauspost/compress/snappy/README
deleted file mode 100644
index cea1287..0000000
--- a/vendor/github.com/klauspost/compress/snappy/README
+++ /dev/null
@@ -1,107 +0,0 @@
-The Snappy compression format in the Go programming language.
-
-To download and install from source:
-$ go get github.com/golang/snappy
-
-Unless otherwise noted, the Snappy-Go source files are distributed
-under the BSD-style license found in the LICENSE file.
-
-
-
-Benchmarks.
-
-The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
-or so files, the same set used by the C++ Snappy code (github.com/google/snappy
-and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
-3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
-
-"go test -test.bench=."
-
-_UFlat0-8 2.19GB/s ± 0% html
-_UFlat1-8 1.41GB/s ± 0% urls
-_UFlat2-8 23.5GB/s ± 2% jpg
-_UFlat3-8 1.91GB/s ± 0% jpg_200
-_UFlat4-8 14.0GB/s ± 1% pdf
-_UFlat5-8 1.97GB/s ± 0% html4
-_UFlat6-8 814MB/s ± 0% txt1
-_UFlat7-8 785MB/s ± 0% txt2
-_UFlat8-8 857MB/s ± 0% txt3
-_UFlat9-8 719MB/s ± 1% txt4
-_UFlat10-8 2.84GB/s ± 0% pb
-_UFlat11-8 1.05GB/s ± 0% gaviota
-
-_ZFlat0-8 1.04GB/s ± 0% html
-_ZFlat1-8 534MB/s ± 0% urls
-_ZFlat2-8 15.7GB/s ± 1% jpg
-_ZFlat3-8 740MB/s ± 3% jpg_200
-_ZFlat4-8 9.20GB/s ± 1% pdf
-_ZFlat5-8 991MB/s ± 0% html4
-_ZFlat6-8 379MB/s ± 0% txt1
-_ZFlat7-8 352MB/s ± 0% txt2
-_ZFlat8-8 396MB/s ± 1% txt3
-_ZFlat9-8 327MB/s ± 1% txt4
-_ZFlat10-8 1.33GB/s ± 1% pb
-_ZFlat11-8 605MB/s ± 1% gaviota
-
-
-
-"go test -test.bench=. -tags=noasm"
-
-_UFlat0-8 621MB/s ± 2% html
-_UFlat1-8 494MB/s ± 1% urls
-_UFlat2-8 23.2GB/s ± 1% jpg
-_UFlat3-8 1.12GB/s ± 1% jpg_200
-_UFlat4-8 4.35GB/s ± 1% pdf
-_UFlat5-8 609MB/s ± 0% html4
-_UFlat6-8 296MB/s ± 0% txt1
-_UFlat7-8 288MB/s ± 0% txt2
-_UFlat8-8 309MB/s ± 1% txt3
-_UFlat9-8 280MB/s ± 1% txt4
-_UFlat10-8 753MB/s ± 0% pb
-_UFlat11-8 400MB/s ± 0% gaviota
-
-_ZFlat0-8 409MB/s ± 1% html
-_ZFlat1-8 250MB/s ± 1% urls
-_ZFlat2-8 12.3GB/s ± 1% jpg
-_ZFlat3-8 132MB/s ± 0% jpg_200
-_ZFlat4-8 2.92GB/s ± 0% pdf
-_ZFlat5-8 405MB/s ± 1% html4
-_ZFlat6-8 179MB/s ± 1% txt1
-_ZFlat7-8 170MB/s ± 1% txt2
-_ZFlat8-8 189MB/s ± 1% txt3
-_ZFlat9-8 164MB/s ± 1% txt4
-_ZFlat10-8 479MB/s ± 1% pb
-_ZFlat11-8 270MB/s ± 1% gaviota
-
-
-
-For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
-are the numbers from C++ Snappy's
-
-make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
-
-BM_UFlat/0 2.4GB/s html
-BM_UFlat/1 1.4GB/s urls
-BM_UFlat/2 21.8GB/s jpg
-BM_UFlat/3 1.5GB/s jpg_200
-BM_UFlat/4 13.3GB/s pdf
-BM_UFlat/5 2.1GB/s html4
-BM_UFlat/6 1.0GB/s txt1
-BM_UFlat/7 959.4MB/s txt2
-BM_UFlat/8 1.0GB/s txt3
-BM_UFlat/9 864.5MB/s txt4
-BM_UFlat/10 2.9GB/s pb
-BM_UFlat/11 1.2GB/s gaviota
-
-BM_ZFlat/0 944.3MB/s html (22.31 %)
-BM_ZFlat/1 501.6MB/s urls (47.78 %)
-BM_ZFlat/2 14.3GB/s jpg (99.95 %)
-BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
-BM_ZFlat/4 8.3GB/s pdf (83.30 %)
-BM_ZFlat/5 903.5MB/s html4 (22.52 %)
-BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
-BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
-BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
-BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
-BM_ZFlat/10 1.2GB/s pb (19.68 %)
-BM_ZFlat/11 527.4MB/s gaviota (37.72 %)
diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/klauspost/compress/snappy/decode.go
deleted file mode 100644
index 72efb03..0000000
--- a/vendor/github.com/klauspost/compress/snappy/decode.go
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package snappy
-
-import (
- "encoding/binary"
- "errors"
- "io"
-)
-
-var (
- // ErrCorrupt reports that the input is invalid.
- ErrCorrupt = errors.New("snappy: corrupt input")
- // ErrTooLarge reports that the uncompressed length is too large.
- ErrTooLarge = errors.New("snappy: decoded block is too large")
- // ErrUnsupported reports that the input isn't supported.
- ErrUnsupported = errors.New("snappy: unsupported input")
-
- errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
-)
-
-// DecodedLen returns the length of the decoded block.
-func DecodedLen(src []byte) (int, error) {
- v, _, err := decodedLen(src)
- return v, err
-}
-
-// decodedLen returns the length of the decoded block and the number of bytes
-// that the length header occupied.
-func decodedLen(src []byte) (blockLen, headerLen int, err error) {
- v, n := binary.Uvarint(src)
- if n <= 0 || v > 0xffffffff {
- return 0, 0, ErrCorrupt
- }
-
- const wordSize = 32 << (^uint(0) >> 32 & 1)
- if wordSize == 32 && v > 0x7fffffff {
- return 0, 0, ErrTooLarge
- }
- return int(v), n, nil
-}
-
-const (
- decodeErrCodeCorrupt = 1
- decodeErrCodeUnsupportedLiteralLength = 2
-)
-
-// Decode returns the decoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire decoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-func Decode(dst, src []byte) ([]byte, error) {
- dLen, s, err := decodedLen(src)
- if err != nil {
- return nil, err
- }
- if dLen <= len(dst) {
- dst = dst[:dLen]
- } else {
- dst = make([]byte, dLen)
- }
- switch decode(dst, src[s:]) {
- case 0:
- return dst, nil
- case decodeErrCodeUnsupportedLiteralLength:
- return nil, errUnsupportedLiteralLength
- }
- return nil, ErrCorrupt
-}
-
-// NewReader returns a new Reader that decompresses from r, using the framing
-// format described at
-// https://github.com/google/snappy/blob/master/framing_format.txt
-func NewReader(r io.Reader) *Reader {
- return &Reader{
- r: r,
- decoded: make([]byte, maxBlockSize),
- buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
- }
-}
-
-// Reader is an io.Reader that can read Snappy-compressed bytes.
-type Reader struct {
- r io.Reader
- err error
- decoded []byte
- buf []byte
- // decoded[i:j] contains decoded bytes that have not yet been passed on.
- i, j int
- readHeader bool
-}
-
-// Reset discards any buffered data, resets all state, and switches the Snappy
-// reader to read from r. This permits reusing a Reader rather than allocating
-// a new one.
-func (r *Reader) Reset(reader io.Reader) {
- r.r = reader
- r.err = nil
- r.i = 0
- r.j = 0
- r.readHeader = false
-}
-
-func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
- if _, r.err = io.ReadFull(r.r, p); r.err != nil {
- if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
- r.err = ErrCorrupt
- }
- return false
- }
- return true
-}
-
-// Read satisfies the io.Reader interface.
-func (r *Reader) Read(p []byte) (int, error) {
- if r.err != nil {
- return 0, r.err
- }
- for {
- if r.i < r.j {
- n := copy(p, r.decoded[r.i:r.j])
- r.i += n
- return n, nil
- }
- if !r.readFull(r.buf[:4], true) {
- return 0, r.err
- }
- chunkType := r.buf[0]
- if !r.readHeader {
- if chunkType != chunkTypeStreamIdentifier {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.readHeader = true
- }
- chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
- if chunkLen > len(r.buf) {
- r.err = ErrUnsupported
- return 0, r.err
- }
-
- // The chunk types are specified at
- // https://github.com/google/snappy/blob/master/framing_format.txt
- switch chunkType {
- case chunkTypeCompressedData:
- // Section 4.2. Compressed data (chunk type 0x00).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
- buf := r.buf[:chunkLen]
- if !r.readFull(buf, false) {
- return 0, r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- buf = buf[checksumSize:]
-
- n, err := DecodedLen(buf)
- if err != nil {
- r.err = err
- return 0, r.err
- }
- if n > len(r.decoded) {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if _, err := Decode(r.decoded, buf); err != nil {
- r.err = err
- return 0, r.err
- }
- if crc(r.decoded[:n]) != checksum {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.i, r.j = 0, n
- continue
-
- case chunkTypeUncompressedData:
- // Section 4.3. Uncompressed data (chunk type 0x01).
- if chunkLen < checksumSize {
- r.err = ErrCorrupt
- return 0, r.err
- }
- buf := r.buf[:checksumSize]
- if !r.readFull(buf, false) {
- return 0, r.err
- }
- checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
- // Read directly into r.decoded instead of via r.buf.
- n := chunkLen - checksumSize
- if n > len(r.decoded) {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if !r.readFull(r.decoded[:n], false) {
- return 0, r.err
- }
- if crc(r.decoded[:n]) != checksum {
- r.err = ErrCorrupt
- return 0, r.err
- }
- r.i, r.j = 0, n
- continue
-
- case chunkTypeStreamIdentifier:
- // Section 4.1. Stream identifier (chunk type 0xff).
- if chunkLen != len(magicBody) {
- r.err = ErrCorrupt
- return 0, r.err
- }
- if !r.readFull(r.buf[:len(magicBody)], false) {
- return 0, r.err
- }
- for i := 0; i < len(magicBody); i++ {
- if r.buf[i] != magicBody[i] {
- r.err = ErrCorrupt
- return 0, r.err
- }
- }
- continue
- }
-
- if chunkType <= 0x7f {
- // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
- r.err = ErrUnsupported
- return 0, r.err
- }
- // Section 4.4 Padding (chunk type 0xfe).
- // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
- if !r.readFull(r.buf[:chunkLen], false) {
- return 0, r.err
- }
- }
-}
diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/klauspost/compress/snappy/decode_amd64.go
deleted file mode 100644
index fcd192b..0000000
--- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package snappy
-
-// decode has the same semantics as in decode_other.go.
-//
-//go:noescape
-func decode(dst, src []byte) int
diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/klauspost/compress/snappy/decode_amd64.s
deleted file mode 100644
index 1c66e37..0000000
--- a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s
+++ /dev/null
@@ -1,482 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-// - AX scratch
-// - BX scratch
-// - CX length or x
-// - DX offset
-// - SI &src[s]
-// - DI &dst[d]
-// + R8 dst_base
-// + R9 dst_len
-// + R10 dst_base + dst_len
-// + R11 src_base
-// + R12 src_len
-// + R13 src_base + src_len
-// - R14 used by doCopy
-// - R15 used by doCopy
-//
-// The registers R8-R13 (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
-// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
-TEXT ·decode(SB), NOSPLIT, $48-56
- // Initialize SI, DI and R8-R13.
- MOVQ dst_base+0(FP), R8
- MOVQ dst_len+8(FP), R9
- MOVQ R8, DI
- MOVQ R8, R10
- ADDQ R9, R10
- MOVQ src_base+24(FP), R11
- MOVQ src_len+32(FP), R12
- MOVQ R11, SI
- MOVQ R11, R13
- ADDQ R12, R13
-
-loop:
- // for s < len(src)
- CMPQ SI, R13
- JEQ end
-
- // CX = uint32(src[s])
- //
- // switch src[s] & 0x03
- MOVBLZX (SI), CX
- MOVL CX, BX
- ANDL $3, BX
- CMPL BX, $1
- JAE tagCopy
-
- // ----------------------------------------
- // The code below handles literal tags.
-
- // case tagLiteral:
- // x := uint32(src[s] >> 2)
- // switch
- SHRL $2, CX
- CMPL CX, $60
- JAE tagLit60Plus
-
- // case x < 60:
- // s++
- INCQ SI
-
-doLit:
- // This is the end of the inner "switch", when we have a literal tag.
- //
- // We assume that CX == x and x fits in a uint32, where x is the variable
- // used in the pure Go decode_other.go code.
-
- // length = int(x) + 1
- //
- // Unlike the pure Go code, we don't need to check if length <= 0 because
- // CX can hold 64 bits, so the increment cannot overflow.
- INCQ CX
-
- // Prepare to check if copying length bytes will run past the end of dst or
- // src.
- //
- // AX = len(dst) - d
- // BX = len(src) - s
- MOVQ R10, AX
- SUBQ DI, AX
- MOVQ R13, BX
- SUBQ SI, BX
-
- // !!! Try a faster technique for short (16 or fewer bytes) copies.
- //
- // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
- // goto callMemmove // Fall back on calling runtime·memmove.
- // }
- //
- // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
- // against 21 instead of 16, because it cannot assume that all of its input
- // is contiguous in memory and so it needs to leave enough source bytes to
- // read the next tag without refilling buffers, but Go's Decode assumes
- // contiguousness (the src argument is a []byte).
- CMPQ CX, $16
- JGT callMemmove
- CMPQ AX, $16
- JLT callMemmove
- CMPQ BX, $16
- JLT callMemmove
-
- // !!! Implement the copy from src to dst as a 16-byte load and store.
- // (Decode's documentation says that dst and src must not overlap.)
- //
- // This always copies 16 bytes, instead of only length bytes, but that's
- // OK. If the input is a valid Snappy encoding then subsequent iterations
- // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
- // non-nil error), so the overrun will be ignored.
- //
- // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
- // 16-byte loads and stores. This technique probably wouldn't be as
- // effective on architectures that are fussier about alignment.
- MOVOU 0(SI), X0
- MOVOU X0, 0(DI)
-
- // d += length
- // s += length
- ADDQ CX, DI
- ADDQ CX, SI
- JMP loop
-
-callMemmove:
- // if length > len(dst)-d || length > len(src)-s { etc }
- CMPQ CX, AX
- JGT errCorrupt
- CMPQ CX, BX
- JGT errCorrupt
-
- // copy(dst[d:], src[s:s+length])
- //
- // This means calling runtime·memmove(&dst[d], &src[s], length), so we push
- // DI, SI and CX as arguments. Coincidentally, we also need to spill those
- // three registers to the stack, to save local variables across the CALL.
- MOVQ DI, 0(SP)
- MOVQ SI, 8(SP)
- MOVQ CX, 16(SP)
- MOVQ DI, 24(SP)
- MOVQ SI, 32(SP)
- MOVQ CX, 40(SP)
- CALL runtime·memmove(SB)
-
- // Restore local variables: unspill registers from the stack and
- // re-calculate R8-R13.
- MOVQ 24(SP), DI
- MOVQ 32(SP), SI
- MOVQ 40(SP), CX
- MOVQ dst_base+0(FP), R8
- MOVQ dst_len+8(FP), R9
- MOVQ R8, R10
- ADDQ R9, R10
- MOVQ src_base+24(FP), R11
- MOVQ src_len+32(FP), R12
- MOVQ R11, R13
- ADDQ R12, R13
-
- // d += length
- // s += length
- ADDQ CX, DI
- ADDQ CX, SI
- JMP loop
-
-tagLit60Plus:
- // !!! This fragment does the
- //
- // s += x - 58; if uint(s) > uint(len(src)) { etc }
- //
- // checks. In the asm version, we code it once instead of once per switch case.
- ADDQ CX, SI
- SUBQ $58, SI
- CMPQ SI, R13
- JA errCorrupt
-
- // case x == 60:
- CMPL CX, $61
- JEQ tagLit61
- JA tagLit62Plus
-
- // x = uint32(src[s-1])
- MOVBLZX -1(SI), CX
- JMP doLit
-
-tagLit61:
- // case x == 61:
- // x = uint32(src[s-2]) | uint32(src[s-1])<<8
- MOVWLZX -2(SI), CX
- JMP doLit
-
-tagLit62Plus:
- CMPL CX, $62
- JA tagLit63
-
- // case x == 62:
- // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- MOVWLZX -3(SI), CX
- MOVBLZX -1(SI), BX
- SHLL $16, BX
- ORL BX, CX
- JMP doLit
-
-tagLit63:
- // case x == 63:
- // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- MOVL -4(SI), CX
- JMP doLit
-
-// The code above handles literal tags.
-// ----------------------------------------
-// The code below handles copy tags.
-
-tagCopy4:
- // case tagCopy4:
- // s += 5
- ADDQ $5, SI
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ SI, R13
- JA errCorrupt
-
- // length = 1 + int(src[s-5])>>2
- SHRQ $2, CX
- INCQ CX
-
- // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- MOVLQZX -4(SI), DX
- JMP doCopy
-
-tagCopy2:
- // case tagCopy2:
- // s += 3
- ADDQ $3, SI
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ SI, R13
- JA errCorrupt
-
- // length = 1 + int(src[s-3])>>2
- SHRQ $2, CX
- INCQ CX
-
- // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
- MOVWQZX -2(SI), DX
- JMP doCopy
-
-tagCopy:
- // We have a copy tag. We assume that:
- // - BX == src[s] & 0x03
- // - CX == src[s]
- CMPQ BX, $2
- JEQ tagCopy2
- JA tagCopy4
-
- // case tagCopy1:
- // s += 2
- ADDQ $2, SI
-
- // if uint(s) > uint(len(src)) { etc }
- CMPQ SI, R13
- JA errCorrupt
-
- // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
- MOVQ CX, DX
- ANDQ $0xe0, DX
- SHLQ $3, DX
- MOVBQZX -1(SI), BX
- ORQ BX, DX
-
- // length = 4 + int(src[s-2])>>2&0x7
- SHRQ $2, CX
- ANDQ $7, CX
- ADDQ $4, CX
-
-doCopy:
- // This is the end of the outer "switch", when we have a copy tag.
- //
- // We assume that:
- // - CX == length && CX > 0
- // - DX == offset
-
- // if offset <= 0 { etc }
- CMPQ DX, $0
- JLE errCorrupt
-
- // if d < offset { etc }
- MOVQ DI, BX
- SUBQ R8, BX
- CMPQ BX, DX
- JLT errCorrupt
-
- // if length > len(dst)-d { etc }
- MOVQ R10, BX
- SUBQ DI, BX
- CMPQ CX, BX
- JGT errCorrupt
-
- // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
- //
- // Set:
- // - R14 = len(dst)-d
- // - R15 = &dst[d-offset]
- MOVQ R10, R14
- SUBQ DI, R14
- MOVQ DI, R15
- SUBQ DX, R15
-
- // !!! Try a faster technique for short (16 or fewer bytes) forward copies.
- //
- // First, try using two 8-byte load/stores, similar to the doLit technique
- // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
- // still OK if offset >= 8. Note that this has to be two 8-byte load/stores
- // and not one 16-byte load/store, and the first store has to be before the
- // second load, due to the overlap if offset is in the range [8, 16).
- //
- // if length > 16 || offset < 8 || len(dst)-d < 16 {
- // goto slowForwardCopy
- // }
- // copy 16 bytes
- // d += length
- CMPQ CX, $16
- JGT slowForwardCopy
- CMPQ DX, $8
- JLT slowForwardCopy
- CMPQ R14, $16
- JLT slowForwardCopy
- MOVQ 0(R15), AX
- MOVQ AX, 0(DI)
- MOVQ 8(R15), BX
- MOVQ BX, 8(DI)
- ADDQ CX, DI
- JMP loop
-
-slowForwardCopy:
- // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
- // can still try 8-byte load stores, provided we can overrun up to 10 extra
- // bytes. As above, the overrun will be fixed up by subsequent iterations
- // of the outermost loop.
- //
- // The C++ snappy code calls this technique IncrementalCopyFastPath. Its
- // commentary says:
- //
- // ----
- //
- // The main part of this loop is a simple copy of eight bytes at a time
- // until we've copied (at least) the requested amount of bytes. However,
- // if d and d-offset are less than eight bytes apart (indicating a
- // repeating pattern of length < 8), we first need to expand the pattern in
- // order to get the correct results. For instance, if the buffer looks like
- // this, with the eight-byte <d-offset> and <d> patterns marked as
- // intervals:
- //
- // abxxxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // a single eight-byte copy from <d-offset> to <d> will repeat the pattern
- // once, after which we can move <d> two bytes without moving <d-offset>:
- //
- // ababxxxxxxxxxx
- // [------] d-offset
- // [------] d
- //
- // and repeat the exercise until the two no longer overlap.
- //
- // This allows us to do very well in the special case of one single byte
- // repeated many times, without taking a big hit for more general cases.
- //
- // The worst case of extra writing past the end of the match occurs when
- // offset == 1 and length == 1; the last copy will read from byte positions
- // [0..7] and write to [4..11], whereas it was only supposed to write to
- // position 1. Thus, ten excess bytes.
- //
- // ----
- //
- // That "10 byte overrun" worst case is confirmed by Go's
- // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
- // and finishSlowForwardCopy algorithm.
- //
- // if length > len(dst)-d-10 {
- // goto verySlowForwardCopy
- // }
- SUBQ $10, R14
- CMPQ CX, R14
- JGT verySlowForwardCopy
-
-makeOffsetAtLeast8:
- // !!! As above, expand the pattern so that offset >= 8 and we can use
- // 8-byte load/stores.
- //
- // for offset < 8 {
- // copy 8 bytes from dst[d-offset:] to dst[d:]
- // length -= offset
- // d += offset
- // offset += offset
- // // The two previous lines together means that d-offset, and therefore
- // // R15, is unchanged.
- // }
- CMPQ DX, $8
- JGE fixUpSlowForwardCopy
- MOVQ (R15), BX
- MOVQ BX, (DI)
- SUBQ DX, CX
- ADDQ DX, DI
- ADDQ DX, DX
- JMP makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
- // !!! Add length (which might be negative now) to d (implied by DI being
- // &dst[d]) so that d ends up at the right place when we jump back to the
- // top of the loop. Before we do that, though, we save DI to AX so that, if
- // length is positive, copying the remaining length bytes will write to the
- // right place.
- MOVQ DI, AX
- ADDQ CX, DI
-
-finishSlowForwardCopy:
- // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
- // length means that we overrun, but as above, that will be fixed up by
- // subsequent iterations of the outermost loop.
- CMPQ CX, $0
- JLE loop
- MOVQ (R15), BX
- MOVQ BX, (AX)
- ADDQ $8, R15
- ADDQ $8, AX
- SUBQ $8, CX
- JMP finishSlowForwardCopy
-
-verySlowForwardCopy:
- // verySlowForwardCopy is a simple implementation of forward copy. In C
- // parlance, this is a do/while loop instead of a while loop, since we know
- // that length > 0. In Go syntax:
- //
- // for {
- // dst[d] = dst[d - offset]
- // d++
- // length--
- // if length == 0 {
- // break
- // }
- // }
- MOVB (R15), BX
- MOVB BX, (DI)
- INCQ R15
- INCQ DI
- DECQ CX
- JNZ verySlowForwardCopy
- JMP loop
-
-// The code above handles copy tags.
-// ----------------------------------------
-
-end:
- // This is the end of the "for s < len(src)".
- //
- // if d != len(dst) { etc }
- CMPQ DI, R10
- JNE errCorrupt
-
- // return 0
- MOVQ $0, ret+48(FP)
- RET
-
-errCorrupt:
- // return decodeErrCodeCorrupt
- MOVQ $1, ret+48(FP)
- RET
diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/klauspost/compress/snappy/decode_other.go
deleted file mode 100644
index 94a96c5..0000000
--- a/vendor/github.com/klauspost/compress/snappy/decode_other.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !amd64 appengine !gc noasm
-
-package snappy
-
-// decode writes the decoding of src to dst. It assumes that the varint-encoded
-// length of the decompressed bytes has already been read, and that len(dst)
-// equals that length.
-//
-// It returns 0 on success or a decodeErrCodeXxx error code on failure.
-func decode(dst, src []byte) int {
- var d, s, offset, length int
- for s < len(src) {
- switch src[s] & 0x03 {
- case tagLiteral:
- x := uint32(src[s] >> 2)
- switch {
- case x < 60:
- s++
- case x == 60:
- s += 2
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-1])
- case x == 61:
- s += 3
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-2]) | uint32(src[s-1])<<8
- case x == 62:
- s += 4
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
- case x == 63:
- s += 5
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
- }
- length = int(x) + 1
- if length <= 0 {
- return decodeErrCodeUnsupportedLiteralLength
- }
- if length > len(dst)-d || length > len(src)-s {
- return decodeErrCodeCorrupt
- }
- copy(dst[d:], src[s:s+length])
- d += length
- s += length
- continue
-
- case tagCopy1:
- s += 2
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = 4 + int(src[s-2])>>2&0x7
- offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-
- case tagCopy2:
- s += 3
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = 1 + int(src[s-3])>>2
- offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-
- case tagCopy4:
- s += 5
- if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
- return decodeErrCodeCorrupt
- }
- length = 1 + int(src[s-5])>>2
- offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
- }
-
- if offset <= 0 || d < offset || length > len(dst)-d {
- return decodeErrCodeCorrupt
- }
- // Copy from an earlier sub-slice of dst to a later sub-slice.
- // If no overlap, use the built-in copy:
- if offset > length {
- copy(dst[d:d+length], dst[d-offset:])
- d += length
- continue
- }
-
- // Unlike the built-in copy function, this byte-by-byte copy always runs
- // forwards, even if the slices overlap. Conceptually, this is:
- //
- // d += forwardCopy(dst[d:d+length], dst[d-offset:])
- //
- // We align the slices into a and b and show the compiler they are the same size.
- // This allows the loop to run without bounds checks.
- a := dst[d : d+length]
- b := dst[d-offset:]
- b = b[:len(a)]
- for i := range a {
- a[i] = b[i]
- }
- d += length
- }
- if d != len(dst) {
- return decodeErrCodeCorrupt
- }
- return 0
-}
diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/klauspost/compress/snappy/encode.go
deleted file mode 100644
index 8d393e9..0000000
--- a/vendor/github.com/klauspost/compress/snappy/encode.go
+++ /dev/null
@@ -1,285 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package snappy
-
-import (
- "encoding/binary"
- "errors"
- "io"
-)
-
-// Encode returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-func Encode(dst, src []byte) []byte {
- if n := MaxEncodedLen(len(src)); n < 0 {
- panic(ErrTooLarge)
- } else if len(dst) < n {
- dst = make([]byte, n)
- }
-
- // The block starts with the varint-encoded length of the decompressed bytes.
- d := binary.PutUvarint(dst, uint64(len(src)))
-
- for len(src) > 0 {
- p := src
- src = nil
- if len(p) > maxBlockSize {
- p, src = p[:maxBlockSize], p[maxBlockSize:]
- }
- if len(p) < minNonLiteralBlockSize {
- d += emitLiteral(dst[d:], p)
- } else {
- d += encodeBlock(dst[d:], p)
- }
- }
- return dst[:d]
-}
-
-// inputMargin is the minimum number of extra input bytes to keep, inside
-// encodeBlock's inner loop. On some architectures, this margin lets us
-// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
-// literals can be implemented as a single load to and store from a 16-byte
-// register. That literal's actual length can be as short as 1 byte, so this
-// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
-// the encoding loop will fix up the copy overrun, and this inputMargin ensures
-// that we don't overrun the dst and src buffers.
-const inputMargin = 16 - 1
-
-// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
-// could be encoded with a copy tag. This is the minimum with respect to the
-// algorithm used by encodeBlock, not a minimum enforced by the file format.
-//
-// The encoded output must start with at least a 1 byte literal, as there are
-// no previous bytes to copy. A minimal (1 byte) copy after that, generated
-// from an emitCopy call in encodeBlock's main loop, would require at least
-// another inputMargin bytes, for the reason above: we want any emitLiteral
-// calls inside encodeBlock's main loop to use the fast path if possible, which
-// requires being able to overrun by inputMargin bytes. Thus,
-// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
-//
-// The C++ code doesn't use this exact threshold, but it could, as discussed at
-// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
-// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
-// optimization. It should not affect the encoded form. This is tested by
-// TestSameEncodingAsCppShortCopies.
-const minNonLiteralBlockSize = 1 + 1 + inputMargin
-
-// MaxEncodedLen returns the maximum length of a snappy block, given its
-// uncompressed length.
-//
-// It will return a negative value if srcLen is too large to encode.
-func MaxEncodedLen(srcLen int) int {
- n := uint64(srcLen)
- if n > 0xffffffff {
- return -1
- }
- // Compressed data can be defined as:
- // compressed := item* literal*
- // item := literal* copy
- //
- // The trailing literal sequence has a space blowup of at most 62/60
- // since a literal of length 60 needs one tag byte + one extra byte
- // for length information.
- //
- // Item blowup is trickier to measure. Suppose the "copy" op copies
- // 4 bytes of data. Because of a special check in the encoding code,
- // we produce a 4-byte copy only if the offset is < 65536. Therefore
- // the copy op takes 3 bytes to encode, and this type of item leads
- // to at most the 62/60 blowup for representing literals.
- //
- // Suppose the "copy" op copies 5 bytes of data. If the offset is big
- // enough, it will take 5 bytes to encode the copy op. Therefore the
- // worst case here is a one-byte literal followed by a five-byte copy.
- // That is, 6 bytes of input turn into 7 bytes of "compressed" data.
- //
- // This last factor dominates the blowup, so the final estimate is:
- n = 32 + n + n/6
- if n > 0xffffffff {
- return -1
- }
- return int(n)
-}
-
-var errClosed = errors.New("snappy: Writer is closed")
-
-// NewWriter returns a new Writer that compresses to w.
-//
-// The Writer returned does not buffer writes. There is no need to Flush or
-// Close such a Writer.
-//
-// Deprecated: the Writer returned is not suitable for many small writes, only
-// for few large writes. Use NewBufferedWriter instead, which is efficient
-// regardless of the frequency and shape of the writes, and remember to Close
-// that Writer when done.
-func NewWriter(w io.Writer) *Writer {
- return &Writer{
- w: w,
- obuf: make([]byte, obufLen),
- }
-}
-
-// NewBufferedWriter returns a new Writer that compresses to w, using the
-// framing format described at
-// https://github.com/google/snappy/blob/master/framing_format.txt
-//
-// The Writer returned buffers writes. Users must call Close to guarantee all
-// data has been forwarded to the underlying io.Writer. They may also call
-// Flush zero or more times before calling Close.
-func NewBufferedWriter(w io.Writer) *Writer {
- return &Writer{
- w: w,
- ibuf: make([]byte, 0, maxBlockSize),
- obuf: make([]byte, obufLen),
- }
-}
-
-// Writer is an io.Writer that can write Snappy-compressed bytes.
-type Writer struct {
- w io.Writer
- err error
-
- // ibuf is a buffer for the incoming (uncompressed) bytes.
- //
- // Its use is optional. For backwards compatibility, Writers created by the
- // NewWriter function have ibuf == nil, do not buffer incoming bytes, and
- // therefore do not need to be Flush'ed or Close'd.
- ibuf []byte
-
- // obuf is a buffer for the outgoing (compressed) bytes.
- obuf []byte
-
- // wroteStreamHeader is whether we have written the stream header.
- wroteStreamHeader bool
-}
-
-// Reset discards the writer's state and switches the Snappy writer to write to
-// w. This permits reusing a Writer rather than allocating a new one.
-func (w *Writer) Reset(writer io.Writer) {
- w.w = writer
- w.err = nil
- if w.ibuf != nil {
- w.ibuf = w.ibuf[:0]
- }
- w.wroteStreamHeader = false
-}
-
-// Write satisfies the io.Writer interface.
-func (w *Writer) Write(p []byte) (nRet int, errRet error) {
- if w.ibuf == nil {
- // Do not buffer incoming bytes. This does not perform or compress well
- // if the caller of Writer.Write writes many small slices. This
- // behavior is therefore deprecated, but still supported for backwards
- // compatibility with code that doesn't explicitly Flush or Close.
- return w.write(p)
- }
-
- // The remainder of this method is based on bufio.Writer.Write from the
- // standard library.
-
- for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
- var n int
- if len(w.ibuf) == 0 {
- // Large write, empty buffer.
- // Write directly from p to avoid copy.
- n, _ = w.write(p)
- } else {
- n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
- w.ibuf = w.ibuf[:len(w.ibuf)+n]
- w.Flush()
- }
- nRet += n
- p = p[n:]
- }
- if w.err != nil {
- return nRet, w.err
- }
- n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
- w.ibuf = w.ibuf[:len(w.ibuf)+n]
- nRet += n
- return nRet, nil
-}
-
-func (w *Writer) write(p []byte) (nRet int, errRet error) {
- if w.err != nil {
- return 0, w.err
- }
- for len(p) > 0 {
- obufStart := len(magicChunk)
- if !w.wroteStreamHeader {
- w.wroteStreamHeader = true
- copy(w.obuf, magicChunk)
- obufStart = 0
- }
-
- var uncompressed []byte
- if len(p) > maxBlockSize {
- uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
- } else {
- uncompressed, p = p, nil
- }
- checksum := crc(uncompressed)
-
- // Compress the buffer, discarding the result if the improvement
- // isn't at least 12.5%.
- compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
- chunkType := uint8(chunkTypeCompressedData)
- chunkLen := 4 + len(compressed)
- obufEnd := obufHeaderLen + len(compressed)
- if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
- chunkType = chunkTypeUncompressedData
- chunkLen = 4 + len(uncompressed)
- obufEnd = obufHeaderLen
- }
-
- // Fill in the per-chunk header that comes before the body.
- w.obuf[len(magicChunk)+0] = chunkType
- w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
- w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
- w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
- w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
- w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
- w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
- w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
-
- if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
- w.err = err
- return nRet, err
- }
- if chunkType == chunkTypeUncompressedData {
- if _, err := w.w.Write(uncompressed); err != nil {
- w.err = err
- return nRet, err
- }
- }
- nRet += len(uncompressed)
- }
- return nRet, nil
-}
-
-// Flush flushes the Writer to its underlying io.Writer.
-func (w *Writer) Flush() error {
- if w.err != nil {
- return w.err
- }
- if len(w.ibuf) == 0 {
- return nil
- }
- w.write(w.ibuf)
- w.ibuf = w.ibuf[:0]
- return w.err
-}
-
-// Close calls Flush and then closes the Writer.
-func (w *Writer) Close() error {
- w.Flush()
- ret := w.err
- if w.err == nil {
- w.err = errClosed
- }
- return ret
-}
diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go
deleted file mode 100644
index 150d91b..0000000
--- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package snappy
-
-// emitLiteral has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitLiteral(dst, lit []byte) int
-
-// emitCopy has the same semantics as in encode_other.go.
-//
-//go:noescape
-func emitCopy(dst []byte, offset, length int) int
-
-// extendMatch has the same semantics as in encode_other.go.
-//
-//go:noescape
-func extendMatch(src []byte, i, j int) int
-
-// encodeBlock has the same semantics as in encode_other.go.
-//
-//go:noescape
-func encodeBlock(dst, src []byte) (d int)
diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s
deleted file mode 100644
index adfd979..0000000
--- a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s
+++ /dev/null
@@ -1,730 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
-// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
-// https://github.com/golang/snappy/issues/29
-//
-// As a workaround, the package was built with a known good assembler, and
-// those instructions were disassembled by "objdump -d" to yield the
-// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
-// style comments, in AT&T asm syntax. Note that rsp here is a physical
-// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
-// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
-// fine on Go 1.6.
-
-// The asm code generally follows the pure Go code in encode_other.go, except
-// where marked with a "!!!".
-
-// ----------------------------------------------------------------------------
-
-// func emitLiteral(dst, lit []byte) int
-//
-// All local variables fit into registers. The register allocation:
-// - AX len(lit)
-// - BX n
-// - DX return value
-// - DI &dst[i]
-// - R10 &lit[0]
-//
-// The 24 bytes of stack space is to call runtime·memmove.
-//
-// The unusual register allocation of local variables, such as R10 for the
-// source pointer, matches the allocation used at the call site in encodeBlock,
-// which makes it easier to manually inline this function.
-TEXT ·emitLiteral(SB), NOSPLIT, $24-56
- MOVQ dst_base+0(FP), DI
- MOVQ lit_base+24(FP), R10
- MOVQ lit_len+32(FP), AX
- MOVQ AX, DX
- MOVL AX, BX
- SUBL $1, BX
-
- CMPL BX, $60
- JLT oneByte
- CMPL BX, $256
- JLT twoBytes
-
-threeBytes:
- MOVB $0xf4, 0(DI)
- MOVW BX, 1(DI)
- ADDQ $3, DI
- ADDQ $3, DX
- JMP memmove
-
-twoBytes:
- MOVB $0xf0, 0(DI)
- MOVB BX, 1(DI)
- ADDQ $2, DI
- ADDQ $2, DX
- JMP memmove
-
-oneByte:
- SHLB $2, BX
- MOVB BX, 0(DI)
- ADDQ $1, DI
- ADDQ $1, DX
-
-memmove:
- MOVQ DX, ret+48(FP)
-
- // copy(dst[i:], lit)
- //
- // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
- // DI, R10 and AX as arguments.
- MOVQ DI, 0(SP)
- MOVQ R10, 8(SP)
- MOVQ AX, 16(SP)
- CALL runtime·memmove(SB)
- RET
-
-// ----------------------------------------------------------------------------
-
-// func emitCopy(dst []byte, offset, length int) int
-//
-// All local variables fit into registers. The register allocation:
-// - AX length
-// - SI &dst[0]
-// - DI &dst[i]
-// - R11 offset
-//
-// The unusual register allocation of local variables, such as R11 for the
-// offset, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·emitCopy(SB), NOSPLIT, $0-48
- MOVQ dst_base+0(FP), DI
- MOVQ DI, SI
- MOVQ offset+24(FP), R11
- MOVQ length+32(FP), AX
-
-loop0:
- // for length >= 68 { etc }
- CMPL AX, $68
- JLT step1
-
- // Emit a length 64 copy, encoded as 3 bytes.
- MOVB $0xfe, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $64, AX
- JMP loop0
-
-step1:
- // if length > 64 { etc }
- CMPL AX, $64
- JLE step2
-
- // Emit a length 60 copy, encoded as 3 bytes.
- MOVB $0xee, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $60, AX
-
-step2:
- // if length >= 12 || offset >= 2048 { goto step3 }
- CMPL AX, $12
- JGE step3
- CMPL R11, $2048
- JGE step3
-
- // Emit the remaining copy, encoded as 2 bytes.
- MOVB R11, 1(DI)
- SHRL $8, R11
- SHLB $5, R11
- SUBB $4, AX
- SHLB $2, AX
- ORB AX, R11
- ORB $1, R11
- MOVB R11, 0(DI)
- ADDQ $2, DI
-
- // Return the number of bytes written.
- SUBQ SI, DI
- MOVQ DI, ret+40(FP)
- RET
-
-step3:
- // Emit the remaining copy, encoded as 3 bytes.
- SUBL $1, AX
- SHLB $2, AX
- ORB $2, AX
- MOVB AX, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
-
- // Return the number of bytes written.
- SUBQ SI, DI
- MOVQ DI, ret+40(FP)
- RET
-
-// ----------------------------------------------------------------------------
-
-// func extendMatch(src []byte, i, j int) int
-//
-// All local variables fit into registers. The register allocation:
-// - DX &src[0]
-// - SI &src[j]
-// - R13 &src[len(src) - 8]
-// - R14 &src[len(src)]
-// - R15 &src[i]
-//
-// The unusual register allocation of local variables, such as R15 for a source
-// pointer, matches the allocation used at the call site in encodeBlock, which
-// makes it easier to manually inline this function.
-TEXT ·extendMatch(SB), NOSPLIT, $0-48
- MOVQ src_base+0(FP), DX
- MOVQ src_len+8(FP), R14
- MOVQ i+24(FP), R15
- MOVQ j+32(FP), SI
- ADDQ DX, R14
- ADDQ DX, R15
- ADDQ DX, SI
- MOVQ R14, R13
- SUBQ $8, R13
-
-cmp8:
- // As long as we are 8 or more bytes before the end of src, we can load and
- // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
- CMPQ SI, R13
- JA cmp1
- MOVQ (R15), AX
- MOVQ (SI), BX
- CMPQ AX, BX
- JNE bsf
- ADDQ $8, R15
- ADDQ $8, SI
- JMP cmp8
-
-bsf:
- // If those 8 bytes were not equal, XOR the two 8 byte values, and return
- // the index of the first byte that differs. The BSF instruction finds the
- // least significant 1 bit, the amd64 architecture is little-endian, and
- // the shift by 3 converts a bit index to a byte index.
- XORQ AX, BX
- BSFQ BX, BX
- SHRQ $3, BX
- ADDQ BX, SI
-
- // Convert from &src[ret] to ret.
- SUBQ DX, SI
- MOVQ SI, ret+40(FP)
- RET
-
-cmp1:
- // In src's tail, compare 1 byte at a time.
- CMPQ SI, R14
- JAE extendMatchEnd
- MOVB (R15), AX
- MOVB (SI), BX
- CMPB AX, BX
- JNE extendMatchEnd
- ADDQ $1, R15
- ADDQ $1, SI
- JMP cmp1
-
-extendMatchEnd:
- // Convert from &src[ret] to ret.
- SUBQ DX, SI
- MOVQ SI, ret+40(FP)
- RET
-
-// ----------------------------------------------------------------------------
-
-// func encodeBlock(dst, src []byte) (d int)
-//
-// All local variables fit into registers, other than "var table". The register
-// allocation:
-// - AX . .
-// - BX . .
-// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
-// - DX 64 &src[0], tableSize
-// - SI 72 &src[s]
-// - DI 80 &dst[d]
-// - R9 88 sLimit
-// - R10 . &src[nextEmit]
-// - R11 96 prevHash, currHash, nextHash, offset
-// - R12 104 &src[base], skip
-// - R13 . &src[nextS], &src[len(src) - 8]
-// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
-// - R15 112 candidate
-//
-// The second column (56, 64, etc) is the stack offset to spill the registers
-// when calling other functions. We could pack this slightly tighter, but it's
-// simpler to have a dedicated spill map independent of the function called.
-//
-// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
-// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
-// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
-TEXT ·encodeBlock(SB), 0, $32888-56
- MOVQ dst_base+0(FP), DI
- MOVQ src_base+24(FP), SI
- MOVQ src_len+32(FP), R14
-
- // shift, tableSize := uint32(32-8), 1<<8
- MOVQ $24, CX
- MOVQ $256, DX
-
-calcShift:
- // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
- // shift--
- // }
- CMPQ DX, $16384
- JGE varTable
- CMPQ DX, R14
- JGE varTable
- SUBQ $1, CX
- SHLQ $1, DX
- JMP calcShift
-
-varTable:
- // var table [maxTableSize]uint16
- //
- // In the asm code, unlike the Go code, we can zero-initialize only the
- // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
- // writes 16 bytes, so we can do only tableSize/8 writes instead of the
- // 2048 writes that would zero-initialize all of table's 32768 bytes.
- SHRQ $3, DX
- LEAQ table-32768(SP), BX
- PXOR X0, X0
-
-memclr:
- MOVOU X0, 0(BX)
- ADDQ $16, BX
- SUBQ $1, DX
- JNZ memclr
-
- // !!! DX = &src[0]
- MOVQ SI, DX
-
- // sLimit := len(src) - inputMargin
- MOVQ R14, R9
- SUBQ $15, R9
-
- // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
- // change for the rest of the function.
- MOVQ CX, 56(SP)
- MOVQ DX, 64(SP)
- MOVQ R9, 88(SP)
-
- // nextEmit := 0
- MOVQ DX, R10
-
- // s := 1
- ADDQ $1, SI
-
- // nextHash := hash(load32(src, s), shift)
- MOVL 0(SI), R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
-outer:
- // for { etc }
-
- // skip := 32
- MOVQ $32, R12
-
- // nextS := s
- MOVQ SI, R13
-
- // candidate := 0
- MOVQ $0, R15
-
-inner0:
- // for { etc }
-
- // s := nextS
- MOVQ R13, SI
-
- // bytesBetweenHashLookups := skip >> 5
- MOVQ R12, R14
- SHRQ $5, R14
-
- // nextS = s + bytesBetweenHashLookups
- ADDQ R14, R13
-
- // skip += bytesBetweenHashLookups
- ADDQ R14, R12
-
- // if nextS > sLimit { goto emitRemainder }
- MOVQ R13, AX
- SUBQ DX, AX
- CMPQ AX, R9
- JA emitRemainder
-
- // candidate = int(table[nextHash])
- // XXX: MOVWQZX table-32768(SP)(R11*2), R15
- // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
- BYTE $0x4e
- BYTE $0x0f
- BYTE $0xb7
- BYTE $0x7c
- BYTE $0x5c
- BYTE $0x78
-
- // table[nextHash] = uint16(s)
- MOVQ SI, AX
- SUBQ DX, AX
-
- // XXX: MOVW AX, table-32768(SP)(R11*2)
- // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- // nextHash = hash(load32(src, nextS), shift)
- MOVL 0(R13), R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- // if load32(src, s) != load32(src, candidate) { continue } break
- MOVL 0(SI), AX
- MOVL (DX)(R15*1), BX
- CMPL AX, BX
- JNE inner0
-
-fourByteMatch:
- // As per the encode_other.go code:
- //
- // A 4-byte match has been found. We'll later see etc.
-
- // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
- // on inputMargin in encode.go.
- MOVQ SI, AX
- SUBQ R10, AX
- CMPQ AX, $16
- JLE emitLiteralFastPath
-
- // ----------------------------------------
- // Begin inline of the emitLiteral call.
- //
- // d += emitLiteral(dst[d:], src[nextEmit:s])
-
- MOVL AX, BX
- SUBL $1, BX
-
- CMPL BX, $60
- JLT inlineEmitLiteralOneByte
- CMPL BX, $256
- JLT inlineEmitLiteralTwoBytes
-
-inlineEmitLiteralThreeBytes:
- MOVB $0xf4, 0(DI)
- MOVW BX, 1(DI)
- ADDQ $3, DI
- JMP inlineEmitLiteralMemmove
-
-inlineEmitLiteralTwoBytes:
- MOVB $0xf0, 0(DI)
- MOVB BX, 1(DI)
- ADDQ $2, DI
- JMP inlineEmitLiteralMemmove
-
-inlineEmitLiteralOneByte:
- SHLB $2, BX
- MOVB BX, 0(DI)
- ADDQ $1, DI
-
-inlineEmitLiteralMemmove:
- // Spill local variables (registers) onto the stack; call; unspill.
- //
- // copy(dst[i:], lit)
- //
- // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
- // DI, R10 and AX as arguments.
- MOVQ DI, 0(SP)
- MOVQ R10, 8(SP)
- MOVQ AX, 16(SP)
- ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
- MOVQ SI, 72(SP)
- MOVQ DI, 80(SP)
- MOVQ R15, 112(SP)
- CALL runtime·memmove(SB)
- MOVQ 56(SP), CX
- MOVQ 64(SP), DX
- MOVQ 72(SP), SI
- MOVQ 80(SP), DI
- MOVQ 88(SP), R9
- MOVQ 112(SP), R15
- JMP inner1
-
-inlineEmitLiteralEnd:
- // End inline of the emitLiteral call.
- // ----------------------------------------
-
-emitLiteralFastPath:
- // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
- MOVB AX, BX
- SUBB $1, BX
- SHLB $2, BX
- MOVB BX, (DI)
- ADDQ $1, DI
-
- // !!! Implement the copy from lit to dst as a 16-byte load and store.
- // (Encode's documentation says that dst and src must not overlap.)
- //
- // This always copies 16 bytes, instead of only len(lit) bytes, but that's
- // OK. Subsequent iterations will fix up the overrun.
- //
- // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
- // 16-byte loads and stores. This technique probably wouldn't be as
- // effective on architectures that are fussier about alignment.
- MOVOU 0(R10), X0
- MOVOU X0, 0(DI)
- ADDQ AX, DI
-
-inner1:
- // for { etc }
-
- // base := s
- MOVQ SI, R12
-
- // !!! offset := base - candidate
- MOVQ R12, R11
- SUBQ R15, R11
- SUBQ DX, R11
-
- // ----------------------------------------
- // Begin inline of the extendMatch call.
- //
- // s = extendMatch(src, candidate+4, s+4)
-
- // !!! R14 = &src[len(src)]
- MOVQ src_len+32(FP), R14
- ADDQ DX, R14
-
- // !!! R13 = &src[len(src) - 8]
- MOVQ R14, R13
- SUBQ $8, R13
-
- // !!! R15 = &src[candidate + 4]
- ADDQ $4, R15
- ADDQ DX, R15
-
- // !!! s += 4
- ADDQ $4, SI
-
-inlineExtendMatchCmp8:
- // As long as we are 8 or more bytes before the end of src, we can load and
- // compare 8 bytes at a time. If those 8 bytes are equal, repeat.
- CMPQ SI, R13
- JA inlineExtendMatchCmp1
- MOVQ (R15), AX
- MOVQ (SI), BX
- CMPQ AX, BX
- JNE inlineExtendMatchBSF
- ADDQ $8, R15
- ADDQ $8, SI
- JMP inlineExtendMatchCmp8
-
-inlineExtendMatchBSF:
- // If those 8 bytes were not equal, XOR the two 8 byte values, and return
- // the index of the first byte that differs. The BSF instruction finds the
- // least significant 1 bit, the amd64 architecture is little-endian, and
- // the shift by 3 converts a bit index to a byte index.
- XORQ AX, BX
- BSFQ BX, BX
- SHRQ $3, BX
- ADDQ BX, SI
- JMP inlineExtendMatchEnd
-
-inlineExtendMatchCmp1:
- // In src's tail, compare 1 byte at a time.
- CMPQ SI, R14
- JAE inlineExtendMatchEnd
- MOVB (R15), AX
- MOVB (SI), BX
- CMPB AX, BX
- JNE inlineExtendMatchEnd
- ADDQ $1, R15
- ADDQ $1, SI
- JMP inlineExtendMatchCmp1
-
-inlineExtendMatchEnd:
- // End inline of the extendMatch call.
- // ----------------------------------------
-
- // ----------------------------------------
- // Begin inline of the emitCopy call.
- //
- // d += emitCopy(dst[d:], base-candidate, s-base)
-
- // !!! length := s - base
- MOVQ SI, AX
- SUBQ R12, AX
-
-inlineEmitCopyLoop0:
- // for length >= 68 { etc }
- CMPL AX, $68
- JLT inlineEmitCopyStep1
-
- // Emit a length 64 copy, encoded as 3 bytes.
- MOVB $0xfe, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $64, AX
- JMP inlineEmitCopyLoop0
-
-inlineEmitCopyStep1:
- // if length > 64 { etc }
- CMPL AX, $64
- JLE inlineEmitCopyStep2
-
- // Emit a length 60 copy, encoded as 3 bytes.
- MOVB $0xee, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
- SUBL $60, AX
-
-inlineEmitCopyStep2:
- // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
- CMPL AX, $12
- JGE inlineEmitCopyStep3
- CMPL R11, $2048
- JGE inlineEmitCopyStep3
-
- // Emit the remaining copy, encoded as 2 bytes.
- MOVB R11, 1(DI)
- SHRL $8, R11
- SHLB $5, R11
- SUBB $4, AX
- SHLB $2, AX
- ORB AX, R11
- ORB $1, R11
- MOVB R11, 0(DI)
- ADDQ $2, DI
- JMP inlineEmitCopyEnd
-
-inlineEmitCopyStep3:
- // Emit the remaining copy, encoded as 3 bytes.
- SUBL $1, AX
- SHLB $2, AX
- ORB $2, AX
- MOVB AX, 0(DI)
- MOVW R11, 1(DI)
- ADDQ $3, DI
-
-inlineEmitCopyEnd:
- // End inline of the emitCopy call.
- // ----------------------------------------
-
- // nextEmit = s
- MOVQ SI, R10
-
- // if s >= sLimit { goto emitRemainder }
- MOVQ SI, AX
- SUBQ DX, AX
- CMPQ AX, R9
- JAE emitRemainder
-
- // As per the encode_other.go code:
- //
- // We could immediately etc.
-
- // x := load64(src, s-1)
- MOVQ -1(SI), R14
-
- // prevHash := hash(uint32(x>>0), shift)
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- // table[prevHash] = uint16(s-1)
- MOVQ SI, AX
- SUBQ DX, AX
- SUBQ $1, AX
-
- // XXX: MOVW AX, table-32768(SP)(R11*2)
- // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- // currHash := hash(uint32(x>>8), shift)
- SHRQ $8, R14
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- // candidate = int(table[currHash])
- // XXX: MOVWQZX table-32768(SP)(R11*2), R15
- // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
- BYTE $0x4e
- BYTE $0x0f
- BYTE $0xb7
- BYTE $0x7c
- BYTE $0x5c
- BYTE $0x78
-
- // table[currHash] = uint16(s)
- ADDQ $1, AX
-
- // XXX: MOVW AX, table-32768(SP)(R11*2)
- // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
- BYTE $0x66
- BYTE $0x42
- BYTE $0x89
- BYTE $0x44
- BYTE $0x5c
- BYTE $0x78
-
- // if uint32(x>>8) == load32(src, candidate) { continue }
- MOVL (DX)(R15*1), BX
- CMPL R14, BX
- JEQ inner1
-
- // nextHash = hash(uint32(x>>16), shift)
- SHRQ $8, R14
- MOVL R14, R11
- IMULL $0x1e35a7bd, R11
- SHRL CX, R11
-
- // s++
- ADDQ $1, SI
-
- // break out of the inner1 for loop, i.e. continue the outer loop.
- JMP outer
-
-emitRemainder:
- // if nextEmit < len(src) { etc }
- MOVQ src_len+32(FP), AX
- ADDQ DX, AX
- CMPQ R10, AX
- JEQ encodeBlockEnd
-
- // d += emitLiteral(dst[d:], src[nextEmit:])
- //
- // Push args.
- MOVQ DI, 0(SP)
- MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
- MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
- MOVQ R10, 24(SP)
- SUBQ R10, AX
- MOVQ AX, 32(SP)
- MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
-
- // Spill local variables (registers) onto the stack; call; unspill.
- MOVQ DI, 80(SP)
- CALL ·emitLiteral(SB)
- MOVQ 80(SP), DI
-
- // Finish the "d +=" part of "d += emitLiteral(etc)".
- ADDQ 48(SP), DI
-
-encodeBlockEnd:
- MOVQ dst_base+0(FP), AX
- SUBQ AX, DI
- MOVQ DI, d+48(FP)
- RET
diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/klauspost/compress/snappy/encode_other.go
deleted file mode 100644
index dbcae90..0000000
--- a/vendor/github.com/klauspost/compress/snappy/encode_other.go
+++ /dev/null
@@ -1,238 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !amd64 appengine !gc noasm
-
-package snappy
-
-func load32(b []byte, i int) uint32 {
- b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
-}
-
-func load64(b []byte, i int) uint64 {
- b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
-}
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= len(lit) && len(lit) <= 65536
-func emitLiteral(dst, lit []byte) int {
- i, n := 0, uint(len(lit)-1)
- switch {
- case n < 60:
- dst[0] = uint8(n)<<2 | tagLiteral
- i = 1
- case n < 1<<8:
- dst[0] = 60<<2 | tagLiteral
- dst[1] = uint8(n)
- i = 2
- default:
- dst[0] = 61<<2 | tagLiteral
- dst[1] = uint8(n)
- dst[2] = uint8(n >> 8)
- i = 3
- }
- return i + copy(dst[i:], lit)
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-// dst is long enough to hold the encoded bytes
-// 1 <= offset && offset <= 65535
-// 4 <= length && length <= 65535
-func emitCopy(dst []byte, offset, length int) int {
- i := 0
- // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
- // threshold for this loop is a little higher (at 68 = 64 + 4), and the
- // length emitted down below is is a little lower (at 60 = 64 - 4), because
- // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
- // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
- // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
- // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
- // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
- // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
- for length >= 68 {
- // Emit a length 64 copy, encoded as 3 bytes.
- dst[i+0] = 63<<2 | tagCopy2
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- i += 3
- length -= 64
- }
- if length > 64 {
- // Emit a length 60 copy, encoded as 3 bytes.
- dst[i+0] = 59<<2 | tagCopy2
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- i += 3
- length -= 60
- }
- if length >= 12 || offset >= 2048 {
- // Emit the remaining copy, encoded as 3 bytes.
- dst[i+0] = uint8(length-1)<<2 | tagCopy2
- dst[i+1] = uint8(offset)
- dst[i+2] = uint8(offset >> 8)
- return i + 3
- }
- // Emit the remaining copy, encoded as 2 bytes.
- dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
- dst[i+1] = uint8(offset)
- return i + 2
-}
-
-// extendMatch returns the largest k such that k <= len(src) and that
-// src[i:i+k-j] and src[j:k] have the same contents.
-//
-// It assumes that:
-// 0 <= i && i < j && j <= len(src)
-func extendMatch(src []byte, i, j int) int {
- for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
- }
- return j
-}
-
-func hash(u, shift uint32) uint32 {
- return (u * 0x1e35a7bd) >> shift
-}
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-// len(dst) >= MaxEncodedLen(len(src)) &&
-// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlock(dst, src []byte) (d int) {
- // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
- // The table element type is uint16, as s < sLimit and sLimit < len(src)
- // and len(src) <= maxBlockSize and maxBlockSize == 65536.
- const (
- maxTableSize = 1 << 14
- // tableMask is redundant, but helps the compiler eliminate bounds
- // checks.
- tableMask = maxTableSize - 1
- )
- shift := uint32(32 - 8)
- for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
- shift--
- }
- // In Go, all array elements are zero-initialized, so there is no advantage
- // to a smaller tableSize per se. However, it matches the C++ algorithm,
- // and in the asm versions of this code, we can get away with zeroing only
- // the first tableSize elements.
- var table [maxTableSize]uint16
-
- // sLimit is when to stop looking for offset/length copies. The inputMargin
- // lets us use a fast path for emitLiteral in the main loop, while we are
- // looking for copies.
- sLimit := len(src) - inputMargin
-
- // nextEmit is where in src the next emitLiteral should start from.
- nextEmit := 0
-
- // The encoded form must start with a literal, as there are no previous
- // bytes to copy, so we start looking for hash matches at s == 1.
- s := 1
- nextHash := hash(load32(src, s), shift)
-
- for {
- // Copied from the C++ snappy implementation:
- //
- // Heuristic match skipping: If 32 bytes are scanned with no matches
- // found, start looking only at every other byte. If 32 more bytes are
- // scanned (or skipped), look at every third byte, etc.. When a match
- // is found, immediately go back to looking at every byte. This is a
- // small loss (~5% performance, ~0.1% density) for compressible data
- // due to more bookkeeping, but for non-compressible data (such as
- // JPEG) it's a huge win since the compressor quickly "realizes" the
- // data is incompressible and doesn't bother looking for matches
- // everywhere.
- //
- // The "skip" variable keeps track of how many bytes there are since
- // the last match; dividing it by 32 (ie. right-shifting by five) gives
- // the number of bytes to move ahead for each iteration.
- skip := 32
-
- nextS := s
- candidate := 0
- for {
- s = nextS
- bytesBetweenHashLookups := skip >> 5
- nextS = s + bytesBetweenHashLookups
- skip += bytesBetweenHashLookups
- if nextS > sLimit {
- goto emitRemainder
- }
- candidate = int(table[nextHash&tableMask])
- table[nextHash&tableMask] = uint16(s)
- nextHash = hash(load32(src, nextS), shift)
- if load32(src, s) == load32(src, candidate) {
- break
- }
- }
-
- // A 4-byte match has been found. We'll later see if more than 4 bytes
- // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
- // them as literal bytes.
- d += emitLiteral(dst[d:], src[nextEmit:s])
-
- // Call emitCopy, and then see if another emitCopy could be our next
- // move. Repeat until we find no match for the input immediately after
- // what was consumed by the last emitCopy call.
- //
- // If we exit this loop normally then we need to call emitLiteral next,
- // though we don't yet know how big the literal will be. We handle that
- // by proceeding to the next iteration of the main loop. We also can
- // exit this loop via goto if we get close to exhausting the input.
- for {
- // Invariant: we have a 4-byte match at s, and no need to emit any
- // literal bytes prior to s.
- base := s
-
- // Extend the 4-byte match as long as possible.
- //
- // This is an inlined version of:
- // s = extendMatch(src, candidate+4, s+4)
- s += 4
- for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
- }
-
- d += emitCopy(dst[d:], base-candidate, s-base)
- nextEmit = s
- if s >= sLimit {
- goto emitRemainder
- }
-
- // We could immediately start working at s now, but to improve
- // compression we first update the hash table at s-1 and at s. If
- // another emitCopy is not our next move, also calculate nextHash
- // at s+1. At least on GOARCH=amd64, these three hash calculations
- // are faster as one load64 call (with some shifts) instead of
- // three load32 calls.
- x := load64(src, s-1)
- prevHash := hash(uint32(x>>0), shift)
- table[prevHash&tableMask] = uint16(s - 1)
- currHash := hash(uint32(x>>8), shift)
- candidate = int(table[currHash&tableMask])
- table[currHash&tableMask] = uint16(s)
- if uint32(x>>8) != load32(src, candidate) {
- nextHash = hash(uint32(x>>16), shift)
- s++
- break
- }
- }
- }
-
-emitRemainder:
- if nextEmit < len(src) {
- d += emitLiteral(dst[d:], src[nextEmit:])
- }
- return d
-}
diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd
deleted file mode 100644
index d24eb4b..0000000
--- a/vendor/github.com/klauspost/compress/snappy/runbench.cmd
+++ /dev/null
@@ -1,2 +0,0 @@
-del old.txt
-go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt
diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go
deleted file mode 100644
index 74a3668..0000000
--- a/vendor/github.com/klauspost/compress/snappy/snappy.go
+++ /dev/null
@@ -1,98 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package snappy implements the Snappy compression format. It aims for very
-// high speeds and reasonable compression.
-//
-// There are actually two Snappy formats: block and stream. They are related,
-// but different: trying to decompress block-compressed data as a Snappy stream
-// will fail, and vice versa. The block format is the Decode and Encode
-// functions and the stream format is the Reader and Writer types.
-//
-// The block format, the more common case, is used when the complete size (the
-// number of bytes) of the original data is known upfront, at the time
-// compression starts. The stream format, also known as the framing format, is
-// for when that isn't always true.
-//
-// The canonical, C++ implementation is at https://github.com/google/snappy and
-// it only implements the block format.
-package snappy
-
-import (
- "hash/crc32"
-)
-
-/*
-Each encoded block begins with the varint-encoded length of the decoded data,
-followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
-first byte of each chunk is broken into its 2 least and 6 most significant bits
-called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
-Zero means a literal tag. All other values mean a copy tag.
-
-For literal tags:
- - If m < 60, the next 1 + m bytes are literal bytes.
- - Otherwise, let n be the little-endian unsigned integer denoted by the next
- m - 59 bytes. The next 1 + n bytes after that are literal bytes.
-
-For copy tags, length bytes are copied from offset bytes ago, in the style of
-Lempel-Ziv compression algorithms. In particular:
- - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
- The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
- of the offset. The next byte is bits 0-7 of the offset.
- - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
- The length is 1 + m. The offset is the little-endian unsigned integer
- denoted by the next 2 bytes.
- - For l == 3, this tag is a legacy format that is no longer issued by most
- encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
- [1, 65). The length is 1 + m. The offset is the little-endian unsigned
- integer denoted by the next 4 bytes.
-*/
-const (
- tagLiteral = 0x00
- tagCopy1 = 0x01
- tagCopy2 = 0x02
- tagCopy4 = 0x03
-)
-
-const (
- checksumSize = 4
- chunkHeaderSize = 4
- magicChunk = "\xff\x06\x00\x00" + magicBody
- magicBody = "sNaPpY"
-
- // maxBlockSize is the maximum size of the input to encodeBlock. It is not
- // part of the wire format per se, but some parts of the encoder assume
- // that an offset fits into a uint16.
- //
- // Also, for the framing format (Writer type instead of Encode function),
- // https://github.com/google/snappy/blob/master/framing_format.txt says
- // that "the uncompressed data in a chunk must be no longer than 65536
- // bytes".
- maxBlockSize = 65536
-
- // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
- // hard coded to be a const instead of a variable, so that obufLen can also
- // be a const. Their equivalence is confirmed by
- // TestMaxEncodedLenOfMaxBlockSize.
- maxEncodedLenOfMaxBlockSize = 76490
-
- obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
- obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize
-)
-
-const (
- chunkTypeCompressedData = 0x00
- chunkTypeUncompressedData = 0x01
- chunkTypePadding = 0xfe
- chunkTypeStreamIdentifier = 0xff
-)
-
-var crcTable = crc32.MakeTable(crc32.Castagnoli)
-
-// crc implements the checksum specified in section 3 of
-// https://github.com/google/snappy/blob/master/framing_format.txt
-func crc(b []byte) uint32 {
- c := crc32.Update(0, crcTable, b)
- return uint32(c>>15|c<<17) + 0xa282ead8
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md
index bc977a3..e7d7eb0 100644
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@@ -5,11 +5,8 @@
A high performance compression algorithm is implemented. For now focused on speed.
This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content.
-Note that custom dictionaries are not supported yet, so if your code relies on that,
-you cannot use the package as-is.
This package is pure Go and without use of "unsafe".
-If a significant speedup can be achieved using "unsafe", it may be added as an option later.
The `zstd` package is provided as open source software using a Go standard license.
@@ -27,22 +24,21 @@
### Status:
STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively
-used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
-kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
+used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates.
There may still be specific combinations of data types/size/settings that could lead to edge cases,
so as always, testing is recommended.
For now, a high speed (fastest) and medium-fast (default) compressor has been implemented.
-The "Fastest" compression ratio is roughly equivalent to zstd level 1.
-The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
+* The "Fastest" compression ratio is roughly equivalent to zstd level 1.
+* The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
+* The "Better" compression ratio is roughly equivalent to zstd level 7.
+* The "Best" compression ratio is roughly equivalent to zstd level 11.
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
-Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
-
### Usage
@@ -57,11 +53,11 @@
```Go
// Compress input to output.
func Compress(in io.Reader, out io.Writer) error {
- w, err := NewWriter(output)
+ enc, err := zstd.NewWriter(out)
if err != nil {
return err
}
- _, err := io.Copy(w, input)
+ _, err = io.Copy(enc, in)
if err != nil {
enc.Close()
return err
@@ -142,117 +138,110 @@
I have collected some speed examples to compare speed and compression against other compressors.
* `file` is the input file.
-* `out` is the compressor used. `zskp` is this package. `gzstd` is gzip standard library. `zstd` is the Datadog cgo library.
-* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default".
+* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library.
+* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best".
* `insize`/`outsize` is the input/output size.
* `millis` is the number of milliseconds used for compression.
* `mb/s` is megabytes (2^20 bytes) per second.
```
-The test data for the Large Text Compression Benchmark is the first
-10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
-http://mattmahoney.net/dc/textdata.html
+Silesia Corpus:
+http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
-file out level insize outsize millis mb/s
-enwik9 zskp 1 1000000000 343833033 5840 163.30
-enwik9 zskp 2 1000000000 317822183 8449 112.87
-enwik9 gzstd 1 1000000000 382578136 13627 69.98
-enwik9 gzstd 3 1000000000 349139651 22344 42.68
-enwik9 zstd 1 1000000000 357416379 4838 197.12
-enwik9 zstd 3 1000000000 313734522 7556 126.21
+This package:
+file out level insize outsize millis mb/s
+silesia.tar zskp 1 211947520 73101992 643 313.87
+silesia.tar zskp 2 211947520 67504318 969 208.38
+silesia.tar zskp 3 211947520 64595893 2007 100.68
+silesia.tar zskp 4 211947520 60995370 7691 26.28
+
+cgo zstd:
+silesia.tar zstd 1 211947520 73605392 543 371.56
+silesia.tar zstd 3 211947520 66793289 864 233.68
+silesia.tar zstd 6 211947520 62916450 1913 105.66
+silesia.tar zstd 9 211947520 60212393 5063 39.92
+
+gzip, stdlib/this package:
+silesia.tar gzstd 1 211947520 80007735 1654 122.21
+silesia.tar gzkp 1 211947520 80369488 1168 173.06
GOB stream of binary data. Highly compressible.
https://files.klauspost.com/compress/gob-stream.7z
-file out level insize outsize millis mb/s
-gob-stream zskp 1 1911399616 234981983 5100 357.42
-gob-stream zskp 2 1911399616 208674003 6698 272.15
-gob-stream gzstd 1 1911399616 357382641 14727 123.78
-gob-stream gzstd 3 1911399616 327835097 17005 107.19
-gob-stream zstd 1 1911399616 250787165 4075 447.22
-gob-stream zstd 3 1911399616 208191888 5511 330.77
+file out level insize outsize millis mb/s
+gob-stream zskp 1 1911399616 235022249 3088 590.30
+gob-stream zskp 2 1911399616 205669791 3786 481.34
+gob-stream zskp 3 1911399616 175034659 9636 189.17
+gob-stream zskp 4 1911399616 167273881 29337 62.13
+gob-stream zstd 1 1911399616 249810424 2637 691.26
+gob-stream zstd 3 1911399616 208192146 3490 522.31
+gob-stream zstd 6 1911399616 193632038 6687 272.56
+gob-stream zstd 9 1911399616 177620386 16175 112.70
+gob-stream gzstd 1 1911399616 357382641 10251 177.82
+gob-stream gzkp 1 1911399616 362156523 5695 320.08
-Highly compressible JSON file. Similar to logs in a lot of ways.
-https://files.klauspost.com/compress/adresser.001.gz
+The test data for the Large Text Compression Benchmark is the first
+10^9 bytes of the English Wikipedia dump on Mar. 3, 2006.
+http://mattmahoney.net/dc/textdata.html
-file out level insize outsize millis mb/s
-adresser.001 zskp 1 1073741824 18510122 1477 692.83
-adresser.001 zskp 2 1073741824 19831697 1705 600.59
-adresser.001 gzstd 1 1073741824 47755503 3079 332.47
-adresser.001 gzstd 3 1073741824 40052381 3051 335.63
-adresser.001 zstd 1 1073741824 16135896 994 1030.18
-adresser.001 zstd 3 1073741824 17794465 905 1131.49
+file out level insize outsize millis mb/s
+enwik9 zskp 1 1000000000 343848582 3609 264.18
+enwik9 zskp 2 1000000000 317276632 5746 165.97
+enwik9 zskp 3 1000000000 292243069 12162 78.41
+enwik9 zskp 4 1000000000 275241169 36430 26.18
+enwik9 zstd 1 1000000000 358072021 3110 306.65
+enwik9 zstd 3 1000000000 313734672 4784 199.35
+enwik9 zstd 6 1000000000 295138875 10290 92.68
+enwik9 zstd 9 1000000000 278348700 28549 33.40
+enwik9 gzstd 1 1000000000 382578136 9604 99.30
+enwik9 gzkp 1 1000000000 383825945 6544 145.73
+
+Highly compressible JSON file.
+https://files.klauspost.com/compress/github-june-2days-2019.json.zst
+
+file out level insize outsize millis mb/s
+github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
+github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
+github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
+github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
+github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
+github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
+github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
+github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
+github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
+github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
VM Image, Linux mint with a few installed applications:
https://files.klauspost.com/compress/rawstudio-mint14.7z
-file out level insize outsize millis mb/s
-rawstudio-mint14.tar zskp 1 8558382592 3648168838 33398 244.38
-rawstudio-mint14.tar zskp 2 8558382592 3376721436 50962 160.16
-rawstudio-mint14.tar gzstd 1 8558382592 3926257486 84712 96.35
-rawstudio-mint14.tar gzstd 3 8558382592 3740711978 176344 46.28
-rawstudio-mint14.tar zstd 1 8558382592 3607859742 27903 292.51
-rawstudio-mint14.tar zstd 3 8558382592 3341710879 46700 174.77
+file out level insize outsize millis mb/s
+rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
+rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
+rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
+rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
+rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
+rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
+rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
+rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
+rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
+rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
+CSV data:
+https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
-The test data is designed to test archivers in realistic backup scenarios.
-http://mattmahoney.net/dc/10gb.html
-
-file out level insize outsize millis mb/s
-10gb.tar zskp 1 10065157632 4883149814 45715 209.97
-10gb.tar zskp 2 10065157632 4638110010 60970 157.44
-10gb.tar gzstd 1 10065157632 5198296126 97769 98.18
-10gb.tar gzstd 3 10065157632 4932665487 313427 30.63
-10gb.tar zstd 1 10065157632 4940796535 40391 237.65
-10gb.tar zstd 3 10065157632 4638618579 52911 181.42
-
-Silesia Corpus:
-http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip
-
-file out level insize outsize millis mb/s
-silesia.tar zskp 1 211947520 73025800 1108 182.26
-silesia.tar zskp 2 211947520 67674684 1599 126.41
-silesia.tar gzstd 1 211947520 80007735 2515 80.37
-silesia.tar gzstd 3 211947520 73133380 4259 47.45
-silesia.tar zstd 1 211947520 73513991 933 216.64
-silesia.tar zstd 3 211947520 66793301 1377 146.79
+file out level insize outsize millis mb/s
+nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
+nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
+nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
+nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
+nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
+nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
+nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
+nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
+nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
+nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
```
-### Converters
-
-As part of the development process a *Snappy* -> *Zstandard* converter was also built.
-
-This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream.
-Note that a single block is not framed.
-
-Conversion is done by converting the stream directly from Snappy without intermediate full decoding.
-Therefore the compression ratio is much less than what can be done by a full decompression
-and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without
-any errors being generated.
-No CRC value is being generated and not all CRC values of the Snappy stream are checked.
-However, it provides really fast re-compression of Snappy streams.
-
-
-```
-BenchmarkSnappy_ConvertSilesia-8 1 1156001600 ns/op 183.35 MB/s
-Snappy len 103008711 -> zstd len 82687318
-
-BenchmarkSnappy_Enwik9-8 1 6472998400 ns/op 154.49 MB/s
-Snappy len 508028601 -> zstd len 390921079
-```
-
-
-```Go
- s := zstd.SnappyConverter{}
- n, err = s.Convert(input, output)
- if err != nil {
- fmt.Println("Re-compressed stream to", n, "bytes")
- }
-```
-
-The converter `s` can be reused to avoid allocations, even after errors.
-
-
## Decompressor
Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
@@ -273,14 +262,14 @@
import "github.com/klauspost/compress/zstd"
func Decompress(in io.Reader, out io.Writer) error {
- d, err := zstd.NewReader(input)
+ d, err := zstd.NewReader(in)
if err != nil {
return err
}
defer d.Close()
// Copy content...
- _, err := io.Copy(out, d)
+ _, err = io.Copy(out, d)
return err
}
```
@@ -309,6 +298,35 @@
It will only allow a certain number of concurrent operations to run.
To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder.
+### Dictionaries
+
+Data compressed with [dictionaries](https://github.com/facebook/zstd#the-case-for-small-data-compression) can be decompressed.
+
+Dictionaries are added individually to Decoders.
+Dictionaries are generated by the `zstd --train` command and contains an initial state for the decoder.
+To add a dictionary use the `WithDecoderDicts(dicts ...[]byte)` option with the dictionary data.
+Several dictionaries can be added at once.
+
+The dictionary will be used automatically for the data that specifies them.
+A re-used Decoder will still contain the dictionaries registered.
+
+When registering multiple dictionaries with the same ID, the last one will be used.
+
+It is possible to use dictionaries when compressing data.
+
+To enable a dictionary use `WithEncoderDict(dict []byte)`. Here only one dictionary will be used
+and it will likely be used even if it doesn't improve compression.
+
+The used dictionary must be used to decompress the content.
+
+For any real gains, the dictionary should be built with similar data.
+If an unsuitable dictionary is used the output may be slightly larger than using no dictionary.
+Use the [zstd commandline tool](https://github.com/facebook/zstd/releases) to build a dictionary from sample data.
+For information see [zstd dictionary information](https://github.com/facebook/zstd#the-case-for-small-data-compression).
+
+For now there is a fixed startup performance penalty for compressing content with dictionaries.
+This will likely be improved over time. Just be aware to test performance when implementing.
+
### Allocation-less operation
The decoder has been designed to operate without allocations after a warmup.
@@ -350,36 +368,42 @@
The first two are streaming decodes and the last are smaller inputs.
```
-BenchmarkDecoderSilesia-8 20 642550210 ns/op 329.85 MB/s 3101 B/op 8 allocs/op
-BenchmarkDecoderSilesiaCgo-8 100 384930000 ns/op 550.61 MB/s 451878 B/op 9713 allocs/op
+BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
+BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
-BenchmarkDecoderEnwik9-2 10 3146000080 ns/op 317.86 MB/s 2649 B/op 9 allocs/op
-BenchmarkDecoderEnwik9Cgo-2 20 1905900000 ns/op 524.69 MB/s 1125120 B/op 45785 allocs/op
+BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
+BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
-BenchmarkDecoder_DecodeAll/z000000.zst-8 200 7049994 ns/op 138.26 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000001.zst-8 100000 19560 ns/op 97.49 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000002.zst-8 5000 297599 ns/op 236.99 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000003.zst-8 2000 725502 ns/op 141.17 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000004.zst-8 200000 9314 ns/op 54.54 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000005.zst-8 10000 137500 ns/op 104.72 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000006.zst-8 500 2316009 ns/op 206.06 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000007.zst-8 20000 64499 ns/op 344.90 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000008.zst-8 50000 24900 ns/op 219.56 MB/s 40 B/op 2 allocs/op
-BenchmarkDecoder_DecodeAll/z000009.zst-8 1000 2348999 ns/op 154.01 MB/s 40 B/op 2 allocs/op
+Concurrent performance:
-BenchmarkDecoder_DecodeAllCgo/z000000.zst-8 500 4268005 ns/op 228.38 MB/s 1228849 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000001.zst-8 100000 15250 ns/op 125.05 MB/s 2096 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000002.zst-8 10000 147399 ns/op 478.49 MB/s 73776 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000003.zst-8 5000 320798 ns/op 319.27 MB/s 139312 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000004.zst-8 200000 10004 ns/op 50.77 MB/s 560 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000005.zst-8 20000 73599 ns/op 195.64 MB/s 19120 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000006.zst-8 1000 1119003 ns/op 426.48 MB/s 557104 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000007.zst-8 20000 103450 ns/op 215.04 MB/s 71296 B/op 9 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000008.zst-8 100000 20130 ns/op 271.58 MB/s 6192 B/op 3 allocs/op
-BenchmarkDecoder_DecodeAllCgo/z000009.zst-8 2000 1123500 ns/op 322.00 MB/s 368688 B/op 3 allocs/op
+BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
+
+BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
+BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
```
-This reflects the performance around May 2019, but this may be out of date.
+This reflects the performance around May 2020, but this may be out of date.
# Contributions
diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go
index 15d79d4..8544585 100644
--- a/vendor/github.com/klauspost/compress/zstd/bitreader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go
@@ -5,6 +5,7 @@
package zstd
import (
+ "encoding/binary"
"errors"
"io"
"math/bits"
@@ -34,8 +35,12 @@
}
b.bitsRead = 64
b.value = 0
- b.fill()
- b.fill()
+ if len(in) >= 8 {
+ b.fillFastStart()
+ } else {
+ b.fill()
+ b.fill()
+ }
b.bitsRead += 8 - uint8(highBits(uint32(v)))
return nil
}
@@ -63,21 +68,31 @@
if b.bitsRead < 32 {
return
}
- // Do single re-slice to avoid bounds checks.
- v := b.in[b.off-4 : b.off]
+ // 2 bounds checks.
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
b.off -= 4
}
+// fillFastStart() assumes the bitreader is empty and there is at least 8 bytes to read.
+func (b *bitReader) fillFastStart() {
+ // Do single re-slice to avoid bounds checks.
+ b.value = binary.LittleEndian.Uint64(b.in[b.off-8:])
+ b.bitsRead = 0
+ b.off -= 8
+}
+
// fill() will make sure at least 32 bits are available.
func (b *bitReader) fill() {
if b.bitsRead < 32 {
return
}
if b.off >= 4 {
- v := b.in[b.off-4 : b.off]
+ v := b.in[b.off-4:]
+ v = v[:4]
low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
b.value = (b.value << 32) | uint64(low)
b.bitsRead -= 32
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index ed670bc..b51d922 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -75,21 +75,29 @@
// Window size of the block.
WindowSize uint64
- Type blockType
- RLESize uint32
+
+ history chan *history
+ input chan struct{}
+ result chan decodeOutput
+ sequenceBuf []seq
+ err error
+ decWG sync.WaitGroup
+
+ // Frame to use for singlethreaded decoding.
+ // Should not be used by the decoder itself since parent may be another frame.
+ localFrame *frameDec
+
+ // Block is RLE, this is the size.
+ RLESize uint32
+ tmp [4]byte
+
+ Type blockType
// Is this the last block of a frame?
Last bool
// Use less memory
- lowMem bool
- history chan *history
- input chan struct{}
- result chan decodeOutput
- sequenceBuf []seq
- tmp [4]byte
- err error
- decWG sync.WaitGroup
+ lowMem bool
}
func (b *blockDec) String() string {
@@ -127,25 +135,37 @@
b.Type = blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
+ maxSize := maxBlockSize
switch b.Type {
case blockTypeReserved:
return ErrReservedBlockType
case blockTypeRLE:
b.RLESize = uint32(cSize)
+ if b.lowMem {
+ maxSize = cSize
+ }
cSize = 1
case blockTypeCompressed:
if debug {
println("Data size on stream:", cSize)
}
b.RLESize = 0
+ maxSize = maxCompressedBlockSize
+ if windowSize < maxCompressedBlockSize && b.lowMem {
+ maxSize = int(windowSize)
+ }
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debug {
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrCompressedSizeTooBig
}
- default:
+ case blockTypeRaw:
b.RLESize = 0
+ // We do not need a destination for raw blocks.
+ maxSize = -1
+ default:
+ panic("Invalid block type")
}
// Read block data.
@@ -156,8 +176,8 @@
b.dataStorage = make([]byte, 0, maxBlockSize)
}
}
- if cap(b.dst) <= maxBlockSize {
- b.dst = make([]byte, 0, maxBlockSize+1)
+ if cap(b.dst) <= maxSize {
+ b.dst = make([]byte, 0, maxSize+1)
}
var err error
b.data, err = br.readBig(cSize, b.dataStorage)
@@ -445,26 +465,22 @@
if huff == nil {
huff = &huff0.Scratch{}
}
- huff.Out = b.literalBuf[:0]
huff, literals, err = huff0.ReadTable(literals, huff)
if err != nil {
println("reading huffman table:", err)
return err
}
// Use our out buffer.
- huff.Out = b.literalBuf[:0]
- huff.MaxDecodedSize = litRegenSize
if fourStreams {
- literals, err = huff.Decompress4X(literals, litRegenSize)
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
- literals, err = huff.Decompress1X(literals)
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
if err != nil {
println("decoding compressed literals:", err)
return err
}
// Make sure we don't leak our literals buffer
- huff.Out = nil
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
@@ -597,7 +613,7 @@
// Decode treeless literal block.
if litType == literalsBlockTreeless {
// TODO: We could send the history early WITHOUT the stream history.
- // This would allow decoding treeless literials before the byte history is available.
+ // This would allow decoding treeless literals before the byte history is available.
// Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
// So not much obvious gain here.
@@ -615,15 +631,12 @@
var err error
// Use our out buffer.
huff = hist.huffTree
- huff.Out = b.literalBuf[:0]
- huff.MaxDecodedSize = litRegenSize
if fourStreams {
- literals, err = huff.Decompress4X(literals, litRegenSize)
+ literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
- literals, err = huff.Decompress1X(literals)
+ literals, err = huff.Decoder().Decompress1X(b.literalBuf[:0:litRegenSize], literals)
}
// Make sure we don't leak our literals buffer
- huff.Out = nil
if err != nil {
println("decompressing literals:", err)
return err
@@ -633,12 +646,13 @@
}
} else {
if hist.huffTree != nil && huff != nil {
- huffDecoderPool.Put(hist.huffTree)
+ if hist.dict == nil || hist.dict.litEnc != hist.huffTree {
+ huffDecoderPool.Put(hist.huffTree)
+ }
hist.huffTree = nil
}
}
if huff != nil {
- huff.Out = nil
hist.huffTree = huff
}
if debug {
@@ -671,12 +685,21 @@
// If only recent offsets were not transferred, this would be an obvious win.
// Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded.
+ hbytes := hist.b
+ if len(hbytes) > hist.windowSize {
+ hbytes = hbytes[len(hbytes)-hist.windowSize:]
+ // We do not need history any more.
+ if hist.dict != nil {
+ hist.dict.content = nil
+ }
+ }
+
if err := seqs.initialize(br, hist, literals, b.dst); err != nil {
println("initializing sequences:", err)
return err
}
- err = seqs.decode(nSeqs, br, hist.b)
+ err = seqs.decode(nSeqs, br, hbytes)
if err != nil {
return err
}
diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go
index 507757d..e1be092 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@@ -14,35 +14,52 @@
)
type blockEnc struct {
- size int
- literals []byte
- sequences []seq
- coders seqCoders
- litEnc *huff0.Scratch
- wr bitWriter
+ size int
+ literals []byte
+ sequences []seq
+ coders seqCoders
+ litEnc *huff0.Scratch
+ dictLitEnc *huff0.Scratch
+ wr bitWriter
- extraLits int
- last bool
-
+ extraLits int
output []byte
recentOffsets [3]uint32
prevRecentOffsets [3]uint32
+
+ last bool
+ lowMem bool
}
// init should be used once the block has been created.
// If called more than once, the effect is the same as calling reset.
func (b *blockEnc) init() {
- if cap(b.literals) < maxCompressedLiteralSize {
- b.literals = make([]byte, 0, maxCompressedLiteralSize)
+ if b.lowMem {
+ // 1K literals
+ if cap(b.literals) < 1<<10 {
+ b.literals = make([]byte, 0, 1<<10)
+ }
+ const defSeqs = 20
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ // 1K
+ if cap(b.output) < 1<<10 {
+ b.output = make([]byte, 0, 1<<10)
+ }
+ } else {
+ if cap(b.literals) < maxCompressedBlockSize {
+ b.literals = make([]byte, 0, maxCompressedBlockSize)
+ }
+ const defSeqs = 200
+ if cap(b.sequences) < defSeqs {
+ b.sequences = make([]seq, 0, defSeqs)
+ }
+ if cap(b.output) < maxCompressedBlockSize {
+ b.output = make([]byte, 0, maxCompressedBlockSize)
+ }
}
- const defSeqs = 200
- b.literals = b.literals[:0]
- if cap(b.sequences) < defSeqs {
- b.sequences = make([]seq, 0, defSeqs)
- }
- if cap(b.output) < maxCompressedBlockSize {
- b.output = make([]byte, 0, maxCompressedBlockSize)
- }
+
if b.coders.mlEnc == nil {
b.coders.mlEnc = &fseEncoder{}
b.coders.mlPrev = &fseEncoder{}
@@ -75,6 +92,7 @@
if prev != nil {
b.recentOffsets = prev.prevRecentOffsets
}
+ b.dictLitEnc = nil
}
// reset will reset the block for a new encode, but in the same stream,
@@ -295,7 +313,7 @@
b.output = bh.appendTo(b.output[:0])
b.output = append(b.output, a...)
if debug {
- println("Adding RAW block, length", len(a))
+ println("Adding RAW block, length", len(a), "last:", b.last)
}
}
@@ -308,25 +326,25 @@
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
- println("Adding RAW block, length", len(src))
+ println("Adding RAW block, length", len(src), "last:", b.last)
}
return dst
}
// encodeLits can be used if the block is only litLen.
-func (b *blockEnc) encodeLits(raw bool) error {
+func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
var bh blockHeader
bh.setLast(b.last)
- bh.setSize(uint32(len(b.literals)))
+ bh.setSize(uint32(len(lits)))
// Don't compress extremely small blocks
- if len(b.literals) < 32 || raw {
+ if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
if debug {
- println("Adding RAW block, length", len(b.literals))
+ println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
b.output = bh.appendTo(b.output)
- b.output = append(b.output, b.literals...)
+ b.output = append(b.output, lits...)
return nil
}
@@ -335,13 +353,18 @@
reUsed, single bool
err error
)
- if len(b.literals) >= 1024 {
+ if b.dictLitEnc != nil {
+ b.litEnc.TransferCTable(b.dictLitEnc)
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ b.dictLitEnc = nil
+ }
+ if len(lits) >= 1024 {
// Use 4 Streams.
- out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
- } else if len(b.literals) > 32 {
+ out, reUsed, err = huff0.Compress4X(lits, b.litEnc)
+ } else if len(lits) > 32 {
// Use 1 stream
single = true
- out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc)
+ out, reUsed, err = huff0.Compress1X(lits, b.litEnc)
} else {
err = huff0.ErrIncompressible
}
@@ -349,23 +372,23 @@
switch err {
case huff0.ErrIncompressible:
if debug {
- println("Adding RAW block, length", len(b.literals))
+ println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
b.output = bh.appendTo(b.output)
- b.output = append(b.output, b.literals...)
+ b.output = append(b.output, lits...)
return nil
case huff0.ErrUseRLE:
if debug {
- println("Adding RLE block, length", len(b.literals))
+ println("Adding RLE block, length", len(lits))
}
bh.setType(blockTypeRLE)
b.output = bh.appendTo(b.output)
- b.output = append(b.output, b.literals[0])
+ b.output = append(b.output, lits[0])
return nil
+ case nil:
default:
return err
- case nil:
}
// Compressed...
// Now, allow reuse
@@ -384,7 +407,7 @@
lh.setType(literalsBlockCompressed)
}
// Set sizes
- lh.setSizes(len(out), len(b.literals), single)
+ lh.setSizes(len(out), len(lits), single)
bh.setSize(uint32(len(out) + lh.size() + 1))
// Write block headers.
@@ -444,13 +467,19 @@
}
// encode will encode the block and append the output in b.output.
-func (b *blockEnc) encode(raw bool) error {
+// Previous offset codes must be pushed if more blocks are expected.
+func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if len(b.sequences) == 0 {
- return b.encodeLits(raw)
+ return b.encodeLits(b.literals, rawAllLits)
}
- // We want some difference
- if len(b.literals) > (b.size - (b.size >> 5)) {
- return errIncompressible
+ // We want some difference to at least account for the headers.
+ saved := b.size - len(b.literals) - (b.size >> 5)
+ if saved < 16 {
+ if org == nil {
+ return errIncompressible
+ }
+ b.popOffsets()
+ return b.encodeLits(org, rawAllLits)
}
var bh blockHeader
@@ -466,6 +495,11 @@
reUsed, single bool
err error
)
+ if b.dictLitEnc != nil {
+ b.litEnc.TransferCTable(b.dictLitEnc)
+ b.litEnc.Reuse = huff0.ReusePolicyAllow
+ b.dictLitEnc = nil
+ }
if len(b.literals) >= 1024 && !raw {
// Use 4 Streams.
out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc)
@@ -494,11 +528,6 @@
if debug {
println("Adding literals RLE")
}
- default:
- if debug {
- println("Adding literals ERROR:", err)
- }
- return err
case nil:
// Compressed litLen...
if reUsed {
@@ -529,6 +558,11 @@
if debug {
println("Adding literals compressed")
}
+ default:
+ if debug {
+ println("Adding literals ERROR:", err)
+ }
+ return err
}
// Sequence compression
@@ -806,7 +840,7 @@
mlH[v]++
if v > mlMax {
mlMax = v
- if debug && mlMax > maxMatchLengthSymbol {
+ if debugAsserts && mlMax > maxMatchLengthSymbol {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
}
}
@@ -821,13 +855,13 @@
}
return int(max)
}
- if mlMax > maxMatchLengthSymbol {
+ if debugAsserts && mlMax > maxMatchLengthSymbol {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax))
}
- if ofMax > maxOffsetBits {
+ if debugAsserts && ofMax > maxOffsetBits {
panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax))
}
- if llMax > maxLiteralLengthSymbol {
+ if debugAsserts && llMax > maxLiteralLengthSymbol {
panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax))
}
diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
index 07321ac..658ef78 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@@ -30,7 +30,7 @@
type byteBuf []byte
func (b *byteBuf) readSmall(n int) []byte {
- if debug && n > 8 {
+ if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
bb := *b
@@ -82,7 +82,7 @@
}
func (r *readerWrapper) readSmall(n int) []byte {
- if debug && n > 8 {
+ if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
n2, err := io.ReadFull(r.r, r.tmp[:n])
diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go
index dc4378b..2c4fca1 100644
--- a/vendor/github.com/klauspost/compress/zstd/bytereader.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go
@@ -31,7 +31,8 @@
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
- b2 := b.b[b.off : b.off+4 : b.off+4]
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
v3 := int32(b2[3])
v2 := int32(b2[2])
v1 := int32(b2[1])
@@ -55,7 +56,20 @@
}
return v
}
- b2 := b.b[b.off : b.off+4 : b.off+4]
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
+ v3 := uint32(b2[3])
+ v2 := uint32(b2[2])
+ v1 := uint32(b2[1])
+ v0 := uint32(b2[0])
+ return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24)
+}
+
+// Uint32NC returns a little endian uint32 starting at current offset.
+// The caller must be sure if there are at least 4 bytes left.
+func (b byteReader) Uint32NC() uint32 {
+ b2 := b.b[b.off:]
+ b2 = b2[:4]
v3 := uint32(b2[3])
v2 := uint32(b2[2])
v1 := uint32(b2[1])
diff --git a/vendor/github.com/klauspost/compress/zstd/decodeheader.go b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
new file mode 100644
index 0000000..69736e8
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@@ -0,0 +1,202 @@
+// Copyright 2020+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "bytes"
+ "errors"
+ "io"
+)
+
+// HeaderMaxSize is the maximum size of a Frame and Block Header.
+// If less is sent to Header.Decode it *may* still contain enough information.
+const HeaderMaxSize = 14 + 3
+
+// Header contains information about the first frame and block within that.
+type Header struct {
+ // Window Size the window of data to keep while decoding.
+ // Will only be set if HasFCS is false.
+ WindowSize uint64
+
+ // Frame content size.
+ // Expected size of the entire frame.
+ FrameContentSize uint64
+
+ // Dictionary ID.
+ // If 0, no dictionary.
+ DictionaryID uint32
+
+ // First block information.
+ FirstBlock struct {
+ // OK will be set if first block could be decoded.
+ OK bool
+
+ // Is this the last block of a frame?
+ Last bool
+
+ // Is the data compressed?
+ // If true CompressedSize will be populated.
+ // Unfortunately DecompressedSize cannot be determined
+ // without decoding the blocks.
+ Compressed bool
+
+ // DecompressedSize is the expected decompressed size of the block.
+ // Will be 0 if it cannot be determined.
+ DecompressedSize int
+
+ // CompressedSize of the data in the block.
+ // Does not include the block header.
+ // Will be equal to DecompressedSize if not Compressed.
+ CompressedSize int
+ }
+
+ // Skippable will be true if the frame is meant to be skipped.
+ // No other information will be populated.
+ Skippable bool
+
+ // If set there is a checksum present for the block content.
+ HasCheckSum bool
+
+ // If this is true FrameContentSize will have a valid value
+ HasFCS bool
+
+ SingleSegment bool
+}
+
+// Decode the header from the beginning of the stream.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) Decode(in []byte) error {
+ if len(in) < 4 {
+ return io.ErrUnexpectedEOF
+ }
+ b, in := in[:4], in[4:]
+ if !bytes.Equal(b, frameMagic) {
+ if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
+ return ErrMagicMismatch
+ }
+ *h = Header{Skippable: true}
+ return nil
+ }
+ if len(in) < 1 {
+ return io.ErrUnexpectedEOF
+ }
+
+ // Clear output
+ *h = Header{}
+ fhd, in := in[0], in[1:]
+ h.SingleSegment = fhd&(1<<5) != 0
+ h.HasCheckSum = fhd&(1<<2) != 0
+
+ if fhd&(1<<3) != 0 {
+ return errors.New("reserved bit set on frame header")
+ }
+
+ // Read Window_Descriptor
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
+ if !h.SingleSegment {
+ if len(in) < 1 {
+ return io.ErrUnexpectedEOF
+ }
+ var wd byte
+ wd, in = in[0], in[1:]
+ windowLog := 10 + (wd >> 3)
+ windowBase := uint64(1) << windowLog
+ windowAdd := (windowBase / 8) * uint64(wd&0x7)
+ h.WindowSize = windowBase + windowAdd
+ }
+
+ // Read Dictionary_ID
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
+ if size := fhd & 3; size != 0 {
+ if size == 3 {
+ size = 4
+ }
+ if len(in) < int(size) {
+ return io.ErrUnexpectedEOF
+ }
+ b, in = in[:size], in[size:]
+ if b == nil {
+ return io.ErrUnexpectedEOF
+ }
+ switch size {
+ case 1:
+ h.DictionaryID = uint32(b[0])
+ case 2:
+ h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
+ case 4:
+ h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ }
+ }
+
+ // Read Frame_Content_Size
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
+ var fcsSize int
+ v := fhd >> 6
+ switch v {
+ case 0:
+ if h.SingleSegment {
+ fcsSize = 1
+ }
+ default:
+ fcsSize = 1 << v
+ }
+
+ if fcsSize > 0 {
+ h.HasFCS = true
+ if len(in) < fcsSize {
+ return io.ErrUnexpectedEOF
+ }
+ b, in = in[:fcsSize], in[fcsSize:]
+ if b == nil {
+ return io.ErrUnexpectedEOF
+ }
+ switch fcsSize {
+ case 1:
+ h.FrameContentSize = uint64(b[0])
+ case 2:
+ // When FCS_Field_Size is 2, the offset of 256 is added.
+ h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
+ case 4:
+ h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
+ case 8:
+ d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
+ h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
+ }
+ }
+
+ // Frame Header done, we will not fail from now on.
+ if len(in) < 3 {
+ return nil
+ }
+ tmp := in[:3]
+ bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
+ h.FirstBlock.Last = bh&1 != 0
+ blockType := blockType((bh >> 1) & 3)
+ // find size.
+ cSize := int(bh >> 3)
+ switch blockType {
+ case blockTypeReserved:
+ return nil
+ case blockTypeRLE:
+ h.FirstBlock.Compressed = true
+ h.FirstBlock.DecompressedSize = cSize
+ h.FirstBlock.CompressedSize = 1
+ case blockTypeCompressed:
+ h.FirstBlock.Compressed = true
+ h.FirstBlock.CompressedSize = cSize
+ case blockTypeRaw:
+ h.FirstBlock.DecompressedSize = cSize
+ h.FirstBlock.CompressedSize = cSize
+ default:
+ panic("Invalid block type")
+ }
+
+ h.FirstBlock.OK = true
+ return nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go
index 35a3cda..f593e46 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@@ -5,7 +5,6 @@
package zstd
import (
- "bytes"
"errors"
"io"
"sync"
@@ -23,17 +22,15 @@
// Unreferenced decoders, ready for use.
decoders chan *blockDec
- // Unreferenced decoders, ready for use.
- frames chan *frameDec
-
// Streams ready to be decoded.
stream chan decodeStream
// Current read position used for Reader functionality.
current decoderState
- // Custom dictionaries
- dicts map[uint32]struct{}
+ // Custom dictionaries.
+ // Always uses copies.
+ dicts map[uint32]dict
// streamWg is the waitgroup for all streams
streamWg sync.WaitGroup
@@ -66,7 +63,7 @@
// A Decoder can be used in two modes:
//
// 1) As a stream, or
-// 2) For stateless decoding using DecodeAll or DecodeBuffer.
+// 2) For stateless decoding using DecodeAll.
//
// Only a single stream can be decoded concurrently, but the same decoder
// can run multiple concurrent stateless decodes. It is even possible to
@@ -87,12 +84,23 @@
d.current.output = make(chan decodeOutput, d.o.concurrent)
d.current.flushed = true
+ if r == nil {
+ d.current.err = ErrDecoderNilInput
+ }
+
+ // Transfer option dicts.
+ d.dicts = make(map[uint32]dict, len(d.o.dicts))
+ for _, dc := range d.o.dicts {
+ d.dicts[dc.id] = dc
+ }
+ d.o.dicts = nil
+
// Create decoders
d.decoders = make(chan *blockDec, d.o.concurrent)
- d.frames = make(chan *frameDec, d.o.concurrent)
for i := 0; i < d.o.concurrent; i++ {
- d.frames <- newFrameDec(d.o)
- d.decoders <- newBlockDec(d.o.lowMem)
+ dec := newBlockDec(d.o.lowMem)
+ dec.localFrame = newFrameDec(d.o)
+ d.decoders <- dec
}
if r == nil {
@@ -106,7 +114,7 @@
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
if d.stream == nil {
- return 0, errors.New("no input has been initialized")
+ return 0, ErrDecoderNilInput
}
var n int
for {
@@ -147,12 +155,20 @@
// Reset will reset the decoder the supplied stream after the current has finished processing.
// Note that this functionality cannot be used after Close has been called.
+// Reset can be called with a nil reader to release references to the previous reader.
+// After being called with a nil reader, no other operations than Reset or DecodeAll or Close
+// should be used.
func (d *Decoder) Reset(r io.Reader) error {
if d.current.err == ErrDecoderClosed {
return d.current.err
}
+
+ d.drainOutput()
+
if r == nil {
- return errors.New("nil Reader sent as input")
+ d.current.err = ErrDecoderNilInput
+ d.current.flushed = true
+ return nil
}
if d.stream == nil {
@@ -161,15 +177,19 @@
go d.startStreamDecoder(d.stream)
}
- d.drainOutput()
-
// If bytes buffer and < 1MB, do sync decoding anyway.
- if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 {
+ if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
+ bb2 := bb
if debug {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
- b := bb.Bytes()
- dst, err := d.DecodeAll(b, nil)
+ b := bb2.Bytes()
+ var dst []byte
+ if cap(d.current.b) > 0 {
+ dst = d.current.b
+ }
+
+ dst, err := d.DecodeAll(b, dst[:0])
if err == nil {
err = io.EOF
}
@@ -177,7 +197,7 @@
d.current.err = err
d.current.flushed = true
if debug {
- println("sync decode to ", len(dst), "bytes, err:", err)
+ println("sync decode to", len(dst), "bytes, err:", err)
}
return nil
}
@@ -216,20 +236,17 @@
println("current already flushed")
return
}
- for {
- select {
- case v := <-d.current.output:
- if v.d != nil {
- if debug {
- printf("re-adding decoder %p", v.d)
- }
- d.decoders <- v.d
+ for v := range d.current.output {
+ if v.d != nil {
+ if debug {
+ printf("re-adding decoder %p", v.d)
}
- if v.err == errEndOfStream {
- println("current flushed")
- d.current.flushed = true
- return
- }
+ d.decoders <- v.d
+ }
+ if v.err == errEndOfStream {
+ println("current flushed")
+ d.current.flushed = true
+ return
}
}
}
@@ -239,7 +256,7 @@
// Any error encountered during the write is also returned.
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.stream == nil {
- return 0, errors.New("no input has been initialized")
+ return 0, ErrDecoderNilInput
}
var n int64
for {
@@ -277,23 +294,34 @@
}
// Grab a block decoder and frame decoder.
- block, frame := <-d.decoders, <-d.frames
+ block := <-d.decoders
+ frame := block.localFrame
defer func() {
if debug {
printf("re-adding decoder: %p", block)
}
- d.decoders <- block
frame.rawInput = nil
frame.bBuf = nil
- d.frames <- frame
+ d.decoders <- block
}()
frame.bBuf = input
for {
+ frame.history.reset()
err := frame.reset(&frame.bBuf)
if err == io.EOF {
+ if debug {
+ println("frame reset return EOF")
+ }
return dst, nil
}
+ if frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ return nil, ErrUnknownDictionary
+ }
+ frame.history.setDict(&dict)
+ }
if err != nil {
return dst, err
}
@@ -302,20 +330,24 @@
}
if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 {
// Never preallocate moe than 1 GB up front.
- if uint64(cap(dst)) < frame.FrameContentSize {
+ if cap(dst)-len(dst) < int(frame.FrameContentSize) {
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
copy(dst2, dst)
dst = dst2
}
}
if cap(dst) == 0 {
- // Allocate window size * 2 by default if nothing is provided and we didn't get frame content size.
- size := frame.WindowSize * 2
+ // Allocate len(input) * 2 by default if nothing is provided
+ // and we didn't get frame content size.
+ size := len(input) * 2
// Cap to 1 MB.
if size > 1<<20 {
size = 1 << 20
}
- dst = make([]byte, 0, frame.WindowSize)
+ if uint64(size) > d.o.maxDecodedSize {
+ size = int(d.o.maxDecodedSize)
+ }
+ dst = make([]byte, 0, size)
}
dst, err = frame.runDecoder(dst, block)
@@ -323,6 +355,9 @@
return dst, err
}
if len(frame.bBuf) == 0 {
+ if debug {
+ println("frame dbuf empty")
+ }
break
}
}
@@ -456,10 +491,19 @@
br := readerWrapper{r: stream.r}
decodeStream:
for {
+ frame.history.reset()
err := frame.reset(&br)
if debug && err != nil {
println("Frame decoder returned", err)
}
+ if err == nil && frame.DictionaryID != nil {
+ dict, ok := d.dicts[*frame.DictionaryID]
+ if !ok {
+ err = ErrUnknownDictionary
+ } else {
+ frame.history.setDict(&dict)
+ }
+ }
if err != nil {
stream.output <- decodeOutput{
err: err,
diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
index 2ac9cd2..c0fd058 100644
--- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go
@@ -6,7 +6,6 @@
import (
"errors"
- "fmt"
"runtime"
)
@@ -18,6 +17,7 @@
lowMem bool
concurrent int
maxDecodedSize uint64
+ dicts []dict
}
func (o *decoderOptions) setDefault() {
@@ -42,7 +42,7 @@
func WithDecoderConcurrency(n int) DOption {
return func(o *decoderOptions) error {
if n <= 0 {
- return fmt.Errorf("Concurrency must be at least 1")
+ return errors.New("concurrency must be at least 1")
}
o.concurrent = n
return nil
@@ -60,9 +60,24 @@
return errors.New("WithDecoderMaxMemory must be at least 1")
}
if n > 1<<63 {
- return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63")
+ return errors.New("WithDecoderMaxmemory must be less than 1 << 63")
}
o.maxDecodedSize = n
return nil
}
}
+
+// WithDecoderDicts allows to register one or more dictionaries for the decoder.
+// If several dictionaries with the same ID is provided the last one will be used.
+func WithDecoderDicts(dicts ...[]byte) DOption {
+ return func(o *decoderOptions) error {
+ for _, b := range dicts {
+ d, err := loadDict(b)
+ if err != nil {
+ return err
+ }
+ o.dicts = append(o.dicts, *d)
+ }
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/dict.go b/vendor/github.com/klauspost/compress/zstd/dict.go
new file mode 100644
index 0000000..fa25a18
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@@ -0,0 +1,122 @@
+package zstd
+
+import (
+ "bytes"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+
+ "github.com/klauspost/compress/huff0"
+)
+
+type dict struct {
+ id uint32
+
+ litEnc *huff0.Scratch
+ llDec, ofDec, mlDec sequenceDec
+ //llEnc, ofEnc, mlEnc []*fseEncoder
+ offsets [3]int
+ content []byte
+}
+
+var dictMagic = [4]byte{0x37, 0xa4, 0x30, 0xec}
+
+// ID returns the dictionary id or 0 if d is nil.
+func (d *dict) ID() uint32 {
+ if d == nil {
+ return 0
+ }
+ return d.id
+}
+
+// DictContentSize returns the dictionary content size or 0 if d is nil.
+func (d *dict) DictContentSize() int {
+ if d == nil {
+ return 0
+ }
+ return len(d.content)
+}
+
+// Load a dictionary as described in
+// https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
+func loadDict(b []byte) (*dict, error) {
+ // Check static field size.
+ if len(b) <= 8+(3*4) {
+ return nil, io.ErrUnexpectedEOF
+ }
+ d := dict{
+ llDec: sequenceDec{fse: &fseDecoder{}},
+ ofDec: sequenceDec{fse: &fseDecoder{}},
+ mlDec: sequenceDec{fse: &fseDecoder{}},
+ }
+ if !bytes.Equal(b[:4], dictMagic[:]) {
+ return nil, ErrMagicMismatch
+ }
+ d.id = binary.LittleEndian.Uint32(b[4:8])
+ if d.id == 0 {
+ return nil, errors.New("dictionaries cannot have ID 0")
+ }
+
+ // Read literal table
+ var err error
+ d.litEnc, b, err = huff0.ReadTable(b[8:], nil)
+ if err != nil {
+ return nil, err
+ }
+ d.litEnc.Reuse = huff0.ReusePolicyMust
+
+ br := byteReader{
+ b: b,
+ off: 0,
+ }
+ readDec := func(i tableIndex, dec *fseDecoder) error {
+ if err := dec.readNCount(&br, uint16(maxTableSymbol[i])); err != nil {
+ return err
+ }
+ if br.overread() {
+ return io.ErrUnexpectedEOF
+ }
+ err = dec.transform(symbolTableX[i])
+ if err != nil {
+ println("Transform table error:", err)
+ return err
+ }
+ if debug {
+ println("Read table ok", "symbolLen:", dec.symbolLen)
+ }
+ // Set decoders as predefined so they aren't reused.
+ dec.preDefined = true
+ return nil
+ }
+
+ if err := readDec(tableOffsets, d.ofDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableMatchLengths, d.mlDec.fse); err != nil {
+ return nil, err
+ }
+ if err := readDec(tableLiteralLengths, d.llDec.fse); err != nil {
+ return nil, err
+ }
+ if br.remain() < 12 {
+ return nil, io.ErrUnexpectedEOF
+ }
+
+ d.offsets[0] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[1] = int(br.Uint32())
+ br.advance(4)
+ d.offsets[2] = int(br.Uint32())
+ br.advance(4)
+ if d.offsets[0] <= 0 || d.offsets[1] <= 0 || d.offsets[2] <= 0 {
+ return nil, errors.New("invalid offset in dictionary")
+ }
+ d.content = make([]byte, br.remain())
+ copy(d.content, br.unread())
+ if d.offsets[0] > len(d.content) || d.offsets[1] > len(d.content) || d.offsets[2] > len(d.content) {
+ return nil, fmt.Errorf("initial offset bigger than dictionary content size %d, offsets: %v", len(d.content), d.offsets)
+ }
+
+ return &d, nil
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_base.go b/vendor/github.com/klauspost/compress/zstd/enc_base.go
new file mode 100644
index 0000000..60f2986
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_base.go
@@ -0,0 +1,178 @@
+package zstd
+
+import (
+ "fmt"
+ "math/bits"
+
+ "github.com/klauspost/compress/zstd/internal/xxhash"
+)
+
+const (
+ dictShardBits = 6
+)
+
+type fastBase struct {
+ // cur is the offset at the start of hist
+ cur int32
+ // maximum offset. Should be at least 2x block size.
+ maxMatchOff int32
+ hist []byte
+ crc *xxhash.Digest
+ tmp [8]byte
+ blk *blockEnc
+ lastDictID uint32
+ lowMem bool
+}
+
+// CRC returns the underlying CRC writer.
+func (e *fastBase) CRC() *xxhash.Digest {
+ return e.crc
+}
+
+// AppendCRC will append the CRC to the destination slice and return it.
+func (e *fastBase) AppendCRC(dst []byte) []byte {
+ crc := e.crc.Sum(e.tmp[:0])
+ dst = append(dst, crc[7], crc[6], crc[5], crc[4])
+ return dst
+}
+
+// WindowSize returns the window size of the encoder,
+// or a window size small enough to contain the input size, if > 0.
+func (e *fastBase) WindowSize(size int) int32 {
+ if size > 0 && size < int(e.maxMatchOff) {
+ b := int32(1) << uint(bits.Len(uint(size)))
+ // Keep minimum window.
+ if b < 1024 {
+ b = 1024
+ }
+ return b
+ }
+ return e.maxMatchOff
+}
+
+// Block returns the current block.
+func (e *fastBase) Block() *blockEnc {
+ return e.blk
+}
+
+func (e *fastBase) addBlock(src []byte) int32 {
+ if debugAsserts && e.cur > bufferReset {
+ panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
+ }
+ // check if we have space already
+ if len(e.hist)+len(src) > cap(e.hist) {
+ if cap(e.hist) == 0 {
+ e.ensureHist(len(src))
+ } else {
+ if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
+ panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
+ }
+ // Move down
+ offset := int32(len(e.hist)) - e.maxMatchOff
+ copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
+ e.cur += offset
+ e.hist = e.hist[:e.maxMatchOff]
+ }
+ }
+ s := int32(len(e.hist))
+ e.hist = append(e.hist, src...)
+ return s
+}
+
+// ensureHist will ensure that history can keep at least this many bytes.
+func (e *fastBase) ensureHist(n int) {
+ if cap(e.hist) >= n {
+ return
+ }
+ l := e.maxMatchOff
+ if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
+ l += maxCompressedBlockSize
+ } else {
+ l += e.maxMatchOff
+ }
+ // Make it at least 1MB.
+ if l < 1<<20 && !e.lowMem {
+ l = 1 << 20
+ }
+ // Make it at least the requested size.
+ if l < int32(n) {
+ l = int32(n)
+ }
+ e.hist = make([]byte, 0, l)
+}
+
+// useBlock will replace the block with the provided one,
+// but transfer recent offsets from the previous.
+func (e *fastBase) UseBlock(enc *blockEnc) {
+ enc.reset(e.blk)
+ e.blk = enc
+}
+
+func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
+
+func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
+ if debugAsserts {
+ if s < 0 {
+ err := fmt.Sprintf("s (%d) < 0", s)
+ panic(err)
+ }
+ if t < 0 {
+ err := fmt.Sprintf("s (%d) < 0", s)
+ panic(err)
+ }
+ if s-t > e.maxMatchOff {
+ err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
+ panic(err)
+ }
+ if len(src)-int(s) > maxCompressedBlockSize {
+ panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
+ }
+ }
+
+ // Extend the match to be as long as possible.
+ return int32(matchLen(src[s:], src[t:]))
+}
+
+// Reset the encoding table.
+func (e *fastBase) resetBase(d *dict, singleBlock bool) {
+ if e.blk == nil {
+ e.blk = &blockEnc{lowMem: e.lowMem}
+ e.blk.init()
+ } else {
+ e.blk.reset(nil)
+ }
+ e.blk.initNewEncode()
+ if e.crc == nil {
+ e.crc = xxhash.New()
+ } else {
+ e.crc.Reset()
+ }
+ if d != nil {
+ low := e.lowMem
+ if singleBlock {
+ e.lowMem = true
+ }
+ e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
+ e.lowMem = low
+ }
+
+ // We offset current position so everything will be out of reach.
+ // If above reset line, history will be purged.
+ if e.cur < bufferReset {
+ e.cur += e.maxMatchOff + int32(len(e.hist))
+ }
+ e.hist = e.hist[:0]
+ if d != nil {
+ // Set offsets (currently not used)
+ for i, off := range d.offsets {
+ e.blk.recentOffsets[i] = uint32(off)
+ e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
+ }
+ // Transfer litenc.
+ e.blk.dictLitEnc = d.litEnc
+ e.hist = append(e.hist, d.content...)
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_best.go b/vendor/github.com/klauspost/compress/zstd/enc_best.go
new file mode 100644
index 0000000..dc1eed5
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@@ -0,0 +1,501 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import (
+ "fmt"
+ "math/bits"
+)
+
+const (
+ bestLongTableBits = 20 // Bits used in the long match table
+ bestLongTableSize = 1 << bestLongTableBits // Size of the table
+
+ // Note: Increasing the short table bits or making the hash shorter
+ // can actually lead to compression degradation since it will 'steal' more from the
+ // long match table and match offsets are quite big.
+ // This greatly depends on the type of input.
+ bestShortTableBits = 16 // Bits used in the short match table
+ bestShortTableSize = 1 << bestShortTableBits // Size of the table
+)
+
+// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
+// The long match table contains the previous entry with the same hash,
+// effectively making it a "chain" of length 2.
+// When we find a long match we choose between the two values and select the longest.
+// When we find a short match, after checking the long, we check if we can find a long at n+1
+// and that it is longer (lazy matching).
+type bestFastEncoder struct {
+ fastBase
+ table [bestShortTableSize]prevEntry
+ longTable [bestLongTableSize]prevEntry
+ dictTable []prevEntry
+ dictLongTable []prevEntry
+}
+
+// Encode improves compression...
+func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 4
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = prevEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ v2 := e.table[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.table[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ const kSearchStrength = 10
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+ offset3 := int32(blk.recentOffsets[2])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ _ = addLiterals
+
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ type match struct {
+ offset int32
+ s int32
+ length int32
+ rep int32
+ }
+ matchAt := func(offset int32, s int32, first uint32, rep int32) match {
+ if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+ return match{offset: offset, s: s}
+ }
+ return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
+ }
+
+ bestOf := func(a, b match) match {
+ aScore := b.s - a.s + a.length
+ bScore := a.s - b.s + b.length
+ if a.rep < 0 {
+ aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
+ }
+ if b.rep < 0 {
+ bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
+ }
+ if aScore >= bScore {
+ return a
+ }
+ return b
+ }
+ const goodEnough = 100
+
+ nextHashL := hash8(cv, bestLongTableBits)
+ nextHashS := hash4x64(cv, bestShortTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
+ if canRepeat && best.length < goodEnough {
+ best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
+ best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
+ best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
+ if best.length > 0 {
+ best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
+ best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
+ best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
+ }
+ }
+ // Load next and check...
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
+ e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+
+ // Look far ahead, unless we have a really long match already...
+ if best.length < goodEnough {
+ // No match found, move forward on input, no need to check forward...
+ if best.length < 4 {
+ s += 1 + (s-nextEmit)>>(kSearchStrength-1)
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+
+ s++
+ candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
+ cv = load6432(src, s)
+ cv2 := load6432(src, s+1)
+ candidateL = e.longTable[hash8(cv, bestLongTableBits)]
+ candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
+
+ best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
+ best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
+ best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+
+ // See if we can find a better match by checking where the current best ends.
+ // Use that offset to see if we can find a better full match.
+ if sAt := best.s + best.length; sAt < sLimit {
+ nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
+ candidateEnd := e.longTable[nextHashL]
+ if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
+ bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
+ if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
+ bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+ }
+ best = bestEnd
+ }
+ }
+ }
+
+ // We have a match, we can store the forward value
+ if best.rep > 0 {
+ s = best.s
+ var seq seq
+ seq.matchLen = uint32(best.length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := best.s
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ repIndex := best.offset
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = uint32(best.rep)
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s
+ s = best.s + best.length
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, best.length)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ off := index0 + e.cur
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ h0 := hash8(cv0, bestLongTableBits)
+ h1 := hash4x64(cv0, bestShortTableBits)
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
+ off++
+ index0++
+ }
+ switch best.rep {
+ case 2:
+ offset1, offset2 = offset2, offset1
+ case 3:
+ offset1, offset2, offset3 = offset3, offset1, offset2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ s = best.s
+ t := best.offset
+ offset1, offset2, offset3 = s-t, offset1, offset2
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := best.length
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ // every entry
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ h0 := hash8(cv0, bestLongTableBits)
+ h1 := hash4x64(cv0, bestShortTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
+ index0++
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash4x64(cv, bestShortTableBits)
+ nextHashL := hash8(cv, bestLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ blk.recentOffsets[2] = uint32(offset3)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
+ e.Encode(blk, src)
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
+ }
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]prevEntry, len(e.table))
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff; i < end; i += 4 {
+ const hashLog = bestShortTableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash4x64(cv, hashLog) // 0 -> 4
+ nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
+ nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
+ nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
+ e.dictTable[nextHash] = prevEntry{
+ prev: e.dictTable[nextHash].offset,
+ offset: i,
+ }
+ e.dictTable[nextHash1] = prevEntry{
+ prev: e.dictTable[nextHash1].offset,
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = prevEntry{
+ prev: e.dictTable[nextHash2].offset,
+ offset: i + 2,
+ }
+ e.dictTable[nextHash3] = prevEntry{
+ prev: e.dictTable[nextHash3].offset,
+ offset: i + 3,
+ }
+ }
+ e.lastDictID = d.id
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]prevEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ h := hash8(cv, bestLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: e.maxMatchOff,
+ prev: e.dictLongTable[h].offset,
+ }
+
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ off := 8 // First to read
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[off]) << 56)
+ h := hash8(cv, bestLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: i,
+ prev: e.dictLongTable[h].offset,
+ }
+ off++
+ }
+ }
+ e.lastDictID = d.id
+ }
+ // Reset table to initial state
+ copy(e.longTable[:], e.dictLongTable)
+
+ e.cur = e.maxMatchOff
+ // Reset table to initial state
+ copy(e.table[:], e.dictTable)
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
new file mode 100644
index 0000000..6049542
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -0,0 +1,1235 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+// Based on work by Yann Collet, released under BSD License.
+
+package zstd
+
+import "fmt"
+
+const (
+ betterLongTableBits = 19 // Bits used in the long match table
+ betterLongTableSize = 1 << betterLongTableBits // Size of the table
+
+ // Note: Increasing the short table bits or making the hash shorter
+ // can actually lead to compression degradation since it will 'steal' more from the
+ // long match table and match offsets are quite big.
+ // This greatly depends on the type of input.
+ betterShortTableBits = 13 // Bits used in the short match table
+ betterShortTableSize = 1 << betterShortTableBits // Size of the table
+
+ betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
+ betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
+
+ betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
+ betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
+)
+
+type prevEntry struct {
+ offset int32
+ prev int32
+}
+
+// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
+// The long match table contains the previous entry with the same hash,
+// effectively making it a "chain" of length 2.
+// When we find a long match we choose between the two values and select the longest.
+// When we find a short match, after checking the long, we check if we can find a long at n+1
+// and that it is longer (lazy matching).
+type betterFastEncoder struct {
+ fastBase
+ table [betterShortTableSize]tableEntry
+ longTable [betterLongTableSize]prevEntry
+}
+
+type betterFastEncoderDict struct {
+ betterFastEncoder
+ dictTable []tableEntry
+ dictLongTable []prevEntry
+ shortTableShardDirty [betterShortTableShardCnt]bool
+ longTableShardDirty [betterLongTableShardCnt]bool
+ allDirty bool
+}
+
+// Encode improves compression...
+func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 9
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+ var matched int32
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ off := s + e.cur
+ e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
+ e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s + repOff
+ s += lenght + repOff
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ const repOff2 = 1
+
+ // We deviate from the reference encoder and also check offset 2.
+ // Still slower and not much better, so disabled.
+ // repIndex = s - offset2 + repOff2
+ if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
+ // Consider history as well.
+ var seq seq
+ lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 2
+ seq.offset = 2
+ if debugSequences {
+ println("repeat sequence 2", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ index0 := s + repOff2
+ s += lenght + repOff2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ // Swap offsets
+ offset1, offset2 = offset2, offset1
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := candidateL.offset - e.cur
+ coffsetLP := candidateL.prev - e.cur
+
+ // Check if we have a long match.
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetL+8, src) + 8
+ t = coffsetL
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
+ if prevMatch > matched {
+ matched = prevMatch
+ t = coffsetLP
+ }
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ }
+ break
+ }
+
+ // Check if we have a long match on prev.
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetLP+8, src) + 8
+ t = coffsetLP
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ coffsetS := candidateS.offset - e.cur
+
+ // Check if we have a short match.
+ if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ matched = e.matchlen(s+4, coffsetS+4, src) + 4
+
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, betterLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = candidateL.offset - e.cur
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+ }
+
+ // Check prev long...
+ coffsetL = candidateL.prev - e.cur
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match (after short)")
+ }
+ break
+ }
+ }
+ t = coffsetS
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // Try to find a better match by searching for a long match at the end of the current best match
+ if true && s+matched < sLimit {
+ nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+ cv := load3232(src, s)
+ candidateL := e.longTable[nextHashL]
+ coffsetL := candidateL.offset - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("long match at end-of-match")
+ }
+ }
+ }
+
+ // Check prev long...
+ if true {
+ coffsetL = candidateL.prev - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match at end-of-match")
+ }
+ }
+ }
+ }
+ }
+ // A match has been found. Update recent offsets.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := matched
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ index0 += 2
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// EncodeNoHist will encode a block with no history and no following blocks.
+// Most notable difference is that src will not be copied for history and
+// we do not need to check for max match length.
+func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
+ e.ensureHist(len(src))
+ e.Encode(blk, src)
+}
+
+// Encode improves compression...
+func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = prevEntry{}
+ }
+ e.cur = e.maxMatchOff
+ e.allDirty = true
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ v2 := e.longTable[i].prev
+ if v < minOff {
+ v = 0
+ v2 = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ if v2 < minOff {
+ v2 = 0
+ } else {
+ v2 = v2 - e.cur + e.maxMatchOff
+ }
+ }
+ e.longTable[i] = prevEntry{
+ offset: v,
+ prev: v2,
+ }
+ }
+ e.allDirty = true
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 9
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+ var matched int32
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ off := s + e.cur
+ e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s + repOff
+ s += lenght + repOff
+
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ const repOff2 = 1
+
+ // We deviate from the reference encoder and also check offset 2.
+ // Still slower and not much better, so disabled.
+ // repIndex = s - offset2 + repOff2
+ if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
+ // Consider history as well.
+ var seq seq
+ lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 2
+ seq.offset = 2
+ if debugSequences {
+ println("repeat sequence 2", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ index0 := s + repOff2
+ s += lenght + repOff2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+
+ // Index skipped...
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+ cv = load6432(src, s)
+ // Swap offsets
+ offset1, offset2 = offset2, offset1
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := candidateL.offset - e.cur
+ coffsetLP := candidateL.prev - e.cur
+
+ // Check if we have a long match.
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetL+8, src) + 8
+ t = coffsetL
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
+ if prevMatch > matched {
+ matched = prevMatch
+ t = coffsetLP
+ }
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ }
+ break
+ }
+
+ // Check if we have a long match on prev.
+ if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
+ // Found a long match, at least 8 bytes.
+ matched = e.matchlen(s+8, coffsetLP+8, src) + 8
+ t = coffsetLP
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ coffsetS := candidateS.offset - e.cur
+
+ // Check if we have a short match.
+ if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ matched = e.matchlen(s+4, coffsetS+4, src) + 4
+
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, betterLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = candidateL.offset - e.cur
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
+ e.markLongShardDirty(nextHashL)
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+ }
+
+ // Check prev long...
+ coffsetL = candidateL.prev - e.cur
+ if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
+ // Found a long match, at least 8 bytes.
+ matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
+ if matchedNext > matched {
+ t = coffsetL
+ s += checkAt
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match (after short)")
+ }
+ break
+ }
+ }
+ t = coffsetS
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // Try to find a better match by searching for a long match at the end of the current best match
+ if s+matched < sLimit {
+ nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+ cv := load3232(src, s)
+ candidateL := e.longTable[nextHashL]
+ coffsetL := candidateL.offset - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("long match at end-of-match")
+ }
+ }
+ }
+
+ // Check prev long...
+ if true {
+ coffsetL = candidateL.prev - e.cur - matched
+ if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+ // Found a long match, at least 4 bytes.
+ matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+ if matchedNext > matched {
+ t = coffsetL
+ matched = matchedNext
+ if debugMatches {
+ println("prev long match at end-of-match")
+ }
+ }
+ }
+ }
+ }
+ // A match has been found. Update recent offsets.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the n-byte match as long as possible.
+ l := matched
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) -> s - 1
+ index0 := s - l + 1
+ for index0 < s-1 {
+ cv0 := load6432(src, index0)
+ cv1 := cv0 >> 8
+ h0 := hash8(cv0, betterLongTableBits)
+ off := index0 + e.cur
+ e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
+ e.markLongShardDirty(h0)
+ h1 := hash5(cv1, betterShortTableBits)
+ e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
+ e.markShortShardDirty(h1)
+ index0 += 2
+ }
+
+ cv = load6432(src, s)
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, betterShortTableBits)
+ nextHashL := hash8(cv, betterLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShortShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("betterFastEncoder: Reset with dict")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
+ }
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]tableEntry, len(e.table))
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff; i < end; i += 4 {
+ const hashLog = betterShortTableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash5(cv, hashLog) // 0 -> 4
+ nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5
+ nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6
+ nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7
+ e.dictTable[nextHash] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ e.dictTable[nextHash1] = tableEntry{
+ val: uint32(cv >> 8),
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = tableEntry{
+ val: uint32(cv >> 16),
+ offset: i + 2,
+ }
+ e.dictTable[nextHash3] = tableEntry{
+ val: uint32(cv >> 24),
+ offset: i + 3,
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]prevEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ h := hash8(cv, betterLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: e.maxMatchOff,
+ prev: e.dictLongTable[h].offset,
+ }
+
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ off := 8 // First to read
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[off]) << 56)
+ h := hash8(cv, betterLongTableBits)
+ e.dictLongTable[h] = prevEntry{
+ offset: i,
+ prev: e.dictLongTable[h].offset,
+ }
+ off++
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+
+ // Reset table to initial state
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterShortTableShardCnt
+ const shardSize = betterShortTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.shortTableShardDirty {
+ e.shortTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.shortTableShardDirty {
+ if !e.shortTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.shortTableShardDirty[i] = false
+ }
+ }
+ }
+ {
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.shortTableShardDirty {
+ if e.shortTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+ const shardCnt = betterLongTableShardCnt
+ const shardSize = betterLongTableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ } else {
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
+ e.longTableShardDirty[i] = false
+ }
+ }
+ }
+ e.cur = e.maxMatchOff
+ e.allDirty = false
+}
+
+func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
+}
+
+func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
+ e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index ee3b09b..8629d43 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -4,11 +4,16 @@
package zstd
+import "fmt"
+
const (
dFastLongTableBits = 17 // Bits used in the long match table
dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+ dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
+ dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
+
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
@@ -19,6 +24,13 @@
longTable [dFastLongTableSize]tableEntry
}
+type doubleFastEncoderDict struct {
+ fastEncoderDict
+ longTable [dFastLongTableSize]tableEntry
+ dictLongTable []tableEntry
+ longTableShardDirty [dLongTableShardCnt]bool
+}
+
// Encode mimmics functionality in zstd_dfast.c
func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
const (
@@ -29,7 +41,7 @@
)
// Protect against e.cur wraparound.
- for e.cur > (1<<30)+e.maxMatchOff {
+ for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -61,6 +73,7 @@
e.longTable[i].offset = v
}
e.cur = e.maxMatchOff
+ break
}
s := e.addBlock(src)
@@ -77,10 +90,7 @@
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 1.
- stepSize := int32(e.o.targetLength)
- if stepSize == 0 {
- stepSize++
- }
+ const stepSize = 1
const kSearchStrength = 8
@@ -110,7 +120,7 @@
canRepeat := len(blk.sequences) > 2
for {
- if debug && canRepeat && offset1 == 0 {
+ if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
@@ -169,55 +179,6 @@
cv = load6432(src, s)
continue
}
- const repOff2 = 1
- // We deviate from the reference encoder and also check offset 2.
- // Slower and not consistently better, so disabled.
- // repIndex = s - offset2 + repOff2
- if false && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff2*8)) {
- // Consider history as well.
- var seq seq
- lenght := 4 + e.matchlen(s+4+repOff2, repIndex+4, src)
-
- seq.matchLen = uint32(lenght - zstdMinMatch)
-
- // We might be able to match backwards.
- // Extend as long as we can.
- start := s + repOff2
- // We end the search early, so we don't risk 0 literals
- // and have to do special offset treatment.
- startLimit := nextEmit + 1
-
- tMin := s - e.maxMatchOff
- if tMin < 0 {
- tMin = 0
- }
- for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
- repIndex--
- start--
- seq.matchLen++
- }
- addLiterals(&seq, start)
-
- // rep 2
- seq.offset = 2
- if debugSequences {
- println("repeat sequence 2", seq, "next s:", s)
- }
- blk.sequences = append(blk.sequences, seq)
- s += lenght + repOff2
- nextEmit = s
- if s >= sLimit {
- if debug {
- println("repeat ended", s, lenght)
-
- }
- break encodeLoop
- }
- cv = load6432(src, s)
- // Swap offsets
- offset1, offset2 = offset2, offset1
- continue
- }
}
// Find the offsets of our two matches.
coffsetL := s - (candidateL.offset - e.cur)
@@ -229,10 +190,10 @@
// Reference encoder checks all 8 bytes, we only check 4,
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
@@ -266,13 +227,13 @@
}
t = candidateS.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
- if debug && t < 0 {
+ if debugAsserts && t < 0 {
panic("t<0")
}
if debugMatches {
@@ -294,11 +255,11 @@
offset2 = offset1
offset1 = s - t
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && canRepeat && int(offset1) > len(src) {
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
@@ -369,7 +330,7 @@
}
// Store this, since we have it.
- nextHashS := hash5(cv1>>8, dFastShortTableBits)
+ nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match.
@@ -424,7 +385,7 @@
)
// Protect against e.cur wraparound.
- if e.cur > (1<<30)+e.maxMatchOff {
+ if e.cur >= bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -447,10 +408,7 @@
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 1.
- stepSize := int32(e.o.targetLength)
- if stepSize == 0 {
- stepSize++
- }
+ const stepSize = 1
const kSearchStrength = 8
@@ -545,10 +503,10 @@
// Reference encoder checks all 8 bytes, we only check 4,
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d). cur: %d", s, t, e.cur))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
@@ -582,13 +540,13 @@
}
t = candidateS.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
- if debug && t < 0 {
+ if debugAsserts && t < 0 {
panic("t<0")
}
if debugMatches {
@@ -610,8 +568,8 @@
offset2 = offset1
offset1 = s - t
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
// Extend the 4-byte match as long as possible.
@@ -723,4 +681,441 @@
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
+ }
+}
+
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ // Input margin is the number of bytes we read (8)
+ // and the maximum we will read ahead (2)
+ inputMargin = 8 + 2
+ minNonLiteralBlockSize = 16
+ )
+
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ for i := range e.longTable[:] {
+ e.longTable[i] = tableEntry{}
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ for i := range e.longTable[:] {
+ v := e.longTable[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.longTable[i].offset = v
+ }
+ e.markAllShardsDirty()
+ e.cur = e.maxMatchOff
+ break
+ }
+
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
+
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 1.
+ const stepSize = 1
+
+ const kSearchStrength = 8
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
+ if debug {
+ println("recent offsets:", blk.recentOffsets)
+ }
+
+encodeLoop:
+ for {
+ var t int32
+ // We allow the encoder to optionally turn off repeat offsets across blocks
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+ candidateL := e.longTable[nextHashL]
+ candidateS := e.table[nextHashS]
+
+ const repOff = 1
+ repIndex := s - offset1 + repOff
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+
+ if canRepeat {
+ if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
+ // Consider history as well.
+ var seq seq
+ lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+
+ seq.matchLen = uint32(lenght - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + repOff
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += lenght + repOff
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, lenght)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ }
+ // Find the offsets of our two matches.
+ coffsetL := s - (candidateL.offset - e.cur)
+ coffsetS := s - (candidateS.offset - e.cur)
+
+ // Check if we have a long match.
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugMatches {
+ println("long match")
+ }
+ break
+ }
+
+ // Check if we have a short match.
+ if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
+ // found a regular match
+ // See if we can find a long match at s+1
+ const checkAt = 1
+ cv := load6432(src, s+checkAt)
+ nextHashL = hash8(cv, dFastLongTableBits)
+ candidateL = e.longTable[nextHashL]
+ coffsetL = s - (candidateL.offset - e.cur) + checkAt
+
+ // We can store it, since we have at least a 4 byte match.
+ e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
+ e.markLongShardDirty(nextHashL)
+ if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
+ // Found a long match, likely at least 8 bytes.
+ // Reference encoder checks all 8 bytes, we only check 4,
+ // but the likelihood of both the first 4 bytes and the hash matching should be enough.
+ t = candidateL.offset - e.cur
+ s += checkAt
+ if debugMatches {
+ println("long match (after short)")
+ }
+ break
+ }
+
+ t = candidateS.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ if debugMatches {
+ println("short match")
+ }
+ break
+ }
+
+ // No match found, move forward in input.
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+
+ // A 4-byte match has been found. Update recent offsets.
+ // We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ l := e.matchlen(s+4, t+4, src) + 4
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+
+ // Index match start+1 (long) and start+2 (short)
+ index0 := s - l + 1
+ // Index match end-2 (long) and end-1 (short)
+ index1 := s - 2
+
+ cv0 := load6432(src, index0)
+ cv1 := load6432(src, index1)
+ te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
+ te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
+ longHash1 := hash8(cv0, dFastLongTableBits)
+ longHash2 := hash8(cv0, dFastLongTableBits)
+ e.longTable[longHash1] = te0
+ e.longTable[longHash2] = te1
+ e.markLongShardDirty(longHash1)
+ e.markLongShardDirty(longHash2)
+ cv0 >>= 8
+ cv1 >>= 8
+ te0.offset++
+ te1.offset++
+ te0.val = uint32(cv0)
+ te1.val = uint32(cv1)
+ hashVal1 := hash5(cv0, dFastShortTableBits)
+ hashVal2 := hash5(cv1, dFastShortTableBits)
+ e.table[hashVal1] = te0
+ e.markShardDirty(hashVal1)
+ e.table[hashVal2] = te1
+ e.markShardDirty(hashVal2)
+
+ cv = load6432(src, s)
+
+ if !canRepeat {
+ continue
+ }
+
+ // Check offset 2
+ for {
+ o2 := s - offset2
+ if load3232(src, o2) != uint32(cv) {
+ // Do regular search
+ break
+ }
+
+ // Store this, since we have it.
+ nextHashS := hash5(cv, dFastShortTableBits)
+ nextHashL := hash8(cv, dFastLongTableBits)
+
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ l := 4 + e.matchlen(s+4, o2+4, src)
+
+ entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.longTable[nextHashL] = entry
+ e.markLongShardDirty(nextHashL)
+ e.table[nextHashS] = entry
+ e.markShardDirty(nextHashS)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ // Finished
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
+ // If we encoded more than 64K mark all dirty.
+ if len(src) > 64<<10 {
+ e.markAllShardsDirty()
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
+ e.fastEncoder.Reset(d, singleBlock)
+ if d != nil {
+ panic("doubleFastEncoder: Reset with dict not supported")
+ }
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
+ allDirty := e.allDirty
+ e.fastEncoderDict.Reset(d, singleBlock)
+ if d == nil {
+ return
+ }
+
+ // Init or copy dict table
+ if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
+ if len(e.dictLongTable) != len(e.longTable) {
+ e.dictLongTable = make([]tableEntry, len(e.longTable))
+ }
+ if len(d.content) >= 8 {
+ cv := load6432(d.content, 0)
+ e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+ val: uint32(cv),
+ offset: e.maxMatchOff,
+ }
+ end := int32(len(d.content)) - 8 + e.maxMatchOff
+ for i := e.maxMatchOff + 1; i < end; i++ {
+ cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56)
+ e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
+ }
+ // Reset table to initial state
+ e.cur = e.maxMatchOff
+
+ dirtyShardCnt := 0
+ if !allDirty {
+ for i := range e.longTableShardDirty {
+ if e.longTableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
+ copy(e.longTable[:], e.dictLongTable)
+ for i := range e.longTableShardDirty {
+ e.longTableShardDirty[i] = false
+ }
+ return
+ }
+ for i := range e.longTableShardDirty {
+ if !e.longTableShardDirty[i] {
+ continue
+ }
+
+ copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
+ e.longTableShardDirty[i] = false
+ }
+}
+
+func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
+ e.longTableShardDirty[entryNum/dLongTableShardSize] = true
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
index 0bdddac..ba4a17e 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@@ -5,15 +5,17 @@
package zstd
import (
+ "fmt"
+ "math"
"math/bits"
-
- "github.com/klauspost/compress/zstd/internal/xxhash"
)
const (
- tableBits = 15 // Bits used in the table
- tableSize = 1 << tableBits // Size of the table
- tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
+ tableBits = 15 // Bits used in the table
+ tableSize = 1 << tableBits // Size of the table
+ tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
+ tableShardSize = tableSize / tableShardCnt // Size of an individual shard
+ tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
)
@@ -23,47 +25,15 @@
}
type fastEncoder struct {
- o encParams
- // cur is the offset at the start of hist
- cur int32
- // maximum offset. Should be at least 2x block size.
- maxMatchOff int32
- hist []byte
- crc *xxhash.Digest
- table [tableSize]tableEntry
- tmp [8]byte
- blk *blockEnc
+ fastBase
+ table [tableSize]tableEntry
}
-// CRC returns the underlying CRC writer.
-func (e *fastEncoder) CRC() *xxhash.Digest {
- return e.crc
-}
-
-// AppendCRC will append the CRC to the destination slice and return it.
-func (e *fastEncoder) AppendCRC(dst []byte) []byte {
- crc := e.crc.Sum(e.tmp[:0])
- dst = append(dst, crc[7], crc[6], crc[5], crc[4])
- return dst
-}
-
-// WindowSize returns the window size of the encoder,
-// or a window size small enough to contain the input size, if > 0.
-func (e *fastEncoder) WindowSize(size int) int32 {
- if size > 0 && size < int(e.maxMatchOff) {
- b := int32(1) << uint(bits.Len(uint(size)))
- // Keep minimum window.
- if b < 1024 {
- b = 1024
- }
- return b
- }
- return e.maxMatchOff
-}
-
-// Block returns the current block.
-func (e *fastEncoder) Block() *blockEnc {
- return e.blk
+type fastEncoderDict struct {
+ fastEncoder
+ dictTable []tableEntry
+ tableShardDirty [tableShardCnt]bool
+ allDirty bool
}
// Encode mimmics functionality in zstd_fast.c
@@ -74,7 +44,7 @@
)
// Protect against e.cur wraparound.
- for e.cur > (1<<30)+e.maxMatchOff {
+ for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
@@ -94,6 +64,7 @@
e.table[i].offset = v
}
e.cur = e.maxMatchOff
+ break
}
s := e.addBlock(src)
@@ -110,16 +81,12 @@
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 2.
- stepSize := int32(e.o.targetLength)
- if stepSize == 0 {
- stepSize++
- }
- stepSize++
+ const stepSize = 2
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
- const kSearchStrength = 8
+ const kSearchStrength = 7
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
@@ -151,7 +118,7 @@
canRepeat := len(blk.sequences) > 2
for {
- if debug && canRepeat && offset1 == 0 {
+ if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
@@ -167,9 +134,22 @@
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
- lenght := 4 + e.matchlen(s+6, repIndex+4, src)
+ var length int32
+ // length = 4 + e.matchlen(s+6, repIndex+4, src)
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -195,11 +175,11 @@
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + 2
+ s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -212,10 +192,10 @@
if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
// found a regular match
t = candidate.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
break
@@ -225,13 +205,13 @@
// found a regular match
t = candidate2.offset - e.cur
s++
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
- if debug && t < 0 {
+ if debugAsserts && t < 0 {
panic("t<0")
}
break
@@ -246,16 +226,29 @@
offset2 = offset1
offset1 = s - t
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && canRepeat && int(offset1) > len(src) {
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the 4-byte match as long as possible.
- l := e.matchlen(s+4, t+4, src) + 4
+ //l := e.matchlen(s+4, t+4, src) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
// Extend backwards
tMin := s - e.maxMatchOff
@@ -292,7 +285,20 @@
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
- l := 4 + e.matchlen(s+4, o2+4, src)
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
@@ -342,8 +348,9 @@
panic("src too big")
}
}
+
// Protect against e.cur wraparound.
- if e.cur > (1<<30)+e.maxMatchOff {
+ if e.cur >= bufferReset {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
@@ -410,10 +417,23 @@
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
- // lenght := 4 + e.matchlen(s+6, repIndex+4, src)
- lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
+ // length := 4 + e.matchlen(s+6, repIndex+4, src)
+ // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
+ var length int32
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
- seq.matchLen = uint32(lenght - zstdMinMatch)
+ seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@@ -439,11 +459,11 @@
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
- s += lenght + 2
+ s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
- println("repeat ended", s, lenght)
+ println("repeat ended", s, length)
}
break encodeLoop
@@ -456,12 +476,15 @@
if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
// found a regular match
t = candidate.offset - e.cur
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
+ }
break
}
@@ -469,13 +492,13 @@
// found a regular match
t = candidate2.offset - e.cur
s++
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
- if debug && s-t > e.maxMatchOff {
+ if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
- if debug && t < 0 {
+ if debugAsserts && t < 0 {
panic("t<0")
}
break
@@ -490,13 +513,29 @@
offset2 = offset1
offset1 = s - t
- if debug && s <= t {
- panic("s <= t")
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
+ if debugAsserts && t < 0 {
+ panic(fmt.Sprintf("t (%d) < 0 ", t))
+ }
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
- l := int32(matchLen(src[s+4:], src[t+4:])) + 4
+ // l := int32(matchLen(src[s+4:], src[t+4:])) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
// Extend backwards
tMin := s - e.maxMatchOff
@@ -534,7 +573,20 @@
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
- l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
+ // l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
@@ -567,90 +619,400 @@
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
-}
-
-func (e *fastEncoder) addBlock(src []byte) int32 {
- // check if we have space already
- if len(e.hist)+len(src) > cap(e.hist) {
- if cap(e.hist) == 0 {
- l := e.maxMatchOff * 2
- // Make it at least 1MB.
- if l < 1<<20 {
- l = 1 << 20
- }
- e.hist = make([]byte, 0, l)
- } else {
- if cap(e.hist) < int(e.maxMatchOff*2) {
- panic("unexpected buffer size")
- }
- // Move down
- offset := int32(len(e.hist)) - e.maxMatchOff
- copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
- e.cur += offset
- e.hist = e.hist[:e.maxMatchOff]
- }
+ // We do not store history, so we must offset e.cur to avoid false matches for next user.
+ if e.cur < bufferReset {
+ e.cur += int32(len(src))
}
- s := int32(len(e.hist))
- e.hist = append(e.hist, src...)
- return s
}
-// useBlock will replace the block with the provided one,
-// but transfer recent offsets from the previous.
-func (e *fastEncoder) UseBlock(enc *blockEnc) {
- enc.reset(e.blk)
- e.blk = enc
-}
+// Encode will encode the content, with a dictionary if initialized for it.
+func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
+ const (
+ inputMargin = 8
+ minNonLiteralBlockSize = 1 + 1 + inputMargin
+ )
+ if e.allDirty || len(src) > 32<<10 {
+ e.fastEncoder.Encode(blk, src)
+ e.allDirty = true
+ return
+ }
+ // Protect against e.cur wraparound.
+ for e.cur >= bufferReset {
+ if len(e.hist) == 0 {
+ for i := range e.table[:] {
+ e.table[i] = tableEntry{}
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
+ // Shift down everything in the table that isn't already too far away.
+ minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
+ for i := range e.table[:] {
+ v := e.table[i].offset
+ if v < minOff {
+ v = 0
+ } else {
+ v = v - e.cur + e.maxMatchOff
+ }
+ e.table[i].offset = v
+ }
+ e.cur = e.maxMatchOff
+ break
+ }
-func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 {
- // Extend the match to be as long as possible.
- return int32(matchLen(src[s:], src[t:]))
-}
+ s := e.addBlock(src)
+ blk.size = len(src)
+ if len(src) < minNonLiteralBlockSize {
+ blk.extraLits = len(src)
+ blk.literals = blk.literals[:len(src)]
+ copy(blk.literals, src)
+ return
+ }
-func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
+ // Override src
+ src = e.hist
+ sLimit := int32(len(src)) - inputMargin
+ // stepSize is the number of bytes to skip on every main loop iteration.
+ // It should be >= 2.
+ const stepSize = 2
+
+ // TEMPLATE
+ const hashLog = tableBits
+ // seems global, but would be nice to tweak.
+ const kSearchStrength = 7
+
+ // nextEmit is where in src the next emitLiteral should start from.
+ nextEmit := s
+ cv := load6432(src, s)
+
+ // Relative offsets
+ offset1 := int32(blk.recentOffsets[0])
+ offset2 := int32(blk.recentOffsets[1])
+
+ addLiterals := func(s *seq, until int32) {
+ if until == nextEmit {
+ return
+ }
+ blk.literals = append(blk.literals, src[nextEmit:until]...)
+ s.litLen = uint32(until - nextEmit)
+ }
if debug {
- if s < 0 {
- panic("s<0")
- }
- if t < 0 {
- panic("t<0")
- }
- if s-t > e.maxMatchOff {
- panic(s - t)
- }
- }
- s1 := int(s) + maxMatchLength - 4
- if s1 > len(src) {
- s1 = len(src)
+ println("recent offsets:", blk.recentOffsets)
}
- // Extend the match to be as long as possible.
- return int32(matchLen(src[s:s1], src[t:]))
+encodeLoop:
+ for {
+ // t will contain the match offset when we find one.
+ // When existing the search loop, we have already checked 4 bytes.
+ var t int32
+
+ // We will not use repeat offsets across blocks.
+ // By not using them for the first 3 matches
+ canRepeat := len(blk.sequences) > 2
+
+ for {
+ if debugAsserts && canRepeat && offset1 == 0 {
+ panic("offset0 was 0")
+ }
+
+ nextHash := hash6(cv, hashLog)
+ nextHash2 := hash6(cv>>8, hashLog)
+ candidate := e.table[nextHash]
+ candidate2 := e.table[nextHash2]
+ repIndex := s - offset1 + 2
+
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
+ e.markShardDirty(nextHash2)
+
+ if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
+ // Consider history as well.
+ var seq seq
+ var length int32
+ // length = 4 + e.matchlen(s+6, repIndex+4, src)
+ {
+ a := src[s+6:]
+ b := src[repIndex+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ length = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ seq.matchLen = uint32(length - zstdMinMatch)
+
+ // We might be able to match backwards.
+ // Extend as long as we can.
+ start := s + 2
+ // We end the search early, so we don't risk 0 literals
+ // and have to do special offset treatment.
+ startLimit := nextEmit + 1
+
+ sMin := s - e.maxMatchOff
+ if sMin < 0 {
+ sMin = 0
+ }
+ for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
+ repIndex--
+ start--
+ seq.matchLen++
+ }
+ addLiterals(&seq, start)
+
+ // rep 0
+ seq.offset = 1
+ if debugSequences {
+ println("repeat sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ s += length + 2
+ nextEmit = s
+ if s >= sLimit {
+ if debug {
+ println("repeat ended", s, length)
+
+ }
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ continue
+ }
+ coffset0 := s - (candidate.offset - e.cur)
+ coffset1 := s - (candidate2.offset - e.cur) + 1
+ if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
+ // found a regular match
+ t = candidate.offset - e.cur
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ break
+ }
+
+ if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
+ // found a regular match
+ t = candidate2.offset - e.cur
+ s++
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+ if debugAsserts && s-t > e.maxMatchOff {
+ panic("s - t >e.maxMatchOff")
+ }
+ if debugAsserts && t < 0 {
+ panic("t<0")
+ }
+ break
+ }
+ s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+ }
+ // A 4-byte match has been found. We'll later see if more than 4 bytes.
+ offset2 = offset1
+ offset1 = s - t
+
+ if debugAsserts && s <= t {
+ panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
+ }
+
+ if debugAsserts && canRepeat && int(offset1) > len(src) {
+ panic("invalid offset")
+ }
+
+ // Extend the 4-byte match as long as possible.
+ //l := e.matchlen(s+4, t+4, src) + 4
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[t+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Extend backwards
+ tMin := s - e.maxMatchOff
+ if tMin < 0 {
+ tMin = 0
+ }
+ for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
+ s--
+ t--
+ l++
+ }
+
+ // Write our sequence.
+ var seq seq
+ seq.litLen = uint32(s - nextEmit)
+ seq.matchLen = uint32(l - zstdMinMatch)
+ if seq.litLen > 0 {
+ blk.literals = append(blk.literals, src[nextEmit:s]...)
+ }
+ // Don't use repeat offsets
+ seq.offset = uint32(s-t) + 3
+ s += l
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+ nextEmit = s
+ if s >= sLimit {
+ break encodeLoop
+ }
+ cv = load6432(src, s)
+
+ // Check offset 2
+ if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
+ // We have at least 4 byte match.
+ // No need to check backwards. We come straight from a match
+ //l := 4 + e.matchlen(s+4, o2+4, src)
+ var l int32
+ {
+ a := src[s+4:]
+ b := src[o2+4:]
+ endI := len(a) & (math.MaxInt32 - 7)
+ l = int32(endI) + 4
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
+ break
+ }
+ }
+ }
+
+ // Store this, since we have it.
+ nextHash := hash6(cv, hashLog)
+ e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
+ e.markShardDirty(nextHash)
+ seq.matchLen = uint32(l) - zstdMinMatch
+ seq.litLen = 0
+ // Since litlen is always 0, this is offset 1.
+ seq.offset = 1
+ s += l
+ nextEmit = s
+ if debugSequences {
+ println("sequence", seq, "next s:", s)
+ }
+ blk.sequences = append(blk.sequences, seq)
+
+ // Swap offset 1 and 2.
+ offset1, offset2 = offset2, offset1
+ if s >= sLimit {
+ break encodeLoop
+ }
+ // Prepare next loop.
+ cv = load6432(src, s)
+ }
+ }
+
+ if int(nextEmit) < len(src) {
+ blk.literals = append(blk.literals, src[nextEmit:]...)
+ blk.extraLits = len(src) - int(nextEmit)
+ }
+ blk.recentOffsets[0] = uint32(offset1)
+ blk.recentOffsets[1] = uint32(offset2)
+ if debug {
+ println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
+ }
}
-// Reset the encoding table.
-func (e *fastEncoder) Reset() {
- if e.blk == nil {
- e.blk = &blockEnc{}
- e.blk.init()
- } else {
- e.blk.reset(nil)
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d != nil {
+ panic("fastEncoder: Reset with dict")
}
- e.blk.initNewEncode()
- if e.crc == nil {
- e.crc = xxhash.New()
- } else {
- e.crc.Reset()
+}
+
+// ResetDict will reset and set a dictionary if not nil
+func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
+ e.resetBase(d, singleBlock)
+ if d == nil {
+ return
}
- if cap(e.hist) < int(e.maxMatchOff*2) {
- l := e.maxMatchOff * 2
- // Make it at least 1MB.
- if l < 1<<20 {
- l = 1 << 20
+
+ // Init or copy dict table
+ if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
+ if len(e.dictTable) != len(e.table) {
+ e.dictTable = make([]tableEntry, len(e.table))
}
- e.hist = make([]byte, 0, l)
+ if true {
+ end := e.maxMatchOff + int32(len(d.content)) - 8
+ for i := e.maxMatchOff; i < end; i += 3 {
+ const hashLog = tableBits
+
+ cv := load6432(d.content, i-e.maxMatchOff)
+ nextHash := hash6(cv, hashLog) // 0 -> 5
+ nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6
+ nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7
+ e.dictTable[nextHash] = tableEntry{
+ val: uint32(cv),
+ offset: i,
+ }
+ e.dictTable[nextHash1] = tableEntry{
+ val: uint32(cv >> 8),
+ offset: i + 1,
+ }
+ e.dictTable[nextHash2] = tableEntry{
+ val: uint32(cv >> 16),
+ offset: i + 2,
+ }
+ }
+ }
+ e.lastDictID = d.id
+ e.allDirty = true
}
- // We offset current position so everything will be out of reach
- e.cur += e.maxMatchOff + int32(len(e.hist))
- e.hist = e.hist[:0]
+
+ e.cur = e.maxMatchOff
+ dirtyShardCnt := 0
+ if !e.allDirty {
+ for i := range e.tableShardDirty {
+ if e.tableShardDirty[i] {
+ dirtyShardCnt++
+ }
+ }
+ }
+
+ const shardCnt = tableShardCnt
+ const shardSize = tableShardSize
+ if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
+ copy(e.table[:], e.dictTable)
+ for i := range e.tableShardDirty {
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+ return
+ }
+ for i := range e.tableShardDirty {
+ if !e.tableShardDirty[i] {
+ continue
+ }
+
+ copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
+ e.tableShardDirty[i] = false
+ }
+ e.allDirty = false
+}
+
+func (e *fastEncoderDict) markAllShardsDirty() {
+ e.allDirty = true
+}
+
+func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
+ e.tableShardDirty[entryNum/tableShardSize] = true
}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_params.go b/vendor/github.com/klauspost/compress/zstd/enc_params.go
deleted file mode 100644
index b6779ec..0000000
--- a/vendor/github.com/klauspost/compress/zstd/enc_params.go
+++ /dev/null
@@ -1,154 +0,0 @@
-// Copyright 2019+ Klaus Post. All rights reserved.
-// License information can be found in the LICENSE file.
-// Based on work by Yann Collet, released under BSD License.
-
-package zstd
-
-type encParams struct {
- // largest match distance : larger == more compression, more memory needed during decompression
- windowLog uint8
-
- // fully searched segment : larger == more compression, slower, more memory (useless for fast)
- chainLog uint8
-
- // dispatch table : larger == faster, more memory
- hashLog uint8
-
- // < nb of searches : larger == more compression, slower
- searchLog uint8
-
- // < match length searched : larger == faster decompression, sometimes less compression
- minMatch uint8
-
- // acceptable match size for optimal parser (only) : larger == more compression, slower
- targetLength uint32
-
- // see ZSTD_strategy definition above
- strategy strategy
-}
-
-// strategy defines the algorithm to use when generating sequences.
-type strategy uint8
-
-const (
- // Compression strategies, listed from fastest to strongest
- strategyFast strategy = iota + 1
- strategyDfast
- strategyGreedy
- strategyLazy
- strategyLazy2
- strategyBtlazy2
- strategyBtopt
- strategyBtultra
- strategyBtultra2
- // note : new strategies _might_ be added in the future.
- // Only the order (from fast to strong) is guaranteed
-
-)
-
-var defEncParams = [4][]encParams{
- { // "default" - for any srcSize > 256 KB
- // W, C, H, S, L, TL, strat
- {19, 12, 13, 1, 6, 1, strategyFast}, // base for negative levels
- {19, 13, 14, 1, 7, 0, strategyFast}, // level 1
- {20, 15, 16, 1, 6, 0, strategyFast}, // level 2
- {21, 16, 17, 1, 5, 1, strategyDfast}, // level 3
- {21, 18, 18, 1, 5, 1, strategyDfast}, // level 4
- {21, 18, 19, 2, 5, 2, strategyGreedy}, // level 5
- {21, 19, 19, 3, 5, 4, strategyGreedy}, // level 6
- {21, 19, 19, 3, 5, 8, strategyLazy}, // level 7
- {21, 19, 19, 3, 5, 16, strategyLazy2}, // level 8
- {21, 19, 20, 4, 5, 16, strategyLazy2}, // level 9
- {22, 20, 21, 4, 5, 16, strategyLazy2}, // level 10
- {22, 21, 22, 4, 5, 16, strategyLazy2}, // level 11
- {22, 21, 22, 5, 5, 16, strategyLazy2}, // level 12
- {22, 21, 22, 5, 5, 32, strategyBtlazy2}, // level 13
- {22, 22, 23, 5, 5, 32, strategyBtlazy2}, // level 14
- {22, 23, 23, 6, 5, 32, strategyBtlazy2}, // level 15
- {22, 22, 22, 5, 5, 48, strategyBtopt}, // level 16
- {23, 23, 22, 5, 4, 64, strategyBtopt}, // level 17
- {23, 23, 22, 6, 3, 64, strategyBtultra}, // level 18
- {23, 24, 22, 7, 3, 256, strategyBtultra2}, // level 19
- {25, 25, 23, 7, 3, 256, strategyBtultra2}, // level 20
- {26, 26, 24, 7, 3, 512, strategyBtultra2}, // level 21
- {27, 27, 25, 9, 3, 999, strategyBtultra2}, // level 22
- },
- { // for srcSize <= 256 KB
- // W, C, H, S, L, T, strat
- {18, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels
- {18, 13, 14, 1, 6, 0, strategyFast}, // level 1
- {18, 14, 14, 1, 5, 1, strategyDfast}, // level 2
- {18, 16, 16, 1, 4, 1, strategyDfast}, // level 3
- {18, 16, 17, 2, 5, 2, strategyGreedy}, // level 4.
- {18, 18, 18, 3, 5, 2, strategyGreedy}, // level 5.
- {18, 18, 19, 3, 5, 4, strategyLazy}, // level 6.
- {18, 18, 19, 4, 4, 4, strategyLazy}, // level 7
- {18, 18, 19, 4, 4, 8, strategyLazy2}, // level 8
- {18, 18, 19, 5, 4, 8, strategyLazy2}, // level 9
- {18, 18, 19, 6, 4, 8, strategyLazy2}, // level 10
- {18, 18, 19, 5, 4, 12, strategyBtlazy2}, // level 11.
- {18, 19, 19, 7, 4, 12, strategyBtlazy2}, // level 12.
- {18, 18, 19, 4, 4, 16, strategyBtopt}, // level 13
- {18, 18, 19, 4, 3, 32, strategyBtopt}, // level 14.
- {18, 18, 19, 6, 3, 128, strategyBtopt}, // level 15.
- {18, 19, 19, 6, 3, 128, strategyBtultra}, // level 16.
- {18, 19, 19, 8, 3, 256, strategyBtultra}, // level 17.
- {18, 19, 19, 6, 3, 128, strategyBtultra2}, // level 18.
- {18, 19, 19, 8, 3, 256, strategyBtultra2}, // level 19.
- {18, 19, 19, 10, 3, 512, strategyBtultra2}, // level 20.
- {18, 19, 19, 12, 3, 512, strategyBtultra2}, // level 21.
- {18, 19, 19, 13, 3, 999, strategyBtultra2}, // level 22.
- },
- { // for srcSize <= 128 KB
- // W, C, H, S, L, T, strat
- {17, 12, 12, 1, 5, 1, strategyFast}, // base for negative levels
- {17, 12, 13, 1, 6, 0, strategyFast}, // level 1
- {17, 13, 15, 1, 5, 0, strategyFast}, // level 2
- {17, 15, 16, 2, 5, 1, strategyDfast}, // level 3
- {17, 17, 17, 2, 4, 1, strategyDfast}, // level 4
- {17, 16, 17, 3, 4, 2, strategyGreedy}, // level 5
- {17, 17, 17, 3, 4, 4, strategyLazy}, // level 6
- {17, 17, 17, 3, 4, 8, strategyLazy2}, // level 7
- {17, 17, 17, 4, 4, 8, strategyLazy2}, // level 8
- {17, 17, 17, 5, 4, 8, strategyLazy2}, // level 9
- {17, 17, 17, 6, 4, 8, strategyLazy2}, // level 10
- {17, 17, 17, 5, 4, 8, strategyBtlazy2}, // level 11
- {17, 18, 17, 7, 4, 12, strategyBtlazy2}, // level 12
- {17, 18, 17, 3, 4, 12, strategyBtopt}, // level 13.
- {17, 18, 17, 4, 3, 32, strategyBtopt}, // level 14.
- {17, 18, 17, 6, 3, 256, strategyBtopt}, // level 15.
- {17, 18, 17, 6, 3, 128, strategyBtultra}, // level 16.
- {17, 18, 17, 8, 3, 256, strategyBtultra}, // level 17.
- {17, 18, 17, 10, 3, 512, strategyBtultra}, // level 18.
- {17, 18, 17, 5, 3, 256, strategyBtultra2}, // level 19.
- {17, 18, 17, 7, 3, 512, strategyBtultra2}, // level 20.
- {17, 18, 17, 9, 3, 512, strategyBtultra2}, // level 21.
- {17, 18, 17, 11, 3, 999, strategyBtultra2}, // level 22.
- },
- { // for srcSize <= 16 KB
- // W, C, H, S, L, T, strat
- {14, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels
- {14, 14, 15, 1, 5, 0, strategyFast}, // level 1
- {14, 14, 15, 1, 4, 0, strategyFast}, // level 2
- {14, 14, 15, 2, 4, 1, strategyDfast}, // level 3
- {14, 14, 14, 4, 4, 2, strategyGreedy}, // level 4
- {14, 14, 14, 3, 4, 4, strategyLazy}, // level 5.
- {14, 14, 14, 4, 4, 8, strategyLazy2}, // level 6
- {14, 14, 14, 6, 4, 8, strategyLazy2}, // level 7
- {14, 14, 14, 8, 4, 8, strategyLazy2}, // level 8.
- {14, 15, 14, 5, 4, 8, strategyBtlazy2}, // level 9.
- {14, 15, 14, 9, 4, 8, strategyBtlazy2}, // level 10.
- {14, 15, 14, 3, 4, 12, strategyBtopt}, // level 11.
- {14, 15, 14, 4, 3, 24, strategyBtopt}, // level 12.
- {14, 15, 14, 5, 3, 32, strategyBtultra}, // level 13.
- {14, 15, 15, 6, 3, 64, strategyBtultra}, // level 14.
- {14, 15, 15, 7, 3, 256, strategyBtultra}, // level 15.
- {14, 15, 15, 5, 3, 48, strategyBtultra2}, // level 16.
- {14, 15, 15, 6, 3, 128, strategyBtultra2}, // level 17.
- {14, 15, 15, 7, 3, 256, strategyBtultra2}, // level 18.
- {14, 15, 15, 8, 3, 256, strategyBtultra2}, // level 19.
- {14, 15, 15, 8, 3, 512, strategyBtultra2}, // level 20.
- {14, 15, 15, 9, 3, 512, strategyBtultra2}, // level 21.
- {14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22.
- },
-}
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 366dd66..4871dd0 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -35,21 +35,22 @@
AppendCRC([]byte) []byte
WindowSize(size int) int32
UseBlock(*blockEnc)
- Reset()
+ Reset(d *dict, singleBlock bool)
}
type encoderState struct {
- w io.Writer
- filling []byte
- current []byte
- previous []byte
- encoder encoder
- writing *blockEnc
- err error
- writeErr error
- nWritten int64
- headerWritten bool
- eofWritten bool
+ w io.Writer
+ filling []byte
+ current []byte
+ previous []byte
+ encoder encoder
+ writing *blockEnc
+ err error
+ writeErr error
+ nWritten int64
+ headerWritten bool
+ eofWritten bool
+ fullFrameWritten bool
// This waitgroup indicates an encode is running.
wg sync.WaitGroup
@@ -71,27 +72,24 @@
}
if w != nil {
e.Reset(w)
- } else {
- e.init.Do(func() {
- e.initialize()
- })
}
return &e, nil
}
func (e *Encoder) initialize() {
+ if e.o.concurrent == 0 {
+ e.o.setDefault()
+ }
e.encoders = make(chan encoder, e.o.concurrent)
for i := 0; i < e.o.concurrent; i++ {
- e.encoders <- e.o.encoder()
+ enc := e.o.encoder()
+ e.encoders <- enc
}
}
// Reset will re-initialize the writer and new writes will encode to the supplied writer
// as a new, independent stream.
func (e *Encoder) Reset(w io.Writer) {
- e.init.Do(func() {
- e.initialize()
- })
s := &e.state
s.wg.Wait()
s.wWg.Wait()
@@ -108,16 +106,17 @@
s.encoder = e.o.encoder()
}
if s.writing == nil {
- s.writing = &blockEnc{}
+ s.writing = &blockEnc{lowMem: e.o.lowMem}
s.writing.init()
}
s.writing.initNewEncode()
s.filling = s.filling[:0]
s.current = s.current[:0]
s.previous = s.previous[:0]
- s.encoder.Reset()
+ s.encoder.Reset(e.o.dict, false)
s.headerWritten = false
s.eofWritten = false
+ s.fullFrameWritten = false
s.w = w
s.err = nil
s.nWritten = 0
@@ -156,7 +155,7 @@
if err != nil {
return n, err
}
- if debug && len(s.filling) > 0 {
+ if debugAsserts && len(s.filling) > 0 {
panic(len(s.filling))
}
}
@@ -176,14 +175,38 @@
return fmt.Errorf("block > maxStoreBlockSize")
}
if !s.headerWritten {
+ // If we have a single block encode, do a sync compression.
+ if final && len(s.filling) == 0 && !e.o.fullZero {
+ s.headerWritten = true
+ s.fullFrameWritten = true
+ s.eofWritten = true
+ return nil
+ }
+ if final && len(s.filling) > 0 {
+ s.current = e.EncodeAll(s.filling, s.current[:0])
+ var n2 int
+ n2, s.err = s.w.Write(s.current)
+ if s.err != nil {
+ return s.err
+ }
+ s.nWritten += int64(n2)
+ s.current = s.current[:0]
+ s.filling = s.filling[:0]
+ s.headerWritten = true
+ s.fullFrameWritten = true
+ s.eofWritten = true
+ return nil
+ }
+
var tmp [maxHeaderSize]byte
fh := frameHeader{
ContentSize: 0,
WindowSize: uint32(s.encoder.WindowSize(0)),
SingleSegment: false,
Checksum: e.o.crc,
- DictID: 0,
+ DictID: e.o.dict.ID(),
}
+
dst, err := fh.appendTo(tmp[:0])
if err != nil {
return err
@@ -263,7 +286,7 @@
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(src) != len(blk.literals) || len(src) != e.o.blockSize {
- err = blk.encode(e.o.noEntropy)
+ err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
case errIncompressible:
@@ -293,12 +316,20 @@
if debug {
println("Using ReadFrom")
}
- // Maybe handle stuff queued?
+
+ // Flush any current writes.
+ if len(e.state.filling) > 0 {
+ if err := e.nextBlock(false); err != nil {
+ return 0, err
+ }
+ }
e.state.filling = e.state.filling[:e.o.blockSize]
src := e.state.filling
for {
n2, err := r.Read(src)
- _, _ = e.state.encoder.CRC().Write(src[:n2])
+ if e.o.crc {
+ _, _ = e.state.encoder.CRC().Write(src[:n2])
+ }
// src is now the unfilled part...
src = src[n2:]
n += int64(n2)
@@ -308,14 +339,14 @@
if debug {
println("ReadFrom: got EOF final block:", len(e.state.filling))
}
- return n, e.nextBlock(true)
+ return n, nil
+ case nil:
default:
if debug {
println("ReadFrom: got error:", err)
}
e.state.err = err
return n, err
- case nil:
}
if len(src) > 0 {
if debug {
@@ -363,6 +394,9 @@
if err != nil {
return err
}
+ if e.state.fullFrameWritten {
+ return s.err
+ }
s.wg.Wait()
s.wWg.Wait()
@@ -422,18 +456,13 @@
}
return dst
}
- e.init.Do(func() {
- e.o.setDefault()
- e.initialize()
- })
+ e.init.Do(e.initialize)
enc := <-e.encoders
defer func() {
// Release encoder reference to last block.
- enc.Reset()
+ // If a non-single block is needed the encoder will reset again.
e.encoders <- enc
}()
- enc.Reset()
- blk := enc.Block()
// Use single segments when above minimum window and below 1MB.
single := len(src) < 1<<20 && len(src) > MinWindowSize
if e.o.single != nil {
@@ -444,11 +473,11 @@
WindowSize: uint32(enc.WindowSize(len(src))),
SingleSegment: single,
Checksum: e.o.crc,
- DictID: 0,
+ DictID: e.o.dict.ID(),
}
// If less than 1MB, allocate a buffer up front.
- if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 {
+ if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
dst, err := fh.appendTo(dst)
@@ -456,14 +485,20 @@
panic(err)
}
- if len(src) <= e.o.blockSize && len(src) <= maxBlockSize {
+ // If we can do everything in one block, prefer that.
+ if len(src) <= maxCompressedBlockSize {
+ enc.Reset(e.o.dict, true)
// Slightly faster with no history and everything in one block.
if e.o.crc {
_, _ = enc.CRC().Write(src)
}
- blk.reset(nil)
+ blk := enc.Block()
blk.last = true
- enc.EncodeNoHist(blk, src)
+ if e.o.dict == nil {
+ enc.EncodeNoHist(blk, src)
+ } else {
+ enc.Encode(blk, src)
+ }
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
@@ -472,7 +507,7 @@
if len(blk.literals) != len(src) || len(src) != e.o.blockSize {
// Output directly to dst
blk.output = dst
- err = blk.encode(e.o.noEntropy)
+ err = blk.encode(src, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
@@ -488,6 +523,8 @@
}
blk.output = oldout
} else {
+ enc.Reset(e.o.dict, false)
+ blk := enc.Block()
for len(src) > 0 {
todo := src
if len(todo) > e.o.blockSize {
@@ -497,7 +534,6 @@
if e.o.crc {
_, _ = enc.CRC().Write(todo)
}
- blk.reset(nil)
blk.pushOffsets()
enc.Encode(blk, todo)
if len(src) == 0 {
@@ -507,7 +543,7 @@
// If we got the exact same number of literals as input,
// assume the literals cannot be compressed.
if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize {
- err = blk.encode(e.o.noEntropy)
+ err = blk.encode(todo, e.o.noEntropy, !e.o.allLitEntropy)
}
switch err {
@@ -522,6 +558,7 @@
default:
panic(err)
}
+ blk.reset(nil)
}
}
if e.o.crc {
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
index 40eb457..16d4ab6 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@@ -12,36 +12,56 @@
// options retains accumulated state of multiple options.
type encoderOptions struct {
- concurrent int
- crc bool
- single *bool
- pad int
- blockSize int
- windowSize int
- level EncoderLevel
- fullZero bool
- noEntropy bool
+ concurrent int
+ level EncoderLevel
+ single *bool
+ pad int
+ blockSize int
+ windowSize int
+ crc bool
+ fullZero bool
+ noEntropy bool
+ allLitEntropy bool
+ customWindow bool
+ customALEntropy bool
+ lowMem bool
+ dict *dict
}
func (o *encoderOptions) setDefault() {
*o = encoderOptions{
- // use less ram: true for now, but may change.
- concurrent: runtime.GOMAXPROCS(0),
- crc: true,
- single: nil,
- blockSize: 1 << 16,
- windowSize: 1 << 22,
- level: SpeedDefault,
+ concurrent: runtime.GOMAXPROCS(0),
+ crc: true,
+ single: nil,
+ blockSize: 1 << 16,
+ windowSize: 8 << 20,
+ level: SpeedDefault,
+ allLitEntropy: true,
+ lowMem: false,
}
}
// encoder returns an encoder with the selected options.
func (o encoderOptions) encoder() encoder {
switch o.level {
- case SpeedDefault:
- return &doubleFastEncoder{fastEncoder: fastEncoder{maxMatchOff: int32(o.windowSize)}}
case SpeedFastest:
- return &fastEncoder{maxMatchOff: int32(o.windowSize)}
+ if o.dict != nil {
+ return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+
+ case SpeedDefault:
+ if o.dict != nil {
+ return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
+ }
+ return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ case SpeedBetterCompression:
+ if o.dict != nil {
+ return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
+ }
+ return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
+ case SpeedBestCompression:
+ return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
@@ -53,7 +73,7 @@
}
// WithEncoderConcurrency will set the concurrency,
-// meaning the maximum number of decoders to run concurrently.
+// meaning the maximum number of encoders to run concurrently.
// The value supplied must be at least 1.
// By default this will be set to GOMAXPROCS.
func WithEncoderConcurrency(n int) EOption {
@@ -67,7 +87,7 @@
}
// WithWindowSize will set the maximum allowed back-reference distance.
-// The value must be a power of two between WindowSizeMin and WindowSizeMax.
+// The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer.
// The default value is determined by the compression level.
@@ -83,6 +103,7 @@
}
o.windowSize = n
+ o.customWindow = true
if o.blockSize > o.windowSize {
o.blockSize = o.windowSize
}
@@ -130,25 +151,25 @@
// This is roughly equivalent to the default Zstandard mode (level 3).
SpeedDefault
+ // SpeedBetterCompression will yield better compression than the default.
+ // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
+ // By using this, notice that CPU usage may go up in the future.
+ SpeedBetterCompression
+
+ // SpeedBestCompression will choose the best available compression option.
+ // This will offer the best compression no matter the CPU cost.
+ SpeedBestCompression
+
// speedLast should be kept as the last actual compression option.
// The is not for external usage, but is used to keep track of the valid options.
speedLast
-
- // SpeedBetterCompression will (in the future) yield better compression than the default,
- // but at approximately 4x the CPU usage of the default.
- // For now this is not implemented.
- SpeedBetterCompression = SpeedDefault
-
- // SpeedBestCompression will choose the best available compression option.
- // For now this is not implemented.
- SpeedBestCompression = SpeedDefault
)
// EncoderLevelFromString will convert a string representation of an encoding level back
// to a compression level. The compare is not case sensitive.
// If the string wasn't recognized, (false, SpeedDefault) will be returned.
func EncoderLevelFromString(s string) (bool, EncoderLevel) {
- for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ {
+ for l := speedNotSet + 1; l < speedLast; l++ {
if strings.EqualFold(s, l.String()) {
return true, l
}
@@ -163,8 +184,12 @@
switch {
case level < 3:
return SpeedFastest
- case level >= 3:
+ case level >= 3 && level < 6:
return SpeedDefault
+ case level >= 6 && level < 10:
+ return SpeedBetterCompression
+ case level >= 10:
+ return SpeedBetterCompression
}
return SpeedDefault
}
@@ -176,6 +201,10 @@
return "fastest"
case SpeedDefault:
return "default"
+ case SpeedBetterCompression:
+ return "better"
+ case SpeedBestCompression:
+ return "best"
default:
return "invalid"
}
@@ -189,6 +218,22 @@
return fmt.Errorf("unknown encoder level")
}
o.level = l
+ if !o.customWindow {
+ switch o.level {
+ case SpeedFastest:
+ o.windowSize = 4 << 20
+ case SpeedDefault:
+ o.windowSize = 8 << 20
+ case SpeedBetterCompression:
+ o.windowSize = 16 << 20
+ case SpeedBestCompression:
+ o.windowSize = 32 << 20
+ }
+ }
+ if !o.customALEntropy {
+ o.allLitEntropy = l > SpeedFastest
+ }
+
return nil
}
}
@@ -203,6 +248,18 @@
}
}
+// WithAllLitEntropyCompression will apply entropy compression if no matches are found.
+// Disabling this will skip incompressible data faster, but in cases with no matches but
+// skewed character distribution compression is lost.
+// Default value depends on the compression level selected.
+func WithAllLitEntropyCompression(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.customALEntropy = true
+ o.allLitEntropy = b
+ return nil
+ }
+}
+
// WithNoEntropyCompression will always skip entropy compression of literals.
// This can be useful if content has matches, but unlikely to benefit from entropy
// compression. Usually the slight speed improvement is not worth enabling this.
@@ -229,3 +286,27 @@
return nil
}
}
+
+// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
+// slower encoding speed.
+// This will not change the window size which is the primary function for reducing
+// memory usage. See WithWindowSize.
+func WithLowerEncoderMem(b bool) EOption {
+ return func(o *encoderOptions) error {
+ o.lowMem = b
+ return nil
+ }
+}
+
+// WithEncoderDict allows to register a dictionary that will be used for the encode.
+// The encoder *may* choose to use no dictionary instead for certain payloads.
+func WithEncoderDict(dict []byte) EOption {
+ return func(o *encoderOptions) error {
+ d, err := loadDict(dict)
+ if err != nil {
+ return err
+ }
+ o.dict = d
+ return nil
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 4079074..693c5f0 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -16,16 +16,11 @@
)
type frameDec struct {
- o decoderOptions
- crc hash.Hash64
- frameDone sync.WaitGroup
- offset int64
+ o decoderOptions
+ crc hash.Hash64
+ offset int64
- WindowSize uint64
- DictionaryID uint32
- FrameContentSize uint64
- HasCheckSum bool
- SingleSegment bool
+ WindowSize uint64
// maxWindowSize is the maximum windows size to support.
// should never be bigger than max-int.
@@ -42,15 +37,22 @@
// Byte buffer that can be reused for small input blocks.
bBuf byteBuf
+ FrameContentSize uint64
+ frameDone sync.WaitGroup
+
+ DictionaryID *uint32
+ HasCheckSum bool
+ SingleSegment bool
+
// asyncRunning indicates whether the async routine processes input on 'decoding'.
- asyncRunning bool
asyncRunningMu sync.Mutex
+ asyncRunning bool
}
const (
// The minimum Window_Size is 1 KB.
MinWindowSize = 1 << 10
- MaxWindowSize = 1 << 30
+ MaxWindowSize = 1 << 29
)
var (
@@ -119,7 +121,7 @@
d.SingleSegment = fhd&(1<<5) != 0
if fhd&(1<<3) != 0 {
- return errors.New("Reserved bit set on frame header")
+ return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
@@ -140,7 +142,7 @@
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
- d.DictionaryID = 0
+ d.DictionaryID = nil
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
@@ -152,19 +154,22 @@
}
return io.ErrUnexpectedEOF
}
+ var id uint32
switch size {
case 1:
- d.DictionaryID = uint32(b[0])
+ id = uint32(b[0])
case 2:
- d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
+ id = uint32(b[0]) | (uint32(b[1]) << 8)
case 4:
- d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
+ id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
if debug {
- println("Dict size", size, "ID:", d.DictionaryID)
+ println("Dict size", size, "ID:", id)
}
- if d.DictionaryID != 0 {
- return ErrUnknownDictionary
+ if id > 0 {
+ // ID 0 means "sorry, no dictionary anyway".
+ // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
+ d.DictionaryID = &id
}
}
@@ -231,7 +236,11 @@
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
- d.history.maxSize = d.history.windowSize + maxBlockSize
+ if d.o.lowMem && d.history.windowSize < maxBlockSize {
+ d.history.maxSize = d.history.windowSize * 2
+ } else {
+ d.history.maxSize = d.history.windowSize + maxBlockSize
+ }
// history contains input - maybe we do something
d.rawInput = br
return nil
@@ -318,8 +327,8 @@
func (d *frameDec) initAsync() {
if !d.o.lowMem && !d.SingleSegment {
- // set max extra size history to 20MB.
- d.history.maxSize = d.history.windowSize + maxBlockSize*10
+ // set max extra size history to 10MB.
+ d.history.maxSize = d.history.windowSize + maxBlockSize*5
}
// re-alloc if more than one extra block size.
if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize {
@@ -345,8 +354,6 @@
// When the frame has finished decoding the *bufio.Reader
// containing the remaining input will be sent on frameDec.frameDone.
func (d *frameDec) startDecoder(output chan decodeOutput) {
- // TODO: Init to dictionary
- d.history.reset()
written := int64(0)
defer func() {
@@ -439,8 +446,6 @@
// runDecoder will create a sync decoder that will decode a block of data.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
- // TODO: Init to dictionary
- d.history.reset()
saved := d.history.b
// We use the history for output to avoid copying it.
diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go
index 4479cfe..4ef7f5a 100644
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@@ -5,6 +5,7 @@
package zstd
import (
+ "encoding/binary"
"fmt"
"io"
"math"
@@ -16,7 +17,7 @@
WindowSize uint32
SingleSegment bool
Checksum bool
- DictID uint32 // Not stored.
+ DictID uint32
}
const maxHeaderSize = 14
@@ -30,6 +31,24 @@
if f.SingleSegment {
fhd |= 1 << 5
}
+
+ var dictIDContent []byte
+ if f.DictID > 0 {
+ var tmp [4]byte
+ if f.DictID < 256 {
+ fhd |= 1
+ tmp[0] = uint8(f.DictID)
+ dictIDContent = tmp[:1]
+ } else if f.DictID < 1<<16 {
+ fhd |= 2
+ binary.LittleEndian.PutUint16(tmp[:2], uint16(f.DictID))
+ dictIDContent = tmp[:2]
+ } else {
+ fhd |= 3
+ binary.LittleEndian.PutUint32(tmp[:4], f.DictID)
+ dictIDContent = tmp[:4]
+ }
+ }
var fcs uint8
if f.ContentSize >= 256 {
fcs++
@@ -40,6 +59,7 @@
if f.ContentSize >= 0xffffffff {
fcs++
}
+
fhd |= fcs << 6
dst = append(dst, fhd)
@@ -48,7 +68,9 @@
windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3
dst = append(dst, uint8(windowLog))
}
-
+ if f.DictID > 0 {
+ dst = append(dst, dictIDContent...)
+ }
switch fcs {
case 0:
if f.SingleSegment {
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
index 9efe34f..e6d3d49 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go
@@ -19,7 +19,7 @@
* Increasing memory usage improves compression ratio
* Reduced memory usage can improve speed, due to cache effect
* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
- maxMemoryUsage = 11
+ maxMemoryUsage = tablelogAbsoluteMax + 2
maxTableLog = maxMemoryUsage - 2
maxTablesize = 1 << maxTableLog
@@ -55,7 +55,7 @@
if b.remain() < 4 {
return errors.New("input too small")
}
- bitStream := b.Uint32()
+ bitStream := b.Uint32NC()
nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog
if nbBits > tablelogAbsoluteMax {
println("Invalid tablelog:", nbBits)
@@ -79,7 +79,8 @@
n0 += 24
if r := b.remain(); r > 5 {
b.advance(2)
- bitStream = b.Uint32() >> bitCount
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
} else {
// end of bit stream
bitStream >>= 16
@@ -104,10 +105,11 @@
charnum++
}
- if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 {
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3)
bitCount &= 7
- bitStream = b.Uint32() >> bitCount
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> bitCount
} else {
bitStream >>= 2
}
@@ -118,7 +120,7 @@
if int32(bitStream)&(threshold-1) < max {
count = int32(bitStream) & (threshold - 1)
- if debug && nbBits < 1 {
+ if debugAsserts && nbBits < 1 {
panic("nbBits underflow")
}
bitCount += nbBits - 1
@@ -148,17 +150,16 @@
threshold >>= 1
}
- //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain())
- if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 {
+ if r := b.remain(); r >= 7 || r-int(bitCount>>3) >= 4 {
b.advance(bitCount >> 3)
bitCount &= 7
+ // The check above should make sure we can read 32 bits
+ bitStream = b.Uint32NC() >> (bitCount & 31)
} else {
bitCount -= (uint)(8 * (len(b.b) - 4 - b.off))
b.off = len(b.b) - 4
- //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend)
+ bitStream = b.Uint32() >> (bitCount & 31)
}
- bitStream = b.Uint32() >> (bitCount & 31)
- //printf("bitstream is now: 0b%b", bitStream)
}
s.symbolLen = charnum
if s.symbolLen <= 1 {
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
index 619836f..c74681b 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@@ -97,7 +97,7 @@
func (s *fseEncoder) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
- if cap(s.ct.tableSymbol) < int(tableSize) {
+ if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@@ -202,13 +202,13 @@
case 0:
case -1, 1:
symbolTT[i].deltaNbBits = tl
- symbolTT[i].deltaFindState = int16(total - 1)
+ symbolTT[i].deltaFindState = total - 1
total++
default:
maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
minStatePlus := uint32(v) << maxBitsOut
symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
- symbolTT[i].deltaFindState = int16(total - v)
+ symbolTT[i].deltaFindState = total - v
total += v
}
}
@@ -327,7 +327,7 @@
if err != nil {
return err
}
- if debug {
+ if debugAsserts {
err = s.validateNorm()
if err != nil {
return err
@@ -336,7 +336,7 @@
return s.buildCTable()
}
s.norm[largest] += stillToDistribute
- if debug {
+ if debugAsserts {
err := s.validateNorm()
if err != nil {
return err
@@ -353,8 +353,8 @@
distributed uint32
total = uint32(length)
tableLog = s.actualTableLog
- lowThreshold = uint32(total >> tableLog)
- lowOne = uint32((total * 3) >> (tableLog + 1))
+ lowThreshold = total >> tableLog
+ lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@@ -379,7 +379,7 @@
if (total / toDistribute) > lowOne {
// risk of rounding to zero
- lowOne = uint32((total * 3) / (toDistribute * 2))
+ lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1
@@ -619,7 +619,7 @@
func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16
threshold := (minNbBits + 1) << 16
- if debug {
+ if debugAsserts {
if !(s.actualTableLog < 16) {
panic("!s.actualTableLog < 16")
}
@@ -633,7 +633,7 @@
// linear interpolation (very approximate)
normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog
bitMultiplier := uint32(1) << accuracyLog
- if debug {
+ if debugAsserts {
if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold {
panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold")
}
@@ -708,7 +708,6 @@
im := int32((nbBitsOut << 16) - first.deltaNbBits)
lu := (im >> nbBitsOut) + int32(first.deltaFindState)
c.state = c.stateTable[lu]
- return
}
// encode the output symbol provided and write it to the bitstream.
diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
index 6c17dc1..474cb77 100644
--- a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go
@@ -59,7 +59,7 @@
}
for i, bit := range bits {
if base > math.MaxInt32 {
- panic(fmt.Sprintf("invalid decoding table, base overflows int32"))
+ panic("invalid decoding table, base overflows int32")
}
dst[i] = baseOffset{
diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go
index e8c419b..f783e32 100644
--- a/vendor/github.com/klauspost/compress/zstd/history.go
+++ b/vendor/github.com/klauspost/compress/zstd/history.go
@@ -17,6 +17,7 @@
windowSize int
maxSize int
error bool
+ dict *dict
}
// reset will reset the history to initial state of a frame.
@@ -36,12 +37,27 @@
}
h.decoders = sequenceDecs{}
if h.huffTree != nil {
- huffDecoderPool.Put(h.huffTree)
+ if h.dict == nil || h.dict.litEnc != h.huffTree {
+ huffDecoderPool.Put(h.huffTree)
+ }
}
h.huffTree = nil
+ h.dict = nil
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
}
+func (h *history) setDict(dict *dict) {
+ if dict == nil {
+ return
+ }
+ h.dict = dict
+ h.decoders.litLengths = dict.llDec
+ h.decoders.offsets = dict.ofDec
+ h.decoders.matchLengths = dict.mlDec
+ h.recentOffsets = dict.offsets
+ h.huffTree = dict.litEnc
+}
+
// append bytes to history.
// This function will make sure there is space for it,
// if the buffer has been allocated with enough extra space.
diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
index d580e32..2c9c535 100644
--- a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s
@@ -179,13 +179,13 @@
MOVQ ·prime2v(SB), R14
// Load slice.
- MOVQ b_base+8(FP), CX
- MOVQ b_len+16(FP), DX
+ MOVQ arg1_base+8(FP), CX
+ MOVQ arg1_len+16(FP), DX
LEAQ (CX)(DX*1), BX
SUBQ $32, BX
// Load vN from d.
- MOVQ d+0(FP), AX
+ MOVQ arg+0(FP), AX
MOVQ 0(AX), R8 // v1
MOVQ 8(AX), R9 // v2
MOVQ 16(AX), R10 // v3
@@ -209,7 +209,7 @@
MOVQ R11, 24(AX)
// The number of bytes written is CX minus the old base pointer.
- SUBQ b_base+8(FP), CX
+ SUBQ arg1_base+8(FP), CX
MOVQ CX, ret+32(FP)
RET
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go
index 15a45f7..1dd39e6 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go
@@ -62,8 +62,10 @@
matchLengths sequenceDec
prevOffset [3]int
hist []byte
+ dict []byte
literals []byte
out []byte
+ windowSize int
maxBits uint8
}
@@ -82,7 +84,12 @@
s.hist = hist.b
s.prevOffset = hist.recentOffsets
s.maxBits = s.litLengths.fse.maxBits + s.offsets.fse.maxBits + s.matchLengths.fse.maxBits
+ s.windowSize = hist.windowSize
s.out = out
+ s.dict = nil
+ if hist.dict != nil {
+ s.dict = hist.dict.content
+ }
return nil
}
@@ -98,76 +105,159 @@
printf("reading sequence %d, exceeded available data\n", seqs-i)
return io.ErrUnexpectedEOF
}
- var litLen, matchOff, matchLen int
+ var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
- litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState)
+ // inlined function:
+ // ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
+
+ // Final will not read from stream.
+ var llB, mlB, moB uint8
+ ll, llB = llState.final()
+ ml, mlB = mlState.final()
+ mo, moB = ofState.final()
+
+ // extra bits are stored in reverse order.
+ br.fillFast()
+ mo += br.getBits(moB)
+ if s.maxBits > 32 {
+ br.fillFast()
+ }
+ ml += br.getBits(mlB)
+ ll += br.getBits(llB)
+
+ if moB > 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = mo
+ } else {
+ // mo = s.adjustOffset(mo, ll, moB)
+ // Inlined for rather big speedup
+ if ll == 0 {
+ // There is an exception though, when current sequence's literals_length = 0.
+ // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
+ // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
+ mo++
+ }
+
+ if mo == 0 {
+ mo = s.prevOffset[0]
+ } else {
+ var temp int
+ if mo == 3 {
+ temp = s.prevOffset[0] - 1
+ } else {
+ temp = s.prevOffset[mo]
+ }
+
+ if temp == 0 {
+ // 0 is not valid; input is corrupted; force offset to 1
+ println("temp was 0")
+ temp = 1
+ }
+
+ if mo != 1 {
+ s.prevOffset[2] = s.prevOffset[1]
+ }
+ s.prevOffset[1] = s.prevOffset[0]
+ s.prevOffset[0] = temp
+ mo = temp
+ }
+ }
br.fillFast()
} else {
- litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState)
+ ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}
if debugSequences {
- println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen)
+ println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}
- if litLen > len(s.literals) {
- return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals))
+ if ll > len(s.literals) {
+ return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
}
- size := litLen + matchLen + len(s.out)
+ size := ll + ml + len(s.out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size", size)
}
if size > cap(s.out) {
- // Not enough size, will be extremely rarely triggered,
+ // Not enough size, which can happen under high volume block streaming conditions
// but could be if destination slice is too small for sync operations.
- // We add maxBlockSize to the capacity.
- s.out = append(s.out, make([]byte, maxBlockSize)...)
- s.out = s.out[:len(s.out)-maxBlockSize]
+ // over-allocating here can create a large amount of GC pressure so we try to keep
+ // it as contained as possible
+ used := len(s.out) - startSize
+ addBytes := 256 + ll + ml + used>>2
+ // Clamp to max block size.
+ if used+addBytes > maxBlockSize {
+ addBytes = maxBlockSize - used
+ }
+ s.out = append(s.out, make([]byte, addBytes)...)
+ s.out = s.out[:len(s.out)-addBytes]
}
- if matchLen > maxMatchLen {
- return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen)
- }
- if matchOff > len(s.out)+len(hist)+litLen {
- return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen)
- }
- if matchOff == 0 && matchLen > 0 {
- return fmt.Errorf("zero matchoff and matchlen > 0")
+ if ml > maxMatchLen {
+ return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
- s.out = append(s.out, s.literals[:litLen]...)
- s.literals = s.literals[litLen:]
+ // Add literals
+ s.out = append(s.out, s.literals[:ll]...)
+ s.literals = s.literals[ll:]
out := s.out
+ if mo == 0 && ml > 0 {
+ return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
+ }
+
+ if mo > len(s.out)+len(hist) || mo > s.windowSize {
+ if len(s.dict) == 0 {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+
+ // we may be in dictionary.
+ dictO := len(s.dict) - (mo - (len(s.out) + len(hist)))
+ if dictO < 0 || dictO >= len(s.dict) {
+ return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist))
+ }
+ end := dictO + ml
+ if end > len(s.dict) {
+ out = append(out, s.dict[dictO:]...)
+ mo -= len(s.dict) - dictO
+ ml -= len(s.dict) - dictO
+ } else {
+ out = append(out, s.dict[dictO:end]...)
+ mo = 0
+ ml = 0
+ }
+ }
+
// Copy from history.
// TODO: Blocks without history could be made to ignore this completely.
- if v := matchOff - len(s.out); v > 0 {
+ if v := mo - len(s.out); v > 0 {
// v is the start position in history from end.
start := len(s.hist) - v
- if matchLen > v {
+ if ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, s.hist[start:]...)
- matchOff -= v
- matchLen -= v
+ mo -= v
+ ml -= v
} else {
- out = append(out, s.hist[start:start+matchLen]...)
- matchLen = 0
+ out = append(out, s.hist[start:start+ml]...)
+ ml = 0
}
}
// We must be in current buffer now
- if matchLen > 0 {
- start := len(s.out) - matchOff
- if matchLen <= len(s.out)-start {
+ if ml > 0 {
+ start := len(s.out) - mo
+ if ml <= len(s.out)-start {
// No overlap
- out = append(out, s.out[start:start+matchLen]...)
+ out = append(out, s.out[start:start+ml]...)
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
- out = out[:len(out)+matchLen]
- src := out[start : start+matchLen]
+ out = out[:len(out)+ml]
+ src := out[start : start+ml]
// Destination is the space we just added.
- dst := out[len(out)-matchLen:]
+ dst := out[len(out)-ml:]
dst = dst[:len(src)]
for i := range src {
dst[i] = src[i]
diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go
index 36bcc3c..8014174 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go
@@ -35,7 +35,6 @@
// Ensure we cannot reuse by accident
prevEnc := *prev
prevEnc.symbolLen = 0
- return
}
compareSwap(ll, &s.llEnc, &s.llPrev)
compareSwap(ml, &s.mlEnc, &s.mlPrev)
diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go
index 356956b..9d9d1d5 100644
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@@ -10,8 +10,8 @@
"hash/crc32"
"io"
+ "github.com/golang/snappy"
"github.com/klauspost/compress/huff0"
- "github.com/klauspost/compress/snappy"
)
const (
@@ -111,7 +111,7 @@
// Add empty last block
r.block.reset(nil)
r.block.last = true
- err := r.block.encodeLits(false)
+ err := r.block.encodeLits(r.block.literals, false)
if err != nil {
return written, err
}
@@ -178,17 +178,16 @@
r.err = ErrSnappyCorrupt
return written, r.err
}
- err = r.block.encode(false)
+ err = r.block.encode(nil, false, false)
switch err {
case errIncompressible:
r.block.popOffsets()
r.block.reset(nil)
r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen])
if err != nil {
- println("snappy.Decode:", err)
return written, err
}
- err = r.block.encodeLits(false)
+ err = r.block.encodeLits(r.block.literals, false)
if err != nil {
return written, err
}
@@ -235,7 +234,7 @@
r.err = ErrSnappyCorrupt
return written, r.err
}
- err := r.block.encodeLits(false)
+ err := r.block.encodeLits(r.block.literals, false)
if err != nil {
return written, err
}
@@ -417,7 +416,7 @@
// https://github.com/google/snappy/blob/master/framing_format.txt
func snappyCRC(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
- return uint32(c>>15|c<<17) + 0xa282ead8
+ return c>>15 | c<<17 + 0xa282ead8
}
// snappyDecodedLen returns the length of the decoded block and the number of bytes
diff --git a/vendor/github.com/klauspost/compress/zstd/zip.go b/vendor/github.com/klauspost/compress/zstd/zip.go
new file mode 100644
index 0000000..e35a0a2
--- /dev/null
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@@ -0,0 +1,120 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+ "errors"
+ "io"
+ "sync"
+)
+
+// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
+// See https://www.winzip.com/win/en/comp_info.html
+const ZipMethodWinZip = 93
+
+// ZipMethodPKWare is the method number used by PKWARE to indicate Zstandard compression.
+// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.7.TXT
+const ZipMethodPKWare = 20
+
+var zipReaderPool sync.Pool
+
+// newZipReader cannot be used since we would leak goroutines...
+func newZipReader(r io.Reader) io.ReadCloser {
+ dec, ok := zipReaderPool.Get().(*Decoder)
+ if ok {
+ dec.Reset(r)
+ } else {
+ d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+ if err != nil {
+ panic(err)
+ }
+ dec = d
+ }
+ return &pooledZipReader{dec: dec}
+}
+
+type pooledZipReader struct {
+ mu sync.Mutex // guards Close and Read
+ dec *Decoder
+}
+
+func (r *pooledZipReader) Read(p []byte) (n int, err error) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ if r.dec == nil {
+ return 0, errors.New("Read after Close")
+ }
+ dec, err := r.dec.Read(p)
+
+ return dec, err
+}
+
+func (r *pooledZipReader) Close() error {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ var err error
+ if r.dec != nil {
+ err = r.dec.Reset(nil)
+ zipReaderPool.Put(r.dec)
+ r.dec = nil
+ }
+ return err
+}
+
+type pooledZipWriter struct {
+ mu sync.Mutex // guards Close and Read
+ enc *Encoder
+}
+
+func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ if w.enc == nil {
+ return 0, errors.New("Write after Close")
+ }
+ return w.enc.Write(p)
+}
+
+func (w *pooledZipWriter) Close() error {
+ w.mu.Lock()
+ defer w.mu.Unlock()
+ var err error
+ if w.enc != nil {
+ err = w.enc.Close()
+ zipReaderPool.Put(w.enc)
+ w.enc = nil
+ }
+ return err
+}
+
+// ZipCompressor returns a compressor that can be registered with zip libraries.
+// The provided encoder options will be used on all encodes.
+func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
+ var pool sync.Pool
+ return func(w io.Writer) (io.WriteCloser, error) {
+ enc, ok := pool.Get().(*Encoder)
+ if ok {
+ enc.Reset(w)
+ } else {
+ var err error
+ enc, err = NewWriter(w, opts...)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return &pooledZipWriter{enc: enc}, nil
+ }
+}
+
+// ZipDecompressor returns a decompressor that can be registered with zip libraries.
+// See ZipCompressor for example.
+func ZipDecompressor() func(r io.Reader) io.ReadCloser {
+ return func(r io.Reader) io.ReadCloser {
+ d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+ if err != nil {
+ panic(err)
+ }
+ return d.IOReadCloser()
+ }
+}
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 57a8a2f..1ba308c 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -4,13 +4,24 @@
package zstd
import (
+ "bytes"
+ "encoding/binary"
"errors"
"log"
+ "math"
"math/bits"
)
+// enable debug printing
const debug = false
+
+// Enable extra assertions.
+const debugAsserts = debug || false
+
+// print sequence details
const debugSequences = false
+
+// print detailed matching information
const debugMatches = false
// force encoder to use predefined tables.
@@ -19,6 +30,9 @@
// zstdMinMatch is the minimum zstd match length.
const zstdMinMatch = 3
+// Reset the buffer offset when reaching this.
+const bufferReset = math.MaxInt32 - MaxWindowSize
+
var (
// ErrReservedBlockType is returned when a reserved block type is found.
// Typically this indicates wrong or corrupted input.
@@ -61,6 +75,10 @@
// ErrDecoderClosed will be returned if the Decoder was used after
// Close has been called.
ErrDecoderClosed = errors.New("decoder used after Close")
+
+ // ErrDecoderNilInput is returned when a nil Reader was provided
+ // and an operation other than Reset/DecodeAll/Close was attempted.
+ ErrDecoderNilInput = errors.New("nil input provided as reader")
)
func println(a ...interface{}) {
@@ -75,6 +93,17 @@
}
}
+// matchLenFast does matching, but will not match the last up to 7 bytes.
+func matchLenFast(a, b []byte) int {
+ endI := len(a) & (math.MaxInt32 - 7)
+ for i := 0; i < endI; i += 8 {
+ if diff := load64(a, i) ^ load64(b, i); diff != 0 {
+ return i + bits.TrailingZeros64(diff)>>3
+ }
+ }
+ return endI
+}
+
// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.
@@ -85,52 +114,33 @@
return i + (bits.TrailingZeros64(diff) >> 3)
}
}
+
checked := (len(a) >> 3) << 3
a = a[checked:]
b = b[checked:]
- // TODO: We could do a 4 check.
for i := range a {
if a[i] != b[i] {
- return int(i) + checked
+ return i + checked
}
}
return len(a) + checked
}
-// matchLen returns a match length in src between index s and t
-func matchLenIn(src []byte, s, t int32) int32 {
- s1 := len(src)
- b := src[t:]
- a := src[s:s1]
- b = b[:len(a)]
- // Extend the match to be as long as possible.
- for i := range a {
- if a[i] != b[i] {
- return int32(i)
- }
- }
- return int32(len(a))
-}
-
func load3232(b []byte, i int32) uint32 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:4]
- return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
+ return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+ return binary.LittleEndian.Uint64(b[i:])
}
func load64(b []byte, i int) uint64 {
- // Help the compiler eliminate bounds checks on the read so it can be done in a single read.
- b = b[i:]
- b = b[:8]
- return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
- uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
+ return binary.LittleEndian.Uint64(b[i:])
}
+
+type byter interface {
+ Bytes() []byte
+ Len() int
+}
+
+var _ byter = &bytes.Buffer{}