[VOL-5292] Implementation for fetching the GEM port history Data from the ONT
Change-Id: I4cf22555cbd13bcd5e49e620c8aa8b67cbd2891c
Signed-off-by: Akash Reddy Kankanala <akash.kankanala@radisys.com>
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 41703bb..c0c48bd 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -4,13 +4,13 @@
"errors"
"fmt"
"io"
+ "sync"
"github.com/klauspost/compress/fse"
)
type dTable struct {
single []dEntrySingle
- double []dEntryDouble
}
// single-symbols decoding
@@ -18,13 +18,6 @@
entry uint16
}
-// double-symbols decoding
-type dEntryDouble struct {
- seq uint16
- nBits uint8
- len uint8
-}
-
// Uses special code for all tables that are < 8 bits.
const use8BitTables = true
@@ -34,7 +27,7 @@
// If no Scratch is provided a new one is allocated.
// The returned Scratch can be used for encoding or decoding input using this table.
func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) {
- s, err = s.prepare(in)
+ s, err = s.prepare(nil)
if err != nil {
return s, nil, err
}
@@ -216,6 +209,7 @@
return &Decoder{
dt: s.dt,
actualTableLog: s.actualTableLog,
+ bufs: &s.decPool,
}
}
@@ -223,103 +217,15 @@
type Decoder struct {
dt dTable
actualTableLog uint8
+ bufs *sync.Pool
}
-// Decompress1X will decompress a 1X encoded stream.
-// The cap of the output buffer will be the maximum decompressed size.
-// The length of the supplied input must match the end of a block exactly.
-func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
- if len(d.dt.single) == 0 {
- return nil, errors.New("no table loaded")
+func (d *Decoder) buffer() *[4][256]byte {
+ buf, ok := d.bufs.Get().(*[4][256]byte)
+ if ok {
+ return buf
}
- if use8BitTables && d.actualTableLog <= 8 {
- return d.decompress1X8Bit(dst, src)
- }
- var br bitReaderShifted
- err := br.init(src)
- if err != nil {
- return dst, err
- }
- maxDecodedSize := cap(dst)
- dst = dst[:0]
-
- // Avoid bounds check by always having full sized table.
- const tlSize = 1 << tableLogMax
- const tlMask = tlSize - 1
- dt := d.dt.single[:tlSize]
-
- // Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
- var off uint8
-
- for br.off >= 8 {
- br.fillFast()
- v := dt[br.peekBitsFast(d.actualTableLog)&tlMask]
- br.advance(uint8(v.entry))
- buf[off+0] = uint8(v.entry >> 8)
-
- v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
- br.advance(uint8(v.entry))
- buf[off+1] = uint8(v.entry >> 8)
-
- // Refill
- br.fillFast()
-
- v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
- br.advance(uint8(v.entry))
- buf[off+2] = uint8(v.entry >> 8)
-
- v = dt[br.peekBitsFast(d.actualTableLog)&tlMask]
- br.advance(uint8(v.entry))
- buf[off+3] = uint8(v.entry >> 8)
-
- off += 4
- if off == 0 {
- if len(dst)+256 > maxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- dst = append(dst, buf[:]...)
- }
- }
-
- if len(dst)+int(off) > maxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- dst = append(dst, buf[:off]...)
-
- // br < 8, so uint8 is fine
- bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
- for bitsLeft > 0 {
- br.fill()
- if false && br.bitsRead >= 32 {
- if br.off >= 4 {
- v := br.in[br.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
- br.value = (br.value << 32) | uint64(low)
- br.bitsRead -= 32
- br.off -= 4
- } else {
- for br.off > 0 {
- br.value = (br.value << 8) | uint64(br.in[br.off-1])
- br.bitsRead -= 8
- br.off--
- }
- }
- }
- if len(dst) >= maxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
- }
- v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
- nBits := uint8(v.entry)
- br.advance(nBits)
- bitsLeft -= nBits
- dst = append(dst, uint8(v.entry>>8))
- }
- return dst, br.close()
+ return &[4][256]byte{}
}
// decompress1X8Bit will decompress a 1X encoded stream with tablelog <= 8.
@@ -341,41 +247,258 @@
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ bufs := d.buffer()
+ buf := &bufs[0]
var off uint8
- shift := (8 - d.actualTableLog) & 7
+ switch d.actualTableLog {
+ case 8:
+ const shift = 8 - 8
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
- //fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
- for br.off >= 4 {
- br.fillFast()
- v := dt[br.peekByteFast()>>shift]
- br.advance(uint8(v.entry))
- buf[off+0] = uint8(v.entry >> 8)
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
- br.advance(uint8(v.entry))
- buf[off+1] = uint8(v.entry >> 8)
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
- br.advance(uint8(v.entry))
- buf[off+2] = uint8(v.entry >> 8)
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
- br.advance(uint8(v.entry))
- buf[off+3] = uint8(v.entry >> 8)
-
- off += 4
- if off == 0 {
- if len(dst)+256 > maxDecodedSize {
- br.close()
- return nil, ErrMaxDecodedSizeExceeded
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ d.bufs.Put(bufs)
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
}
- dst = append(dst, buf[:]...)
}
+ case 7:
+ const shift = 8 - 7
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ br.close()
+ d.bufs.Put(bufs)
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 6:
+ const shift = 8 - 6
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 5:
+ const shift = 8 - 5
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 4:
+ const shift = 8 - 4
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 3:
+ const shift = 8 - 3
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 2:
+ const shift = 8 - 2
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ case 1:
+ const shift = 8 - 1
+ for br.off >= 4 {
+ br.fillFast()
+ v := dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+0] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+1] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+2] = uint8(v.entry >> 8)
+
+ v = dt[uint8(br.value>>(56+shift))]
+ br.advance(uint8(v.entry))
+ buf[off+3] = uint8(v.entry >> 8)
+
+ off += 4
+ if off == 0 {
+ if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
+ br.close()
+ return nil, ErrMaxDecodedSizeExceeded
+ }
+ dst = append(dst, buf[:]...)
+ }
+ }
+ default:
+ d.bufs.Put(bufs)
+ return nil, fmt.Errorf("invalid tablelog: %d", d.actualTableLog)
}
if len(dst)+int(off) > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -383,6 +506,8 @@
// br < 4, so uint8 is fine
bitsLeft := int8(uint8(br.off)*8 + (64 - br.bitsRead))
+ shift := (8 - d.actualTableLog) & 7
+
for bitsLeft > 0 {
if br.bitsRead >= 64-8 {
for br.off > 0 {
@@ -393,6 +518,7 @@
}
if len(dst) >= maxDecodedSize {
br.close()
+ d.bufs.Put(bufs)
return nil, ErrMaxDecodedSizeExceeded
}
v := dt[br.peekByteFast()>>shift]
@@ -401,6 +527,7 @@
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
+ d.bufs.Put(bufs)
return dst, br.close()
}
@@ -420,33 +547,35 @@
dt := d.dt.single[:256]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ bufs := d.buffer()
+ buf := &bufs[0]
var off uint8
- const shift = 0
+ const shift = 56
//fmt.Printf("mask: %b, tl:%d\n", mask, d.actualTableLog)
for br.off >= 4 {
br.fillFast()
- v := dt[br.peekByteFast()>>shift]
+ v := dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+0] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
+ v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+1] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
+ v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+2] = uint8(v.entry >> 8)
- v = dt[br.peekByteFast()>>shift]
+ v = dt[uint8(br.value>>shift)]
br.advance(uint8(v.entry))
buf[off+3] = uint8(v.entry >> 8)
off += 4
if off == 0 {
if len(dst)+256 > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -455,6 +584,7 @@
}
if len(dst)+int(off) > maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
@@ -471,15 +601,17 @@
}
}
if len(dst) >= maxDecodedSize {
+ d.bufs.Put(bufs)
br.close()
return nil, ErrMaxDecodedSizeExceeded
}
- v := dt[br.peekByteFast()>>shift]
+ v := dt[br.peekByteFast()]
nBits := uint8(v.entry)
br.advance(nBits)
bitsLeft -= int8(nBits)
dst = append(dst, uint8(v.entry>>8))
}
+ d.bufs.Put(bufs)
return dst, br.close()
}
@@ -487,196 +619,6 @@
// The length of the supplied input must match the end of a block exactly.
// The *capacity* of the dst slice must match the destination size of
// the uncompressed data exactly.
-func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
- if len(d.dt.single) == 0 {
- return nil, errors.New("no table loaded")
- }
- if len(src) < 6+(4*1) {
- return nil, errors.New("input too small")
- }
- if use8BitTables && d.actualTableLog <= 8 {
- return d.decompress4X8bit(dst, src)
- }
-
- var br [4]bitReaderShifted
- start := 6
- for i := 0; i < 3; i++ {
- length := int(src[i*2]) | (int(src[i*2+1]) << 8)
- if start+length >= len(src) {
- return nil, errors.New("truncated input (or invalid offset)")
- }
- err := br[i].init(src[start : start+length])
- if err != nil {
- return nil, err
- }
- start += length
- }
- err := br[3].init(src[start:])
- if err != nil {
- return nil, err
- }
-
- // destination, offset to match first output
- dstSize := cap(dst)
- dst = dst[:dstSize]
- out := dst
- dstEvery := (dstSize + 3) / 4
-
- const tlSize = 1 << tableLogMax
- const tlMask = tlSize - 1
- single := d.dt.single[:tlSize]
-
- // Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
- var off uint8
- var decoded int
-
- // Decode 2 values from each decoder/loop.
- const bufoff = 256 / 4
- for {
- if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
- break
- }
-
- {
- const stream = 0
- const stream2 = 1
- br[stream].fillFast()
- br[stream2].fillFast()
-
- val := br[stream].peekBitsFast(d.actualTableLog)
- v := single[val&tlMask]
- br[stream].advance(uint8(v.entry))
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
-
- val2 := br[stream2].peekBitsFast(d.actualTableLog)
- v2 := single[val2&tlMask]
- br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
-
- val = br[stream].peekBitsFast(d.actualTableLog)
- v = single[val&tlMask]
- br[stream].advance(uint8(v.entry))
- buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
-
- val2 = br[stream2].peekBitsFast(d.actualTableLog)
- v2 = single[val2&tlMask]
- br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
- }
-
- {
- const stream = 2
- const stream2 = 3
- br[stream].fillFast()
- br[stream2].fillFast()
-
- val := br[stream].peekBitsFast(d.actualTableLog)
- v := single[val&tlMask]
- br[stream].advance(uint8(v.entry))
- buf[off+bufoff*stream] = uint8(v.entry >> 8)
-
- val2 := br[stream2].peekBitsFast(d.actualTableLog)
- v2 := single[val2&tlMask]
- br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
-
- val = br[stream].peekBitsFast(d.actualTableLog)
- v = single[val&tlMask]
- br[stream].advance(uint8(v.entry))
- buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
-
- val2 = br[stream2].peekBitsFast(d.actualTableLog)
- v2 = single[val2&tlMask]
- br[stream2].advance(uint8(v2.entry))
- buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
- }
-
- off += 2
-
- if off == bufoff {
- if bufoff > dstEvery {
- return nil, errors.New("corruption detected: stream overrun 1")
- }
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
- out = out[bufoff:]
- decoded += 256
- // There must at least be 3 buffers left.
- if len(out) < dstEvery*3 {
- return nil, errors.New("corruption detected: stream overrun 2")
- }
- }
- }
- if off > 0 {
- ioff := int(off)
- if len(out) < dstEvery*3+ioff {
- return nil, errors.New("corruption detected: stream overrun 3")
- }
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
- decoded += int(off) * 4
- out = out[off:]
- }
-
- // Decode remaining.
- for i := range br {
- offset := dstEvery * i
- br := &br[i]
- bitsLeft := br.off*8 + uint(64-br.bitsRead)
- for bitsLeft > 0 {
- br.fill()
- if false && br.bitsRead >= 32 {
- if br.off >= 4 {
- v := br.in[br.off-4:]
- v = v[:4]
- low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
- br.value = (br.value << 32) | uint64(low)
- br.bitsRead -= 32
- br.off -= 4
- } else {
- for br.off > 0 {
- br.value = (br.value << 8) | uint64(br.in[br.off-1])
- br.bitsRead -= 8
- br.off--
- }
- }
- }
- // end inline...
- if offset >= len(out) {
- return nil, errors.New("corruption detected: stream overrun 4")
- }
-
- // Read value and increment offset.
- val := br.peekBitsFast(d.actualTableLog)
- v := single[val&tlMask].entry
- nBits := uint8(v)
- br.advance(nBits)
- bitsLeft -= uint(nBits)
- out[offset] = uint8(v >> 8)
- offset++
- }
- decoded += offset - dstEvery*i
- err = br.close()
- if err != nil {
- return nil, err
- }
- }
- if dstSize != decoded {
- return nil, errors.New("corruption detected: short output block")
- }
- return dst, nil
-}
-
-// Decompress4X will decompress a 4X encoded stream.
-// The length of the supplied input must match the end of a block exactly.
-// The *capacity* of the dst slice must match the destination size of
-// the uncompressed data exactly.
func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
if d.actualTableLog == 8 {
return d.decompress4X8bitExactly(dst, src)
@@ -706,19 +648,18 @@
out := dst
dstEvery := (dstSize + 3) / 4
- shift := (8 - d.actualTableLog) & 7
+ shift := (56 + (8 - d.actualTableLog)) & 63
const tlSize = 1 << 8
- const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ buf := d.buffer()
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
- const bufoff = 256 / 4
+ const bufoff = 256
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
@@ -728,96 +669,109 @@
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
- v := single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
- v2 := single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
{
const stream = 2
const stream2 = 3
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
- v := single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
- v2 := single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
off += 4
- if off == bufoff {
+ if off == 0 {
if bufoff > dstEvery {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
+ copy(out, buf[0][:])
+ copy(out[dstEvery:], buf[1][:])
+ copy(out[dstEvery*2:], buf[2][:])
+ copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
- decoded += 256
+ decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
}
@@ -825,23 +779,31 @@
if off > 0 {
ioff := int(off)
if len(out) < dstEvery*3+ioff {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ copy(out, buf[0][:off])
+ copy(out[dstEvery:], buf[1][:off])
+ copy(out[dstEvery*2:], buf[2][:off])
+ copy(out[dstEvery*3:], buf[3][:off])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
+ // Decode remaining.
+ remainBytes := dstEvery - (decoded / 4)
for i := range br {
offset := dstEvery * i
+ endsAt := offset + remainBytes
+ if endsAt > len(out) {
+ endsAt = len(out)
+ }
br := &br[i]
- bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ bitsLeft := br.remaining()
for bitsLeft > 0 {
if br.finished() {
+ d.bufs.Put(buf)
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
@@ -861,24 +823,31 @@
}
}
// end inline...
- if offset >= len(out) {
+ if offset >= endsAt {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
- v := single[br.peekByteFast()>>shift].entry
+ v := single[uint8(br.value>>shift)].entry
nBits := uint8(v)
br.advance(nBits)
- bitsLeft -= int(nBits)
+ bitsLeft -= uint(nBits)
out[offset] = uint8(v >> 8)
offset++
}
+ if offset != endsAt {
+ d.bufs.Put(buf)
+ return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+ }
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
+ d.bufs.Put(buf)
return nil, err
}
}
+ d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}
@@ -914,18 +883,17 @@
out := dst
dstEvery := (dstSize + 3) / 4
- const shift = 0
+ const shift = 56
const tlSize = 1 << 8
- const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
// Use temp table to avoid bound checks/append penalty.
- var buf [256]byte
+ buf := d.buffer()
var off uint8
var decoded int
// Decode 4 values from each decoder/loop.
- const bufoff = 256 / 4
+ const bufoff = 256
for {
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
break
@@ -935,96 +903,109 @@
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
- v := single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
- v2 := single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
{
const stream = 2
const stream2 = 3
- br[stream].fillFast()
- br[stream2].fillFast()
+ br1 := &br[stream]
+ br2 := &br[stream2]
+ br1.fillFast()
+ br2.fillFast()
- v := single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v := single[uint8(br1.value>>shift)].entry
+ v2 := single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off] = uint8(v >> 8)
+ buf[stream2][off] = uint8(v2 >> 8)
- v2 := single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+1] = uint8(v >> 8)
+ buf[stream2][off+1] = uint8(v2 >> 8)
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+1] = uint8(v >> 8)
- br[stream].advance(uint8(v))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+2] = uint8(v >> 8)
+ buf[stream2][off+2] = uint8(v2 >> 8)
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+2] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
-
- v = single[br[stream].peekByteFast()>>shift].entry
- buf[off+bufoff*stream+3] = uint8(v >> 8)
- br[stream].advance(uint8(v))
-
- v2 = single[br[stream2].peekByteFast()>>shift].entry
- buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
- br[stream2].advance(uint8(v2))
+ v = single[uint8(br1.value>>shift)].entry
+ v2 = single[uint8(br2.value>>shift)].entry
+ br1.bitsRead += uint8(v)
+ br1.value <<= v & 63
+ br2.bitsRead += uint8(v2)
+ br2.value <<= v2 & 63
+ buf[stream][off+3] = uint8(v >> 8)
+ buf[stream2][off+3] = uint8(v2 >> 8)
}
off += 4
- if off == bufoff {
+ if off == 0 {
if bufoff > dstEvery {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 1")
}
- copy(out, buf[:bufoff])
- copy(out[dstEvery:], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:], buf[bufoff*3:bufoff*4])
- off = 0
+ copy(out, buf[0][:])
+ copy(out[dstEvery:], buf[1][:])
+ copy(out[dstEvery*2:], buf[2][:])
+ copy(out[dstEvery*3:], buf[3][:])
out = out[bufoff:]
- decoded += 256
+ decoded += bufoff * 4
// There must at least be 3 buffers left.
if len(out) < dstEvery*3 {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 2")
}
}
@@ -1034,21 +1015,27 @@
if len(out) < dstEvery*3+ioff {
return nil, errors.New("corruption detected: stream overrun 3")
}
- copy(out, buf[:off])
- copy(out[dstEvery:dstEvery+ioff], buf[bufoff:bufoff*2])
- copy(out[dstEvery*2:dstEvery*2+ioff], buf[bufoff*2:bufoff*3])
- copy(out[dstEvery*3:dstEvery*3+ioff], buf[bufoff*3:bufoff*4])
+ copy(out, buf[0][:off])
+ copy(out[dstEvery:], buf[1][:off])
+ copy(out[dstEvery*2:], buf[2][:off])
+ copy(out[dstEvery*3:], buf[3][:off])
decoded += int(off) * 4
out = out[off:]
}
// Decode remaining.
+ remainBytes := dstEvery - (decoded / 4)
for i := range br {
offset := dstEvery * i
+ endsAt := offset + remainBytes
+ if endsAt > len(out) {
+ endsAt = len(out)
+ }
br := &br[i]
- bitsLeft := int(br.off*8) + int(64-br.bitsRead)
+ bitsLeft := br.remaining()
for bitsLeft > 0 {
if br.finished() {
+ d.bufs.Put(buf)
return nil, io.ErrUnexpectedEOF
}
if br.bitsRead >= 56 {
@@ -1068,24 +1055,32 @@
}
}
// end inline...
- if offset >= len(out) {
+ if offset >= endsAt {
+ d.bufs.Put(buf)
return nil, errors.New("corruption detected: stream overrun 4")
}
// Read value and increment offset.
- v := single[br.peekByteFast()>>shift].entry
+ v := single[br.peekByteFast()].entry
nBits := uint8(v)
br.advance(nBits)
- bitsLeft -= int(nBits)
+ bitsLeft -= uint(nBits)
out[offset] = uint8(v >> 8)
offset++
}
+ if offset != endsAt {
+ d.bufs.Put(buf)
+ return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
+ }
+
decoded += offset - dstEvery*i
err = br.close()
if err != nil {
+ d.bufs.Put(buf)
return nil, err
}
}
+ d.bufs.Put(buf)
if dstSize != decoded {
return nil, errors.New("corruption detected: short output block")
}