blob: 87dd72bd0db3e1bba49359689c43cf7548113364 [file] [log] [blame]
package lz4
import (
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"github.com/pierrec/lz4/internal/xxh32"
)
// Reader implements the LZ4 frame decoder.
// The Header is set after the first call to Read().
// The Header may change between Read() calls in case of concatenated frames.
type Reader struct {
Header
// Handler called when a block has been successfully read.
// It provides the number of bytes read.
OnBlockDone func(size int)
buf [8]byte // Scrap buffer.
pos int64 // Current position in src.
src io.Reader // Source.
zdata []byte // Compressed data.
data []byte // Uncompressed data.
idx int // Index of unread bytes into data.
checksum xxh32.XXHZero // Frame hash.
skip int64 // Bytes to skip before next read.
dpos int64 // Position in dest
}
// NewReader returns a new LZ4 frame decoder.
// No access to the underlying io.Reader is performed.
func NewReader(src io.Reader) *Reader {
r := &Reader{src: src}
return r
}
// readHeader checks the frame magic number and parses the frame descriptoz.
// Skippable frames are supported even as a first frame although the LZ4
// specifications recommends skippable frames not to be used as first frames.
func (z *Reader) readHeader(first bool) error {
defer z.checksum.Reset()
buf := z.buf[:]
for {
magic, err := z.readUint32()
if err != nil {
z.pos += 4
if !first && err == io.ErrUnexpectedEOF {
return io.EOF
}
return err
}
if magic == frameMagic {
break
}
if magic>>8 != frameSkipMagic>>8 {
return ErrInvalid
}
skipSize, err := z.readUint32()
if err != nil {
return err
}
z.pos += 4
m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
if err != nil {
return err
}
z.pos += m
}
// Header.
if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
return err
}
z.pos += 8
b := buf[0]
if v := b >> 6; v != Version {
return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
}
if b>>5&1 == 0 {
return ErrBlockDependency
}
z.BlockChecksum = b>>4&1 > 0
frameSize := b>>3&1 > 0
z.NoChecksum = b>>2&1 == 0
bmsID := buf[1] >> 4 & 0x7
if bmsID < 4 || bmsID > 7 {
return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
}
bSize := blockSizeIndexToValue(bmsID - 4)
z.BlockMaxSize = bSize
// Allocate the compressed/uncompressed buffers.
// The compressed buffer cannot exceed the uncompressed one.
if n := 2 * bSize; cap(z.zdata) < n {
z.zdata = make([]byte, n, n)
}
if debugFlag {
debug("header block max size id=%d size=%d", bmsID, bSize)
}
z.zdata = z.zdata[:bSize]
z.data = z.zdata[:cap(z.zdata)][bSize:]
z.idx = len(z.data)
_, _ = z.checksum.Write(buf[0:2])
if frameSize {
buf := buf[:8]
if _, err := io.ReadFull(z.src, buf); err != nil {
return err
}
z.Size = binary.LittleEndian.Uint64(buf)
z.pos += 8
_, _ = z.checksum.Write(buf)
}
// Header checksum.
if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
return err
}
z.pos++
if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
}
z.Header.done = true
if debugFlag {
debug("header read: %v", z.Header)
}
return nil
}
// Read decompresses data from the underlying source into the supplied buffer.
//
// Since there can be multiple streams concatenated, Header values may
// change between calls to Read(). If that is the case, no data is actually read from
// the underlying io.Reader, to allow for potential input buffer resizing.
func (z *Reader) Read(buf []byte) (int, error) {
if debugFlag {
debug("Read buf len=%d", len(buf))
}
if !z.Header.done {
if err := z.readHeader(true); err != nil {
return 0, err
}
if debugFlag {
debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
}
}
if len(buf) == 0 {
return 0, nil
}
if z.idx == len(z.data) {
// No data ready for reading, process the next block.
if debugFlag {
debug("reading block from writer")
}
// Reset uncompressed buffer
z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
// Block length: 0 = end of frame, highest bit set: uncompressed.
bLen, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if bLen == 0 {
// End of frame reached.
if !z.NoChecksum {
// Validate the frame checksum.
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
if debugFlag {
debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
}
z.pos += 4
if h := z.checksum.Sum32(); checksum != h {
return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
}
}
// Get ready for the next concatenated frame and keep the position.
pos := z.pos
z.Reset(z.src)
z.pos = pos
// Since multiple frames can be concatenated, check for more.
return 0, z.readHeader(false)
}
if debugFlag {
debug("raw block size %d", bLen)
}
if bLen&compressedBlockFlag > 0 {
// Uncompressed block.
bLen &= compressedBlockMask
if debugFlag {
debug("uncompressed block size %d", bLen)
}
if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
z.data = z.data[:bLen]
if _, err := io.ReadFull(z.src, z.data); err != nil {
return 0, err
}
z.pos += int64(bLen)
if z.OnBlockDone != nil {
z.OnBlockDone(int(bLen))
}
if z.BlockChecksum {
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if h := xxh32.ChecksumZero(z.data); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
} else {
// Compressed block.
if debugFlag {
debug("compressed block size %d", bLen)
}
if int(bLen) > cap(z.data) {
return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
}
zdata := z.zdata[:bLen]
if _, err := io.ReadFull(z.src, zdata); err != nil {
return 0, err
}
z.pos += int64(bLen)
if z.BlockChecksum {
checksum, err := z.readUint32()
if err != nil {
return 0, err
}
z.pos += 4
if h := xxh32.ChecksumZero(zdata); h != checksum {
return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
}
}
n, err := UncompressBlock(zdata, z.data)
if err != nil {
return 0, err
}
z.data = z.data[:n]
if z.OnBlockDone != nil {
z.OnBlockDone(n)
}
}
if !z.NoChecksum {
_, _ = z.checksum.Write(z.data)
if debugFlag {
debug("current frame checksum %x", z.checksum.Sum32())
}
}
z.idx = 0
}
if z.skip > int64(len(z.data[z.idx:])) {
z.skip -= int64(len(z.data[z.idx:]))
z.dpos += int64(len(z.data[z.idx:]))
z.idx = len(z.data)
return 0, nil
}
z.idx += int(z.skip)
z.dpos += z.skip
z.skip = 0
n := copy(buf, z.data[z.idx:])
z.idx += n
z.dpos += int64(n)
if debugFlag {
debug("copied %d bytes to input", n)
}
return n, nil
}
// Seek implements io.Seeker, but supports seeking forward from the current
// position only. Any other seek will return an error. Allows skipping output
// bytes which aren't needed, which in some scenarios is faster than reading
// and discarding them.
// Note this may cause future calls to Read() to read 0 bytes if all of the
// data they would have returned is skipped.
func (z *Reader) Seek(offset int64, whence int) (int64, error) {
if offset < 0 || whence != io.SeekCurrent {
return z.dpos + z.skip, ErrUnsupportedSeek
}
z.skip += offset
return z.dpos + z.skip, nil
}
// Reset discards the Reader's state and makes it equivalent to the
// result of its original state from NewReader, but reading from r instead.
// This permits reusing a Reader rather than allocating a new one.
func (z *Reader) Reset(r io.Reader) {
z.Header = Header{}
z.pos = 0
z.src = r
z.zdata = z.zdata[:0]
z.data = z.data[:0]
z.idx = 0
z.checksum.Reset()
}
// readUint32 reads an uint32 into the supplied buffer.
// The idea is to make use of the already allocated buffers avoiding additional allocations.
func (z *Reader) readUint32() (uint32, error) {
buf := z.buf[:4]
_, err := io.ReadFull(z.src, buf)
x := binary.LittleEndian.Uint32(buf)
return x, err
}