blob: 126b792e71275c60d3fffc4f9cbbf17b95b48de2 [file] [log] [blame]
Scott Bakere7144bc2019-10-01 14:16:47 -07001package lz4
2
3import (
4 "encoding/binary"
5 "fmt"
6 "io"
7 "io/ioutil"
8
9 "github.com/pierrec/lz4/internal/xxh32"
10)
11
12// Reader implements the LZ4 frame decoder.
13// The Header is set after the first call to Read().
14// The Header may change between Read() calls in case of concatenated frames.
15type Reader struct {
16 Header
Scott Bakerf579f132019-10-24 14:31:41 -070017 // Handler called when a block has been successfully read.
18 // It provides the number of bytes read.
19 OnBlockDone func(size int)
Scott Bakere7144bc2019-10-01 14:16:47 -070020
21 buf [8]byte // Scrap buffer.
22 pos int64 // Current position in src.
23 src io.Reader // Source.
24 zdata []byte // Compressed data.
25 data []byte // Uncompressed data.
26 idx int // Index of unread bytes into data.
27 checksum xxh32.XXHZero // Frame hash.
Scott Bakerf579f132019-10-24 14:31:41 -070028 skip int64 // Bytes to skip before next read.
29 dpos int64 // Position in dest
Scott Bakere7144bc2019-10-01 14:16:47 -070030}
31
32// NewReader returns a new LZ4 frame decoder.
33// No access to the underlying io.Reader is performed.
34func NewReader(src io.Reader) *Reader {
35 r := &Reader{src: src}
36 return r
37}
38
39// readHeader checks the frame magic number and parses the frame descriptoz.
40// Skippable frames are supported even as a first frame although the LZ4
41// specifications recommends skippable frames not to be used as first frames.
42func (z *Reader) readHeader(first bool) error {
43 defer z.checksum.Reset()
44
45 buf := z.buf[:]
46 for {
47 magic, err := z.readUint32()
48 if err != nil {
49 z.pos += 4
50 if !first && err == io.ErrUnexpectedEOF {
51 return io.EOF
52 }
53 return err
54 }
55 if magic == frameMagic {
56 break
57 }
58 if magic>>8 != frameSkipMagic>>8 {
59 return ErrInvalid
60 }
61 skipSize, err := z.readUint32()
62 if err != nil {
63 return err
64 }
65 z.pos += 4
66 m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
67 if err != nil {
68 return err
69 }
70 z.pos += m
71 }
72
73 // Header.
74 if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
75 return err
76 }
77 z.pos += 8
78
79 b := buf[0]
80 if v := b >> 6; v != Version {
81 return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
82 }
83 if b>>5&1 == 0 {
Scott Bakerf579f132019-10-24 14:31:41 -070084 return ErrBlockDependency
Scott Bakere7144bc2019-10-01 14:16:47 -070085 }
86 z.BlockChecksum = b>>4&1 > 0
87 frameSize := b>>3&1 > 0
88 z.NoChecksum = b>>2&1 == 0
89
90 bmsID := buf[1] >> 4 & 0x7
91 bSize, ok := bsMapID[bmsID]
92 if !ok {
93 return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
94 }
95 z.BlockMaxSize = bSize
96
97 // Allocate the compressed/uncompressed buffers.
98 // The compressed buffer cannot exceed the uncompressed one.
99 if n := 2 * bSize; cap(z.zdata) < n {
100 z.zdata = make([]byte, n, n)
101 }
102 if debugFlag {
103 debug("header block max size id=%d size=%d", bmsID, bSize)
104 }
105 z.zdata = z.zdata[:bSize]
106 z.data = z.zdata[:cap(z.zdata)][bSize:]
107 z.idx = len(z.data)
108
Scott Bakerf579f132019-10-24 14:31:41 -0700109 _, _ = z.checksum.Write(buf[0:2])
Scott Bakere7144bc2019-10-01 14:16:47 -0700110
111 if frameSize {
112 buf := buf[:8]
113 if _, err := io.ReadFull(z.src, buf); err != nil {
114 return err
115 }
116 z.Size = binary.LittleEndian.Uint64(buf)
117 z.pos += 8
Scott Bakerf579f132019-10-24 14:31:41 -0700118 _, _ = z.checksum.Write(buf)
Scott Bakere7144bc2019-10-01 14:16:47 -0700119 }
120
121 // Header checksum.
122 if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
123 return err
124 }
125 z.pos++
126 if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
127 return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
128 }
129
130 z.Header.done = true
131 if debugFlag {
132 debug("header read: %v", z.Header)
133 }
134
135 return nil
136}
137
138// Read decompresses data from the underlying source into the supplied buffer.
139//
140// Since there can be multiple streams concatenated, Header values may
141// change between calls to Read(). If that is the case, no data is actually read from
142// the underlying io.Reader, to allow for potential input buffer resizing.
143func (z *Reader) Read(buf []byte) (int, error) {
144 if debugFlag {
145 debug("Read buf len=%d", len(buf))
146 }
147 if !z.Header.done {
148 if err := z.readHeader(true); err != nil {
149 return 0, err
150 }
151 if debugFlag {
152 debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
153 len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
154 }
155 }
156
157 if len(buf) == 0 {
158 return 0, nil
159 }
160
161 if z.idx == len(z.data) {
162 // No data ready for reading, process the next block.
163 if debugFlag {
164 debug("reading block from writer")
165 }
Scott Bakerf579f132019-10-24 14:31:41 -0700166 // Reset uncompressed buffer
167 z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
168
Scott Bakere7144bc2019-10-01 14:16:47 -0700169 // Block length: 0 = end of frame, highest bit set: uncompressed.
170 bLen, err := z.readUint32()
171 if err != nil {
172 return 0, err
173 }
174 z.pos += 4
175
176 if bLen == 0 {
177 // End of frame reached.
178 if !z.NoChecksum {
179 // Validate the frame checksum.
180 checksum, err := z.readUint32()
181 if err != nil {
182 return 0, err
183 }
184 if debugFlag {
185 debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
186 }
187 z.pos += 4
188 if h := z.checksum.Sum32(); checksum != h {
189 return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
190 }
191 }
192
193 // Get ready for the next concatenated frame and keep the position.
194 pos := z.pos
195 z.Reset(z.src)
196 z.pos = pos
197
198 // Since multiple frames can be concatenated, check for more.
199 return 0, z.readHeader(false)
200 }
201
202 if debugFlag {
203 debug("raw block size %d", bLen)
204 }
205 if bLen&compressedBlockFlag > 0 {
206 // Uncompressed block.
207 bLen &= compressedBlockMask
208 if debugFlag {
209 debug("uncompressed block size %d", bLen)
210 }
211 if int(bLen) > cap(z.data) {
212 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
213 }
214 z.data = z.data[:bLen]
215 if _, err := io.ReadFull(z.src, z.data); err != nil {
216 return 0, err
217 }
218 z.pos += int64(bLen)
Scott Bakerf579f132019-10-24 14:31:41 -0700219 if z.OnBlockDone != nil {
220 z.OnBlockDone(int(bLen))
221 }
Scott Bakere7144bc2019-10-01 14:16:47 -0700222
223 if z.BlockChecksum {
224 checksum, err := z.readUint32()
225 if err != nil {
226 return 0, err
227 }
228 z.pos += 4
229
230 if h := xxh32.ChecksumZero(z.data); h != checksum {
231 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
232 }
233 }
234
235 } else {
236 // Compressed block.
237 if debugFlag {
238 debug("compressed block size %d", bLen)
239 }
240 if int(bLen) > cap(z.data) {
241 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
242 }
243 zdata := z.zdata[:bLen]
244 if _, err := io.ReadFull(z.src, zdata); err != nil {
245 return 0, err
246 }
247 z.pos += int64(bLen)
248
249 if z.BlockChecksum {
250 checksum, err := z.readUint32()
251 if err != nil {
252 return 0, err
253 }
254 z.pos += 4
255
256 if h := xxh32.ChecksumZero(zdata); h != checksum {
257 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
258 }
259 }
260
261 n, err := UncompressBlock(zdata, z.data)
262 if err != nil {
263 return 0, err
264 }
265 z.data = z.data[:n]
Scott Bakerf579f132019-10-24 14:31:41 -0700266 if z.OnBlockDone != nil {
267 z.OnBlockDone(n)
268 }
Scott Bakere7144bc2019-10-01 14:16:47 -0700269 }
270
271 if !z.NoChecksum {
Scott Bakerf579f132019-10-24 14:31:41 -0700272 _, _ = z.checksum.Write(z.data)
Scott Bakere7144bc2019-10-01 14:16:47 -0700273 if debugFlag {
274 debug("current frame checksum %x", z.checksum.Sum32())
275 }
276 }
277 z.idx = 0
278 }
279
Scott Bakerf579f132019-10-24 14:31:41 -0700280 if z.skip > int64(len(z.data[z.idx:])) {
281 z.skip -= int64(len(z.data[z.idx:]))
282 z.dpos += int64(len(z.data[z.idx:]))
283 z.idx = len(z.data)
284 return 0, nil
285 }
286
287 z.idx += int(z.skip)
288 z.dpos += z.skip
289 z.skip = 0
290
Scott Bakere7144bc2019-10-01 14:16:47 -0700291 n := copy(buf, z.data[z.idx:])
292 z.idx += n
Scott Bakerf579f132019-10-24 14:31:41 -0700293 z.dpos += int64(n)
Scott Bakere7144bc2019-10-01 14:16:47 -0700294 if debugFlag {
295 debug("copied %d bytes to input", n)
296 }
297
298 return n, nil
299}
300
Scott Bakerf579f132019-10-24 14:31:41 -0700301// Seek implements io.Seeker, but supports seeking forward from the current
302// position only. Any other seek will return an error. Allows skipping output
303// bytes which aren't needed, which in some scenarios is faster than reading
304// and discarding them.
305// Note this may cause future calls to Read() to read 0 bytes if all of the
306// data they would have returned is skipped.
307func (z *Reader) Seek(offset int64, whence int) (int64, error) {
308 if offset < 0 || whence != io.SeekCurrent {
309 return z.dpos + z.skip, ErrUnsupportedSeek
310 }
311 z.skip += offset
312 return z.dpos + z.skip, nil
313}
314
Scott Bakere7144bc2019-10-01 14:16:47 -0700315// Reset discards the Reader's state and makes it equivalent to the
316// result of its original state from NewReader, but reading from r instead.
317// This permits reusing a Reader rather than allocating a new one.
318func (z *Reader) Reset(r io.Reader) {
319 z.Header = Header{}
320 z.pos = 0
321 z.src = r
322 z.zdata = z.zdata[:0]
323 z.data = z.data[:0]
324 z.idx = 0
325 z.checksum.Reset()
326}
327
328// readUint32 reads an uint32 into the supplied buffer.
329// The idea is to make use of the already allocated buffers avoiding additional allocations.
330func (z *Reader) readUint32() (uint32, error) {
331 buf := z.buf[:4]
332 _, err := io.ReadFull(z.src, buf)
333 x := binary.LittleEndian.Uint32(buf)
334 return x, err
335}