blob: 90e8efe2eb0ae313ca3c95ea52833ad3642b1240 [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001package lz4
2
3import (
4 "encoding/binary"
5 "fmt"
6 "io"
7 "io/ioutil"
8
9 "github.com/pierrec/lz4/internal/xxh32"
10)
11
12// Reader implements the LZ4 frame decoder.
13// The Header is set after the first call to Read().
14// The Header may change between Read() calls in case of concatenated frames.
15type Reader struct {
16 Header
17 // Handler called when a block has been successfully read.
18 // It provides the number of bytes read.
19 OnBlockDone func(size int)
20
21 buf [8]byte // Scrap buffer.
22 pos int64 // Current position in src.
23 src io.Reader // Source.
24 zdata []byte // Compressed data.
25 data []byte // Uncompressed data.
26 idx int // Index of unread bytes into data.
27 checksum xxh32.XXHZero // Frame hash.
28}
29
30// NewReader returns a new LZ4 frame decoder.
31// No access to the underlying io.Reader is performed.
32func NewReader(src io.Reader) *Reader {
33 r := &Reader{src: src}
34 return r
35}
36
37// readHeader checks the frame magic number and parses the frame descriptoz.
38// Skippable frames are supported even as a first frame although the LZ4
39// specifications recommends skippable frames not to be used as first frames.
40func (z *Reader) readHeader(first bool) error {
41 defer z.checksum.Reset()
42
43 buf := z.buf[:]
44 for {
45 magic, err := z.readUint32()
46 if err != nil {
47 z.pos += 4
48 if !first && err == io.ErrUnexpectedEOF {
49 return io.EOF
50 }
51 return err
52 }
53 if magic == frameMagic {
54 break
55 }
56 if magic>>8 != frameSkipMagic>>8 {
57 return ErrInvalid
58 }
59 skipSize, err := z.readUint32()
60 if err != nil {
61 return err
62 }
63 z.pos += 4
64 m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
65 if err != nil {
66 return err
67 }
68 z.pos += m
69 }
70
71 // Header.
72 if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
73 return err
74 }
75 z.pos += 8
76
77 b := buf[0]
78 if v := b >> 6; v != Version {
79 return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
80 }
81 if b>>5&1 == 0 {
82 return ErrBlockDependency
83 }
84 z.BlockChecksum = b>>4&1 > 0
85 frameSize := b>>3&1 > 0
86 z.NoChecksum = b>>2&1 == 0
87
88 bmsID := buf[1] >> 4 & 0x7
89 bSize, ok := bsMapID[bmsID]
90 if !ok {
91 return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
92 }
93 z.BlockMaxSize = bSize
94
95 // Allocate the compressed/uncompressed buffers.
96 // The compressed buffer cannot exceed the uncompressed one.
97 if n := 2 * bSize; cap(z.zdata) < n {
98 z.zdata = make([]byte, n, n)
99 }
100 if debugFlag {
101 debug("header block max size id=%d size=%d", bmsID, bSize)
102 }
103 z.zdata = z.zdata[:bSize]
104 z.data = z.zdata[:cap(z.zdata)][bSize:]
105 z.idx = len(z.data)
106
107 _, _ = z.checksum.Write(buf[0:2])
108
109 if frameSize {
110 buf := buf[:8]
111 if _, err := io.ReadFull(z.src, buf); err != nil {
112 return err
113 }
114 z.Size = binary.LittleEndian.Uint64(buf)
115 z.pos += 8
116 _, _ = z.checksum.Write(buf)
117 }
118
119 // Header checksum.
120 if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
121 return err
122 }
123 z.pos++
124 if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
125 return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
126 }
127
128 z.Header.done = true
129 if debugFlag {
130 debug("header read: %v", z.Header)
131 }
132
133 return nil
134}
135
136// Read decompresses data from the underlying source into the supplied buffer.
137//
138// Since there can be multiple streams concatenated, Header values may
139// change between calls to Read(). If that is the case, no data is actually read from
140// the underlying io.Reader, to allow for potential input buffer resizing.
141func (z *Reader) Read(buf []byte) (int, error) {
142 if debugFlag {
143 debug("Read buf len=%d", len(buf))
144 }
145 if !z.Header.done {
146 if err := z.readHeader(true); err != nil {
147 return 0, err
148 }
149 if debugFlag {
150 debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
151 len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
152 }
153 }
154
155 if len(buf) == 0 {
156 return 0, nil
157 }
158
159 if z.idx == len(z.data) {
160 // No data ready for reading, process the next block.
161 if debugFlag {
162 debug("reading block from writer")
163 }
164 // Reset uncompressed buffer
165 z.data = z.zdata[:cap(z.zdata)][len(z.zdata):]
166
167 // Block length: 0 = end of frame, highest bit set: uncompressed.
168 bLen, err := z.readUint32()
169 if err != nil {
170 return 0, err
171 }
172 z.pos += 4
173
174 if bLen == 0 {
175 // End of frame reached.
176 if !z.NoChecksum {
177 // Validate the frame checksum.
178 checksum, err := z.readUint32()
179 if err != nil {
180 return 0, err
181 }
182 if debugFlag {
183 debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
184 }
185 z.pos += 4
186 if h := z.checksum.Sum32(); checksum != h {
187 return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
188 }
189 }
190
191 // Get ready for the next concatenated frame and keep the position.
192 pos := z.pos
193 z.Reset(z.src)
194 z.pos = pos
195
196 // Since multiple frames can be concatenated, check for more.
197 return 0, z.readHeader(false)
198 }
199
200 if debugFlag {
201 debug("raw block size %d", bLen)
202 }
203 if bLen&compressedBlockFlag > 0 {
204 // Uncompressed block.
205 bLen &= compressedBlockMask
206 if debugFlag {
207 debug("uncompressed block size %d", bLen)
208 }
209 if int(bLen) > cap(z.data) {
210 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
211 }
212 z.data = z.data[:bLen]
213 if _, err := io.ReadFull(z.src, z.data); err != nil {
214 return 0, err
215 }
216 z.pos += int64(bLen)
217 if z.OnBlockDone != nil {
218 z.OnBlockDone(int(bLen))
219 }
220
221 if z.BlockChecksum {
222 checksum, err := z.readUint32()
223 if err != nil {
224 return 0, err
225 }
226 z.pos += 4
227
228 if h := xxh32.ChecksumZero(z.data); h != checksum {
229 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
230 }
231 }
232
233 } else {
234 // Compressed block.
235 if debugFlag {
236 debug("compressed block size %d", bLen)
237 }
238 if int(bLen) > cap(z.data) {
239 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
240 }
241 zdata := z.zdata[:bLen]
242 if _, err := io.ReadFull(z.src, zdata); err != nil {
243 return 0, err
244 }
245 z.pos += int64(bLen)
246
247 if z.BlockChecksum {
248 checksum, err := z.readUint32()
249 if err != nil {
250 return 0, err
251 }
252 z.pos += 4
253
254 if h := xxh32.ChecksumZero(zdata); h != checksum {
255 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
256 }
257 }
258
259 n, err := UncompressBlock(zdata, z.data)
260 if err != nil {
261 return 0, err
262 }
263 z.data = z.data[:n]
264 if z.OnBlockDone != nil {
265 z.OnBlockDone(n)
266 }
267 }
268
269 if !z.NoChecksum {
270 _, _ = z.checksum.Write(z.data)
271 if debugFlag {
272 debug("current frame checksum %x", z.checksum.Sum32())
273 }
274 }
275 z.idx = 0
276 }
277
278 n := copy(buf, z.data[z.idx:])
279 z.idx += n
280 if debugFlag {
281 debug("copied %d bytes to input", n)
282 }
283
284 return n, nil
285}
286
287// Reset discards the Reader's state and makes it equivalent to the
288// result of its original state from NewReader, but reading from r instead.
289// This permits reusing a Reader rather than allocating a new one.
290func (z *Reader) Reset(r io.Reader) {
291 z.Header = Header{}
292 z.pos = 0
293 z.src = r
294 z.zdata = z.zdata[:0]
295 z.data = z.data[:0]
296 z.idx = 0
297 z.checksum.Reset()
298}
299
300// readUint32 reads an uint32 into the supplied buffer.
301// The idea is to make use of the already allocated buffers avoiding additional allocations.
302func (z *Reader) readUint32() (uint32, error) {
303 buf := z.buf[:4]
304 _, err := io.ReadFull(z.src, buf)
305 x := binary.LittleEndian.Uint32(buf)
306 return x, err
307}