blob: f08db47df7bac06e5be312fe8d2c340c31882328 [file] [log] [blame]
Scott Baker2d897982019-09-24 11:50:08 -07001package lz4
2
3import (
4 "encoding/binary"
5 "fmt"
6 "io"
7 "io/ioutil"
8
9 "github.com/pierrec/lz4/internal/xxh32"
10)
11
12// Reader implements the LZ4 frame decoder.
13// The Header is set after the first call to Read().
14// The Header may change between Read() calls in case of concatenated frames.
15type Reader struct {
16 Header
17
18 buf [8]byte // Scrap buffer.
19 pos int64 // Current position in src.
20 src io.Reader // Source.
21 zdata []byte // Compressed data.
22 data []byte // Uncompressed data.
23 idx int // Index of unread bytes into data.
24 checksum xxh32.XXHZero // Frame hash.
25}
26
27// NewReader returns a new LZ4 frame decoder.
28// No access to the underlying io.Reader is performed.
29func NewReader(src io.Reader) *Reader {
30 r := &Reader{src: src}
31 return r
32}
33
34// readHeader checks the frame magic number and parses the frame descriptoz.
35// Skippable frames are supported even as a first frame although the LZ4
36// specifications recommends skippable frames not to be used as first frames.
37func (z *Reader) readHeader(first bool) error {
38 defer z.checksum.Reset()
39
40 buf := z.buf[:]
41 for {
42 magic, err := z.readUint32()
43 if err != nil {
44 z.pos += 4
45 if !first && err == io.ErrUnexpectedEOF {
46 return io.EOF
47 }
48 return err
49 }
50 if magic == frameMagic {
51 break
52 }
53 if magic>>8 != frameSkipMagic>>8 {
54 return ErrInvalid
55 }
56 skipSize, err := z.readUint32()
57 if err != nil {
58 return err
59 }
60 z.pos += 4
61 m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize))
62 if err != nil {
63 return err
64 }
65 z.pos += m
66 }
67
68 // Header.
69 if _, err := io.ReadFull(z.src, buf[:2]); err != nil {
70 return err
71 }
72 z.pos += 8
73
74 b := buf[0]
75 if v := b >> 6; v != Version {
76 return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version)
77 }
78 if b>>5&1 == 0 {
79 return fmt.Errorf("lz4: block dependency not supported")
80 }
81 z.BlockChecksum = b>>4&1 > 0
82 frameSize := b>>3&1 > 0
83 z.NoChecksum = b>>2&1 == 0
84
85 bmsID := buf[1] >> 4 & 0x7
86 bSize, ok := bsMapID[bmsID]
87 if !ok {
88 return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID)
89 }
90 z.BlockMaxSize = bSize
91
92 // Allocate the compressed/uncompressed buffers.
93 // The compressed buffer cannot exceed the uncompressed one.
94 if n := 2 * bSize; cap(z.zdata) < n {
95 z.zdata = make([]byte, n, n)
96 }
97 if debugFlag {
98 debug("header block max size id=%d size=%d", bmsID, bSize)
99 }
100 z.zdata = z.zdata[:bSize]
101 z.data = z.zdata[:cap(z.zdata)][bSize:]
102 z.idx = len(z.data)
103
104 z.checksum.Write(buf[0:2])
105
106 if frameSize {
107 buf := buf[:8]
108 if _, err := io.ReadFull(z.src, buf); err != nil {
109 return err
110 }
111 z.Size = binary.LittleEndian.Uint64(buf)
112 z.pos += 8
113 z.checksum.Write(buf)
114 }
115
116 // Header checksum.
117 if _, err := io.ReadFull(z.src, buf[:1]); err != nil {
118 return err
119 }
120 z.pos++
121 if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] {
122 return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h)
123 }
124
125 z.Header.done = true
126 if debugFlag {
127 debug("header read: %v", z.Header)
128 }
129
130 return nil
131}
132
133// Read decompresses data from the underlying source into the supplied buffer.
134//
135// Since there can be multiple streams concatenated, Header values may
136// change between calls to Read(). If that is the case, no data is actually read from
137// the underlying io.Reader, to allow for potential input buffer resizing.
138func (z *Reader) Read(buf []byte) (int, error) {
139 if debugFlag {
140 debug("Read buf len=%d", len(buf))
141 }
142 if !z.Header.done {
143 if err := z.readHeader(true); err != nil {
144 return 0, err
145 }
146 if debugFlag {
147 debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d",
148 len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx)
149 }
150 }
151
152 if len(buf) == 0 {
153 return 0, nil
154 }
155
156 if z.idx == len(z.data) {
157 // No data ready for reading, process the next block.
158 if debugFlag {
159 debug("reading block from writer")
160 }
161 // Block length: 0 = end of frame, highest bit set: uncompressed.
162 bLen, err := z.readUint32()
163 if err != nil {
164 return 0, err
165 }
166 z.pos += 4
167
168 if bLen == 0 {
169 // End of frame reached.
170 if !z.NoChecksum {
171 // Validate the frame checksum.
172 checksum, err := z.readUint32()
173 if err != nil {
174 return 0, err
175 }
176 if debugFlag {
177 debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum)
178 }
179 z.pos += 4
180 if h := z.checksum.Sum32(); checksum != h {
181 return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum)
182 }
183 }
184
185 // Get ready for the next concatenated frame and keep the position.
186 pos := z.pos
187 z.Reset(z.src)
188 z.pos = pos
189
190 // Since multiple frames can be concatenated, check for more.
191 return 0, z.readHeader(false)
192 }
193
194 if debugFlag {
195 debug("raw block size %d", bLen)
196 }
197 if bLen&compressedBlockFlag > 0 {
198 // Uncompressed block.
199 bLen &= compressedBlockMask
200 if debugFlag {
201 debug("uncompressed block size %d", bLen)
202 }
203 if int(bLen) > cap(z.data) {
204 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
205 }
206 z.data = z.data[:bLen]
207 if _, err := io.ReadFull(z.src, z.data); err != nil {
208 return 0, err
209 }
210 z.pos += int64(bLen)
211
212 if z.BlockChecksum {
213 checksum, err := z.readUint32()
214 if err != nil {
215 return 0, err
216 }
217 z.pos += 4
218
219 if h := xxh32.ChecksumZero(z.data); h != checksum {
220 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
221 }
222 }
223
224 } else {
225 // Compressed block.
226 if debugFlag {
227 debug("compressed block size %d", bLen)
228 }
229 if int(bLen) > cap(z.data) {
230 return 0, fmt.Errorf("lz4: invalid block size: %d", bLen)
231 }
232 zdata := z.zdata[:bLen]
233 if _, err := io.ReadFull(z.src, zdata); err != nil {
234 return 0, err
235 }
236 z.pos += int64(bLen)
237
238 if z.BlockChecksum {
239 checksum, err := z.readUint32()
240 if err != nil {
241 return 0, err
242 }
243 z.pos += 4
244
245 if h := xxh32.ChecksumZero(zdata); h != checksum {
246 return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum)
247 }
248 }
249
250 n, err := UncompressBlock(zdata, z.data)
251 if err != nil {
252 return 0, err
253 }
254 z.data = z.data[:n]
255 }
256
257 if !z.NoChecksum {
258 z.checksum.Write(z.data)
259 if debugFlag {
260 debug("current frame checksum %x", z.checksum.Sum32())
261 }
262 }
263 z.idx = 0
264 }
265
266 n := copy(buf, z.data[z.idx:])
267 z.idx += n
268 if debugFlag {
269 debug("copied %d bytes to input", n)
270 }
271
272 return n, nil
273}
274
275// Reset discards the Reader's state and makes it equivalent to the
276// result of its original state from NewReader, but reading from r instead.
277// This permits reusing a Reader rather than allocating a new one.
278func (z *Reader) Reset(r io.Reader) {
279 z.Header = Header{}
280 z.pos = 0
281 z.src = r
282 z.zdata = z.zdata[:0]
283 z.data = z.data[:0]
284 z.idx = 0
285 z.checksum.Reset()
286}
287
288// readUint32 reads an uint32 into the supplied buffer.
289// The idea is to make use of the already allocated buffers avoiding additional allocations.
290func (z *Reader) readUint32() (uint32, error) {
291 buf := z.buf[:4]
292 _, err := io.ReadFull(z.src, buf)
293 x := binary.LittleEndian.Uint32(buf)
294 return x, err
295}