blob: 23c6e26c6b9b345d10a67713177010bc359ee64a [file] [log] [blame]
kesavand2cde6582020-06-22 04:56:23 -04001// Copyright 2011 The Snappy-Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package snappy
6
7import (
8 "encoding/binary"
9 "errors"
10 "io"
11)
12
13var (
14 // ErrCorrupt reports that the input is invalid.
15 ErrCorrupt = errors.New("snappy: corrupt input")
16 // ErrTooLarge reports that the uncompressed length is too large.
17 ErrTooLarge = errors.New("snappy: decoded block is too large")
18 // ErrUnsupported reports that the input isn't supported.
19 ErrUnsupported = errors.New("snappy: unsupported input")
20
21 errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
22)
23
24// DecodedLen returns the length of the decoded block.
25func DecodedLen(src []byte) (int, error) {
26 v, _, err := decodedLen(src)
27 return v, err
28}
29
30// decodedLen returns the length of the decoded block and the number of bytes
31// that the length header occupied.
32func decodedLen(src []byte) (blockLen, headerLen int, err error) {
33 v, n := binary.Uvarint(src)
34 if n <= 0 || v > 0xffffffff {
35 return 0, 0, ErrCorrupt
36 }
37
38 const wordSize = 32 << (^uint(0) >> 32 & 1)
39 if wordSize == 32 && v > 0x7fffffff {
40 return 0, 0, ErrTooLarge
41 }
42 return int(v), n, nil
43}
44
45const (
46 decodeErrCodeCorrupt = 1
47 decodeErrCodeUnsupportedLiteralLength = 2
48)
49
50// Decode returns the decoded form of src. The returned slice may be a sub-
51// slice of dst if dst was large enough to hold the entire decoded block.
52// Otherwise, a newly allocated slice will be returned.
53//
54// The dst and src must not overlap. It is valid to pass a nil dst.
kesavandc71914f2022-03-25 11:19:03 +053055//
56// Decode handles the Snappy block format, not the Snappy stream format.
kesavand2cde6582020-06-22 04:56:23 -040057func Decode(dst, src []byte) ([]byte, error) {
58 dLen, s, err := decodedLen(src)
59 if err != nil {
60 return nil, err
61 }
62 if dLen <= len(dst) {
63 dst = dst[:dLen]
64 } else {
65 dst = make([]byte, dLen)
66 }
67 switch decode(dst, src[s:]) {
68 case 0:
69 return dst, nil
70 case decodeErrCodeUnsupportedLiteralLength:
71 return nil, errUnsupportedLiteralLength
72 }
73 return nil, ErrCorrupt
74}
75
76// NewReader returns a new Reader that decompresses from r, using the framing
77// format described at
78// https://github.com/google/snappy/blob/master/framing_format.txt
79func NewReader(r io.Reader) *Reader {
80 return &Reader{
81 r: r,
82 decoded: make([]byte, maxBlockSize),
83 buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
84 }
85}
86
87// Reader is an io.Reader that can read Snappy-compressed bytes.
kesavandc71914f2022-03-25 11:19:03 +053088//
89// Reader handles the Snappy stream format, not the Snappy block format.
kesavand2cde6582020-06-22 04:56:23 -040090type Reader struct {
91 r io.Reader
92 err error
93 decoded []byte
94 buf []byte
95 // decoded[i:j] contains decoded bytes that have not yet been passed on.
96 i, j int
97 readHeader bool
98}
99
100// Reset discards any buffered data, resets all state, and switches the Snappy
101// reader to read from r. This permits reusing a Reader rather than allocating
102// a new one.
103func (r *Reader) Reset(reader io.Reader) {
104 r.r = reader
105 r.err = nil
106 r.i = 0
107 r.j = 0
108 r.readHeader = false
109}
110
111func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
112 if _, r.err = io.ReadFull(r.r, p); r.err != nil {
113 if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
114 r.err = ErrCorrupt
115 }
116 return false
117 }
118 return true
119}
120
kesavandc71914f2022-03-25 11:19:03 +0530121func (r *Reader) fill() error {
122 for r.i >= r.j {
kesavand2cde6582020-06-22 04:56:23 -0400123 if !r.readFull(r.buf[:4], true) {
kesavandc71914f2022-03-25 11:19:03 +0530124 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400125 }
126 chunkType := r.buf[0]
127 if !r.readHeader {
128 if chunkType != chunkTypeStreamIdentifier {
129 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530130 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400131 }
132 r.readHeader = true
133 }
134 chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
135 if chunkLen > len(r.buf) {
136 r.err = ErrUnsupported
kesavandc71914f2022-03-25 11:19:03 +0530137 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400138 }
139
140 // The chunk types are specified at
141 // https://github.com/google/snappy/blob/master/framing_format.txt
142 switch chunkType {
143 case chunkTypeCompressedData:
144 // Section 4.2. Compressed data (chunk type 0x00).
145 if chunkLen < checksumSize {
146 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530147 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400148 }
149 buf := r.buf[:chunkLen]
150 if !r.readFull(buf, false) {
kesavandc71914f2022-03-25 11:19:03 +0530151 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400152 }
153 checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
154 buf = buf[checksumSize:]
155
156 n, err := DecodedLen(buf)
157 if err != nil {
158 r.err = err
kesavandc71914f2022-03-25 11:19:03 +0530159 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400160 }
161 if n > len(r.decoded) {
162 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530163 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400164 }
165 if _, err := Decode(r.decoded, buf); err != nil {
166 r.err = err
kesavandc71914f2022-03-25 11:19:03 +0530167 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400168 }
169 if crc(r.decoded[:n]) != checksum {
170 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530171 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400172 }
173 r.i, r.j = 0, n
174 continue
175
176 case chunkTypeUncompressedData:
177 // Section 4.3. Uncompressed data (chunk type 0x01).
178 if chunkLen < checksumSize {
179 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530180 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400181 }
182 buf := r.buf[:checksumSize]
183 if !r.readFull(buf, false) {
kesavandc71914f2022-03-25 11:19:03 +0530184 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400185 }
186 checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
187 // Read directly into r.decoded instead of via r.buf.
188 n := chunkLen - checksumSize
189 if n > len(r.decoded) {
190 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530191 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400192 }
193 if !r.readFull(r.decoded[:n], false) {
kesavandc71914f2022-03-25 11:19:03 +0530194 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400195 }
196 if crc(r.decoded[:n]) != checksum {
197 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530198 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400199 }
200 r.i, r.j = 0, n
201 continue
202
203 case chunkTypeStreamIdentifier:
204 // Section 4.1. Stream identifier (chunk type 0xff).
205 if chunkLen != len(magicBody) {
206 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530207 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400208 }
209 if !r.readFull(r.buf[:len(magicBody)], false) {
kesavandc71914f2022-03-25 11:19:03 +0530210 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400211 }
212 for i := 0; i < len(magicBody); i++ {
213 if r.buf[i] != magicBody[i] {
214 r.err = ErrCorrupt
kesavandc71914f2022-03-25 11:19:03 +0530215 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400216 }
217 }
218 continue
219 }
220
221 if chunkType <= 0x7f {
222 // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
223 r.err = ErrUnsupported
kesavandc71914f2022-03-25 11:19:03 +0530224 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400225 }
226 // Section 4.4 Padding (chunk type 0xfe).
227 // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
228 if !r.readFull(r.buf[:chunkLen], false) {
kesavandc71914f2022-03-25 11:19:03 +0530229 return r.err
kesavand2cde6582020-06-22 04:56:23 -0400230 }
231 }
kesavandc71914f2022-03-25 11:19:03 +0530232
233 return nil
234}
235
236// Read satisfies the io.Reader interface.
237func (r *Reader) Read(p []byte) (int, error) {
238 if r.err != nil {
239 return 0, r.err
240 }
241
242 if err := r.fill(); err != nil {
243 return 0, err
244 }
245
246 n := copy(p, r.decoded[r.i:r.j])
247 r.i += n
248 return n, nil
249}
250
251// ReadByte satisfies the io.ByteReader interface.
252func (r *Reader) ReadByte() (byte, error) {
253 if r.err != nil {
254 return 0, r.err
255 }
256
257 if err := r.fill(); err != nil {
258 return 0, err
259 }
260
261 c := r.decoded[r.i]
262 r.i++
263 return c, nil
kesavand2cde6582020-06-22 04:56:23 -0400264}