Scott Baker | e7144bc | 2019-10-01 14:16:47 -0700 | [diff] [blame] | 1 | package lz4 |
| 2 | |
| 3 | import ( |
| 4 | "encoding/binary" |
| 5 | "fmt" |
| 6 | "io" |
| 7 | |
| 8 | "github.com/pierrec/lz4/internal/xxh32" |
| 9 | ) |
| 10 | |
| 11 | // Writer implements the LZ4 frame encoder. |
| 12 | type Writer struct { |
| 13 | Header |
| 14 | |
| 15 | buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes |
| 16 | dst io.Writer // Destination. |
| 17 | checksum xxh32.XXHZero // Frame checksum. |
| 18 | zdata []byte // Compressed data. |
| 19 | data []byte // Data to be compressed. |
| 20 | idx int // Index into data. |
| 21 | hashtable [winSize]int // Hash table used in CompressBlock(). |
| 22 | } |
| 23 | |
| 24 | // NewWriter returns a new LZ4 frame encoder. |
| 25 | // No access to the underlying io.Writer is performed. |
| 26 | // The supplied Header is checked at the first Write. |
| 27 | // It is ok to change it before the first Write but then not until a Reset() is performed. |
| 28 | func NewWriter(dst io.Writer) *Writer { |
| 29 | return &Writer{dst: dst} |
| 30 | } |
| 31 | |
| 32 | // writeHeader builds and writes the header (magic+header) to the underlying io.Writer. |
| 33 | func (z *Writer) writeHeader() error { |
| 34 | // Default to 4Mb if BlockMaxSize is not set. |
| 35 | if z.Header.BlockMaxSize == 0 { |
| 36 | z.Header.BlockMaxSize = bsMapID[7] |
| 37 | } |
| 38 | // The only option that needs to be validated. |
| 39 | bSize := z.Header.BlockMaxSize |
| 40 | bSizeID, ok := bsMapValue[bSize] |
| 41 | if !ok { |
| 42 | return fmt.Errorf("lz4: invalid block max size: %d", bSize) |
| 43 | } |
| 44 | // Allocate the compressed/uncompressed buffers. |
| 45 | // The compressed buffer cannot exceed the uncompressed one. |
| 46 | if n := 2 * bSize; cap(z.zdata) < n { |
| 47 | z.zdata = make([]byte, n, n) |
| 48 | } |
| 49 | z.zdata = z.zdata[:bSize] |
| 50 | z.data = z.zdata[:cap(z.zdata)][bSize:] |
| 51 | z.idx = 0 |
| 52 | |
| 53 | // Size is optional. |
| 54 | buf := z.buf[:] |
| 55 | |
| 56 | // Set the fixed size data: magic number, block max size and flags. |
| 57 | binary.LittleEndian.PutUint32(buf[0:], frameMagic) |
| 58 | flg := byte(Version << 6) |
| 59 | flg |= 1 << 5 // No block dependency. |
| 60 | if z.Header.BlockChecksum { |
| 61 | flg |= 1 << 4 |
| 62 | } |
| 63 | if z.Header.Size > 0 { |
| 64 | flg |= 1 << 3 |
| 65 | } |
| 66 | if !z.Header.NoChecksum { |
| 67 | flg |= 1 << 2 |
| 68 | } |
| 69 | buf[4] = flg |
| 70 | buf[5] = bSizeID << 4 |
| 71 | |
| 72 | // Current buffer size: magic(4) + flags(1) + block max size (1). |
| 73 | n := 6 |
| 74 | // Optional items. |
| 75 | if z.Header.Size > 0 { |
| 76 | binary.LittleEndian.PutUint64(buf[n:], z.Header.Size) |
| 77 | n += 8 |
| 78 | } |
| 79 | |
| 80 | // The header checksum includes the flags, block max size and optional Size. |
| 81 | buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF) |
| 82 | z.checksum.Reset() |
| 83 | |
| 84 | // Header ready, write it out. |
| 85 | if _, err := z.dst.Write(buf[0 : n+1]); err != nil { |
| 86 | return err |
| 87 | } |
| 88 | z.Header.done = true |
| 89 | if debugFlag { |
| 90 | debug("wrote header %v", z.Header) |
| 91 | } |
| 92 | |
| 93 | return nil |
| 94 | } |
| 95 | |
| 96 | // Write compresses data from the supplied buffer into the underlying io.Writer. |
| 97 | // Write does not return until the data has been written. |
| 98 | func (z *Writer) Write(buf []byte) (int, error) { |
| 99 | if !z.Header.done { |
| 100 | if err := z.writeHeader(); err != nil { |
| 101 | return 0, err |
| 102 | } |
| 103 | } |
| 104 | if debugFlag { |
| 105 | debug("input buffer len=%d index=%d", len(buf), z.idx) |
| 106 | } |
| 107 | |
| 108 | zn := len(z.data) |
| 109 | var n int |
| 110 | for len(buf) > 0 { |
| 111 | if z.idx == 0 && len(buf) >= zn { |
| 112 | // Avoid a copy as there is enough data for a block. |
| 113 | if err := z.compressBlock(buf[:zn]); err != nil { |
| 114 | return n, err |
| 115 | } |
| 116 | n += zn |
| 117 | buf = buf[zn:] |
| 118 | continue |
| 119 | } |
| 120 | // Accumulate the data to be compressed. |
| 121 | m := copy(z.data[z.idx:], buf) |
| 122 | n += m |
| 123 | z.idx += m |
| 124 | buf = buf[m:] |
| 125 | if debugFlag { |
| 126 | debug("%d bytes copied to buf, current index %d", n, z.idx) |
| 127 | } |
| 128 | |
| 129 | if z.idx < len(z.data) { |
| 130 | // Buffer not filled. |
| 131 | if debugFlag { |
| 132 | debug("need more data for compression") |
| 133 | } |
| 134 | return n, nil |
| 135 | } |
| 136 | |
| 137 | // Buffer full. |
| 138 | if err := z.compressBlock(z.data); err != nil { |
| 139 | return n, err |
| 140 | } |
| 141 | z.idx = 0 |
| 142 | } |
| 143 | |
| 144 | return n, nil |
| 145 | } |
| 146 | |
| 147 | // compressBlock compresses a block. |
| 148 | func (z *Writer) compressBlock(data []byte) error { |
| 149 | if !z.NoChecksum { |
| 150 | z.checksum.Write(data) |
| 151 | } |
| 152 | |
| 153 | // The compressed block size cannot exceed the input's. |
| 154 | var zn int |
| 155 | var err error |
| 156 | |
| 157 | if level := z.Header.CompressionLevel; level != 0 { |
| 158 | zn, err = CompressBlockHC(data, z.zdata, level) |
| 159 | } else { |
| 160 | zn, err = CompressBlock(data, z.zdata, z.hashtable[:]) |
| 161 | } |
| 162 | |
| 163 | var zdata []byte |
| 164 | var bLen uint32 |
| 165 | if debugFlag { |
| 166 | debug("block compression %d => %d", len(data), zn) |
| 167 | } |
| 168 | if err == nil && zn > 0 && zn < len(data) { |
| 169 | // Compressible and compressed size smaller than uncompressed: ok! |
| 170 | bLen = uint32(zn) |
| 171 | zdata = z.zdata[:zn] |
| 172 | } else { |
| 173 | // Uncompressed block. |
| 174 | bLen = uint32(len(data)) | compressedBlockFlag |
| 175 | zdata = data |
| 176 | } |
| 177 | if debugFlag { |
| 178 | debug("block compression to be written len=%d data len=%d", bLen, len(zdata)) |
| 179 | } |
| 180 | |
| 181 | // Write the block. |
| 182 | if err := z.writeUint32(bLen); err != nil { |
| 183 | return err |
| 184 | } |
| 185 | if _, err := z.dst.Write(zdata); err != nil { |
| 186 | return err |
| 187 | } |
| 188 | |
| 189 | if z.BlockChecksum { |
| 190 | checksum := xxh32.ChecksumZero(zdata) |
| 191 | if debugFlag { |
| 192 | debug("block checksum %x", checksum) |
| 193 | } |
| 194 | if err := z.writeUint32(checksum); err != nil { |
| 195 | return err |
| 196 | } |
| 197 | } |
| 198 | if debugFlag { |
| 199 | debug("current frame checksum %x", z.checksum.Sum32()) |
| 200 | } |
| 201 | |
| 202 | return nil |
| 203 | } |
| 204 | |
| 205 | // Flush flushes any pending compressed data to the underlying writer. |
| 206 | // Flush does not return until the data has been written. |
| 207 | // If the underlying writer returns an error, Flush returns that error. |
| 208 | func (z *Writer) Flush() error { |
| 209 | if debugFlag { |
| 210 | debug("flush with index %d", z.idx) |
| 211 | } |
| 212 | if z.idx == 0 { |
| 213 | return nil |
| 214 | } |
| 215 | |
| 216 | return z.compressBlock(z.data[:z.idx]) |
| 217 | } |
| 218 | |
| 219 | // Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer. |
| 220 | func (z *Writer) Close() error { |
| 221 | if !z.Header.done { |
| 222 | if err := z.writeHeader(); err != nil { |
| 223 | return err |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | if err := z.Flush(); err != nil { |
| 228 | return err |
| 229 | } |
| 230 | |
| 231 | if debugFlag { |
| 232 | debug("writing last empty block") |
| 233 | } |
| 234 | if err := z.writeUint32(0); err != nil { |
| 235 | return err |
| 236 | } |
| 237 | if !z.NoChecksum { |
| 238 | checksum := z.checksum.Sum32() |
| 239 | if debugFlag { |
| 240 | debug("stream checksum %x", checksum) |
| 241 | } |
| 242 | if err := z.writeUint32(checksum); err != nil { |
| 243 | return err |
| 244 | } |
| 245 | } |
| 246 | return nil |
| 247 | } |
| 248 | |
| 249 | // Reset clears the state of the Writer z such that it is equivalent to its |
| 250 | // initial state from NewWriter, but instead writing to w. |
| 251 | // No access to the underlying io.Writer is performed. |
| 252 | func (z *Writer) Reset(w io.Writer) { |
| 253 | z.Header = Header{} |
| 254 | z.dst = w |
| 255 | z.checksum.Reset() |
| 256 | z.zdata = z.zdata[:0] |
| 257 | z.data = z.data[:0] |
| 258 | z.idx = 0 |
| 259 | } |
| 260 | |
| 261 | // writeUint32 writes a uint32 to the underlying writer. |
| 262 | func (z *Writer) writeUint32(x uint32) error { |
| 263 | buf := z.buf[:4] |
| 264 | binary.LittleEndian.PutUint32(buf, x) |
| 265 | _, err := z.dst.Write(buf) |
| 266 | return err |
| 267 | } |