khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 1 | package lz4 |
| 2 | |
| 3 | import ( |
| 4 | "encoding/binary" |
| 5 | "fmt" |
| 6 | "io" |
| 7 | |
| 8 | "github.com/pierrec/lz4/internal/xxh32" |
| 9 | ) |
| 10 | |
| 11 | // Writer implements the LZ4 frame encoder. |
| 12 | type Writer struct { |
| 13 | Header |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 14 | // Handler called when a block has been successfully written out. |
| 15 | // It provides the number of bytes written. |
| 16 | OnBlockDone func(size int) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 17 | |
| 18 | buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes |
| 19 | dst io.Writer // Destination. |
| 20 | checksum xxh32.XXHZero // Frame checksum. |
| 21 | zdata []byte // Compressed data. |
| 22 | data []byte // Data to be compressed. |
| 23 | idx int // Index into data. |
| 24 | hashtable [winSize]int // Hash table used in CompressBlock(). |
| 25 | } |
| 26 | |
| 27 | // NewWriter returns a new LZ4 frame encoder. |
| 28 | // No access to the underlying io.Writer is performed. |
| 29 | // The supplied Header is checked at the first Write. |
| 30 | // It is ok to change it before the first Write but then not until a Reset() is performed. |
| 31 | func NewWriter(dst io.Writer) *Writer { |
| 32 | return &Writer{dst: dst} |
| 33 | } |
| 34 | |
| 35 | // writeHeader builds and writes the header (magic+header) to the underlying io.Writer. |
| 36 | func (z *Writer) writeHeader() error { |
| 37 | // Default to 4Mb if BlockMaxSize is not set. |
| 38 | if z.Header.BlockMaxSize == 0 { |
| 39 | z.Header.BlockMaxSize = bsMapID[7] |
| 40 | } |
| 41 | // The only option that needs to be validated. |
| 42 | bSize := z.Header.BlockMaxSize |
| 43 | bSizeID, ok := bsMapValue[bSize] |
| 44 | if !ok { |
| 45 | return fmt.Errorf("lz4: invalid block max size: %d", bSize) |
| 46 | } |
| 47 | // Allocate the compressed/uncompressed buffers. |
| 48 | // The compressed buffer cannot exceed the uncompressed one. |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 49 | if cap(z.zdata) < bSize { |
| 50 | // Only allocate if there is not enough capacity. |
| 51 | // Allocate both buffers at once. |
| 52 | z.zdata = make([]byte, 2*bSize) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 53 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 54 | z.data = z.zdata[:bSize] // Uncompressed buffer is the first half. |
| 55 | z.zdata = z.zdata[:cap(z.zdata)][bSize:] // Compressed buffer is the second half. |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 56 | z.idx = 0 |
| 57 | |
| 58 | // Size is optional. |
| 59 | buf := z.buf[:] |
| 60 | |
| 61 | // Set the fixed size data: magic number, block max size and flags. |
| 62 | binary.LittleEndian.PutUint32(buf[0:], frameMagic) |
| 63 | flg := byte(Version << 6) |
| 64 | flg |= 1 << 5 // No block dependency. |
| 65 | if z.Header.BlockChecksum { |
| 66 | flg |= 1 << 4 |
| 67 | } |
| 68 | if z.Header.Size > 0 { |
| 69 | flg |= 1 << 3 |
| 70 | } |
| 71 | if !z.Header.NoChecksum { |
| 72 | flg |= 1 << 2 |
| 73 | } |
| 74 | buf[4] = flg |
| 75 | buf[5] = bSizeID << 4 |
| 76 | |
| 77 | // Current buffer size: magic(4) + flags(1) + block max size (1). |
| 78 | n := 6 |
| 79 | // Optional items. |
| 80 | if z.Header.Size > 0 { |
| 81 | binary.LittleEndian.PutUint64(buf[n:], z.Header.Size) |
| 82 | n += 8 |
| 83 | } |
| 84 | |
| 85 | // The header checksum includes the flags, block max size and optional Size. |
| 86 | buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF) |
| 87 | z.checksum.Reset() |
| 88 | |
| 89 | // Header ready, write it out. |
| 90 | if _, err := z.dst.Write(buf[0 : n+1]); err != nil { |
| 91 | return err |
| 92 | } |
| 93 | z.Header.done = true |
| 94 | if debugFlag { |
| 95 | debug("wrote header %v", z.Header) |
| 96 | } |
| 97 | |
| 98 | return nil |
| 99 | } |
| 100 | |
| 101 | // Write compresses data from the supplied buffer into the underlying io.Writer. |
| 102 | // Write does not return until the data has been written. |
| 103 | func (z *Writer) Write(buf []byte) (int, error) { |
| 104 | if !z.Header.done { |
| 105 | if err := z.writeHeader(); err != nil { |
| 106 | return 0, err |
| 107 | } |
| 108 | } |
| 109 | if debugFlag { |
| 110 | debug("input buffer len=%d index=%d", len(buf), z.idx) |
| 111 | } |
| 112 | |
| 113 | zn := len(z.data) |
| 114 | var n int |
| 115 | for len(buf) > 0 { |
| 116 | if z.idx == 0 && len(buf) >= zn { |
| 117 | // Avoid a copy as there is enough data for a block. |
| 118 | if err := z.compressBlock(buf[:zn]); err != nil { |
| 119 | return n, err |
| 120 | } |
| 121 | n += zn |
| 122 | buf = buf[zn:] |
| 123 | continue |
| 124 | } |
| 125 | // Accumulate the data to be compressed. |
| 126 | m := copy(z.data[z.idx:], buf) |
| 127 | n += m |
| 128 | z.idx += m |
| 129 | buf = buf[m:] |
| 130 | if debugFlag { |
| 131 | debug("%d bytes copied to buf, current index %d", n, z.idx) |
| 132 | } |
| 133 | |
| 134 | if z.idx < len(z.data) { |
| 135 | // Buffer not filled. |
| 136 | if debugFlag { |
| 137 | debug("need more data for compression") |
| 138 | } |
| 139 | return n, nil |
| 140 | } |
| 141 | |
| 142 | // Buffer full. |
| 143 | if err := z.compressBlock(z.data); err != nil { |
| 144 | return n, err |
| 145 | } |
| 146 | z.idx = 0 |
| 147 | } |
| 148 | |
| 149 | return n, nil |
| 150 | } |
| 151 | |
| 152 | // compressBlock compresses a block. |
| 153 | func (z *Writer) compressBlock(data []byte) error { |
| 154 | if !z.NoChecksum { |
| 155 | z.checksum.Write(data) |
| 156 | } |
| 157 | |
| 158 | // The compressed block size cannot exceed the input's. |
| 159 | var zn int |
| 160 | var err error |
| 161 | |
| 162 | if level := z.Header.CompressionLevel; level != 0 { |
| 163 | zn, err = CompressBlockHC(data, z.zdata, level) |
| 164 | } else { |
| 165 | zn, err = CompressBlock(data, z.zdata, z.hashtable[:]) |
| 166 | } |
| 167 | |
| 168 | var zdata []byte |
| 169 | var bLen uint32 |
| 170 | if debugFlag { |
| 171 | debug("block compression %d => %d", len(data), zn) |
| 172 | } |
| 173 | if err == nil && zn > 0 && zn < len(data) { |
| 174 | // Compressible and compressed size smaller than uncompressed: ok! |
| 175 | bLen = uint32(zn) |
| 176 | zdata = z.zdata[:zn] |
| 177 | } else { |
| 178 | // Uncompressed block. |
| 179 | bLen = uint32(len(data)) | compressedBlockFlag |
| 180 | zdata = data |
| 181 | } |
| 182 | if debugFlag { |
| 183 | debug("block compression to be written len=%d data len=%d", bLen, len(zdata)) |
| 184 | } |
| 185 | |
| 186 | // Write the block. |
| 187 | if err := z.writeUint32(bLen); err != nil { |
| 188 | return err |
| 189 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 190 | written, err := z.dst.Write(zdata) |
| 191 | if err != nil { |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 192 | return err |
| 193 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 194 | if h := z.OnBlockDone; h != nil { |
| 195 | h(written) |
| 196 | } |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 197 | |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 198 | if !z.BlockChecksum { |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 199 | if debugFlag { |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 200 | debug("current frame checksum %x", z.checksum.Sum32()) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 201 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 202 | return nil |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 203 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 204 | checksum := xxh32.ChecksumZero(zdata) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 205 | if debugFlag { |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 206 | debug("block checksum %x", checksum) |
| 207 | defer func() { debug("current frame checksum %x", z.checksum.Sum32()) }() |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 208 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 209 | return z.writeUint32(checksum) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 210 | } |
| 211 | |
| 212 | // Flush flushes any pending compressed data to the underlying writer. |
| 213 | // Flush does not return until the data has been written. |
| 214 | // If the underlying writer returns an error, Flush returns that error. |
| 215 | func (z *Writer) Flush() error { |
| 216 | if debugFlag { |
| 217 | debug("flush with index %d", z.idx) |
| 218 | } |
| 219 | if z.idx == 0 { |
| 220 | return nil |
| 221 | } |
| 222 | |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 223 | if err := z.compressBlock(z.data[:z.idx]); err != nil { |
| 224 | return err |
| 225 | } |
| 226 | z.idx = 0 |
| 227 | return nil |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 228 | } |
| 229 | |
| 230 | // Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer. |
| 231 | func (z *Writer) Close() error { |
| 232 | if !z.Header.done { |
| 233 | if err := z.writeHeader(); err != nil { |
| 234 | return err |
| 235 | } |
| 236 | } |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 237 | if err := z.Flush(); err != nil { |
| 238 | return err |
| 239 | } |
| 240 | |
| 241 | if debugFlag { |
| 242 | debug("writing last empty block") |
| 243 | } |
| 244 | if err := z.writeUint32(0); err != nil { |
| 245 | return err |
| 246 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 247 | if z.NoChecksum { |
| 248 | return nil |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 249 | } |
Scott Baker | beb3cfa | 2019-10-01 14:44:30 -0700 | [diff] [blame] | 250 | checksum := z.checksum.Sum32() |
| 251 | if debugFlag { |
| 252 | debug("stream checksum %x", checksum) |
| 253 | } |
| 254 | return z.writeUint32(checksum) |
khenaidoo | ac63710 | 2019-01-14 15:44:34 -0500 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | // Reset clears the state of the Writer z such that it is equivalent to its |
| 258 | // initial state from NewWriter, but instead writing to w. |
| 259 | // No access to the underlying io.Writer is performed. |
| 260 | func (z *Writer) Reset(w io.Writer) { |
| 261 | z.Header = Header{} |
| 262 | z.dst = w |
| 263 | z.checksum.Reset() |
| 264 | z.zdata = z.zdata[:0] |
| 265 | z.data = z.data[:0] |
| 266 | z.idx = 0 |
| 267 | } |
| 268 | |
| 269 | // writeUint32 writes a uint32 to the underlying writer. |
| 270 | func (z *Writer) writeUint32(x uint32) error { |
| 271 | buf := z.buf[:4] |
| 272 | binary.LittleEndian.PutUint32(buf, x) |
| 273 | _, err := z.dst.Write(buf) |
| 274 | return err |
| 275 | } |