Pragya Arya | 324337e | 2020-02-20 14:35:08 +0530 | [diff] [blame] | 1 | // Package lz4 implements reading and writing lz4 compressed data (a frame), |
| 2 | // as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html. |
| 3 | // |
| 4 | // Although the block level compression and decompression functions are exposed and are fully compatible |
| 5 | // with the lz4 block format definition, they are low level and should not be used directly. |
| 6 | // For a complete description of an lz4 compressed block, see: |
| 7 | // http://fastcompression.blogspot.fr/2011/05/lz4-explained.html |
| 8 | // |
| 9 | // See https://github.com/Cyan4973/lz4 for the reference C implementation. |
| 10 | // |
| 11 | package lz4 |
| 12 | |
| 13 | import "math/bits" |
| 14 | |
| 15 | import "sync" |
| 16 | |
| 17 | const ( |
| 18 | // Extension is the LZ4 frame file name extension |
| 19 | Extension = ".lz4" |
| 20 | // Version is the LZ4 frame format version |
| 21 | Version = 1 |
| 22 | |
| 23 | frameMagic uint32 = 0x184D2204 |
| 24 | frameSkipMagic uint32 = 0x184D2A50 |
| 25 | |
| 26 | // The following constants are used to setup the compression algorithm. |
| 27 | minMatch = 4 // the minimum size of the match sequence size (4 bytes) |
| 28 | winSizeLog = 16 // LZ4 64Kb window size limit |
| 29 | winSize = 1 << winSizeLog |
| 30 | winMask = winSize - 1 // 64Kb window of previous data for dependent blocks |
| 31 | compressedBlockFlag = 1 << 31 |
| 32 | compressedBlockMask = compressedBlockFlag - 1 |
| 33 | |
| 34 | // hashLog determines the size of the hash table used to quickly find a previous match position. |
| 35 | // Its value influences the compression speed and memory usage, the lower the faster, |
| 36 | // but at the expense of the compression ratio. |
| 37 | // 16 seems to be the best compromise for fast compression. |
| 38 | hashLog = 16 |
| 39 | htSize = 1 << hashLog |
| 40 | |
| 41 | mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes. |
| 42 | ) |
| 43 | |
| 44 | // map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb. |
| 45 | const ( |
| 46 | blockSize64K = 1 << (16 + 2*iota) |
| 47 | blockSize256K |
| 48 | blockSize1M |
| 49 | blockSize4M |
| 50 | ) |
| 51 | |
| 52 | var ( |
| 53 | // Keep a pool of buffers for each valid block sizes. |
| 54 | bsMapValue = [...]*sync.Pool{ |
| 55 | newBufferPool(2 * blockSize64K), |
| 56 | newBufferPool(2 * blockSize256K), |
| 57 | newBufferPool(2 * blockSize1M), |
| 58 | newBufferPool(2 * blockSize4M), |
| 59 | } |
| 60 | ) |
| 61 | |
| 62 | // newBufferPool returns a pool for buffers of the given size. |
| 63 | func newBufferPool(size int) *sync.Pool { |
| 64 | return &sync.Pool{ |
| 65 | New: func() interface{} { |
| 66 | return make([]byte, size) |
| 67 | }, |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | // getBuffer returns a buffer to its pool. |
| 72 | func getBuffer(size int) []byte { |
| 73 | idx := blockSizeValueToIndex(size) - 4 |
| 74 | return bsMapValue[idx].Get().([]byte) |
| 75 | } |
| 76 | |
| 77 | // putBuffer returns a buffer to its pool. |
| 78 | func putBuffer(size int, buf []byte) { |
| 79 | if cap(buf) > 0 { |
| 80 | idx := blockSizeValueToIndex(size) - 4 |
| 81 | bsMapValue[idx].Put(buf[:cap(buf)]) |
| 82 | } |
| 83 | } |
| 84 | func blockSizeIndexToValue(i byte) int { |
| 85 | return 1 << (16 + 2*uint(i)) |
| 86 | } |
| 87 | func isValidBlockSize(size int) bool { |
| 88 | const blockSizeMask = blockSize64K | blockSize256K | blockSize1M | blockSize4M |
| 89 | |
| 90 | return size&blockSizeMask > 0 && bits.OnesCount(uint(size)) == 1 |
| 91 | } |
| 92 | func blockSizeValueToIndex(size int) byte { |
| 93 | return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2) |
| 94 | } |
| 95 | |
| 96 | // Header describes the various flags that can be set on a Writer or obtained from a Reader. |
| 97 | // The default values match those of the LZ4 frame format definition |
| 98 | // (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html). |
| 99 | // |
| 100 | // NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls. |
| 101 | // It is the caller responsibility to check them if necessary. |
| 102 | type Header struct { |
| 103 | BlockChecksum bool // Compressed blocks checksum flag. |
| 104 | NoChecksum bool // Frame checksum flag. |
| 105 | BlockMaxSize int // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB. |
| 106 | Size uint64 // Frame total size. It is _not_ computed by the Writer. |
| 107 | CompressionLevel int // Compression level (higher is better, use 0 for fastest compression). |
| 108 | done bool // Header processed flag (Read or Write and checked). |
| 109 | } |
| 110 | |
| 111 | func (h *Header) Reset() { |
| 112 | h.done = false |
| 113 | } |