blob: a3284bdf708b7e76ae72b5883c80c6751871d539 [file] [log] [blame]
William Kurkianea869482019-04-09 15:16:11 -04001// Package lz4 implements reading and writing lz4 compressed data (a frame),
2// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html.
3//
4// Although the block level compression and decompression functions are exposed and are fully compatible
5// with the lz4 block format definition, they are low level and should not be used directly.
6// For a complete description of an lz4 compressed block, see:
7// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html
8//
9// See https://github.com/Cyan4973/lz4 for the reference C implementation.
10//
11package lz4
12
khenaidoo106c61a2021-08-11 18:05:46 -040013import (
14 "math/bits"
15 "sync"
16)
17
William Kurkianea869482019-04-09 15:16:11 -040018const (
19 // Extension is the LZ4 frame file name extension
20 Extension = ".lz4"
21 // Version is the LZ4 frame format version
22 Version = 1
23
khenaidoo106c61a2021-08-11 18:05:46 -040024 frameMagic uint32 = 0x184D2204
25 frameSkipMagic uint32 = 0x184D2A50
26 frameMagicLegacy uint32 = 0x184C2102
William Kurkianea869482019-04-09 15:16:11 -040027
28 // The following constants are used to setup the compression algorithm.
29 minMatch = 4 // the minimum size of the match sequence size (4 bytes)
30 winSizeLog = 16 // LZ4 64Kb window size limit
31 winSize = 1 << winSizeLog
32 winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
33 compressedBlockFlag = 1 << 31
34 compressedBlockMask = compressedBlockFlag - 1
35
36 // hashLog determines the size of the hash table used to quickly find a previous match position.
37 // Its value influences the compression speed and memory usage, the lower the faster,
38 // but at the expense of the compression ratio.
David Bainbridge788e5202019-10-21 18:49:40 +000039 // 16 seems to be the best compromise for fast compression.
40 hashLog = 16
41 htSize = 1 << hashLog
William Kurkianea869482019-04-09 15:16:11 -040042
khenaidoo106c61a2021-08-11 18:05:46 -040043 mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
William Kurkianea869482019-04-09 15:16:11 -040044)
45
46// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb.
Abhilash S.L3b494632019-07-16 15:51:09 +053047const (
khenaidoo106c61a2021-08-11 18:05:46 -040048 blockSize64K = 1 << (16 + 2*iota)
49 blockSize256K
50 blockSize1M
51 blockSize4M
William Kurkianea869482019-04-09 15:16:11 -040052)
53
Abhilash S.L3b494632019-07-16 15:51:09 +053054var (
khenaidoo106c61a2021-08-11 18:05:46 -040055 // Keep a pool of buffers for each valid block sizes.
56 bsMapValue = [...]*sync.Pool{
57 newBufferPool(2 * blockSize64K),
58 newBufferPool(2 * blockSize256K),
59 newBufferPool(2 * blockSize1M),
60 newBufferPool(2 * blockSize4M),
61 }
Abhilash S.L3b494632019-07-16 15:51:09 +053062)
William Kurkianea869482019-04-09 15:16:11 -040063
khenaidoo106c61a2021-08-11 18:05:46 -040064// newBufferPool returns a pool for buffers of the given size.
65func newBufferPool(size int) *sync.Pool {
66 return &sync.Pool{
67 New: func() interface{} {
68 return make([]byte, size)
69 },
70 }
71}
72
73// getBuffer returns a buffer to its pool.
74func getBuffer(size int) []byte {
75 idx := blockSizeValueToIndex(size) - 4
76 return bsMapValue[idx].Get().([]byte)
77}
78
79// putBuffer returns a buffer to its pool.
80func putBuffer(size int, buf []byte) {
81 if cap(buf) > 0 {
82 idx := blockSizeValueToIndex(size) - 4
83 bsMapValue[idx].Put(buf[:cap(buf)])
84 }
85}
86func blockSizeIndexToValue(i byte) int {
87 return 1 << (16 + 2*uint(i))
88}
89func isValidBlockSize(size int) bool {
90 const blockSizeMask = blockSize64K | blockSize256K | blockSize1M | blockSize4M
91
92 return size&blockSizeMask > 0 && bits.OnesCount(uint(size)) == 1
93}
94func blockSizeValueToIndex(size int) byte {
95 return 4 + byte(bits.TrailingZeros(uint(size)>>16)/2)
96}
97
William Kurkianea869482019-04-09 15:16:11 -040098// Header describes the various flags that can be set on a Writer or obtained from a Reader.
99// The default values match those of the LZ4 frame format definition
100// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html).
101//
102// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls.
khenaidoo106c61a2021-08-11 18:05:46 -0400103// It is the caller's responsibility to check them if necessary.
William Kurkianea869482019-04-09 15:16:11 -0400104type Header struct {
105 BlockChecksum bool // Compressed blocks checksum flag.
106 NoChecksum bool // Frame checksum flag.
107 BlockMaxSize int // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB.
108 Size uint64 // Frame total size. It is _not_ computed by the Writer.
109 CompressionLevel int // Compression level (higher is better, use 0 for fastest compression).
110 done bool // Header processed flag (Read or Write and checked).
111}
khenaidoo106c61a2021-08-11 18:05:46 -0400112
113// Reset reset internal status
114func (h *Header) Reset() {
115 h.done = false
116}