khenaidoo | 106c61a | 2021-08-11 18:05:46 -0400 | [diff] [blame] | 1 | package zstd |
| 2 | |
| 3 | import ( |
| 4 | "fmt" |
| 5 | "math/bits" |
| 6 | |
| 7 | "github.com/klauspost/compress/zstd/internal/xxhash" |
| 8 | ) |
| 9 | |
| 10 | const ( |
| 11 | dictShardBits = 6 |
| 12 | ) |
| 13 | |
| 14 | type fastBase struct { |
| 15 | // cur is the offset at the start of hist |
| 16 | cur int32 |
| 17 | // maximum offset. Should be at least 2x block size. |
| 18 | maxMatchOff int32 |
| 19 | hist []byte |
| 20 | crc *xxhash.Digest |
| 21 | tmp [8]byte |
| 22 | blk *blockEnc |
| 23 | lastDictID uint32 |
| 24 | lowMem bool |
| 25 | } |
| 26 | |
| 27 | // CRC returns the underlying CRC writer. |
| 28 | func (e *fastBase) CRC() *xxhash.Digest { |
| 29 | return e.crc |
| 30 | } |
| 31 | |
| 32 | // AppendCRC will append the CRC to the destination slice and return it. |
| 33 | func (e *fastBase) AppendCRC(dst []byte) []byte { |
| 34 | crc := e.crc.Sum(e.tmp[:0]) |
| 35 | dst = append(dst, crc[7], crc[6], crc[5], crc[4]) |
| 36 | return dst |
| 37 | } |
| 38 | |
| 39 | // WindowSize returns the window size of the encoder, |
| 40 | // or a window size small enough to contain the input size, if > 0. |
| 41 | func (e *fastBase) WindowSize(size int) int32 { |
| 42 | if size > 0 && size < int(e.maxMatchOff) { |
| 43 | b := int32(1) << uint(bits.Len(uint(size))) |
| 44 | // Keep minimum window. |
| 45 | if b < 1024 { |
| 46 | b = 1024 |
| 47 | } |
| 48 | return b |
| 49 | } |
| 50 | return e.maxMatchOff |
| 51 | } |
| 52 | |
| 53 | // Block returns the current block. |
| 54 | func (e *fastBase) Block() *blockEnc { |
| 55 | return e.blk |
| 56 | } |
| 57 | |
| 58 | func (e *fastBase) addBlock(src []byte) int32 { |
| 59 | if debugAsserts && e.cur > bufferReset { |
| 60 | panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) |
| 61 | } |
| 62 | // check if we have space already |
| 63 | if len(e.hist)+len(src) > cap(e.hist) { |
| 64 | if cap(e.hist) == 0 { |
| 65 | e.ensureHist(len(src)) |
| 66 | } else { |
| 67 | if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) { |
| 68 | panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff)) |
| 69 | } |
| 70 | // Move down |
| 71 | offset := int32(len(e.hist)) - e.maxMatchOff |
| 72 | copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) |
| 73 | e.cur += offset |
| 74 | e.hist = e.hist[:e.maxMatchOff] |
| 75 | } |
| 76 | } |
| 77 | s := int32(len(e.hist)) |
| 78 | e.hist = append(e.hist, src...) |
| 79 | return s |
| 80 | } |
| 81 | |
| 82 | // ensureHist will ensure that history can keep at least this many bytes. |
| 83 | func (e *fastBase) ensureHist(n int) { |
| 84 | if cap(e.hist) >= n { |
| 85 | return |
| 86 | } |
| 87 | l := e.maxMatchOff |
| 88 | if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize { |
| 89 | l += maxCompressedBlockSize |
| 90 | } else { |
| 91 | l += e.maxMatchOff |
| 92 | } |
| 93 | // Make it at least 1MB. |
| 94 | if l < 1<<20 && !e.lowMem { |
| 95 | l = 1 << 20 |
| 96 | } |
| 97 | // Make it at least the requested size. |
| 98 | if l < int32(n) { |
| 99 | l = int32(n) |
| 100 | } |
| 101 | e.hist = make([]byte, 0, l) |
| 102 | } |
| 103 | |
| 104 | // useBlock will replace the block with the provided one, |
| 105 | // but transfer recent offsets from the previous. |
| 106 | func (e *fastBase) UseBlock(enc *blockEnc) { |
| 107 | enc.reset(e.blk) |
| 108 | e.blk = enc |
| 109 | } |
| 110 | |
| 111 | func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { |
| 112 | // Extend the match to be as long as possible. |
| 113 | return int32(matchLen(src[s:], src[t:])) |
| 114 | } |
| 115 | |
| 116 | func (e *fastBase) matchlen(s, t int32, src []byte) int32 { |
| 117 | if debugAsserts { |
| 118 | if s < 0 { |
| 119 | err := fmt.Sprintf("s (%d) < 0", s) |
| 120 | panic(err) |
| 121 | } |
| 122 | if t < 0 { |
| 123 | err := fmt.Sprintf("s (%d) < 0", s) |
| 124 | panic(err) |
| 125 | } |
| 126 | if s-t > e.maxMatchOff { |
| 127 | err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) |
| 128 | panic(err) |
| 129 | } |
| 130 | if len(src)-int(s) > maxCompressedBlockSize { |
| 131 | panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | // Extend the match to be as long as possible. |
| 136 | return int32(matchLen(src[s:], src[t:])) |
| 137 | } |
| 138 | |
| 139 | // Reset the encoding table. |
| 140 | func (e *fastBase) resetBase(d *dict, singleBlock bool) { |
| 141 | if e.blk == nil { |
| 142 | e.blk = &blockEnc{lowMem: e.lowMem} |
| 143 | e.blk.init() |
| 144 | } else { |
| 145 | e.blk.reset(nil) |
| 146 | } |
| 147 | e.blk.initNewEncode() |
| 148 | if e.crc == nil { |
| 149 | e.crc = xxhash.New() |
| 150 | } else { |
| 151 | e.crc.Reset() |
| 152 | } |
| 153 | if d != nil { |
| 154 | low := e.lowMem |
| 155 | if singleBlock { |
| 156 | e.lowMem = true |
| 157 | } |
| 158 | e.ensureHist(d.DictContentSize() + maxCompressedBlockSize) |
| 159 | e.lowMem = low |
| 160 | } |
| 161 | |
| 162 | // We offset current position so everything will be out of reach. |
| 163 | // If above reset line, history will be purged. |
| 164 | if e.cur < bufferReset { |
| 165 | e.cur += e.maxMatchOff + int32(len(e.hist)) |
| 166 | } |
| 167 | e.hist = e.hist[:0] |
| 168 | if d != nil { |
| 169 | // Set offsets (currently not used) |
| 170 | for i, off := range d.offsets { |
| 171 | e.blk.recentOffsets[i] = uint32(off) |
| 172 | e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i] |
| 173 | } |
| 174 | // Transfer litenc. |
| 175 | e.blk.dictLitEnc = d.litEnc |
| 176 | e.hist = append(e.hist, d.content...) |
| 177 | } |
| 178 | } |