blob: 535cbadfdea192827c17417deaa8a476a4f3f715 [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Copyright 2018 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5
6// Package fse provides Finite State Entropy encoding and decoding.
7//
8// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9// for byte blocks as implemented in zstd.
10//
11// See https://github.com/klauspost/compress/tree/master/fse for more information.
12package fse
13
14import (
15 "errors"
16 "fmt"
17 "math/bits"
18)
19
20const (
21 /*!MEMORY_USAGE :
22 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23 * Increasing memory usage improves compression ratio
24 * Reduced memory usage can improve speed, due to cache effect
25 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26 maxMemoryUsage = 14
27 defaultMemoryUsage = 13
28
29 maxTableLog = maxMemoryUsage - 2
30 maxTablesize = 1 << maxTableLog
31 defaultTablelog = defaultMemoryUsage - 2
32 minTablelog = 5
33 maxSymbolValue = 255
34)
35
36var (
37 // ErrIncompressible is returned when input is judged to be too hard to compress.
38 ErrIncompressible = errors.New("input is not compressible")
39
40 // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
41 ErrUseRLE = errors.New("input is single value repeated")
42)
43
44// Scratch provides temporary storage for compression and decompression.
45type Scratch struct {
46 // Private
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000047 count [maxSymbolValue + 1]uint32
48 norm [maxSymbolValue + 1]int16
49 br byteReader
50 bits bitReader
51 bw bitWriter
52 ct cTable // Compression tables.
53 decTable []decSymbol // Decompression table.
54 maxCount int // count of the most probable symbol
Scott Bakered4efab2020-01-13 19:12:25 -080055
56 // Per block parameters.
57 // These can be used to override compression parameters of the block.
58 // Do not touch, unless you know what you are doing.
59
60 // Out is output buffer.
61 // If the scratch is re-used before the caller is done processing the output,
62 // set this field to nil.
63 // Otherwise the output buffer will be re-used for next Compression/Decompression step
64 // and allocation will be avoided.
65 Out []byte
66
Scott Bakered4efab2020-01-13 19:12:25 -080067 // DecompressLimit limits the maximum decoded size acceptable.
68 // If > 0 decompression will stop when approximately this many bytes
69 // has been decoded.
70 // If 0, maximum size will be 2GB.
71 DecompressLimit int
David K. Bainbridgebd6b2882021-08-26 13:31:02 +000072
73 symbolLen uint16 // Length of active part of the symbol table.
74 actualTableLog uint8 // Selected tablelog.
75 zeroBits bool // no bits has prob > 50%.
76 clearCount bool // clear count
77
78 // MaxSymbolValue will override the maximum symbol value of the next block.
79 MaxSymbolValue uint8
80
81 // TableLog will attempt to override the tablelog for the next block.
82 TableLog uint8
Scott Bakered4efab2020-01-13 19:12:25 -080083}
84
85// Histogram allows to populate the histogram and skip that step in the compression,
86// It otherwise allows to inspect the histogram when compression is done.
87// To indicate that you have populated the histogram call HistogramFinished
88// with the value of the highest populated symbol, as well as the number of entries
89// in the most populated entry. These are accepted at face value.
90// The returned slice will always be length 256.
91func (s *Scratch) Histogram() []uint32 {
92 return s.count[:]
93}
94
95// HistogramFinished can be called to indicate that the histogram has been populated.
96// maxSymbol is the index of the highest set symbol of the next data segment.
97// maxCount is the number of entries in the most populated entry.
98// These are accepted at face value.
99func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
100 s.maxCount = maxCount
101 s.symbolLen = uint16(maxSymbol) + 1
102 s.clearCount = maxCount != 0
103}
104
105// prepare will prepare and allocate scratch tables used for both compression and decompression.
106func (s *Scratch) prepare(in []byte) (*Scratch, error) {
107 if s == nil {
108 s = &Scratch{}
109 }
110 if s.MaxSymbolValue == 0 {
111 s.MaxSymbolValue = 255
112 }
113 if s.TableLog == 0 {
114 s.TableLog = defaultTablelog
115 }
116 if s.TableLog > maxTableLog {
117 return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
118 }
119 if cap(s.Out) == 0 {
120 s.Out = make([]byte, 0, len(in))
121 }
122 if s.clearCount && s.maxCount == 0 {
123 for i := range s.count {
124 s.count[i] = 0
125 }
126 s.clearCount = false
127 }
128 s.br.init(in)
129 if s.DecompressLimit == 0 {
130 // Max size 2GB.
131 s.DecompressLimit = (2 << 30) - 1
132 }
133
134 return s, nil
135}
136
137// tableStep returns the next table index.
138func tableStep(tableSize uint32) uint32 {
139 return (tableSize >> 1) + (tableSize >> 3) + 3
140}
141
142func highBits(val uint32) (n uint32) {
143 return uint32(bits.Len32(val) - 1)
144}