blob: 075357b5b1604c9e5a1bb41489efdc18c2e62005 [file] [log] [blame]
Scott Bakered4efab2020-01-13 19:12:25 -08001// Copyright 2018 Klaus Post. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
5
6// Package fse provides Finite State Entropy encoding and decoding.
7//
8// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding
9// for byte blocks as implemented in zstd.
10//
11// See https://github.com/klauspost/compress/tree/master/fse for more information.
12package fse
13
14import (
15 "errors"
16 "fmt"
17 "math/bits"
18)
19
20const (
21 /*!MEMORY_USAGE :
22 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
23 * Increasing memory usage improves compression ratio
24 * Reduced memory usage can improve speed, due to cache effect
25 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
26 maxMemoryUsage = 14
27 defaultMemoryUsage = 13
28
29 maxTableLog = maxMemoryUsage - 2
30 maxTablesize = 1 << maxTableLog
31 defaultTablelog = defaultMemoryUsage - 2
32 minTablelog = 5
33 maxSymbolValue = 255
34)
35
36var (
37 // ErrIncompressible is returned when input is judged to be too hard to compress.
38 ErrIncompressible = errors.New("input is not compressible")
39
40 // ErrUseRLE is returned from the compressor when the input is a single byte value repeated.
41 ErrUseRLE = errors.New("input is single value repeated")
42)
43
44// Scratch provides temporary storage for compression and decompression.
45type Scratch struct {
46 // Private
47 count [maxSymbolValue + 1]uint32
48 norm [maxSymbolValue + 1]int16
49 symbolLen uint16 // Length of active part of the symbol table.
50 actualTableLog uint8 // Selected tablelog.
51 br byteReader
52 bits bitReader
53 bw bitWriter
54 ct cTable // Compression tables.
55 decTable []decSymbol // Decompression table.
56 zeroBits bool // no bits has prob > 50%.
57 clearCount bool // clear count
58 maxCount int // count of the most probable symbol
59
60 // Per block parameters.
61 // These can be used to override compression parameters of the block.
62 // Do not touch, unless you know what you are doing.
63
64 // Out is output buffer.
65 // If the scratch is re-used before the caller is done processing the output,
66 // set this field to nil.
67 // Otherwise the output buffer will be re-used for next Compression/Decompression step
68 // and allocation will be avoided.
69 Out []byte
70
71 // MaxSymbolValue will override the maximum symbol value of the next block.
72 MaxSymbolValue uint8
73
74 // TableLog will attempt to override the tablelog for the next block.
75 TableLog uint8
76
77 // DecompressLimit limits the maximum decoded size acceptable.
78 // If > 0 decompression will stop when approximately this many bytes
79 // has been decoded.
80 // If 0, maximum size will be 2GB.
81 DecompressLimit int
82}
83
84// Histogram allows to populate the histogram and skip that step in the compression,
85// It otherwise allows to inspect the histogram when compression is done.
86// To indicate that you have populated the histogram call HistogramFinished
87// with the value of the highest populated symbol, as well as the number of entries
88// in the most populated entry. These are accepted at face value.
89// The returned slice will always be length 256.
90func (s *Scratch) Histogram() []uint32 {
91 return s.count[:]
92}
93
94// HistogramFinished can be called to indicate that the histogram has been populated.
95// maxSymbol is the index of the highest set symbol of the next data segment.
96// maxCount is the number of entries in the most populated entry.
97// These are accepted at face value.
98func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) {
99 s.maxCount = maxCount
100 s.symbolLen = uint16(maxSymbol) + 1
101 s.clearCount = maxCount != 0
102}
103
104// prepare will prepare and allocate scratch tables used for both compression and decompression.
105func (s *Scratch) prepare(in []byte) (*Scratch, error) {
106 if s == nil {
107 s = &Scratch{}
108 }
109 if s.MaxSymbolValue == 0 {
110 s.MaxSymbolValue = 255
111 }
112 if s.TableLog == 0 {
113 s.TableLog = defaultTablelog
114 }
115 if s.TableLog > maxTableLog {
116 return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog)
117 }
118 if cap(s.Out) == 0 {
119 s.Out = make([]byte, 0, len(in))
120 }
121 if s.clearCount && s.maxCount == 0 {
122 for i := range s.count {
123 s.count[i] = 0
124 }
125 s.clearCount = false
126 }
127 s.br.init(in)
128 if s.DecompressLimit == 0 {
129 // Max size 2GB.
130 s.DecompressLimit = (2 << 30) - 1
131 }
132
133 return s, nil
134}
135
136// tableStep returns the next table index.
137func tableStep(tableSize uint32) uint32 {
138 return (tableSize >> 1) + (tableSize >> 3) + 3
139}
140
141func highBits(val uint32) (n uint32) {
142 return uint32(bits.Len32(val) - 1)
143}