blob: f074f1e0a95de61ac85d7898c2c1229cccaf147c [file] [log] [blame]
khenaidooac637102019-01-14 15:44:34 -05001/* ******************************************************************
2 Huffman encoder, part of New Generation Entropy library
3 Copyright (C) 2013-2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33****************************************************************** */
34
35/* **************************************************************
36* Compiler specifics
37****************************************************************/
38#ifdef _MSC_VER /* Visual Studio */
39# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
40#endif
41
42
43/* **************************************************************
44* Includes
45****************************************************************/
46#include <string.h> /* memcpy, memset */
47#include <stdio.h> /* printf (debug) */
khenaidooac637102019-01-14 15:44:34 -050048#include "compiler.h"
Scott Bakerbeb3cfa2019-10-01 14:44:30 -070049#include "bitstream.h"
50#include "hist.h"
khenaidooac637102019-01-14 15:44:34 -050051#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
52#include "fse.h" /* header compression */
53#define HUF_STATIC_LINKING_ONLY
54#include "huf.h"
55#include "error_private.h"
56
57
58/* **************************************************************
59* Error Management
60****************************************************************/
61#define HUF_isError ERR_isError
Scott Bakerbeb3cfa2019-10-01 14:44:30 -070062#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
khenaidooac637102019-01-14 15:44:34 -050063#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
64#define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
65
66
67/* **************************************************************
68* Utils
69****************************************************************/
70unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
71{
72 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
73}
74
75
76/* *******************************************************
77* HUF : Huffman block compression
78*********************************************************/
79/* HUF_compressWeights() :
80 * Same as FSE_compress(), but dedicated to huff0's weights compression.
81 * The use case needs much less stack memory.
82 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83 */
84#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
Scott Bakerbeb3cfa2019-10-01 14:44:30 -070085static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
khenaidooac637102019-01-14 15:44:34 -050086{
87 BYTE* const ostart = (BYTE*) dst;
88 BYTE* op = ostart;
89 BYTE* const oend = ostart + dstSize;
90
Scott Bakerbeb3cfa2019-10-01 14:44:30 -070091 unsigned maxSymbolValue = HUF_TABLELOG_MAX;
khenaidooac637102019-01-14 15:44:34 -050092 U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
93
94 FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95 BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96
Scott Bakerbeb3cfa2019-10-01 14:44:30 -070097 unsigned count[HUF_TABLELOG_MAX+1];
khenaidooac637102019-01-14 15:44:34 -050098 S16 norm[HUF_TABLELOG_MAX+1];
99
100 /* init conditions */
101 if (wtSize <= 1) return 0; /* Not compressible */
102
103 /* Scan input and build symbol stats */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700104 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
khenaidooac637102019-01-14 15:44:34 -0500105 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700106 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
khenaidooac637102019-01-14 15:44:34 -0500107 }
108
109 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110 CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
111
112 /* Write table description header */
113 { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
114 op += hSize;
115 }
116
117 /* Compress */
118 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
120 if (cSize == 0) return 0; /* not enough space for compressed data */
121 op += cSize;
122 }
123
124 return op-ostart;
125}
126
127
128struct HUF_CElt_s {
129 U16 val;
130 BYTE nbBits;
131}; /* typedef'd to HUF_CElt within "huf.h" */
132
133/*! HUF_writeCTable() :
134 `CTable` : Huffman tree to save, using huf representation.
135 @return : size of saved CTable */
136size_t HUF_writeCTable (void* dst, size_t maxDstSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700137 const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
khenaidooac637102019-01-14 15:44:34 -0500138{
139 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
140 BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
141 BYTE* op = (BYTE*)dst;
142 U32 n;
143
144 /* check conditions */
145 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146
147 /* convert to weight */
148 bitsToWeight[0] = 0;
149 for (n=1; n<huffLog+1; n++)
150 bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151 for (n=0; n<maxSymbolValue; n++)
152 huffWeight[n] = bitsToWeight[CTable[n].nbBits];
153
154 /* attempt weights compression by FSE */
155 { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
156 if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
157 op[0] = (BYTE)hSize;
158 return hSize+1;
159 } }
160
161 /* write raw values as 4-bits (max : 15) */
162 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
163 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
164 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
166 for (n=0; n<maxSymbolValue; n+=2)
167 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
168 return ((maxSymbolValue+1)/2) + 1;
169}
170
171
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700172size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
khenaidooac637102019-01-14 15:44:34 -0500173{
174 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */
175 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
176 U32 tableLog = 0;
177 U32 nbSymbols = 0;
178
179 /* get symbol weights */
180 CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
181
182 /* check result */
183 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
184 if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
185
186 /* Prepare base value per rank */
187 { U32 n, nextRankStart = 0;
188 for (n=1; n<=tableLog; n++) {
189 U32 current = nextRankStart;
190 nextRankStart += (rankVal[n] << (n-1));
191 rankVal[n] = current;
192 } }
193
194 /* fill nbBits */
195 { U32 n; for (n=0; n<nbSymbols; n++) {
196 const U32 w = huffWeight[n];
197 CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
198 } }
199
200 /* fill val */
201 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
202 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
204 /* determine stating value per rank */
205 valPerRank[tableLog+1] = 0; /* for w==0 */
206 { U16 min = 0;
207 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
208 valPerRank[n] = min; /* get starting value within each rank */
209 min += nbPerRank[n];
210 min >>= 1;
211 } }
212 /* assign value within rank, symbol order */
213 { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
214 }
215
216 *maxSymbolValuePtr = nbSymbols - 1;
217 return readSize;
218}
219
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700220U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221{
222 const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223 assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224 return table[symbolValue].nbBits;
225}
226
khenaidooac637102019-01-14 15:44:34 -0500227
228typedef struct nodeElt_s {
229 U32 count;
230 U16 parent;
231 BYTE byte;
232 BYTE nbBits;
233} nodeElt;
234
235static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
236{
237 const U32 largestBits = huffNode[lastNonNull].nbBits;
238 if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
239
240 /* there are several too large elements (at least >= 2) */
241 { int totalCost = 0;
242 const U32 baseCost = 1 << (largestBits - maxNbBits);
243 U32 n = lastNonNull;
244
245 while (huffNode[n].nbBits > maxNbBits) {
246 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247 huffNode[n].nbBits = (BYTE)maxNbBits;
248 n --;
249 } /* n stops at huffNode[n].nbBits <= maxNbBits */
250 while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
251
252 /* renorm totalCost */
253 totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
254
255 /* repay normalized cost */
256 { U32 const noSymbol = 0xF0F0F0F0;
257 U32 rankLast[HUF_TABLELOG_MAX+2];
258 int pos;
259
260 /* Get pos of last (smallest) symbol per rank */
261 memset(rankLast, 0xF0, sizeof(rankLast));
262 { U32 currentNbBits = maxNbBits;
263 for (pos=n ; pos >= 0; pos--) {
264 if (huffNode[pos].nbBits >= currentNbBits) continue;
265 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
266 rankLast[maxNbBits-currentNbBits] = pos;
267 } }
268
269 while (totalCost > 0) {
270 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
271 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272 U32 highPos = rankLast[nBitsToDecrease];
273 U32 lowPos = rankLast[nBitsToDecrease-1];
274 if (highPos == noSymbol) continue;
275 if (lowPos == noSymbol) break;
276 { U32 const highTotal = huffNode[highPos].count;
277 U32 const lowTotal = 2 * huffNode[lowPos].count;
278 if (highTotal <= lowTotal) break;
279 } }
280 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
281 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
283 nBitsToDecrease ++;
284 totalCost -= 1 << (nBitsToDecrease-1);
285 if (rankLast[nBitsToDecrease-1] == noSymbol)
286 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
287 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
288 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
289 rankLast[nBitsToDecrease] = noSymbol;
290 else {
291 rankLast[nBitsToDecrease]--;
292 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
293 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
294 } } /* while (totalCost > 0) */
295
296 while (totalCost < 0) { /* Sometimes, cost correction overshoot */
297 if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
298 while (huffNode[n].nbBits == maxNbBits) n--;
299 huffNode[n+1].nbBits--;
300 rankLast[1] = n+1;
301 totalCost++;
302 continue;
303 }
304 huffNode[ rankLast[1] + 1 ].nbBits--;
305 rankLast[1]++;
306 totalCost ++;
307 } } } /* there are several too large elements (at least >= 2) */
308
309 return maxNbBits;
310}
311
312
313typedef struct {
314 U32 base;
315 U32 current;
316} rankPos;
317
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700318static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
khenaidooac637102019-01-14 15:44:34 -0500319{
320 rankPos rank[32];
321 U32 n;
322
323 memset(rank, 0, sizeof(rank));
324 for (n=0; n<=maxSymbolValue; n++) {
325 U32 r = BIT_highbit32(count[n] + 1);
326 rank[r].base ++;
327 }
328 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329 for (n=0; n<32; n++) rank[n].current = rank[n].base;
330 for (n=0; n<=maxSymbolValue; n++) {
331 U32 const c = count[n];
332 U32 const r = BIT_highbit32(c+1) + 1;
333 U32 pos = rank[r].current++;
334 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335 huffNode[pos] = huffNode[pos-1];
336 pos--;
337 }
338 huffNode[pos].count = c;
339 huffNode[pos].byte = (BYTE)n;
340 }
341}
342
343
344/** HUF_buildCTable_wksp() :
345 * Same as HUF_buildCTable(), but using externally allocated scratch buffer.
346 * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
347 */
348#define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700350size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
khenaidooac637102019-01-14 15:44:34 -0500351{
352 nodeElt* const huffNode0 = (nodeElt*)workSpace;
353 nodeElt* const huffNode = huffNode0+1;
354 U32 n, nonNullRank;
355 int lowS, lowN;
356 U16 nodeNb = STARTNODE;
357 U32 nodeRoot;
358
359 /* safety checks */
360 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
361 if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
362 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
363 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
364 memset(huffNode0, 0, sizeof(huffNodeTable));
365
366 /* sort, decreasing order */
367 HUF_sort(huffNode, count, maxSymbolValue);
368
369 /* init for parents */
370 nonNullRank = maxSymbolValue;
371 while(huffNode[nonNullRank].count == 0) nonNullRank--;
372 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
375 nodeNb++; lowS-=2;
376 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377 huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */
378
379 /* create parents */
380 while (nodeNb <= nodeRoot) {
381 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
385 nodeNb++;
386 }
387
388 /* distribute weights (unlimited tree height) */
389 huffNode[nodeRoot].nbBits = 0;
390 for (n=nodeRoot-1; n>=STARTNODE; n--)
391 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
392 for (n=0; n<=nonNullRank; n++)
393 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394
395 /* enforce maxTableLog */
396 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
397
398 /* fill result into tree (val, nbBits) */
399 { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
402 for (n=0; n<=nonNullRank; n++)
403 nbPerRank[huffNode[n].nbBits]++;
404 /* determine stating value per rank */
405 { U16 min = 0;
406 for (n=maxNbBits; n>0; n--) {
407 valPerRank[n] = min; /* get starting value within each rank */
408 min += nbPerRank[n];
409 min >>= 1;
410 } }
411 for (n=0; n<=maxSymbolValue; n++)
412 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
413 for (n=0; n<=maxSymbolValue; n++)
414 tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
415 }
416
417 return maxNbBits;
418}
419
420/** HUF_buildCTable() :
421 * @return : maxNbBits
422 * Note : count is used before tree is written, so they can safely overlap
423 */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700424size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
khenaidooac637102019-01-14 15:44:34 -0500425{
426 huffNodeTable nodeTable;
427 return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
428}
429
430static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
431{
432 size_t nbBits = 0;
433 int s;
434 for (s = 0; s <= (int)maxSymbolValue; ++s) {
435 nbBits += CTable[s].nbBits * count[s];
436 }
437 return nbBits >> 3;
438}
439
440static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
441 int bad = 0;
442 int s;
443 for (s = 0; s <= (int)maxSymbolValue; ++s) {
444 bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
445 }
446 return !bad;
447}
448
449size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450
451FORCE_INLINE_TEMPLATE void
452HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
453{
454 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
455}
456
457#define HUF_FLUSHBITS(s) BIT_flushBits(s)
458
459#define HUF_FLUSHBITS_1(stream) \
460 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
461
462#define HUF_FLUSHBITS_2(stream) \
463 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464
465FORCE_INLINE_TEMPLATE size_t
466HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467 const void* src, size_t srcSize,
468 const HUF_CElt* CTable)
469{
470 const BYTE* ip = (const BYTE*) src;
471 BYTE* const ostart = (BYTE*)dst;
472 BYTE* const oend = ostart + dstSize;
473 BYTE* op = ostart;
474 size_t n;
475 BIT_CStream_t bitC;
476
477 /* init */
478 if (dstSize < 8) return 0; /* not enough space to compress */
479 { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
480 if (HUF_isError(initErr)) return 0; }
481
482 n = srcSize & ~3; /* join to mod 4 */
483 switch (srcSize & 3)
484 {
485 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486 HUF_FLUSHBITS_2(&bitC);
487 /* fall-through */
488 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489 HUF_FLUSHBITS_1(&bitC);
490 /* fall-through */
491 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492 HUF_FLUSHBITS(&bitC);
493 /* fall-through */
494 case 0 : /* fall-through */
495 default: break;
496 }
497
498 for (; n>0; n-=4) { /* note : n&3==0 at this stage */
499 HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500 HUF_FLUSHBITS_1(&bitC);
501 HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502 HUF_FLUSHBITS_2(&bitC);
503 HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504 HUF_FLUSHBITS_1(&bitC);
505 HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506 HUF_FLUSHBITS(&bitC);
507 }
508
509 return BIT_closeCStream(&bitC);
510}
511
512#if DYNAMIC_BMI2
513
514static TARGET_ATTRIBUTE("bmi2") size_t
515HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516 const void* src, size_t srcSize,
517 const HUF_CElt* CTable)
518{
519 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520}
521
522static size_t
523HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
524 const void* src, size_t srcSize,
525 const HUF_CElt* CTable)
526{
527 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
528}
529
530static size_t
531HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
532 const void* src, size_t srcSize,
533 const HUF_CElt* CTable, const int bmi2)
534{
535 if (bmi2) {
536 return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
537 }
538 return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
539}
540
541#else
542
543static size_t
544HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545 const void* src, size_t srcSize,
546 const HUF_CElt* CTable, const int bmi2)
547{
548 (void)bmi2;
549 return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
550}
551
552#endif
553
554size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
555{
556 return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
557}
558
559
560static size_t
561HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
562 const void* src, size_t srcSize,
563 const HUF_CElt* CTable, int bmi2)
564{
565 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
566 const BYTE* ip = (const BYTE*) src;
567 const BYTE* const iend = ip + srcSize;
568 BYTE* const ostart = (BYTE*) dst;
569 BYTE* const oend = ostart + dstSize;
570 BYTE* op = ostart;
571
572 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
573 if (srcSize < 12) return 0; /* no saving possible : too small input */
574 op += 6; /* jumpTable */
575
576 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577 if (cSize==0) return 0;
578 assert(cSize <= 65535);
579 MEM_writeLE16(ostart, (U16)cSize);
580 op += cSize;
581 }
582
583 ip += segmentSize;
584 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585 if (cSize==0) return 0;
586 assert(cSize <= 65535);
587 MEM_writeLE16(ostart+2, (U16)cSize);
588 op += cSize;
589 }
590
591 ip += segmentSize;
592 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
593 if (cSize==0) return 0;
594 assert(cSize <= 65535);
595 MEM_writeLE16(ostart+4, (U16)cSize);
596 op += cSize;
597 }
598
599 ip += segmentSize;
600 { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
601 if (cSize==0) return 0;
602 op += cSize;
603 }
604
605 return op-ostart;
606}
607
608size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
609{
610 return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
611}
612
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700613typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
khenaidooac637102019-01-14 15:44:34 -0500614
615static size_t HUF_compressCTable_internal(
616 BYTE* const ostart, BYTE* op, BYTE* const oend,
617 const void* src, size_t srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700618 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
khenaidooac637102019-01-14 15:44:34 -0500619{
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700620 size_t const cSize = (nbStreams==HUF_singleStream) ?
khenaidooac637102019-01-14 15:44:34 -0500621 HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
622 HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
623 if (HUF_isError(cSize)) { return cSize; }
624 if (cSize==0) { return 0; } /* uncompressible */
625 op += cSize;
626 /* check compressibility */
627 if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
628 return op-ostart;
629}
630
631typedef struct {
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700632 unsigned count[HUF_SYMBOLVALUE_MAX + 1];
khenaidooac637102019-01-14 15:44:34 -0500633 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
634 huffNodeTable nodeTable;
635} HUF_compress_tables_t;
636
637/* HUF_compress_internal() :
638 * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700639static size_t
640HUF_compress_internal (void* dst, size_t dstSize,
641 const void* src, size_t srcSize,
642 unsigned maxSymbolValue, unsigned huffLog,
643 HUF_nbStreams_e nbStreams,
644 void* workSpace, size_t wkspSize,
645 HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
646 const int bmi2)
khenaidooac637102019-01-14 15:44:34 -0500647{
648 HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
649 BYTE* const ostart = (BYTE*)dst;
650 BYTE* const oend = ostart + dstSize;
651 BYTE* op = ostart;
652
653 /* checks & inits */
654 if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700655 if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
khenaidooac637102019-01-14 15:44:34 -0500656 if (!srcSize) return 0; /* Uncompressed */
657 if (!dstSize) return 0; /* cannot fit anything within dst budget */
658 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
659 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
660 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
661 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
662 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663
664 /* Heuristic : If old table is valid, use it for small inputs */
665 if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
666 return HUF_compressCTable_internal(ostart, op, oend,
667 src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700668 nbStreams, oldHufTable, bmi2);
khenaidooac637102019-01-14 15:44:34 -0500669 }
670
671 /* Scan input and build symbol stats */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700672 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
khenaidooac637102019-01-14 15:44:34 -0500673 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700674 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
khenaidooac637102019-01-14 15:44:34 -0500675 }
676
677 /* Check validity of previous table */
678 if ( repeat
679 && *repeat == HUF_repeat_check
680 && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
681 *repeat = HUF_repeat_none;
682 }
683 /* Heuristic : use existing table for small inputs */
684 if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
685 return HUF_compressCTable_internal(ostart, op, oend,
686 src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700687 nbStreams, oldHufTable, bmi2);
khenaidooac637102019-01-14 15:44:34 -0500688 }
689
690 /* Build Huffman Tree */
691 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700692 { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693 maxSymbolValue, huffLog,
694 table->nodeTable, sizeof(table->nodeTable));
695 CHECK_F(maxBits);
khenaidooac637102019-01-14 15:44:34 -0500696 huffLog = (U32)maxBits;
697 /* Zero unused symbols in CTable, so we can check it for validity */
698 memset(table->CTable + (maxSymbolValue + 1), 0,
699 sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
700 }
701
702 /* Write table description header */
703 { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
704 /* Check if using previous huffman table is beneficial */
705 if (repeat && *repeat != HUF_repeat_none) {
706 size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
707 size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
708 if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
709 return HUF_compressCTable_internal(ostart, op, oend,
710 src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700711 nbStreams, oldHufTable, bmi2);
khenaidooac637102019-01-14 15:44:34 -0500712 } }
713
714 /* Use the new huffman table */
715 if (hSize + 12ul >= srcSize) { return 0; }
716 op += hSize;
717 if (repeat) { *repeat = HUF_repeat_none; }
718 if (oldHufTable)
719 memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
720 }
721 return HUF_compressCTable_internal(ostart, op, oend,
722 src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700723 nbStreams, table->CTable, bmi2);
khenaidooac637102019-01-14 15:44:34 -0500724}
725
726
727size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728 const void* src, size_t srcSize,
729 unsigned maxSymbolValue, unsigned huffLog,
730 void* workSpace, size_t wkspSize)
731{
732 return HUF_compress_internal(dst, dstSize, src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700733 maxSymbolValue, huffLog, HUF_singleStream,
khenaidooac637102019-01-14 15:44:34 -0500734 workSpace, wkspSize,
735 NULL, NULL, 0, 0 /*bmi2*/);
736}
737
738size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739 const void* src, size_t srcSize,
740 unsigned maxSymbolValue, unsigned huffLog,
741 void* workSpace, size_t wkspSize,
742 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
743{
744 return HUF_compress_internal(dst, dstSize, src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700745 maxSymbolValue, huffLog, HUF_singleStream,
khenaidooac637102019-01-14 15:44:34 -0500746 workSpace, wkspSize, hufTable,
747 repeat, preferRepeat, bmi2);
748}
749
750size_t HUF_compress1X (void* dst, size_t dstSize,
751 const void* src, size_t srcSize,
752 unsigned maxSymbolValue, unsigned huffLog)
753{
754 unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755 return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756}
757
758/* HUF_compress4X_repeat():
759 * compress input using 4 streams.
760 * provide workspace to generate compression tables */
761size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762 const void* src, size_t srcSize,
763 unsigned maxSymbolValue, unsigned huffLog,
764 void* workSpace, size_t wkspSize)
765{
766 return HUF_compress_internal(dst, dstSize, src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700767 maxSymbolValue, huffLog, HUF_fourStreams,
khenaidooac637102019-01-14 15:44:34 -0500768 workSpace, wkspSize,
769 NULL, NULL, 0, 0 /*bmi2*/);
770}
771
772/* HUF_compress4X_repeat():
773 * compress input using 4 streams.
774 * re-use an existing huffman compression table */
775size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776 const void* src, size_t srcSize,
777 unsigned maxSymbolValue, unsigned huffLog,
778 void* workSpace, size_t wkspSize,
779 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
780{
781 return HUF_compress_internal(dst, dstSize, src, srcSize,
Scott Bakerbeb3cfa2019-10-01 14:44:30 -0700782 maxSymbolValue, huffLog, HUF_fourStreams,
khenaidooac637102019-01-14 15:44:34 -0500783 workSpace, wkspSize,
784 hufTable, repeat, preferRepeat, bmi2);
785}
786
787size_t HUF_compress2 (void* dst, size_t dstSize,
788 const void* src, size_t srcSize,
789 unsigned maxSymbolValue, unsigned huffLog)
790{
791 unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
792 return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793}
794
795size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796{
797 return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
798}