Matteo Scandolo | 9a2772a | 2018-11-19 14:56:26 -0800 | [diff] [blame^] | 1 | /* ****************************************************************** |
| 2 | Huffman encoder, part of New Generation Entropy library |
| 3 | Copyright (C) 2013-2016, Yann Collet. |
| 4 | |
| 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 6 | |
| 7 | Redistribution and use in source and binary forms, with or without |
| 8 | modification, are permitted provided that the following conditions are |
| 9 | met: |
| 10 | |
| 11 | * Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | * Redistributions in binary form must reproduce the above |
| 14 | copyright notice, this list of conditions and the following disclaimer |
| 15 | in the documentation and/or other materials provided with the |
| 16 | distribution. |
| 17 | |
| 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | You can contact the author at : |
| 31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
| 32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
| 33 | ****************************************************************** */ |
| 34 | |
| 35 | /* ************************************************************** |
| 36 | * Compiler specifics |
| 37 | ****************************************************************/ |
| 38 | #ifdef _MSC_VER /* Visual Studio */ |
| 39 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
| 40 | #endif |
| 41 | |
| 42 | |
| 43 | /* ************************************************************** |
| 44 | * Includes |
| 45 | ****************************************************************/ |
| 46 | #include <string.h> /* memcpy, memset */ |
| 47 | #include <stdio.h> /* printf (debug) */ |
| 48 | #include "bitstream.h" |
| 49 | #include "compiler.h" |
| 50 | #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ |
| 51 | #include "fse.h" /* header compression */ |
| 52 | #define HUF_STATIC_LINKING_ONLY |
| 53 | #include "huf.h" |
| 54 | #include "error_private.h" |
| 55 | |
| 56 | |
| 57 | /* ************************************************************** |
| 58 | * Error Management |
| 59 | ****************************************************************/ |
| 60 | #define HUF_isError ERR_isError |
| 61 | #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ |
| 62 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
| 63 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
| 64 | |
| 65 | |
| 66 | /* ************************************************************** |
| 67 | * Utils |
| 68 | ****************************************************************/ |
| 69 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
| 70 | { |
| 71 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); |
| 72 | } |
| 73 | |
| 74 | |
| 75 | /* ******************************************************* |
| 76 | * HUF : Huffman block compression |
| 77 | *********************************************************/ |
| 78 | /* HUF_compressWeights() : |
| 79 | * Same as FSE_compress(), but dedicated to huff0's weights compression. |
| 80 | * The use case needs much less stack memory. |
| 81 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. |
| 82 | */ |
| 83 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 |
| 84 | size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) |
| 85 | { |
| 86 | BYTE* const ostart = (BYTE*) dst; |
| 87 | BYTE* op = ostart; |
| 88 | BYTE* const oend = ostart + dstSize; |
| 89 | |
| 90 | U32 maxSymbolValue = HUF_TABLELOG_MAX; |
| 91 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
| 92 | |
| 93 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; |
| 94 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; |
| 95 | |
| 96 | U32 count[HUF_TABLELOG_MAX+1]; |
| 97 | S16 norm[HUF_TABLELOG_MAX+1]; |
| 98 | |
| 99 | /* init conditions */ |
| 100 | if (wtSize <= 1) return 0; /* Not compressible */ |
| 101 | |
| 102 | /* Scan input and build symbol stats */ |
| 103 | { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); |
| 104 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ |
| 105 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
| 106 | } |
| 107 | |
| 108 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); |
| 109 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); |
| 110 | |
| 111 | /* Write table description header */ |
| 112 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
| 113 | op += hSize; |
| 114 | } |
| 115 | |
| 116 | /* Compress */ |
| 117 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); |
| 118 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); |
| 119 | if (cSize == 0) return 0; /* not enough space for compressed data */ |
| 120 | op += cSize; |
| 121 | } |
| 122 | |
| 123 | return op-ostart; |
| 124 | } |
| 125 | |
| 126 | |
| 127 | struct HUF_CElt_s { |
| 128 | U16 val; |
| 129 | BYTE nbBits; |
| 130 | }; /* typedef'd to HUF_CElt within "huf.h" */ |
| 131 | |
| 132 | /*! HUF_writeCTable() : |
| 133 | `CTable` : Huffman tree to save, using huf representation. |
| 134 | @return : size of saved CTable */ |
| 135 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, |
| 136 | const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) |
| 137 | { |
| 138 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ |
| 139 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; |
| 140 | BYTE* op = (BYTE*)dst; |
| 141 | U32 n; |
| 142 | |
| 143 | /* check conditions */ |
| 144 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 145 | |
| 146 | /* convert to weight */ |
| 147 | bitsToWeight[0] = 0; |
| 148 | for (n=1; n<huffLog+1; n++) |
| 149 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); |
| 150 | for (n=0; n<maxSymbolValue; n++) |
| 151 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
| 152 | |
| 153 | /* attempt weights compression by FSE */ |
| 154 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); |
| 155 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ |
| 156 | op[0] = (BYTE)hSize; |
| 157 | return hSize+1; |
| 158 | } } |
| 159 | |
| 160 | /* write raw values as 4-bits (max : 15) */ |
| 161 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ |
| 162 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
| 163 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); |
| 164 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ |
| 165 | for (n=0; n<maxSymbolValue; n+=2) |
| 166 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); |
| 167 | return ((maxSymbolValue+1)/2) + 1; |
| 168 | } |
| 169 | |
| 170 | |
| 171 | size_t HUF_readCTable (HUF_CElt* CTable, U32* maxSymbolValuePtr, const void* src, size_t srcSize) |
| 172 | { |
| 173 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ |
| 174 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ |
| 175 | U32 tableLog = 0; |
| 176 | U32 nbSymbols = 0; |
| 177 | |
| 178 | /* get symbol weights */ |
| 179 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); |
| 180 | |
| 181 | /* check result */ |
| 182 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
| 183 | if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); |
| 184 | |
| 185 | /* Prepare base value per rank */ |
| 186 | { U32 n, nextRankStart = 0; |
| 187 | for (n=1; n<=tableLog; n++) { |
| 188 | U32 current = nextRankStart; |
| 189 | nextRankStart += (rankVal[n] << (n-1)); |
| 190 | rankVal[n] = current; |
| 191 | } } |
| 192 | |
| 193 | /* fill nbBits */ |
| 194 | { U32 n; for (n=0; n<nbSymbols; n++) { |
| 195 | const U32 w = huffWeight[n]; |
| 196 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); |
| 197 | } } |
| 198 | |
| 199 | /* fill val */ |
| 200 | { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ |
| 201 | U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; |
| 202 | { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } |
| 203 | /* determine stating value per rank */ |
| 204 | valPerRank[tableLog+1] = 0; /* for w==0 */ |
| 205 | { U16 min = 0; |
| 206 | U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ |
| 207 | valPerRank[n] = min; /* get starting value within each rank */ |
| 208 | min += nbPerRank[n]; |
| 209 | min >>= 1; |
| 210 | } } |
| 211 | /* assign value within rank, symbol order */ |
| 212 | { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } |
| 213 | } |
| 214 | |
| 215 | *maxSymbolValuePtr = nbSymbols - 1; |
| 216 | return readSize; |
| 217 | } |
| 218 | |
| 219 | |
| 220 | typedef struct nodeElt_s { |
| 221 | U32 count; |
| 222 | U16 parent; |
| 223 | BYTE byte; |
| 224 | BYTE nbBits; |
| 225 | } nodeElt; |
| 226 | |
| 227 | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) |
| 228 | { |
| 229 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
| 230 | if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ |
| 231 | |
| 232 | /* there are several too large elements (at least >= 2) */ |
| 233 | { int totalCost = 0; |
| 234 | const U32 baseCost = 1 << (largestBits - maxNbBits); |
| 235 | U32 n = lastNonNull; |
| 236 | |
| 237 | while (huffNode[n].nbBits > maxNbBits) { |
| 238 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); |
| 239 | huffNode[n].nbBits = (BYTE)maxNbBits; |
| 240 | n --; |
| 241 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ |
| 242 | while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ |
| 243 | |
| 244 | /* renorm totalCost */ |
| 245 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ |
| 246 | |
| 247 | /* repay normalized cost */ |
| 248 | { U32 const noSymbol = 0xF0F0F0F0; |
| 249 | U32 rankLast[HUF_TABLELOG_MAX+2]; |
| 250 | int pos; |
| 251 | |
| 252 | /* Get pos of last (smallest) symbol per rank */ |
| 253 | memset(rankLast, 0xF0, sizeof(rankLast)); |
| 254 | { U32 currentNbBits = maxNbBits; |
| 255 | for (pos=n ; pos >= 0; pos--) { |
| 256 | if (huffNode[pos].nbBits >= currentNbBits) continue; |
| 257 | currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ |
| 258 | rankLast[maxNbBits-currentNbBits] = pos; |
| 259 | } } |
| 260 | |
| 261 | while (totalCost > 0) { |
| 262 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; |
| 263 | for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { |
| 264 | U32 highPos = rankLast[nBitsToDecrease]; |
| 265 | U32 lowPos = rankLast[nBitsToDecrease-1]; |
| 266 | if (highPos == noSymbol) continue; |
| 267 | if (lowPos == noSymbol) break; |
| 268 | { U32 const highTotal = huffNode[highPos].count; |
| 269 | U32 const lowTotal = 2 * huffNode[lowPos].count; |
| 270 | if (highTotal <= lowTotal) break; |
| 271 | } } |
| 272 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ |
| 273 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
| 274 | while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) |
| 275 | nBitsToDecrease ++; |
| 276 | totalCost -= 1 << (nBitsToDecrease-1); |
| 277 | if (rankLast[nBitsToDecrease-1] == noSymbol) |
| 278 | rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ |
| 279 | huffNode[rankLast[nBitsToDecrease]].nbBits ++; |
| 280 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ |
| 281 | rankLast[nBitsToDecrease] = noSymbol; |
| 282 | else { |
| 283 | rankLast[nBitsToDecrease]--; |
| 284 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) |
| 285 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ |
| 286 | } } /* while (totalCost > 0) */ |
| 287 | |
| 288 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ |
| 289 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ |
| 290 | while (huffNode[n].nbBits == maxNbBits) n--; |
| 291 | huffNode[n+1].nbBits--; |
| 292 | rankLast[1] = n+1; |
| 293 | totalCost++; |
| 294 | continue; |
| 295 | } |
| 296 | huffNode[ rankLast[1] + 1 ].nbBits--; |
| 297 | rankLast[1]++; |
| 298 | totalCost ++; |
| 299 | } } } /* there are several too large elements (at least >= 2) */ |
| 300 | |
| 301 | return maxNbBits; |
| 302 | } |
| 303 | |
| 304 | |
| 305 | typedef struct { |
| 306 | U32 base; |
| 307 | U32 current; |
| 308 | } rankPos; |
| 309 | |
| 310 | static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue) |
| 311 | { |
| 312 | rankPos rank[32]; |
| 313 | U32 n; |
| 314 | |
| 315 | memset(rank, 0, sizeof(rank)); |
| 316 | for (n=0; n<=maxSymbolValue; n++) { |
| 317 | U32 r = BIT_highbit32(count[n] + 1); |
| 318 | rank[r].base ++; |
| 319 | } |
| 320 | for (n=30; n>0; n--) rank[n-1].base += rank[n].base; |
| 321 | for (n=0; n<32; n++) rank[n].current = rank[n].base; |
| 322 | for (n=0; n<=maxSymbolValue; n++) { |
| 323 | U32 const c = count[n]; |
| 324 | U32 const r = BIT_highbit32(c+1) + 1; |
| 325 | U32 pos = rank[r].current++; |
| 326 | while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { |
| 327 | huffNode[pos] = huffNode[pos-1]; |
| 328 | pos--; |
| 329 | } |
| 330 | huffNode[pos].count = c; |
| 331 | huffNode[pos].byte = (BYTE)n; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | |
| 336 | /** HUF_buildCTable_wksp() : |
| 337 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
| 338 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. |
| 339 | */ |
| 340 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) |
| 341 | typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; |
| 342 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) |
| 343 | { |
| 344 | nodeElt* const huffNode0 = (nodeElt*)workSpace; |
| 345 | nodeElt* const huffNode = huffNode0+1; |
| 346 | U32 n, nonNullRank; |
| 347 | int lowS, lowN; |
| 348 | U16 nodeNb = STARTNODE; |
| 349 | U32 nodeRoot; |
| 350 | |
| 351 | /* safety checks */ |
| 352 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
| 353 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); |
| 354 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
| 355 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 356 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
| 357 | |
| 358 | /* sort, decreasing order */ |
| 359 | HUF_sort(huffNode, count, maxSymbolValue); |
| 360 | |
| 361 | /* init for parents */ |
| 362 | nonNullRank = maxSymbolValue; |
| 363 | while(huffNode[nonNullRank].count == 0) nonNullRank--; |
| 364 | lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; |
| 365 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; |
| 366 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; |
| 367 | nodeNb++; lowS-=2; |
| 368 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); |
| 369 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ |
| 370 | |
| 371 | /* create parents */ |
| 372 | while (nodeNb <= nodeRoot) { |
| 373 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 374 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 375 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; |
| 376 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; |
| 377 | nodeNb++; |
| 378 | } |
| 379 | |
| 380 | /* distribute weights (unlimited tree height) */ |
| 381 | huffNode[nodeRoot].nbBits = 0; |
| 382 | for (n=nodeRoot-1; n>=STARTNODE; n--) |
| 383 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
| 384 | for (n=0; n<=nonNullRank; n++) |
| 385 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
| 386 | |
| 387 | /* enforce maxTableLog */ |
| 388 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); |
| 389 | |
| 390 | /* fill result into tree (val, nbBits) */ |
| 391 | { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; |
| 392 | U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; |
| 393 | if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ |
| 394 | for (n=0; n<=nonNullRank; n++) |
| 395 | nbPerRank[huffNode[n].nbBits]++; |
| 396 | /* determine stating value per rank */ |
| 397 | { U16 min = 0; |
| 398 | for (n=maxNbBits; n>0; n--) { |
| 399 | valPerRank[n] = min; /* get starting value within each rank */ |
| 400 | min += nbPerRank[n]; |
| 401 | min >>= 1; |
| 402 | } } |
| 403 | for (n=0; n<=maxSymbolValue; n++) |
| 404 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ |
| 405 | for (n=0; n<=maxSymbolValue; n++) |
| 406 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ |
| 407 | } |
| 408 | |
| 409 | return maxNbBits; |
| 410 | } |
| 411 | |
| 412 | /** HUF_buildCTable() : |
| 413 | * @return : maxNbBits |
| 414 | * Note : count is used before tree is written, so they can safely overlap |
| 415 | */ |
| 416 | size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) |
| 417 | { |
| 418 | huffNodeTable nodeTable; |
| 419 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); |
| 420 | } |
| 421 | |
| 422 | static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) |
| 423 | { |
| 424 | size_t nbBits = 0; |
| 425 | int s; |
| 426 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 427 | nbBits += CTable[s].nbBits * count[s]; |
| 428 | } |
| 429 | return nbBits >> 3; |
| 430 | } |
| 431 | |
| 432 | static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { |
| 433 | int bad = 0; |
| 434 | int s; |
| 435 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 436 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); |
| 437 | } |
| 438 | return !bad; |
| 439 | } |
| 440 | |
| 441 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
| 442 | |
| 443 | FORCE_INLINE_TEMPLATE void |
| 444 | HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) |
| 445 | { |
| 446 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
| 447 | } |
| 448 | |
| 449 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
| 450 | |
| 451 | #define HUF_FLUSHBITS_1(stream) \ |
| 452 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) |
| 453 | |
| 454 | #define HUF_FLUSHBITS_2(stream) \ |
| 455 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) |
| 456 | |
| 457 | FORCE_INLINE_TEMPLATE size_t |
| 458 | HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, |
| 459 | const void* src, size_t srcSize, |
| 460 | const HUF_CElt* CTable) |
| 461 | { |
| 462 | const BYTE* ip = (const BYTE*) src; |
| 463 | BYTE* const ostart = (BYTE*)dst; |
| 464 | BYTE* const oend = ostart + dstSize; |
| 465 | BYTE* op = ostart; |
| 466 | size_t n; |
| 467 | BIT_CStream_t bitC; |
| 468 | |
| 469 | /* init */ |
| 470 | if (dstSize < 8) return 0; /* not enough space to compress */ |
| 471 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); |
| 472 | if (HUF_isError(initErr)) return 0; } |
| 473 | |
| 474 | n = srcSize & ~3; /* join to mod 4 */ |
| 475 | switch (srcSize & 3) |
| 476 | { |
| 477 | case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); |
| 478 | HUF_FLUSHBITS_2(&bitC); |
| 479 | /* fall-through */ |
| 480 | case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); |
| 481 | HUF_FLUSHBITS_1(&bitC); |
| 482 | /* fall-through */ |
| 483 | case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); |
| 484 | HUF_FLUSHBITS(&bitC); |
| 485 | /* fall-through */ |
| 486 | case 0 : /* fall-through */ |
| 487 | default: break; |
| 488 | } |
| 489 | |
| 490 | for (; n>0; n-=4) { /* note : n&3==0 at this stage */ |
| 491 | HUF_encodeSymbol(&bitC, ip[n- 1], CTable); |
| 492 | HUF_FLUSHBITS_1(&bitC); |
| 493 | HUF_encodeSymbol(&bitC, ip[n- 2], CTable); |
| 494 | HUF_FLUSHBITS_2(&bitC); |
| 495 | HUF_encodeSymbol(&bitC, ip[n- 3], CTable); |
| 496 | HUF_FLUSHBITS_1(&bitC); |
| 497 | HUF_encodeSymbol(&bitC, ip[n- 4], CTable); |
| 498 | HUF_FLUSHBITS(&bitC); |
| 499 | } |
| 500 | |
| 501 | return BIT_closeCStream(&bitC); |
| 502 | } |
| 503 | |
| 504 | #if DYNAMIC_BMI2 |
| 505 | |
| 506 | static TARGET_ATTRIBUTE("bmi2") size_t |
| 507 | HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, |
| 508 | const void* src, size_t srcSize, |
| 509 | const HUF_CElt* CTable) |
| 510 | { |
| 511 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 512 | } |
| 513 | |
| 514 | static size_t |
| 515 | HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, |
| 516 | const void* src, size_t srcSize, |
| 517 | const HUF_CElt* CTable) |
| 518 | { |
| 519 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 520 | } |
| 521 | |
| 522 | static size_t |
| 523 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
| 524 | const void* src, size_t srcSize, |
| 525 | const HUF_CElt* CTable, const int bmi2) |
| 526 | { |
| 527 | if (bmi2) { |
| 528 | return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); |
| 529 | } |
| 530 | return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); |
| 531 | } |
| 532 | |
| 533 | #else |
| 534 | |
| 535 | static size_t |
| 536 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
| 537 | const void* src, size_t srcSize, |
| 538 | const HUF_CElt* CTable, const int bmi2) |
| 539 | { |
| 540 | (void)bmi2; |
| 541 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 542 | } |
| 543 | |
| 544 | #endif |
| 545 | |
| 546 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
| 547 | { |
| 548 | return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
| 549 | } |
| 550 | |
| 551 | |
| 552 | static size_t |
| 553 | HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, |
| 554 | const void* src, size_t srcSize, |
| 555 | const HUF_CElt* CTable, int bmi2) |
| 556 | { |
| 557 | size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ |
| 558 | const BYTE* ip = (const BYTE*) src; |
| 559 | const BYTE* const iend = ip + srcSize; |
| 560 | BYTE* const ostart = (BYTE*) dst; |
| 561 | BYTE* const oend = ostart + dstSize; |
| 562 | BYTE* op = ostart; |
| 563 | |
| 564 | if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ |
| 565 | if (srcSize < 12) return 0; /* no saving possible : too small input */ |
| 566 | op += 6; /* jumpTable */ |
| 567 | |
| 568 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 569 | if (cSize==0) return 0; |
| 570 | assert(cSize <= 65535); |
| 571 | MEM_writeLE16(ostart, (U16)cSize); |
| 572 | op += cSize; |
| 573 | } |
| 574 | |
| 575 | ip += segmentSize; |
| 576 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 577 | if (cSize==0) return 0; |
| 578 | assert(cSize <= 65535); |
| 579 | MEM_writeLE16(ostart+2, (U16)cSize); |
| 580 | op += cSize; |
| 581 | } |
| 582 | |
| 583 | ip += segmentSize; |
| 584 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 585 | if (cSize==0) return 0; |
| 586 | assert(cSize <= 65535); |
| 587 | MEM_writeLE16(ostart+4, (U16)cSize); |
| 588 | op += cSize; |
| 589 | } |
| 590 | |
| 591 | ip += segmentSize; |
| 592 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); |
| 593 | if (cSize==0) return 0; |
| 594 | op += cSize; |
| 595 | } |
| 596 | |
| 597 | return op-ostart; |
| 598 | } |
| 599 | |
| 600 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
| 601 | { |
| 602 | return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
| 603 | } |
| 604 | |
| 605 | |
| 606 | static size_t HUF_compressCTable_internal( |
| 607 | BYTE* const ostart, BYTE* op, BYTE* const oend, |
| 608 | const void* src, size_t srcSize, |
| 609 | unsigned singleStream, const HUF_CElt* CTable, const int bmi2) |
| 610 | { |
| 611 | size_t const cSize = singleStream ? |
| 612 | HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : |
| 613 | HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); |
| 614 | if (HUF_isError(cSize)) { return cSize; } |
| 615 | if (cSize==0) { return 0; } /* uncompressible */ |
| 616 | op += cSize; |
| 617 | /* check compressibility */ |
| 618 | if ((size_t)(op-ostart) >= srcSize-1) { return 0; } |
| 619 | return op-ostart; |
| 620 | } |
| 621 | |
| 622 | typedef struct { |
| 623 | U32 count[HUF_SYMBOLVALUE_MAX + 1]; |
| 624 | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; |
| 625 | huffNodeTable nodeTable; |
| 626 | } HUF_compress_tables_t; |
| 627 | |
| 628 | /* HUF_compress_internal() : |
| 629 | * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ |
| 630 | static size_t HUF_compress_internal ( |
| 631 | void* dst, size_t dstSize, |
| 632 | const void* src, size_t srcSize, |
| 633 | unsigned maxSymbolValue, unsigned huffLog, |
| 634 | unsigned singleStream, |
| 635 | void* workSpace, size_t wkspSize, |
| 636 | HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, |
| 637 | const int bmi2) |
| 638 | { |
| 639 | HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; |
| 640 | BYTE* const ostart = (BYTE*)dst; |
| 641 | BYTE* const oend = ostart + dstSize; |
| 642 | BYTE* op = ostart; |
| 643 | |
| 644 | /* checks & inits */ |
| 645 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
| 646 | if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); |
| 647 | if (!srcSize) return 0; /* Uncompressed */ |
| 648 | if (!dstSize) return 0; /* cannot fit anything within dst budget */ |
| 649 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
| 650 | if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
| 651 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 652 | if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
| 653 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; |
| 654 | |
| 655 | /* Heuristic : If old table is valid, use it for small inputs */ |
| 656 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
| 657 | return HUF_compressCTable_internal(ostart, op, oend, |
| 658 | src, srcSize, |
| 659 | singleStream, oldHufTable, bmi2); |
| 660 | } |
| 661 | |
| 662 | /* Scan input and build symbol stats */ |
| 663 | { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) ); |
| 664 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
| 665 | if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */ |
| 666 | } |
| 667 | |
| 668 | /* Check validity of previous table */ |
| 669 | if ( repeat |
| 670 | && *repeat == HUF_repeat_check |
| 671 | && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { |
| 672 | *repeat = HUF_repeat_none; |
| 673 | } |
| 674 | /* Heuristic : use existing table for small inputs */ |
| 675 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { |
| 676 | return HUF_compressCTable_internal(ostart, op, oend, |
| 677 | src, srcSize, |
| 678 | singleStream, oldHufTable, bmi2); |
| 679 | } |
| 680 | |
| 681 | /* Build Huffman Tree */ |
| 682 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
| 683 | { CHECK_V_F(maxBits, HUF_buildCTable_wksp(table->CTable, table->count, |
| 684 | maxSymbolValue, huffLog, |
| 685 | table->nodeTable, sizeof(table->nodeTable)) ); |
| 686 | huffLog = (U32)maxBits; |
| 687 | /* Zero unused symbols in CTable, so we can check it for validity */ |
| 688 | memset(table->CTable + (maxSymbolValue + 1), 0, |
| 689 | sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); |
| 690 | } |
| 691 | |
| 692 | /* Write table description header */ |
| 693 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); |
| 694 | /* Check if using previous huffman table is beneficial */ |
| 695 | if (repeat && *repeat != HUF_repeat_none) { |
| 696 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); |
| 697 | size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); |
| 698 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
| 699 | return HUF_compressCTable_internal(ostart, op, oend, |
| 700 | src, srcSize, |
| 701 | singleStream, oldHufTable, bmi2); |
| 702 | } } |
| 703 | |
| 704 | /* Use the new huffman table */ |
| 705 | if (hSize + 12ul >= srcSize) { return 0; } |
| 706 | op += hSize; |
| 707 | if (repeat) { *repeat = HUF_repeat_none; } |
| 708 | if (oldHufTable) |
| 709 | memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ |
| 710 | } |
| 711 | return HUF_compressCTable_internal(ostart, op, oend, |
| 712 | src, srcSize, |
| 713 | singleStream, table->CTable, bmi2); |
| 714 | } |
| 715 | |
| 716 | |
| 717 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, |
| 718 | const void* src, size_t srcSize, |
| 719 | unsigned maxSymbolValue, unsigned huffLog, |
| 720 | void* workSpace, size_t wkspSize) |
| 721 | { |
| 722 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 723 | maxSymbolValue, huffLog, 1 /*single stream*/, |
| 724 | workSpace, wkspSize, |
| 725 | NULL, NULL, 0, 0 /*bmi2*/); |
| 726 | } |
| 727 | |
| 728 | size_t HUF_compress1X_repeat (void* dst, size_t dstSize, |
| 729 | const void* src, size_t srcSize, |
| 730 | unsigned maxSymbolValue, unsigned huffLog, |
| 731 | void* workSpace, size_t wkspSize, |
| 732 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
| 733 | { |
| 734 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 735 | maxSymbolValue, huffLog, 1 /*single stream*/, |
| 736 | workSpace, wkspSize, hufTable, |
| 737 | repeat, preferRepeat, bmi2); |
| 738 | } |
| 739 | |
| 740 | size_t HUF_compress1X (void* dst, size_t dstSize, |
| 741 | const void* src, size_t srcSize, |
| 742 | unsigned maxSymbolValue, unsigned huffLog) |
| 743 | { |
| 744 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
| 745 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
| 746 | } |
| 747 | |
| 748 | /* HUF_compress4X_repeat(): |
| 749 | * compress input using 4 streams. |
| 750 | * provide workspace to generate compression tables */ |
| 751 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, |
| 752 | const void* src, size_t srcSize, |
| 753 | unsigned maxSymbolValue, unsigned huffLog, |
| 754 | void* workSpace, size_t wkspSize) |
| 755 | { |
| 756 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 757 | maxSymbolValue, huffLog, 0 /*4 streams*/, |
| 758 | workSpace, wkspSize, |
| 759 | NULL, NULL, 0, 0 /*bmi2*/); |
| 760 | } |
| 761 | |
| 762 | /* HUF_compress4X_repeat(): |
| 763 | * compress input using 4 streams. |
| 764 | * re-use an existing huffman compression table */ |
| 765 | size_t HUF_compress4X_repeat (void* dst, size_t dstSize, |
| 766 | const void* src, size_t srcSize, |
| 767 | unsigned maxSymbolValue, unsigned huffLog, |
| 768 | void* workSpace, size_t wkspSize, |
| 769 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
| 770 | { |
| 771 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 772 | maxSymbolValue, huffLog, 0 /* 4 streams */, |
| 773 | workSpace, wkspSize, |
| 774 | hufTable, repeat, preferRepeat, bmi2); |
| 775 | } |
| 776 | |
| 777 | size_t HUF_compress2 (void* dst, size_t dstSize, |
| 778 | const void* src, size_t srcSize, |
| 779 | unsigned maxSymbolValue, unsigned huffLog) |
| 780 | { |
| 781 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
| 782 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
| 783 | } |
| 784 | |
| 785 | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) |
| 786 | { |
| 787 | return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); |
| 788 | } |