Scott Baker | 2c1c482 | 2019-10-16 11:02:41 -0700 | [diff] [blame^] | 1 | /* ****************************************************************** |
| 2 | Huffman encoder, part of New Generation Entropy library |
| 3 | Copyright (C) 2013-2016, Yann Collet. |
| 4 | |
| 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 6 | |
| 7 | Redistribution and use in source and binary forms, with or without |
| 8 | modification, are permitted provided that the following conditions are |
| 9 | met: |
| 10 | |
| 11 | * Redistributions of source code must retain the above copyright |
| 12 | notice, this list of conditions and the following disclaimer. |
| 13 | * Redistributions in binary form must reproduce the above |
| 14 | copyright notice, this list of conditions and the following disclaimer |
| 15 | in the documentation and/or other materials provided with the |
| 16 | distribution. |
| 17 | |
| 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | |
| 30 | You can contact the author at : |
| 31 | - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy |
| 32 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
| 33 | ****************************************************************** */ |
| 34 | |
| 35 | /* ************************************************************** |
| 36 | * Compiler specifics |
| 37 | ****************************************************************/ |
| 38 | #ifdef _MSC_VER /* Visual Studio */ |
| 39 | # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ |
| 40 | #endif |
| 41 | |
| 42 | |
| 43 | /* ************************************************************** |
| 44 | * Includes |
| 45 | ****************************************************************/ |
| 46 | #include <string.h> /* memcpy, memset */ |
| 47 | #include <stdio.h> /* printf (debug) */ |
| 48 | #include "compiler.h" |
| 49 | #include "bitstream.h" |
| 50 | #include "hist.h" |
| 51 | #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ |
| 52 | #include "fse.h" /* header compression */ |
| 53 | #define HUF_STATIC_LINKING_ONLY |
| 54 | #include "huf.h" |
| 55 | #include "error_private.h" |
| 56 | |
| 57 | |
| 58 | /* ************************************************************** |
| 59 | * Error Management |
| 60 | ****************************************************************/ |
| 61 | #define HUF_isError ERR_isError |
| 62 | #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ |
| 63 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e |
| 64 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
| 65 | |
| 66 | |
| 67 | /* ************************************************************** |
| 68 | * Utils |
| 69 | ****************************************************************/ |
| 70 | unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) |
| 71 | { |
| 72 | return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); |
| 73 | } |
| 74 | |
| 75 | |
| 76 | /* ******************************************************* |
| 77 | * HUF : Huffman block compression |
| 78 | *********************************************************/ |
| 79 | /* HUF_compressWeights() : |
| 80 | * Same as FSE_compress(), but dedicated to huff0's weights compression. |
| 81 | * The use case needs much less stack memory. |
| 82 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. |
| 83 | */ |
| 84 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 |
| 85 | static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) |
| 86 | { |
| 87 | BYTE* const ostart = (BYTE*) dst; |
| 88 | BYTE* op = ostart; |
| 89 | BYTE* const oend = ostart + dstSize; |
| 90 | |
| 91 | unsigned maxSymbolValue = HUF_TABLELOG_MAX; |
| 92 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; |
| 93 | |
| 94 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; |
| 95 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; |
| 96 | |
| 97 | unsigned count[HUF_TABLELOG_MAX+1]; |
| 98 | S16 norm[HUF_TABLELOG_MAX+1]; |
| 99 | |
| 100 | /* init conditions */ |
| 101 | if (wtSize <= 1) return 0; /* Not compressible */ |
| 102 | |
| 103 | /* Scan input and build symbol stats */ |
| 104 | { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */ |
| 105 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ |
| 106 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
| 107 | } |
| 108 | |
| 109 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); |
| 110 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); |
| 111 | |
| 112 | /* Write table description header */ |
| 113 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
| 114 | op += hSize; |
| 115 | } |
| 116 | |
| 117 | /* Compress */ |
| 118 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); |
| 119 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); |
| 120 | if (cSize == 0) return 0; /* not enough space for compressed data */ |
| 121 | op += cSize; |
| 122 | } |
| 123 | |
| 124 | return op-ostart; |
| 125 | } |
| 126 | |
| 127 | |
| 128 | struct HUF_CElt_s { |
| 129 | U16 val; |
| 130 | BYTE nbBits; |
| 131 | }; /* typedef'd to HUF_CElt within "huf.h" */ |
| 132 | |
| 133 | /*! HUF_writeCTable() : |
| 134 | `CTable` : Huffman tree to save, using huf representation. |
| 135 | @return : size of saved CTable */ |
| 136 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, |
| 137 | const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) |
| 138 | { |
| 139 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ |
| 140 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; |
| 141 | BYTE* op = (BYTE*)dst; |
| 142 | U32 n; |
| 143 | |
| 144 | /* check conditions */ |
| 145 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 146 | |
| 147 | /* convert to weight */ |
| 148 | bitsToWeight[0] = 0; |
| 149 | for (n=1; n<huffLog+1; n++) |
| 150 | bitsToWeight[n] = (BYTE)(huffLog + 1 - n); |
| 151 | for (n=0; n<maxSymbolValue; n++) |
| 152 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
| 153 | |
| 154 | /* attempt weights compression by FSE */ |
| 155 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); |
| 156 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ |
| 157 | op[0] = (BYTE)hSize; |
| 158 | return hSize+1; |
| 159 | } } |
| 160 | |
| 161 | /* write raw values as 4-bits (max : 15) */ |
| 162 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ |
| 163 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
| 164 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); |
| 165 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ |
| 166 | for (n=0; n<maxSymbolValue; n+=2) |
| 167 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); |
| 168 | return ((maxSymbolValue+1)/2) + 1; |
| 169 | } |
| 170 | |
| 171 | |
| 172 | size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize) |
| 173 | { |
| 174 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ |
| 175 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ |
| 176 | U32 tableLog = 0; |
| 177 | U32 nbSymbols = 0; |
| 178 | |
| 179 | /* get symbol weights */ |
| 180 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); |
| 181 | |
| 182 | /* check result */ |
| 183 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
| 184 | if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); |
| 185 | |
| 186 | /* Prepare base value per rank */ |
| 187 | { U32 n, nextRankStart = 0; |
| 188 | for (n=1; n<=tableLog; n++) { |
| 189 | U32 current = nextRankStart; |
| 190 | nextRankStart += (rankVal[n] << (n-1)); |
| 191 | rankVal[n] = current; |
| 192 | } } |
| 193 | |
| 194 | /* fill nbBits */ |
| 195 | { U32 n; for (n=0; n<nbSymbols; n++) { |
| 196 | const U32 w = huffWeight[n]; |
| 197 | CTable[n].nbBits = (BYTE)(tableLog + 1 - w); |
| 198 | } } |
| 199 | |
| 200 | /* fill val */ |
| 201 | { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */ |
| 202 | U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; |
| 203 | { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; } |
| 204 | /* determine stating value per rank */ |
| 205 | valPerRank[tableLog+1] = 0; /* for w==0 */ |
| 206 | { U16 min = 0; |
| 207 | U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */ |
| 208 | valPerRank[n] = min; /* get starting value within each rank */ |
| 209 | min += nbPerRank[n]; |
| 210 | min >>= 1; |
| 211 | } } |
| 212 | /* assign value within rank, symbol order */ |
| 213 | { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; } |
| 214 | } |
| 215 | |
| 216 | *maxSymbolValuePtr = nbSymbols - 1; |
| 217 | return readSize; |
| 218 | } |
| 219 | |
| 220 | U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue) |
| 221 | { |
| 222 | const HUF_CElt* table = (const HUF_CElt*)symbolTable; |
| 223 | assert(symbolValue <= HUF_SYMBOLVALUE_MAX); |
| 224 | return table[symbolValue].nbBits; |
| 225 | } |
| 226 | |
| 227 | |
| 228 | typedef struct nodeElt_s { |
| 229 | U32 count; |
| 230 | U16 parent; |
| 231 | BYTE byte; |
| 232 | BYTE nbBits; |
| 233 | } nodeElt; |
| 234 | |
| 235 | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) |
| 236 | { |
| 237 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
| 238 | if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */ |
| 239 | |
| 240 | /* there are several too large elements (at least >= 2) */ |
| 241 | { int totalCost = 0; |
| 242 | const U32 baseCost = 1 << (largestBits - maxNbBits); |
| 243 | U32 n = lastNonNull; |
| 244 | |
| 245 | while (huffNode[n].nbBits > maxNbBits) { |
| 246 | totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); |
| 247 | huffNode[n].nbBits = (BYTE)maxNbBits; |
| 248 | n --; |
| 249 | } /* n stops at huffNode[n].nbBits <= maxNbBits */ |
| 250 | while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */ |
| 251 | |
| 252 | /* renorm totalCost */ |
| 253 | totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ |
| 254 | |
| 255 | /* repay normalized cost */ |
| 256 | { U32 const noSymbol = 0xF0F0F0F0; |
| 257 | U32 rankLast[HUF_TABLELOG_MAX+2]; |
| 258 | int pos; |
| 259 | |
| 260 | /* Get pos of last (smallest) symbol per rank */ |
| 261 | memset(rankLast, 0xF0, sizeof(rankLast)); |
| 262 | { U32 currentNbBits = maxNbBits; |
| 263 | for (pos=n ; pos >= 0; pos--) { |
| 264 | if (huffNode[pos].nbBits >= currentNbBits) continue; |
| 265 | currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ |
| 266 | rankLast[maxNbBits-currentNbBits] = pos; |
| 267 | } } |
| 268 | |
| 269 | while (totalCost > 0) { |
| 270 | U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; |
| 271 | for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { |
| 272 | U32 highPos = rankLast[nBitsToDecrease]; |
| 273 | U32 lowPos = rankLast[nBitsToDecrease-1]; |
| 274 | if (highPos == noSymbol) continue; |
| 275 | if (lowPos == noSymbol) break; |
| 276 | { U32 const highTotal = huffNode[highPos].count; |
| 277 | U32 const lowTotal = 2 * huffNode[lowPos].count; |
| 278 | if (highTotal <= lowTotal) break; |
| 279 | } } |
| 280 | /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ |
| 281 | /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ |
| 282 | while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) |
| 283 | nBitsToDecrease ++; |
| 284 | totalCost -= 1 << (nBitsToDecrease-1); |
| 285 | if (rankLast[nBitsToDecrease-1] == noSymbol) |
| 286 | rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ |
| 287 | huffNode[rankLast[nBitsToDecrease]].nbBits ++; |
| 288 | if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ |
| 289 | rankLast[nBitsToDecrease] = noSymbol; |
| 290 | else { |
| 291 | rankLast[nBitsToDecrease]--; |
| 292 | if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) |
| 293 | rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ |
| 294 | } } /* while (totalCost > 0) */ |
| 295 | |
| 296 | while (totalCost < 0) { /* Sometimes, cost correction overshoot */ |
| 297 | if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ |
| 298 | while (huffNode[n].nbBits == maxNbBits) n--; |
| 299 | huffNode[n+1].nbBits--; |
| 300 | rankLast[1] = n+1; |
| 301 | totalCost++; |
| 302 | continue; |
| 303 | } |
| 304 | huffNode[ rankLast[1] + 1 ].nbBits--; |
| 305 | rankLast[1]++; |
| 306 | totalCost ++; |
| 307 | } } } /* there are several too large elements (at least >= 2) */ |
| 308 | |
| 309 | return maxNbBits; |
| 310 | } |
| 311 | |
| 312 | |
| 313 | typedef struct { |
| 314 | U32 base; |
| 315 | U32 current; |
| 316 | } rankPos; |
| 317 | |
| 318 | static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue) |
| 319 | { |
| 320 | rankPos rank[32]; |
| 321 | U32 n; |
| 322 | |
| 323 | memset(rank, 0, sizeof(rank)); |
| 324 | for (n=0; n<=maxSymbolValue; n++) { |
| 325 | U32 r = BIT_highbit32(count[n] + 1); |
| 326 | rank[r].base ++; |
| 327 | } |
| 328 | for (n=30; n>0; n--) rank[n-1].base += rank[n].base; |
| 329 | for (n=0; n<32; n++) rank[n].current = rank[n].base; |
| 330 | for (n=0; n<=maxSymbolValue; n++) { |
| 331 | U32 const c = count[n]; |
| 332 | U32 const r = BIT_highbit32(c+1) + 1; |
| 333 | U32 pos = rank[r].current++; |
| 334 | while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) { |
| 335 | huffNode[pos] = huffNode[pos-1]; |
| 336 | pos--; |
| 337 | } |
| 338 | huffNode[pos].count = c; |
| 339 | huffNode[pos].byte = (BYTE)n; |
| 340 | } |
| 341 | } |
| 342 | |
| 343 | |
| 344 | /** HUF_buildCTable_wksp() : |
| 345 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. |
| 346 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned. |
| 347 | */ |
| 348 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) |
| 349 | typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; |
| 350 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) |
| 351 | { |
| 352 | nodeElt* const huffNode0 = (nodeElt*)workSpace; |
| 353 | nodeElt* const huffNode = huffNode0+1; |
| 354 | U32 n, nonNullRank; |
| 355 | int lowS, lowN; |
| 356 | U16 nodeNb = STARTNODE; |
| 357 | U32 nodeRoot; |
| 358 | |
| 359 | /* safety checks */ |
| 360 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
| 361 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall); |
| 362 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
| 363 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 364 | memset(huffNode0, 0, sizeof(huffNodeTable)); |
| 365 | |
| 366 | /* sort, decreasing order */ |
| 367 | HUF_sort(huffNode, count, maxSymbolValue); |
| 368 | |
| 369 | /* init for parents */ |
| 370 | nonNullRank = maxSymbolValue; |
| 371 | while(huffNode[nonNullRank].count == 0) nonNullRank--; |
| 372 | lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; |
| 373 | huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; |
| 374 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; |
| 375 | nodeNb++; lowS-=2; |
| 376 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); |
| 377 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ |
| 378 | |
| 379 | /* create parents */ |
| 380 | while (nodeNb <= nodeRoot) { |
| 381 | U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 382 | U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; |
| 383 | huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; |
| 384 | huffNode[n1].parent = huffNode[n2].parent = nodeNb; |
| 385 | nodeNb++; |
| 386 | } |
| 387 | |
| 388 | /* distribute weights (unlimited tree height) */ |
| 389 | huffNode[nodeRoot].nbBits = 0; |
| 390 | for (n=nodeRoot-1; n>=STARTNODE; n--) |
| 391 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
| 392 | for (n=0; n<=nonNullRank; n++) |
| 393 | huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; |
| 394 | |
| 395 | /* enforce maxTableLog */ |
| 396 | maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); |
| 397 | |
| 398 | /* fill result into tree (val, nbBits) */ |
| 399 | { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; |
| 400 | U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; |
| 401 | if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ |
| 402 | for (n=0; n<=nonNullRank; n++) |
| 403 | nbPerRank[huffNode[n].nbBits]++; |
| 404 | /* determine stating value per rank */ |
| 405 | { U16 min = 0; |
| 406 | for (n=maxNbBits; n>0; n--) { |
| 407 | valPerRank[n] = min; /* get starting value within each rank */ |
| 408 | min += nbPerRank[n]; |
| 409 | min >>= 1; |
| 410 | } } |
| 411 | for (n=0; n<=maxSymbolValue; n++) |
| 412 | tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ |
| 413 | for (n=0; n<=maxSymbolValue; n++) |
| 414 | tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ |
| 415 | } |
| 416 | |
| 417 | return maxNbBits; |
| 418 | } |
| 419 | |
| 420 | /** HUF_buildCTable() : |
| 421 | * @return : maxNbBits |
| 422 | * Note : count is used before tree is written, so they can safely overlap |
| 423 | */ |
| 424 | size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) |
| 425 | { |
| 426 | huffNodeTable nodeTable; |
| 427 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); |
| 428 | } |
| 429 | |
| 430 | static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) |
| 431 | { |
| 432 | size_t nbBits = 0; |
| 433 | int s; |
| 434 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 435 | nbBits += CTable[s].nbBits * count[s]; |
| 436 | } |
| 437 | return nbBits >> 3; |
| 438 | } |
| 439 | |
| 440 | static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { |
| 441 | int bad = 0; |
| 442 | int s; |
| 443 | for (s = 0; s <= (int)maxSymbolValue; ++s) { |
| 444 | bad |= (count[s] != 0) & (CTable[s].nbBits == 0); |
| 445 | } |
| 446 | return !bad; |
| 447 | } |
| 448 | |
| 449 | size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } |
| 450 | |
| 451 | FORCE_INLINE_TEMPLATE void |
| 452 | HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) |
| 453 | { |
| 454 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
| 455 | } |
| 456 | |
| 457 | #define HUF_FLUSHBITS(s) BIT_flushBits(s) |
| 458 | |
| 459 | #define HUF_FLUSHBITS_1(stream) \ |
| 460 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) |
| 461 | |
| 462 | #define HUF_FLUSHBITS_2(stream) \ |
| 463 | if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) |
| 464 | |
| 465 | FORCE_INLINE_TEMPLATE size_t |
| 466 | HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, |
| 467 | const void* src, size_t srcSize, |
| 468 | const HUF_CElt* CTable) |
| 469 | { |
| 470 | const BYTE* ip = (const BYTE*) src; |
| 471 | BYTE* const ostart = (BYTE*)dst; |
| 472 | BYTE* const oend = ostart + dstSize; |
| 473 | BYTE* op = ostart; |
| 474 | size_t n; |
| 475 | BIT_CStream_t bitC; |
| 476 | |
| 477 | /* init */ |
| 478 | if (dstSize < 8) return 0; /* not enough space to compress */ |
| 479 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); |
| 480 | if (HUF_isError(initErr)) return 0; } |
| 481 | |
| 482 | n = srcSize & ~3; /* join to mod 4 */ |
| 483 | switch (srcSize & 3) |
| 484 | { |
| 485 | case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); |
| 486 | HUF_FLUSHBITS_2(&bitC); |
| 487 | /* fall-through */ |
| 488 | case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); |
| 489 | HUF_FLUSHBITS_1(&bitC); |
| 490 | /* fall-through */ |
| 491 | case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); |
| 492 | HUF_FLUSHBITS(&bitC); |
| 493 | /* fall-through */ |
| 494 | case 0 : /* fall-through */ |
| 495 | default: break; |
| 496 | } |
| 497 | |
| 498 | for (; n>0; n-=4) { /* note : n&3==0 at this stage */ |
| 499 | HUF_encodeSymbol(&bitC, ip[n- 1], CTable); |
| 500 | HUF_FLUSHBITS_1(&bitC); |
| 501 | HUF_encodeSymbol(&bitC, ip[n- 2], CTable); |
| 502 | HUF_FLUSHBITS_2(&bitC); |
| 503 | HUF_encodeSymbol(&bitC, ip[n- 3], CTable); |
| 504 | HUF_FLUSHBITS_1(&bitC); |
| 505 | HUF_encodeSymbol(&bitC, ip[n- 4], CTable); |
| 506 | HUF_FLUSHBITS(&bitC); |
| 507 | } |
| 508 | |
| 509 | return BIT_closeCStream(&bitC); |
| 510 | } |
| 511 | |
| 512 | #if DYNAMIC_BMI2 |
| 513 | |
| 514 | static TARGET_ATTRIBUTE("bmi2") size_t |
| 515 | HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, |
| 516 | const void* src, size_t srcSize, |
| 517 | const HUF_CElt* CTable) |
| 518 | { |
| 519 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 520 | } |
| 521 | |
| 522 | static size_t |
| 523 | HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, |
| 524 | const void* src, size_t srcSize, |
| 525 | const HUF_CElt* CTable) |
| 526 | { |
| 527 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 528 | } |
| 529 | |
| 530 | static size_t |
| 531 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
| 532 | const void* src, size_t srcSize, |
| 533 | const HUF_CElt* CTable, const int bmi2) |
| 534 | { |
| 535 | if (bmi2) { |
| 536 | return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); |
| 537 | } |
| 538 | return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); |
| 539 | } |
| 540 | |
| 541 | #else |
| 542 | |
| 543 | static size_t |
| 544 | HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, |
| 545 | const void* src, size_t srcSize, |
| 546 | const HUF_CElt* CTable, const int bmi2) |
| 547 | { |
| 548 | (void)bmi2; |
| 549 | return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); |
| 550 | } |
| 551 | |
| 552 | #endif |
| 553 | |
| 554 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
| 555 | { |
| 556 | return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
| 557 | } |
| 558 | |
| 559 | |
| 560 | static size_t |
| 561 | HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, |
| 562 | const void* src, size_t srcSize, |
| 563 | const HUF_CElt* CTable, int bmi2) |
| 564 | { |
| 565 | size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ |
| 566 | const BYTE* ip = (const BYTE*) src; |
| 567 | const BYTE* const iend = ip + srcSize; |
| 568 | BYTE* const ostart = (BYTE*) dst; |
| 569 | BYTE* const oend = ostart + dstSize; |
| 570 | BYTE* op = ostart; |
| 571 | |
| 572 | if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ |
| 573 | if (srcSize < 12) return 0; /* no saving possible : too small input */ |
| 574 | op += 6; /* jumpTable */ |
| 575 | |
| 576 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 577 | if (cSize==0) return 0; |
| 578 | assert(cSize <= 65535); |
| 579 | MEM_writeLE16(ostart, (U16)cSize); |
| 580 | op += cSize; |
| 581 | } |
| 582 | |
| 583 | ip += segmentSize; |
| 584 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 585 | if (cSize==0) return 0; |
| 586 | assert(cSize <= 65535); |
| 587 | MEM_writeLE16(ostart+2, (U16)cSize); |
| 588 | op += cSize; |
| 589 | } |
| 590 | |
| 591 | ip += segmentSize; |
| 592 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) ); |
| 593 | if (cSize==0) return 0; |
| 594 | assert(cSize <= 65535); |
| 595 | MEM_writeLE16(ostart+4, (U16)cSize); |
| 596 | op += cSize; |
| 597 | } |
| 598 | |
| 599 | ip += segmentSize; |
| 600 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) ); |
| 601 | if (cSize==0) return 0; |
| 602 | op += cSize; |
| 603 | } |
| 604 | |
| 605 | return op-ostart; |
| 606 | } |
| 607 | |
| 608 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) |
| 609 | { |
| 610 | return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); |
| 611 | } |
| 612 | |
| 613 | typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; |
| 614 | |
| 615 | static size_t HUF_compressCTable_internal( |
| 616 | BYTE* const ostart, BYTE* op, BYTE* const oend, |
| 617 | const void* src, size_t srcSize, |
| 618 | HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) |
| 619 | { |
| 620 | size_t const cSize = (nbStreams==HUF_singleStream) ? |
| 621 | HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) : |
| 622 | HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2); |
| 623 | if (HUF_isError(cSize)) { return cSize; } |
| 624 | if (cSize==0) { return 0; } /* uncompressible */ |
| 625 | op += cSize; |
| 626 | /* check compressibility */ |
| 627 | if ((size_t)(op-ostart) >= srcSize-1) { return 0; } |
| 628 | return op-ostart; |
| 629 | } |
| 630 | |
| 631 | typedef struct { |
| 632 | unsigned count[HUF_SYMBOLVALUE_MAX + 1]; |
| 633 | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; |
| 634 | huffNodeTable nodeTable; |
| 635 | } HUF_compress_tables_t; |
| 636 | |
| 637 | /* HUF_compress_internal() : |
| 638 | * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ |
| 639 | static size_t |
| 640 | HUF_compress_internal (void* dst, size_t dstSize, |
| 641 | const void* src, size_t srcSize, |
| 642 | unsigned maxSymbolValue, unsigned huffLog, |
| 643 | HUF_nbStreams_e nbStreams, |
| 644 | void* workSpace, size_t wkspSize, |
| 645 | HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, |
| 646 | const int bmi2) |
| 647 | { |
| 648 | HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace; |
| 649 | BYTE* const ostart = (BYTE*)dst; |
| 650 | BYTE* const oend = ostart + dstSize; |
| 651 | BYTE* op = ostart; |
| 652 | |
| 653 | /* checks & inits */ |
| 654 | if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ |
| 655 | if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); |
| 656 | if (!srcSize) return 0; /* Uncompressed */ |
| 657 | if (!dstSize) return 0; /* cannot fit anything within dst budget */ |
| 658 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
| 659 | if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
| 660 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); |
| 661 | if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; |
| 662 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; |
| 663 | |
| 664 | /* Heuristic : If old table is valid, use it for small inputs */ |
| 665 | if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { |
| 666 | return HUF_compressCTable_internal(ostart, op, oend, |
| 667 | src, srcSize, |
| 668 | nbStreams, oldHufTable, bmi2); |
| 669 | } |
| 670 | |
| 671 | /* Scan input and build symbol stats */ |
| 672 | { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) ); |
| 673 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
| 674 | if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ |
| 675 | } |
| 676 | |
| 677 | /* Check validity of previous table */ |
| 678 | if ( repeat |
| 679 | && *repeat == HUF_repeat_check |
| 680 | && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { |
| 681 | *repeat = HUF_repeat_none; |
| 682 | } |
| 683 | /* Heuristic : use existing table for small inputs */ |
| 684 | if (preferRepeat && repeat && *repeat != HUF_repeat_none) { |
| 685 | return HUF_compressCTable_internal(ostart, op, oend, |
| 686 | src, srcSize, |
| 687 | nbStreams, oldHufTable, bmi2); |
| 688 | } |
| 689 | |
| 690 | /* Build Huffman Tree */ |
| 691 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
| 692 | { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, |
| 693 | maxSymbolValue, huffLog, |
| 694 | table->nodeTable, sizeof(table->nodeTable)); |
| 695 | CHECK_F(maxBits); |
| 696 | huffLog = (U32)maxBits; |
| 697 | /* Zero unused symbols in CTable, so we can check it for validity */ |
| 698 | memset(table->CTable + (maxSymbolValue + 1), 0, |
| 699 | sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); |
| 700 | } |
| 701 | |
| 702 | /* Write table description header */ |
| 703 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) ); |
| 704 | /* Check if using previous huffman table is beneficial */ |
| 705 | if (repeat && *repeat != HUF_repeat_none) { |
| 706 | size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); |
| 707 | size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); |
| 708 | if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { |
| 709 | return HUF_compressCTable_internal(ostart, op, oend, |
| 710 | src, srcSize, |
| 711 | nbStreams, oldHufTable, bmi2); |
| 712 | } } |
| 713 | |
| 714 | /* Use the new huffman table */ |
| 715 | if (hSize + 12ul >= srcSize) { return 0; } |
| 716 | op += hSize; |
| 717 | if (repeat) { *repeat = HUF_repeat_none; } |
| 718 | if (oldHufTable) |
| 719 | memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ |
| 720 | } |
| 721 | return HUF_compressCTable_internal(ostart, op, oend, |
| 722 | src, srcSize, |
| 723 | nbStreams, table->CTable, bmi2); |
| 724 | } |
| 725 | |
| 726 | |
| 727 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, |
| 728 | const void* src, size_t srcSize, |
| 729 | unsigned maxSymbolValue, unsigned huffLog, |
| 730 | void* workSpace, size_t wkspSize) |
| 731 | { |
| 732 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 733 | maxSymbolValue, huffLog, HUF_singleStream, |
| 734 | workSpace, wkspSize, |
| 735 | NULL, NULL, 0, 0 /*bmi2*/); |
| 736 | } |
| 737 | |
| 738 | size_t HUF_compress1X_repeat (void* dst, size_t dstSize, |
| 739 | const void* src, size_t srcSize, |
| 740 | unsigned maxSymbolValue, unsigned huffLog, |
| 741 | void* workSpace, size_t wkspSize, |
| 742 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
| 743 | { |
| 744 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 745 | maxSymbolValue, huffLog, HUF_singleStream, |
| 746 | workSpace, wkspSize, hufTable, |
| 747 | repeat, preferRepeat, bmi2); |
| 748 | } |
| 749 | |
| 750 | size_t HUF_compress1X (void* dst, size_t dstSize, |
| 751 | const void* src, size_t srcSize, |
| 752 | unsigned maxSymbolValue, unsigned huffLog) |
| 753 | { |
| 754 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
| 755 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
| 756 | } |
| 757 | |
| 758 | /* HUF_compress4X_repeat(): |
| 759 | * compress input using 4 streams. |
| 760 | * provide workspace to generate compression tables */ |
| 761 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, |
| 762 | const void* src, size_t srcSize, |
| 763 | unsigned maxSymbolValue, unsigned huffLog, |
| 764 | void* workSpace, size_t wkspSize) |
| 765 | { |
| 766 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 767 | maxSymbolValue, huffLog, HUF_fourStreams, |
| 768 | workSpace, wkspSize, |
| 769 | NULL, NULL, 0, 0 /*bmi2*/); |
| 770 | } |
| 771 | |
| 772 | /* HUF_compress4X_repeat(): |
| 773 | * compress input using 4 streams. |
| 774 | * re-use an existing huffman compression table */ |
| 775 | size_t HUF_compress4X_repeat (void* dst, size_t dstSize, |
| 776 | const void* src, size_t srcSize, |
| 777 | unsigned maxSymbolValue, unsigned huffLog, |
| 778 | void* workSpace, size_t wkspSize, |
| 779 | HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) |
| 780 | { |
| 781 | return HUF_compress_internal(dst, dstSize, src, srcSize, |
| 782 | maxSymbolValue, huffLog, HUF_fourStreams, |
| 783 | workSpace, wkspSize, |
| 784 | hufTable, repeat, preferRepeat, bmi2); |
| 785 | } |
| 786 | |
| 787 | size_t HUF_compress2 (void* dst, size_t dstSize, |
| 788 | const void* src, size_t srcSize, |
| 789 | unsigned maxSymbolValue, unsigned huffLog) |
| 790 | { |
| 791 | unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; |
| 792 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); |
| 793 | } |
| 794 | |
| 795 | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) |
| 796 | { |
| 797 | return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); |
| 798 | } |