Prince Pereira | c1c21d6 | 2021-04-22 08:38:15 +0000 | [diff] [blame] | 1 | /* ****************************************************************** |
| 2 | hist : Histogram functions |
| 3 | part of Finite State Entropy project |
| 4 | Copyright (C) 2013-present, Yann Collet. |
| 5 | |
| 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 7 | |
| 8 | Redistribution and use in source and binary forms, with or without |
| 9 | modification, are permitted provided that the following conditions are |
| 10 | met: |
| 11 | |
| 12 | * Redistributions of source code must retain the above copyright |
| 13 | notice, this list of conditions and the following disclaimer. |
| 14 | * Redistributions in binary form must reproduce the above |
| 15 | copyright notice, this list of conditions and the following disclaimer |
| 16 | in the documentation and/or other materials provided with the |
| 17 | distribution. |
| 18 | |
| 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | You can contact the author at : |
| 32 | - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy |
| 33 | - Public forum : https://groups.google.com/forum/#!forum/lz4c |
| 34 | ****************************************************************** */ |
| 35 | |
| 36 | /* --- dependencies --- */ |
| 37 | #include <stddef.h> /* size_t */ |
| 38 | |
| 39 | |
| 40 | /* --- simple histogram functions --- */ |
| 41 | |
| 42 | /*! HIST_count(): |
| 43 | * Provides the precise count of each byte within a table 'count'. |
| 44 | * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). |
| 45 | * Updates *maxSymbolValuePtr with actual largest symbol value detected. |
| 46 | * @return : count of the most frequent symbol (which isn't identified). |
| 47 | * or an error code, which can be tested using HIST_isError(). |
| 48 | * note : if return == srcSize, there is only one symbol. |
| 49 | */ |
| 50 | size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, |
| 51 | const void* src, size_t srcSize); |
| 52 | |
| 53 | unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ |
| 54 | |
| 55 | |
| 56 | /* --- advanced histogram functions --- */ |
| 57 | |
| 58 | #define HIST_WKSP_SIZE_U32 1024 |
| 59 | #define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) |
| 60 | /** HIST_count_wksp() : |
| 61 | * Same as HIST_count(), but using an externally provided scratch buffer. |
| 62 | * Benefit is this function will use very little stack space. |
| 63 | * `workSpace` is a writable buffer which must be 4-bytes aligned, |
| 64 | * `workSpaceSize` must be >= HIST_WKSP_SIZE |
| 65 | */ |
| 66 | size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
| 67 | const void* src, size_t srcSize, |
| 68 | void* workSpace, size_t workSpaceSize); |
| 69 | |
| 70 | /** HIST_countFast() : |
| 71 | * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. |
| 72 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` |
| 73 | */ |
| 74 | size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
| 75 | const void* src, size_t srcSize); |
| 76 | |
| 77 | /** HIST_countFast_wksp() : |
| 78 | * Same as HIST_countFast(), but using an externally provided scratch buffer. |
| 79 | * `workSpace` is a writable buffer which must be 4-bytes aligned, |
| 80 | * `workSpaceSize` must be >= HIST_WKSP_SIZE |
| 81 | */ |
| 82 | size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
| 83 | const void* src, size_t srcSize, |
| 84 | void* workSpace, size_t workSpaceSize); |
| 85 | |
| 86 | /*! HIST_count_simple() : |
| 87 | * Same as HIST_countFast(), this function is unsafe, |
| 88 | * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. |
| 89 | * It is also a bit slower for large inputs. |
| 90 | * However, it does not need any additional memory (not even on stack). |
| 91 | * @return : count of the most frequent symbol. |
| 92 | * Note this function doesn't produce any error (i.e. it must succeed). |
| 93 | */ |
| 94 | unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
| 95 | const void* src, size_t srcSize); |