blob: 45b7babc1e23a9abf51d4f3588e15d8ee641df21 [file] [log] [blame]
Scott Baker611f6bd2019-10-18 13:45:19 -07001/* ******************************************************************
2 hist : Histogram functions
3 part of Finite State Entropy project
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34****************************************************************** */
35
36/* --- dependencies --- */
37#include "mem.h" /* U32, BYTE, etc. */
38#include "debug.h" /* assert, DEBUGLOG */
39#include "error_private.h" /* ERROR */
40#include "hist.h"
41
42
43/* --- Error management --- */
44unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45
46/*-**************************************************************
47 * Histogram functions
48 ****************************************************************/
49unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50 const void* src, size_t srcSize)
51{
52 const BYTE* ip = (const BYTE*)src;
53 const BYTE* const end = ip + srcSize;
54 unsigned maxSymbolValue = *maxSymbolValuePtr;
55 unsigned largestCount=0;
56
57 memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59
60 while (ip<end) {
61 assert(*ip <= maxSymbolValue);
62 count[*ip++]++;
63 }
64
65 while (!count[maxSymbolValue]) maxSymbolValue--;
66 *maxSymbolValuePtr = maxSymbolValue;
67
68 { U32 s;
69 for (s=0; s<=maxSymbolValue; s++)
70 if (count[s] > largestCount) largestCount = count[s];
71 }
72
73 return largestCount;
74}
75
76typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e;
77
78/* HIST_count_parallel_wksp() :
79 * store histogram into 4 intermediate tables, recombined at the end.
80 * this design makes better use of OoO cpus,
81 * and is noticeably faster when some values are heavily repeated.
82 * But it needs some additional workspace for intermediate tables.
83 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
84 * @return : largest histogram frequency,
85 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
86static size_t HIST_count_parallel_wksp(
87 unsigned* count, unsigned* maxSymbolValuePtr,
88 const void* source, size_t sourceSize,
89 HIST_checkInput_e check,
90 U32* const workSpace)
91{
92 const BYTE* ip = (const BYTE*)source;
93 const BYTE* const iend = ip+sourceSize;
94 unsigned maxSymbolValue = *maxSymbolValuePtr;
95 unsigned max=0;
96 U32* const Counting1 = workSpace;
97 U32* const Counting2 = Counting1 + 256;
98 U32* const Counting3 = Counting2 + 256;
99 U32* const Counting4 = Counting3 + 256;
100
101 memset(workSpace, 0, 4*256*sizeof(unsigned));
102
103 /* safety checks */
104 if (!sourceSize) {
105 memset(count, 0, maxSymbolValue + 1);
106 *maxSymbolValuePtr = 0;
107 return 0;
108 }
109 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
110
111 /* by stripes of 16 bytes */
112 { U32 cached = MEM_read32(ip); ip += 4;
113 while (ip < iend-15) {
114 U32 c = cached; cached = MEM_read32(ip); ip += 4;
115 Counting1[(BYTE) c ]++;
116 Counting2[(BYTE)(c>>8) ]++;
117 Counting3[(BYTE)(c>>16)]++;
118 Counting4[ c>>24 ]++;
119 c = cached; cached = MEM_read32(ip); ip += 4;
120 Counting1[(BYTE) c ]++;
121 Counting2[(BYTE)(c>>8) ]++;
122 Counting3[(BYTE)(c>>16)]++;
123 Counting4[ c>>24 ]++;
124 c = cached; cached = MEM_read32(ip); ip += 4;
125 Counting1[(BYTE) c ]++;
126 Counting2[(BYTE)(c>>8) ]++;
127 Counting3[(BYTE)(c>>16)]++;
128 Counting4[ c>>24 ]++;
129 c = cached; cached = MEM_read32(ip); ip += 4;
130 Counting1[(BYTE) c ]++;
131 Counting2[(BYTE)(c>>8) ]++;
132 Counting3[(BYTE)(c>>16)]++;
133 Counting4[ c>>24 ]++;
134 }
135 ip-=4;
136 }
137
138 /* finish last symbols */
139 while (ip<iend) Counting1[*ip++]++;
140
141 if (check) { /* verify stats will fit into destination table */
142 U32 s; for (s=255; s>maxSymbolValue; s--) {
143 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
144 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
145 } }
146
147 { U32 s;
148 if (maxSymbolValue > 255) maxSymbolValue = 255;
149 for (s=0; s<=maxSymbolValue; s++) {
150 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
151 if (count[s] > max) max = count[s];
152 } }
153
154 while (!count[maxSymbolValue]) maxSymbolValue--;
155 *maxSymbolValuePtr = maxSymbolValue;
156 return (size_t)max;
157}
158
159/* HIST_countFast_wksp() :
160 * Same as HIST_countFast(), but using an externally provided scratch buffer.
161 * `workSpace` is a writable buffer which must be 4-bytes aligned,
162 * `workSpaceSize` must be >= HIST_WKSP_SIZE
163 */
164size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
165 const void* source, size_t sourceSize,
166 void* workSpace, size_t workSpaceSize)
167{
168 if (sourceSize < 1500) /* heuristic threshold */
169 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
170 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
171 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
172 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace);
173}
174
175/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
176size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
177 const void* source, size_t sourceSize)
178{
179 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
180 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters));
181}
182
183/* HIST_count_wksp() :
184 * Same as HIST_count(), but using an externally provided scratch buffer.
185 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
186size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
187 const void* source, size_t sourceSize,
188 void* workSpace, size_t workSpaceSize)
189{
190 if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
191 if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall);
192 if (*maxSymbolValuePtr < 255)
193 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace);
194 *maxSymbolValuePtr = 255;
195 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize);
196}
197
198size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
199 const void* src, size_t srcSize)
200{
201 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
202 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters));
203}