blob: 1476512580b039bac192b77d34c801133d6737ff [file] [log] [blame]
Scott Baker2d897982019-09-24 11:50:08 -07001/*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
Scott Baker2d897982019-09-24 11:50:08 -070011/*-*************************************
12* Dependencies
13***************************************/
Scott Baker8487c5d2019-10-18 12:49:46 -070014#include <limits.h> /* INT_MAX */
Scott Baker2d897982019-09-24 11:50:08 -070015#include <string.h> /* memset */
16#include "cpu.h"
17#include "mem.h"
Scott Baker8487c5d2019-10-18 12:49:46 -070018#include "hist.h" /* HIST_countFast_wksp */
Scott Baker2d897982019-09-24 11:50:08 -070019#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
20#include "fse.h"
21#define HUF_STATIC_LINKING_ONLY
22#include "huf.h"
23#include "zstd_compress_internal.h"
24#include "zstd_fast.h"
25#include "zstd_double_fast.h"
26#include "zstd_lazy.h"
27#include "zstd_opt.h"
28#include "zstd_ldm.h"
29
30
31/*-*************************************
32* Helper functions
33***************************************/
34size_t ZSTD_compressBound(size_t srcSize) {
35 return ZSTD_COMPRESSBOUND(srcSize);
36}
37
38
39/*-*************************************
40* Context memory management
41***************************************/
42struct ZSTD_CDict_s {
43 void* dictBuffer;
44 const void* dictContent;
45 size_t dictContentSize;
46 void* workspace;
47 size_t workspaceSize;
48 ZSTD_matchState_t matchState;
49 ZSTD_compressedBlockState_t cBlockState;
Scott Baker2d897982019-09-24 11:50:08 -070050 ZSTD_customMem customMem;
51 U32 dictID;
52}; /* typedef'd to ZSTD_CDict within "zstd.h" */
53
54ZSTD_CCtx* ZSTD_createCCtx(void)
55{
56 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
57}
58
Scott Baker8487c5d2019-10-18 12:49:46 -070059static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
60{
61 assert(cctx != NULL);
62 memset(cctx, 0, sizeof(*cctx));
63 cctx->customMem = memManager;
64 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
65 { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
66 assert(!ZSTD_isError(err));
67 (void)err;
68 }
69}
70
Scott Baker2d897982019-09-24 11:50:08 -070071ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
72{
73 ZSTD_STATIC_ASSERT(zcss_init==0);
74 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
75 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
Scott Baker8487c5d2019-10-18 12:49:46 -070076 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
Scott Baker2d897982019-09-24 11:50:08 -070077 if (!cctx) return NULL;
Scott Baker8487c5d2019-10-18 12:49:46 -070078 ZSTD_initCCtx(cctx, customMem);
Scott Baker2d897982019-09-24 11:50:08 -070079 return cctx;
80 }
81}
82
83ZSTD_CCtx* ZSTD_initStaticCCtx(void *workspace, size_t workspaceSize)
84{
85 ZSTD_CCtx* const cctx = (ZSTD_CCtx*) workspace;
86 if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */
87 if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */
88 memset(workspace, 0, workspaceSize); /* may be a bit generous, could memset be smaller ? */
89 cctx->staticSize = workspaceSize;
90 cctx->workSpace = (void*)(cctx+1);
91 cctx->workSpaceSize = workspaceSize - sizeof(ZSTD_CCtx);
92
93 /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
94 if (cctx->workSpaceSize < HUF_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t)) return NULL;
95 assert(((size_t)cctx->workSpace & (sizeof(void*)-1)) == 0); /* ensure correct alignment */
96 cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)cctx->workSpace;
97 cctx->blockState.nextCBlock = cctx->blockState.prevCBlock + 1;
98 {
99 void* const ptr = cctx->blockState.nextCBlock + 1;
100 cctx->entropyWorkspace = (U32*)ptr;
101 }
102 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
103 return cctx;
104}
105
Scott Baker8487c5d2019-10-18 12:49:46 -0700106/**
107 * Clears and frees all of the dictionaries in the CCtx.
108 */
109static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx)
Scott Baker2d897982019-09-24 11:50:08 -0700110{
Scott Baker8487c5d2019-10-18 12:49:46 -0700111 ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem);
112 ZSTD_freeCDict(cctx->localDict.cdict);
113 memset(&cctx->localDict, 0, sizeof(cctx->localDict));
114 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));
115 cctx->cdict = NULL;
116}
117
118static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict)
119{
120 size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0;
121 size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict);
122 return bufferSize + cdictSize;
123}
124
125static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
126{
127 assert(cctx != NULL);
128 assert(cctx->staticSize == 0);
Scott Baker2d897982019-09-24 11:50:08 -0700129 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
Scott Baker8487c5d2019-10-18 12:49:46 -0700130 ZSTD_clearAllDicts(cctx);
Scott Baker2d897982019-09-24 11:50:08 -0700131#ifdef ZSTD_MULTITHREAD
132 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
133#endif
Scott Baker8487c5d2019-10-18 12:49:46 -0700134}
135
136size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
137{
138 if (cctx==NULL) return 0; /* support free on NULL */
139 RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
140 "not compatible with static CCtx");
141 ZSTD_freeCCtxContent(cctx);
Scott Baker2d897982019-09-24 11:50:08 -0700142 ZSTD_free(cctx, cctx->customMem);
Scott Baker8487c5d2019-10-18 12:49:46 -0700143 return 0;
Scott Baker2d897982019-09-24 11:50:08 -0700144}
145
146
147static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx)
148{
149#ifdef ZSTD_MULTITHREAD
150 return ZSTDMT_sizeof_CCtx(cctx->mtctx);
151#else
Scott Baker8487c5d2019-10-18 12:49:46 -0700152 (void)cctx;
Scott Baker2d897982019-09-24 11:50:08 -0700153 return 0;
154#endif
155}
156
157
158size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
159{
160 if (cctx==NULL) return 0; /* support sizeof on NULL */
161 return sizeof(*cctx) + cctx->workSpaceSize
Scott Baker8487c5d2019-10-18 12:49:46 -0700162 + ZSTD_sizeof_localDict(cctx->localDict)
Scott Baker2d897982019-09-24 11:50:08 -0700163 + ZSTD_sizeof_mtctx(cctx);
164}
165
166size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
167{
168 return ZSTD_sizeof_CCtx(zcs); /* same object */
169}
170
171/* private API call, for dictBuilder only */
172const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
173
Scott Baker2d897982019-09-24 11:50:08 -0700174static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
175 ZSTD_compressionParameters cParams)
176{
177 ZSTD_CCtx_params cctxParams;
178 memset(&cctxParams, 0, sizeof(cctxParams));
179 cctxParams.cParams = cParams;
180 cctxParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
181 assert(!ZSTD_checkCParams(cParams));
182 cctxParams.fParams.contentSizeFlag = 1;
183 return cctxParams;
184}
185
186static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced(
187 ZSTD_customMem customMem)
188{
189 ZSTD_CCtx_params* params;
190 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
191 params = (ZSTD_CCtx_params*)ZSTD_calloc(
192 sizeof(ZSTD_CCtx_params), customMem);
193 if (!params) { return NULL; }
194 params->customMem = customMem;
195 params->compressionLevel = ZSTD_CLEVEL_DEFAULT;
196 params->fParams.contentSizeFlag = 1;
197 return params;
198}
199
200ZSTD_CCtx_params* ZSTD_createCCtxParams(void)
201{
202 return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem);
203}
204
205size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params)
206{
207 if (params == NULL) { return 0; }
208 ZSTD_free(params, params->customMem);
209 return 0;
210}
211
212size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params)
213{
214 return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT);
215}
216
217size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) {
Scott Baker8487c5d2019-10-18 12:49:46 -0700218 RETURN_ERROR_IF(!cctxParams, GENERIC);
Scott Baker2d897982019-09-24 11:50:08 -0700219 memset(cctxParams, 0, sizeof(*cctxParams));
220 cctxParams->compressionLevel = compressionLevel;
221 cctxParams->fParams.contentSizeFlag = 1;
222 return 0;
223}
224
225size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
226{
Scott Baker8487c5d2019-10-18 12:49:46 -0700227 RETURN_ERROR_IF(!cctxParams, GENERIC);
228 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
Scott Baker2d897982019-09-24 11:50:08 -0700229 memset(cctxParams, 0, sizeof(*cctxParams));
230 cctxParams->cParams = params.cParams;
231 cctxParams->fParams = params.fParams;
232 cctxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
233 assert(!ZSTD_checkCParams(params.cParams));
234 return 0;
235}
236
237/* ZSTD_assignParamsToCCtxParams() :
238 * params is presumed valid at this stage */
239static ZSTD_CCtx_params ZSTD_assignParamsToCCtxParams(
240 ZSTD_CCtx_params cctxParams, ZSTD_parameters params)
241{
242 ZSTD_CCtx_params ret = cctxParams;
243 ret.cParams = params.cParams;
244 ret.fParams = params.fParams;
245 ret.compressionLevel = ZSTD_CLEVEL_DEFAULT; /* should not matter, as all cParams are presumed properly defined */
246 assert(!ZSTD_checkCParams(params.cParams));
247 return ret;
248}
249
Scott Baker8487c5d2019-10-18 12:49:46 -0700250ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
251{
252 ZSTD_bounds bounds = { 0, 0, 0 };
253
254 switch(param)
255 {
256 case ZSTD_c_compressionLevel:
257 bounds.lowerBound = ZSTD_minCLevel();
258 bounds.upperBound = ZSTD_maxCLevel();
259 return bounds;
260
261 case ZSTD_c_windowLog:
262 bounds.lowerBound = ZSTD_WINDOWLOG_MIN;
263 bounds.upperBound = ZSTD_WINDOWLOG_MAX;
264 return bounds;
265
266 case ZSTD_c_hashLog:
267 bounds.lowerBound = ZSTD_HASHLOG_MIN;
268 bounds.upperBound = ZSTD_HASHLOG_MAX;
269 return bounds;
270
271 case ZSTD_c_chainLog:
272 bounds.lowerBound = ZSTD_CHAINLOG_MIN;
273 bounds.upperBound = ZSTD_CHAINLOG_MAX;
274 return bounds;
275
276 case ZSTD_c_searchLog:
277 bounds.lowerBound = ZSTD_SEARCHLOG_MIN;
278 bounds.upperBound = ZSTD_SEARCHLOG_MAX;
279 return bounds;
280
281 case ZSTD_c_minMatch:
282 bounds.lowerBound = ZSTD_MINMATCH_MIN;
283 bounds.upperBound = ZSTD_MINMATCH_MAX;
284 return bounds;
285
286 case ZSTD_c_targetLength:
287 bounds.lowerBound = ZSTD_TARGETLENGTH_MIN;
288 bounds.upperBound = ZSTD_TARGETLENGTH_MAX;
289 return bounds;
290
291 case ZSTD_c_strategy:
292 bounds.lowerBound = ZSTD_STRATEGY_MIN;
293 bounds.upperBound = ZSTD_STRATEGY_MAX;
294 return bounds;
295
296 case ZSTD_c_contentSizeFlag:
297 bounds.lowerBound = 0;
298 bounds.upperBound = 1;
299 return bounds;
300
301 case ZSTD_c_checksumFlag:
302 bounds.lowerBound = 0;
303 bounds.upperBound = 1;
304 return bounds;
305
306 case ZSTD_c_dictIDFlag:
307 bounds.lowerBound = 0;
308 bounds.upperBound = 1;
309 return bounds;
310
311 case ZSTD_c_nbWorkers:
312 bounds.lowerBound = 0;
313#ifdef ZSTD_MULTITHREAD
314 bounds.upperBound = ZSTDMT_NBWORKERS_MAX;
315#else
316 bounds.upperBound = 0;
317#endif
318 return bounds;
319
320 case ZSTD_c_jobSize:
321 bounds.lowerBound = 0;
322#ifdef ZSTD_MULTITHREAD
323 bounds.upperBound = ZSTDMT_JOBSIZE_MAX;
324#else
325 bounds.upperBound = 0;
326#endif
327 return bounds;
328
329 case ZSTD_c_overlapLog:
330 bounds.lowerBound = ZSTD_OVERLAPLOG_MIN;
331 bounds.upperBound = ZSTD_OVERLAPLOG_MAX;
332 return bounds;
333
334 case ZSTD_c_enableLongDistanceMatching:
335 bounds.lowerBound = 0;
336 bounds.upperBound = 1;
337 return bounds;
338
339 case ZSTD_c_ldmHashLog:
340 bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN;
341 bounds.upperBound = ZSTD_LDM_HASHLOG_MAX;
342 return bounds;
343
344 case ZSTD_c_ldmMinMatch:
345 bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN;
346 bounds.upperBound = ZSTD_LDM_MINMATCH_MAX;
347 return bounds;
348
349 case ZSTD_c_ldmBucketSizeLog:
350 bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN;
351 bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX;
352 return bounds;
353
354 case ZSTD_c_ldmHashRateLog:
355 bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN;
356 bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX;
357 return bounds;
358
359 /* experimental parameters */
360 case ZSTD_c_rsyncable:
361 bounds.lowerBound = 0;
362 bounds.upperBound = 1;
363 return bounds;
364
365 case ZSTD_c_forceMaxWindow :
366 bounds.lowerBound = 0;
367 bounds.upperBound = 1;
368 return bounds;
369
370 case ZSTD_c_format:
371 ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless);
372 bounds.lowerBound = ZSTD_f_zstd1;
373 bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */
374 return bounds;
375
376 case ZSTD_c_forceAttachDict:
377 ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceCopy);
378 bounds.lowerBound = ZSTD_dictDefaultAttach;
379 bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */
380 return bounds;
381
382 case ZSTD_c_literalCompressionMode:
383 ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
384 bounds.lowerBound = ZSTD_lcm_auto;
385 bounds.upperBound = ZSTD_lcm_uncompressed;
386 return bounds;
387
388 case ZSTD_c_targetCBlockSize:
389 bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN;
390 bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX;
391 return bounds;
392
393 default:
394 { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 };
395 return boundError;
396 }
397 }
398}
399
400/* ZSTD_cParam_withinBounds:
401 * @return 1 if value is within cParam bounds,
402 * 0 otherwise */
403static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value)
404{
405 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
406 if (ZSTD_isError(bounds.error)) return 0;
407 if (value < bounds.lowerBound) return 0;
408 if (value > bounds.upperBound) return 0;
409 return 1;
410}
411
412/* ZSTD_cParam_clampBounds:
413 * Clamps the value into the bounded range.
414 */
415static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
416{
417 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);
418 if (ZSTD_isError(bounds.error)) return bounds.error;
419 if (*value < bounds.lowerBound) *value = bounds.lowerBound;
420 if (*value > bounds.upperBound) *value = bounds.upperBound;
421 return 0;
422}
423
424#define BOUNDCHECK(cParam, val) { \
425 RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
426 parameter_outOfBound); \
427}
Scott Baker2d897982019-09-24 11:50:08 -0700428
429
430static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
431{
432 switch(param)
433 {
Scott Baker8487c5d2019-10-18 12:49:46 -0700434 case ZSTD_c_compressionLevel:
435 case ZSTD_c_hashLog:
436 case ZSTD_c_chainLog:
437 case ZSTD_c_searchLog:
438 case ZSTD_c_minMatch:
439 case ZSTD_c_targetLength:
440 case ZSTD_c_strategy:
Scott Baker2d897982019-09-24 11:50:08 -0700441 return 1;
442
Scott Baker8487c5d2019-10-18 12:49:46 -0700443 case ZSTD_c_format:
444 case ZSTD_c_windowLog:
445 case ZSTD_c_contentSizeFlag:
446 case ZSTD_c_checksumFlag:
447 case ZSTD_c_dictIDFlag:
448 case ZSTD_c_forceMaxWindow :
449 case ZSTD_c_nbWorkers:
450 case ZSTD_c_jobSize:
451 case ZSTD_c_overlapLog:
452 case ZSTD_c_rsyncable:
453 case ZSTD_c_enableLongDistanceMatching:
454 case ZSTD_c_ldmHashLog:
455 case ZSTD_c_ldmMinMatch:
456 case ZSTD_c_ldmBucketSizeLog:
457 case ZSTD_c_ldmHashRateLog:
458 case ZSTD_c_forceAttachDict:
459 case ZSTD_c_literalCompressionMode:
460 case ZSTD_c_targetCBlockSize:
Scott Baker2d897982019-09-24 11:50:08 -0700461 default:
462 return 0;
463 }
464}
465
Scott Baker8487c5d2019-10-18 12:49:46 -0700466size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
Scott Baker2d897982019-09-24 11:50:08 -0700467{
Scott Baker8487c5d2019-10-18 12:49:46 -0700468 DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value);
Scott Baker2d897982019-09-24 11:50:08 -0700469 if (cctx->streamStage != zcss_init) {
470 if (ZSTD_isUpdateAuthorized(param)) {
471 cctx->cParamsChanged = 1;
472 } else {
Scott Baker8487c5d2019-10-18 12:49:46 -0700473 RETURN_ERROR(stage_wrong);
Scott Baker2d897982019-09-24 11:50:08 -0700474 } }
475
476 switch(param)
477 {
Scott Baker8487c5d2019-10-18 12:49:46 -0700478 case ZSTD_c_nbWorkers:
479 RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported,
480 "MT not compatible with static alloc");
481 break;
Scott Baker2d897982019-09-24 11:50:08 -0700482
Scott Baker8487c5d2019-10-18 12:49:46 -0700483 case ZSTD_c_compressionLevel:
484 case ZSTD_c_windowLog:
485 case ZSTD_c_hashLog:
486 case ZSTD_c_chainLog:
487 case ZSTD_c_searchLog:
488 case ZSTD_c_minMatch:
489 case ZSTD_c_targetLength:
490 case ZSTD_c_strategy:
491 case ZSTD_c_ldmHashRateLog:
492 case ZSTD_c_format:
493 case ZSTD_c_contentSizeFlag:
494 case ZSTD_c_checksumFlag:
495 case ZSTD_c_dictIDFlag:
496 case ZSTD_c_forceMaxWindow:
497 case ZSTD_c_forceAttachDict:
498 case ZSTD_c_literalCompressionMode:
499 case ZSTD_c_jobSize:
500 case ZSTD_c_overlapLog:
501 case ZSTD_c_rsyncable:
502 case ZSTD_c_enableLongDistanceMatching:
503 case ZSTD_c_ldmHashLog:
504 case ZSTD_c_ldmMinMatch:
505 case ZSTD_c_ldmBucketSizeLog:
506 case ZSTD_c_targetCBlockSize:
507 break;
Scott Baker2d897982019-09-24 11:50:08 -0700508
Scott Baker8487c5d2019-10-18 12:49:46 -0700509 default: RETURN_ERROR(parameter_unsupported);
Scott Baker2d897982019-09-24 11:50:08 -0700510 }
Scott Baker8487c5d2019-10-18 12:49:46 -0700511 return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value);
Scott Baker2d897982019-09-24 11:50:08 -0700512}
513
Scott Baker8487c5d2019-10-18 12:49:46 -0700514size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
515 ZSTD_cParameter param, int value)
Scott Baker2d897982019-09-24 11:50:08 -0700516{
Scott Baker8487c5d2019-10-18 12:49:46 -0700517 DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value);
Scott Baker2d897982019-09-24 11:50:08 -0700518 switch(param)
519 {
Scott Baker8487c5d2019-10-18 12:49:46 -0700520 case ZSTD_c_format :
521 BOUNDCHECK(ZSTD_c_format, value);
Scott Baker2d897982019-09-24 11:50:08 -0700522 CCtxParams->format = (ZSTD_format_e)value;
523 return (size_t)CCtxParams->format;
524
Scott Baker8487c5d2019-10-18 12:49:46 -0700525 case ZSTD_c_compressionLevel : {
526 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
527 if (value) { /* 0 : does not change current level */
528 CCtxParams->compressionLevel = value;
Scott Baker2d897982019-09-24 11:50:08 -0700529 }
530 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
531 return 0; /* return type (size_t) cannot represent negative values */
532 }
533
Scott Baker8487c5d2019-10-18 12:49:46 -0700534 case ZSTD_c_windowLog :
535 if (value!=0) /* 0 => use default */
536 BOUNDCHECK(ZSTD_c_windowLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700537 CCtxParams->cParams.windowLog = value;
538 return CCtxParams->cParams.windowLog;
539
Scott Baker8487c5d2019-10-18 12:49:46 -0700540 case ZSTD_c_hashLog :
541 if (value!=0) /* 0 => use default */
542 BOUNDCHECK(ZSTD_c_hashLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700543 CCtxParams->cParams.hashLog = value;
544 return CCtxParams->cParams.hashLog;
545
Scott Baker8487c5d2019-10-18 12:49:46 -0700546 case ZSTD_c_chainLog :
547 if (value!=0) /* 0 => use default */
548 BOUNDCHECK(ZSTD_c_chainLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700549 CCtxParams->cParams.chainLog = value;
550 return CCtxParams->cParams.chainLog;
551
Scott Baker8487c5d2019-10-18 12:49:46 -0700552 case ZSTD_c_searchLog :
553 if (value!=0) /* 0 => use default */
554 BOUNDCHECK(ZSTD_c_searchLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700555 CCtxParams->cParams.searchLog = value;
556 return value;
557
Scott Baker8487c5d2019-10-18 12:49:46 -0700558 case ZSTD_c_minMatch :
559 if (value!=0) /* 0 => use default */
560 BOUNDCHECK(ZSTD_c_minMatch, value);
561 CCtxParams->cParams.minMatch = value;
562 return CCtxParams->cParams.minMatch;
Scott Baker2d897982019-09-24 11:50:08 -0700563
Scott Baker8487c5d2019-10-18 12:49:46 -0700564 case ZSTD_c_targetLength :
565 BOUNDCHECK(ZSTD_c_targetLength, value);
Scott Baker2d897982019-09-24 11:50:08 -0700566 CCtxParams->cParams.targetLength = value;
567 return CCtxParams->cParams.targetLength;
568
Scott Baker8487c5d2019-10-18 12:49:46 -0700569 case ZSTD_c_strategy :
570 if (value!=0) /* 0 => use default */
571 BOUNDCHECK(ZSTD_c_strategy, value);
Scott Baker2d897982019-09-24 11:50:08 -0700572 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
573 return (size_t)CCtxParams->cParams.strategy;
574
Scott Baker8487c5d2019-10-18 12:49:46 -0700575 case ZSTD_c_contentSizeFlag :
Scott Baker2d897982019-09-24 11:50:08 -0700576 /* Content size written in frame header _when known_ (default:1) */
Scott Baker8487c5d2019-10-18 12:49:46 -0700577 DEBUGLOG(4, "set content size flag = %u", (value!=0));
578 CCtxParams->fParams.contentSizeFlag = value != 0;
Scott Baker2d897982019-09-24 11:50:08 -0700579 return CCtxParams->fParams.contentSizeFlag;
580
Scott Baker8487c5d2019-10-18 12:49:46 -0700581 case ZSTD_c_checksumFlag :
Scott Baker2d897982019-09-24 11:50:08 -0700582 /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */
Scott Baker8487c5d2019-10-18 12:49:46 -0700583 CCtxParams->fParams.checksumFlag = value != 0;
Scott Baker2d897982019-09-24 11:50:08 -0700584 return CCtxParams->fParams.checksumFlag;
585
Scott Baker8487c5d2019-10-18 12:49:46 -0700586 case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */
587 DEBUGLOG(4, "set dictIDFlag = %u", (value!=0));
Scott Baker2d897982019-09-24 11:50:08 -0700588 CCtxParams->fParams.noDictIDFlag = !value;
589 return !CCtxParams->fParams.noDictIDFlag;
590
Scott Baker8487c5d2019-10-18 12:49:46 -0700591 case ZSTD_c_forceMaxWindow :
592 CCtxParams->forceWindow = (value != 0);
Scott Baker2d897982019-09-24 11:50:08 -0700593 return CCtxParams->forceWindow;
594
Scott Baker8487c5d2019-10-18 12:49:46 -0700595 case ZSTD_c_forceAttachDict : {
596 const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
597 BOUNDCHECK(ZSTD_c_forceAttachDict, pref);
598 CCtxParams->attachDictPref = pref;
599 return CCtxParams->attachDictPref;
600 }
601
602 case ZSTD_c_literalCompressionMode : {
603 const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
604 BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
605 CCtxParams->literalCompressionMode = lcm;
606 return CCtxParams->literalCompressionMode;
607 }
608
609 case ZSTD_c_nbWorkers :
Scott Baker2d897982019-09-24 11:50:08 -0700610#ifndef ZSTD_MULTITHREAD
Scott Baker8487c5d2019-10-18 12:49:46 -0700611 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
Scott Baker2d897982019-09-24 11:50:08 -0700612 return 0;
613#else
Scott Baker8487c5d2019-10-18 12:49:46 -0700614 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
615 CCtxParams->nbWorkers = value;
616 return CCtxParams->nbWorkers;
Scott Baker2d897982019-09-24 11:50:08 -0700617#endif
618
Scott Baker8487c5d2019-10-18 12:49:46 -0700619 case ZSTD_c_jobSize :
Scott Baker2d897982019-09-24 11:50:08 -0700620#ifndef ZSTD_MULTITHREAD
Scott Baker8487c5d2019-10-18 12:49:46 -0700621 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
622 return 0;
Scott Baker2d897982019-09-24 11:50:08 -0700623#else
Scott Baker8487c5d2019-10-18 12:49:46 -0700624 /* Adjust to the minimum non-default value. */
625 if (value != 0 && value < ZSTDMT_JOBSIZE_MIN)
626 value = ZSTDMT_JOBSIZE_MIN;
627 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value));
628 assert(value >= 0);
629 CCtxParams->jobSize = value;
630 return CCtxParams->jobSize;
Scott Baker2d897982019-09-24 11:50:08 -0700631#endif
632
Scott Baker8487c5d2019-10-18 12:49:46 -0700633 case ZSTD_c_overlapLog :
Scott Baker2d897982019-09-24 11:50:08 -0700634#ifndef ZSTD_MULTITHREAD
Scott Baker8487c5d2019-10-18 12:49:46 -0700635 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
636 return 0;
Scott Baker2d897982019-09-24 11:50:08 -0700637#else
Scott Baker8487c5d2019-10-18 12:49:46 -0700638 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
639 CCtxParams->overlapLog = value;
640 return CCtxParams->overlapLog;
Scott Baker2d897982019-09-24 11:50:08 -0700641#endif
642
Scott Baker8487c5d2019-10-18 12:49:46 -0700643 case ZSTD_c_rsyncable :
644#ifndef ZSTD_MULTITHREAD
645 RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading");
646 return 0;
647#else
648 FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value));
649 CCtxParams->rsyncable = value;
650 return CCtxParams->rsyncable;
651#endif
652
653 case ZSTD_c_enableLongDistanceMatching :
654 CCtxParams->ldmParams.enableLdm = (value!=0);
Scott Baker2d897982019-09-24 11:50:08 -0700655 return CCtxParams->ldmParams.enableLdm;
656
Scott Baker8487c5d2019-10-18 12:49:46 -0700657 case ZSTD_c_ldmHashLog :
658 if (value!=0) /* 0 ==> auto */
659 BOUNDCHECK(ZSTD_c_ldmHashLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700660 CCtxParams->ldmParams.hashLog = value;
661 return CCtxParams->ldmParams.hashLog;
662
Scott Baker8487c5d2019-10-18 12:49:46 -0700663 case ZSTD_c_ldmMinMatch :
664 if (value!=0) /* 0 ==> default */
665 BOUNDCHECK(ZSTD_c_ldmMinMatch, value);
Scott Baker2d897982019-09-24 11:50:08 -0700666 CCtxParams->ldmParams.minMatchLength = value;
667 return CCtxParams->ldmParams.minMatchLength;
668
Scott Baker8487c5d2019-10-18 12:49:46 -0700669 case ZSTD_c_ldmBucketSizeLog :
670 if (value!=0) /* 0 ==> default */
671 BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value);
Scott Baker2d897982019-09-24 11:50:08 -0700672 CCtxParams->ldmParams.bucketSizeLog = value;
673 return CCtxParams->ldmParams.bucketSizeLog;
674
Scott Baker8487c5d2019-10-18 12:49:46 -0700675 case ZSTD_c_ldmHashRateLog :
676 RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN,
677 parameter_outOfBound);
678 CCtxParams->ldmParams.hashRateLog = value;
679 return CCtxParams->ldmParams.hashRateLog;
Scott Baker2d897982019-09-24 11:50:08 -0700680
Scott Baker8487c5d2019-10-18 12:49:46 -0700681 case ZSTD_c_targetCBlockSize :
682 if (value!=0) /* 0 ==> default */
683 BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
684 CCtxParams->targetCBlockSize = value;
685 return CCtxParams->targetCBlockSize;
686
687 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
Scott Baker2d897982019-09-24 11:50:08 -0700688 }
689}
690
Scott Baker8487c5d2019-10-18 12:49:46 -0700691size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value)
692{
693 return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value);
694}
695
696size_t ZSTD_CCtxParams_getParameter(
697 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value)
698{
699 switch(param)
700 {
701 case ZSTD_c_format :
702 *value = CCtxParams->format;
703 break;
704 case ZSTD_c_compressionLevel :
705 *value = CCtxParams->compressionLevel;
706 break;
707 case ZSTD_c_windowLog :
708 *value = (int)CCtxParams->cParams.windowLog;
709 break;
710 case ZSTD_c_hashLog :
711 *value = (int)CCtxParams->cParams.hashLog;
712 break;
713 case ZSTD_c_chainLog :
714 *value = (int)CCtxParams->cParams.chainLog;
715 break;
716 case ZSTD_c_searchLog :
717 *value = CCtxParams->cParams.searchLog;
718 break;
719 case ZSTD_c_minMatch :
720 *value = CCtxParams->cParams.minMatch;
721 break;
722 case ZSTD_c_targetLength :
723 *value = CCtxParams->cParams.targetLength;
724 break;
725 case ZSTD_c_strategy :
726 *value = (unsigned)CCtxParams->cParams.strategy;
727 break;
728 case ZSTD_c_contentSizeFlag :
729 *value = CCtxParams->fParams.contentSizeFlag;
730 break;
731 case ZSTD_c_checksumFlag :
732 *value = CCtxParams->fParams.checksumFlag;
733 break;
734 case ZSTD_c_dictIDFlag :
735 *value = !CCtxParams->fParams.noDictIDFlag;
736 break;
737 case ZSTD_c_forceMaxWindow :
738 *value = CCtxParams->forceWindow;
739 break;
740 case ZSTD_c_forceAttachDict :
741 *value = CCtxParams->attachDictPref;
742 break;
743 case ZSTD_c_literalCompressionMode :
744 *value = CCtxParams->literalCompressionMode;
745 break;
746 case ZSTD_c_nbWorkers :
747#ifndef ZSTD_MULTITHREAD
748 assert(CCtxParams->nbWorkers == 0);
749#endif
750 *value = CCtxParams->nbWorkers;
751 break;
752 case ZSTD_c_jobSize :
753#ifndef ZSTD_MULTITHREAD
754 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
755#else
756 assert(CCtxParams->jobSize <= INT_MAX);
757 *value = (int)CCtxParams->jobSize;
758 break;
759#endif
760 case ZSTD_c_overlapLog :
761#ifndef ZSTD_MULTITHREAD
762 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
763#else
764 *value = CCtxParams->overlapLog;
765 break;
766#endif
767 case ZSTD_c_rsyncable :
768#ifndef ZSTD_MULTITHREAD
769 RETURN_ERROR(parameter_unsupported, "not compiled with multithreading");
770#else
771 *value = CCtxParams->rsyncable;
772 break;
773#endif
774 case ZSTD_c_enableLongDistanceMatching :
775 *value = CCtxParams->ldmParams.enableLdm;
776 break;
777 case ZSTD_c_ldmHashLog :
778 *value = CCtxParams->ldmParams.hashLog;
779 break;
780 case ZSTD_c_ldmMinMatch :
781 *value = CCtxParams->ldmParams.minMatchLength;
782 break;
783 case ZSTD_c_ldmBucketSizeLog :
784 *value = CCtxParams->ldmParams.bucketSizeLog;
785 break;
786 case ZSTD_c_ldmHashRateLog :
787 *value = CCtxParams->ldmParams.hashRateLog;
788 break;
789 case ZSTD_c_targetCBlockSize :
790 *value = (int)CCtxParams->targetCBlockSize;
791 break;
792 default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
793 }
794 return 0;
795}
796
Scott Baker2d897982019-09-24 11:50:08 -0700797/** ZSTD_CCtx_setParametersUsingCCtxParams() :
798 * just applies `params` into `cctx`
799 * no action is performed, parameters are merely stored.
800 * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx.
801 * This is possible even if a compression is ongoing.
802 * In which case, new parameters will be applied on the fly, starting with next compression job.
803 */
804size_t ZSTD_CCtx_setParametersUsingCCtxParams(
805 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
806{
Scott Baker8487c5d2019-10-18 12:49:46 -0700807 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
808 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
809 RETURN_ERROR_IF(cctx->cdict, stage_wrong);
Scott Baker2d897982019-09-24 11:50:08 -0700810
811 cctx->requestedParams = *params;
812 return 0;
813}
814
815ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
816{
817 DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
Scott Baker8487c5d2019-10-18 12:49:46 -0700818 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
Scott Baker2d897982019-09-24 11:50:08 -0700819 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
820 return 0;
821}
822
Scott Baker8487c5d2019-10-18 12:49:46 -0700823/**
824 * Initializes the local dict using the requested parameters.
825 * NOTE: This does not use the pledged src size, because it may be used for more
826 * than one compression.
827 */
828static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx)
829{
830 ZSTD_localDict* const dl = &cctx->localDict;
831 ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams(
832 &cctx->requestedParams, 0, dl->dictSize);
833 if (dl->dict == NULL) {
834 /* No local dictionary. */
835 assert(dl->dictBuffer == NULL);
836 assert(dl->cdict == NULL);
837 assert(dl->dictSize == 0);
838 return 0;
839 }
840 if (dl->cdict != NULL) {
841 assert(cctx->cdict == dl->cdict);
842 /* Local dictionary already initialized. */
843 return 0;
844 }
845 assert(dl->dictSize > 0);
846 assert(cctx->cdict == NULL);
847 assert(cctx->prefixDict.dict == NULL);
848
849 dl->cdict = ZSTD_createCDict_advanced(
850 dl->dict,
851 dl->dictSize,
852 ZSTD_dlm_byRef,
853 dl->dictContentType,
854 cParams,
855 cctx->customMem);
856 RETURN_ERROR_IF(!dl->cdict, memory_allocation);
857 cctx->cdict = dl->cdict;
858 return 0;
859}
860
Scott Baker2d897982019-09-24 11:50:08 -0700861size_t ZSTD_CCtx_loadDictionary_advanced(
862 ZSTD_CCtx* cctx, const void* dict, size_t dictSize,
863 ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
864{
Scott Baker8487c5d2019-10-18 12:49:46 -0700865 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
866 RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
867 "no malloc for static CCtx");
Scott Baker2d897982019-09-24 11:50:08 -0700868 DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize);
Scott Baker8487c5d2019-10-18 12:49:46 -0700869 ZSTD_clearAllDicts(cctx); /* in case one already exists */
870 if (dict == NULL || dictSize == 0) /* no dictionary mode */
871 return 0;
872 if (dictLoadMethod == ZSTD_dlm_byRef) {
873 cctx->localDict.dict = dict;
Scott Baker2d897982019-09-24 11:50:08 -0700874 } else {
Scott Baker8487c5d2019-10-18 12:49:46 -0700875 void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem);
876 RETURN_ERROR_IF(!dictBuffer, memory_allocation);
877 memcpy(dictBuffer, dict, dictSize);
878 cctx->localDict.dictBuffer = dictBuffer;
879 cctx->localDict.dict = dictBuffer;
Scott Baker2d897982019-09-24 11:50:08 -0700880 }
Scott Baker8487c5d2019-10-18 12:49:46 -0700881 cctx->localDict.dictSize = dictSize;
882 cctx->localDict.dictContentType = dictContentType;
Scott Baker2d897982019-09-24 11:50:08 -0700883 return 0;
884}
885
886ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
887 ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
888{
889 return ZSTD_CCtx_loadDictionary_advanced(
890 cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
891}
892
893ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
894{
895 return ZSTD_CCtx_loadDictionary_advanced(
896 cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
897}
898
899
900size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
901{
Scott Baker8487c5d2019-10-18 12:49:46 -0700902 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
903 /* Free the existing local cdict (if any) to save memory. */
904 ZSTD_clearAllDicts(cctx);
Scott Baker2d897982019-09-24 11:50:08 -0700905 cctx->cdict = cdict;
Scott Baker2d897982019-09-24 11:50:08 -0700906 return 0;
907}
908
909size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize)
910{
911 return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent);
912}
913
914size_t ZSTD_CCtx_refPrefix_advanced(
915 ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType)
916{
Scott Baker8487c5d2019-10-18 12:49:46 -0700917 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
918 ZSTD_clearAllDicts(cctx);
Scott Baker2d897982019-09-24 11:50:08 -0700919 cctx->prefixDict.dict = prefix;
920 cctx->prefixDict.dictSize = prefixSize;
921 cctx->prefixDict.dictContentType = dictContentType;
922 return 0;
923}
924
Scott Baker2d897982019-09-24 11:50:08 -0700925/*! ZSTD_CCtx_reset() :
926 * Also dumps dictionary */
Scott Baker8487c5d2019-10-18 12:49:46 -0700927size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
Scott Baker2d897982019-09-24 11:50:08 -0700928{
Scott Baker8487c5d2019-10-18 12:49:46 -0700929 if ( (reset == ZSTD_reset_session_only)
930 || (reset == ZSTD_reset_session_and_parameters) ) {
931 cctx->streamStage = zcss_init;
932 cctx->pledgedSrcSizePlusOne = 0;
933 }
934 if ( (reset == ZSTD_reset_parameters)
935 || (reset == ZSTD_reset_session_and_parameters) ) {
936 RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong);
937 ZSTD_clearAllDicts(cctx);
938 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
939 }
940 return 0;
Scott Baker2d897982019-09-24 11:50:08 -0700941}
942
Scott Baker8487c5d2019-10-18 12:49:46 -0700943
Scott Baker2d897982019-09-24 11:50:08 -0700944/** ZSTD_checkCParams() :
945 control CParam values remain within authorized range.
946 @return : 0, or an error code if one value is beyond authorized range */
947size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
948{
Scott Baker8487c5d2019-10-18 12:49:46 -0700949 BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog);
950 BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog);
951 BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog);
952 BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog);
953 BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch);
954 BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength);
955 BOUNDCHECK(ZSTD_c_strategy, cParams.strategy);
Scott Baker2d897982019-09-24 11:50:08 -0700956 return 0;
957}
958
959/** ZSTD_clampCParams() :
960 * make CParam values within valid range.
961 * @return : valid CParams */
Scott Baker8487c5d2019-10-18 12:49:46 -0700962static ZSTD_compressionParameters
963ZSTD_clampCParams(ZSTD_compressionParameters cParams)
Scott Baker2d897982019-09-24 11:50:08 -0700964{
Scott Baker8487c5d2019-10-18 12:49:46 -0700965# define CLAMP_TYPE(cParam, val, type) { \
966 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \
967 if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \
968 else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
Scott Baker2d897982019-09-24 11:50:08 -0700969 }
Scott Baker8487c5d2019-10-18 12:49:46 -0700970# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
971 CLAMP(ZSTD_c_windowLog, cParams.windowLog);
972 CLAMP(ZSTD_c_chainLog, cParams.chainLog);
973 CLAMP(ZSTD_c_hashLog, cParams.hashLog);
974 CLAMP(ZSTD_c_searchLog, cParams.searchLog);
975 CLAMP(ZSTD_c_minMatch, cParams.minMatch);
976 CLAMP(ZSTD_c_targetLength,cParams.targetLength);
977 CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy);
Scott Baker2d897982019-09-24 11:50:08 -0700978 return cParams;
979}
980
981/** ZSTD_cycleLog() :
982 * condition for correct operation : hashLog > 1 */
983static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
984{
985 U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
986 return hashLog - btScale;
987}
988
989/** ZSTD_adjustCParams_internal() :
Scott Baker8487c5d2019-10-18 12:49:46 -0700990 * optimize `cPar` for a specified input (`srcSize` and `dictSize`).
991 * mostly downsize to reduce memory consumption and initialization latency.
992 * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known.
993 * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention.
994 * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */
995static ZSTD_compressionParameters
996ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
997 unsigned long long srcSize,
998 size_t dictSize)
Scott Baker2d897982019-09-24 11:50:08 -0700999{
1000 static const U64 minSrcSize = 513; /* (1<<9) + 1 */
1001 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
1002 assert(ZSTD_checkCParams(cPar)==0);
1003
Scott Baker8487c5d2019-10-18 12:49:46 -07001004 if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ )
Scott Baker2d897982019-09-24 11:50:08 -07001005 srcSize = minSrcSize; /* presumed small when there is a dictionary */
1006 else if (srcSize == 0)
1007 srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */
1008
1009 /* resize windowLog if input is small enough, to use less memory */
1010 if ( (srcSize < maxWindowResize)
1011 && (dictSize < maxWindowResize) ) {
1012 U32 const tSize = (U32)(srcSize + dictSize);
1013 static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN;
1014 U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN :
1015 ZSTD_highbit32(tSize-1) + 1;
1016 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
1017 }
Scott Baker8487c5d2019-10-18 12:49:46 -07001018 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
Scott Baker2d897982019-09-24 11:50:08 -07001019 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
1020 if (cycleLog > cPar.windowLog)
1021 cPar.chainLog -= (cycleLog - cPar.windowLog);
1022 }
1023
1024 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN)
Scott Baker8487c5d2019-10-18 12:49:46 -07001025 cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */
Scott Baker2d897982019-09-24 11:50:08 -07001026
1027 return cPar;
1028}
1029
Scott Baker8487c5d2019-10-18 12:49:46 -07001030ZSTD_compressionParameters
1031ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
1032 unsigned long long srcSize,
1033 size_t dictSize)
Scott Baker2d897982019-09-24 11:50:08 -07001034{
Scott Baker8487c5d2019-10-18 12:49:46 -07001035 cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */
Scott Baker2d897982019-09-24 11:50:08 -07001036 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
1037}
1038
Scott Baker8487c5d2019-10-18 12:49:46 -07001039ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
1040 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
1041{
1042 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
1043 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
1044 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
1045 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
1046 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
1047 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
1048 if (CCtxParams->cParams.minMatch) cParams.minMatch = CCtxParams->cParams.minMatch;
1049 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
1050 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
1051 assert(!ZSTD_checkCParams(cParams));
1052 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
1053}
1054
1055static size_t
1056ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
1057 const U32 forCCtx)
Scott Baker2d897982019-09-24 11:50:08 -07001058{
1059 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1060 size_t const hSize = ((size_t)1) << cParams->hashLog;
Scott Baker8487c5d2019-10-18 12:49:46 -07001061 U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
Scott Baker2d897982019-09-24 11:50:08 -07001062 size_t const h3Size = ((size_t)1) << hashLog3;
1063 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1064 size_t const optPotentialSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits)) * sizeof(U32)
1065 + (ZSTD_OPT_NUM+1) * (sizeof(ZSTD_match_t)+sizeof(ZSTD_optimal_t));
Scott Baker8487c5d2019-10-18 12:49:46 -07001066 size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
Scott Baker2d897982019-09-24 11:50:08 -07001067 ? optPotentialSpace
1068 : 0;
1069 DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
1070 (U32)chainSize, (U32)hSize, (U32)h3Size);
1071 return tableSpace + optSpace;
1072}
1073
1074size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1075{
Scott Baker8487c5d2019-10-18 12:49:46 -07001076 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
Scott Baker2d897982019-09-24 11:50:08 -07001077 { ZSTD_compressionParameters const cParams =
1078 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
1079 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
Scott Baker8487c5d2019-10-18 12:49:46 -07001080 U32 const divider = (cParams.minMatch==3) ? 3 : 4;
Scott Baker2d897982019-09-24 11:50:08 -07001081 size_t const maxNbSeq = blockSize / divider;
Scott Baker8487c5d2019-10-18 12:49:46 -07001082 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
Scott Baker2d897982019-09-24 11:50:08 -07001083 size_t const entropySpace = HUF_WORKSPACE_SIZE;
1084 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
1085 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
1086
1087 size_t const ldmSpace = ZSTD_ldm_getTableSize(params->ldmParams);
1088 size_t const ldmSeqSpace = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize) * sizeof(rawSeq);
1089
1090 size_t const neededSpace = entropySpace + blockStateSpace + tokenSpace +
1091 matchStateSize + ldmSpace + ldmSeqSpace;
1092
1093 DEBUGLOG(5, "sizeof(ZSTD_CCtx) : %u", (U32)sizeof(ZSTD_CCtx));
1094 DEBUGLOG(5, "estimate workSpace : %u", (U32)neededSpace);
1095 return sizeof(ZSTD_CCtx) + neededSpace;
1096 }
1097}
1098
1099size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
1100{
1101 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1102 return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
1103}
1104
1105static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
1106{
1107 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
1108 return ZSTD_estimateCCtxSize_usingCParams(cParams);
1109}
1110
1111size_t ZSTD_estimateCCtxSize(int compressionLevel)
1112{
1113 int level;
1114 size_t memBudget = 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07001115 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
Scott Baker2d897982019-09-24 11:50:08 -07001116 size_t const newMB = ZSTD_estimateCCtxSize_internal(level);
1117 if (newMB > memBudget) memBudget = newMB;
1118 }
1119 return memBudget;
1120}
1121
1122size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
1123{
Scott Baker8487c5d2019-10-18 12:49:46 -07001124 RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
1125 { ZSTD_compressionParameters const cParams =
1126 ZSTD_getCParamsFromCCtxParams(params, 0, 0);
1127 size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params);
1128 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
1129 size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize;
Scott Baker2d897982019-09-24 11:50:08 -07001130 size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1;
1131 size_t const streamingSize = inBuffSize + outBuffSize;
1132
1133 return CCtxSize + streamingSize;
1134 }
1135}
1136
1137size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
1138{
1139 ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
1140 return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
1141}
1142
Scott Baker8487c5d2019-10-18 12:49:46 -07001143static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
1144{
Scott Baker2d897982019-09-24 11:50:08 -07001145 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
1146 return ZSTD_estimateCStreamSize_usingCParams(cParams);
1147}
1148
Scott Baker8487c5d2019-10-18 12:49:46 -07001149size_t ZSTD_estimateCStreamSize(int compressionLevel)
1150{
Scott Baker2d897982019-09-24 11:50:08 -07001151 int level;
1152 size_t memBudget = 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07001153 for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) {
Scott Baker2d897982019-09-24 11:50:08 -07001154 size_t const newMB = ZSTD_estimateCStreamSize_internal(level);
1155 if (newMB > memBudget) memBudget = newMB;
1156 }
1157 return memBudget;
1158}
1159
1160/* ZSTD_getFrameProgression():
1161 * tells how much data has been consumed (input) and produced (output) for current frame.
1162 * able to count progression inside worker threads (non-blocking mode).
1163 */
1164ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx)
1165{
1166#ifdef ZSTD_MULTITHREAD
1167 if (cctx->appliedParams.nbWorkers > 0) {
1168 return ZSTDMT_getFrameProgression(cctx->mtctx);
1169 }
1170#endif
1171 { ZSTD_frameProgression fp;
1172 size_t const buffered = (cctx->inBuff == NULL) ? 0 :
1173 cctx->inBuffPos - cctx->inToCompress;
1174 if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress);
1175 assert(buffered <= ZSTD_BLOCKSIZE_MAX);
1176 fp.ingested = cctx->consumedSrcSize + buffered;
1177 fp.consumed = cctx->consumedSrcSize;
1178 fp.produced = cctx->producedCSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07001179 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
1180 fp.currentJobID = 0;
1181 fp.nbActiveWorkers = 0;
Scott Baker2d897982019-09-24 11:50:08 -07001182 return fp;
1183} }
1184
Scott Baker8487c5d2019-10-18 12:49:46 -07001185/*! ZSTD_toFlushNow()
1186 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
1187 */
1188size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
1189{
1190#ifdef ZSTD_MULTITHREAD
1191 if (cctx->appliedParams.nbWorkers > 0) {
1192 return ZSTDMT_toFlushNow(cctx->mtctx);
1193 }
1194#endif
1195 (void)cctx;
1196 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
1197}
1198
1199
Scott Baker2d897982019-09-24 11:50:08 -07001200
1201static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
1202 ZSTD_compressionParameters cParams2)
1203{
1204 return (cParams1.hashLog == cParams2.hashLog)
1205 & (cParams1.chainLog == cParams2.chainLog)
1206 & (cParams1.strategy == cParams2.strategy) /* opt parser space */
Scott Baker8487c5d2019-10-18 12:49:46 -07001207 & ((cParams1.minMatch==3) == (cParams2.minMatch==3)); /* hashlog3 space */
1208}
1209
1210static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
1211 ZSTD_compressionParameters cParams2)
1212{
1213 (void)cParams1;
1214 (void)cParams2;
1215 assert(cParams1.windowLog == cParams2.windowLog);
1216 assert(cParams1.chainLog == cParams2.chainLog);
1217 assert(cParams1.hashLog == cParams2.hashLog);
1218 assert(cParams1.searchLog == cParams2.searchLog);
1219 assert(cParams1.minMatch == cParams2.minMatch);
1220 assert(cParams1.targetLength == cParams2.targetLength);
1221 assert(cParams1.strategy == cParams2.strategy);
Scott Baker2d897982019-09-24 11:50:08 -07001222}
1223
1224/** The parameters are equivalent if ldm is not enabled in both sets or
1225 * all the parameters are equivalent. */
1226static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
1227 ldmParams_t ldmParams2)
1228{
1229 return (!ldmParams1.enableLdm && !ldmParams2.enableLdm) ||
1230 (ldmParams1.enableLdm == ldmParams2.enableLdm &&
1231 ldmParams1.hashLog == ldmParams2.hashLog &&
1232 ldmParams1.bucketSizeLog == ldmParams2.bucketSizeLog &&
1233 ldmParams1.minMatchLength == ldmParams2.minMatchLength &&
Scott Baker8487c5d2019-10-18 12:49:46 -07001234 ldmParams1.hashRateLog == ldmParams2.hashRateLog);
Scott Baker2d897982019-09-24 11:50:08 -07001235}
1236
1237typedef enum { ZSTDb_not_buffered, ZSTDb_buffered } ZSTD_buffered_policy_e;
1238
1239/* ZSTD_sufficientBuff() :
1240 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
1241 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
Scott Baker8487c5d2019-10-18 12:49:46 -07001242static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
1243 size_t maxNbLit1,
Scott Baker2d897982019-09-24 11:50:08 -07001244 ZSTD_buffered_policy_e buffPol2,
1245 ZSTD_compressionParameters cParams2,
1246 U64 pledgedSrcSize)
1247{
1248 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
1249 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
Scott Baker8487c5d2019-10-18 12:49:46 -07001250 size_t const maxNbSeq2 = blockSize2 / ((cParams2.minMatch == 3) ? 3 : 4);
1251 size_t const maxNbLit2 = blockSize2;
Scott Baker2d897982019-09-24 11:50:08 -07001252 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07001253 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
1254 (U32)neededBufferSize2, (U32)bufferSize1);
1255 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
1256 (U32)maxNbSeq2, (U32)maxNbSeq1);
1257 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
1258 (U32)maxNbLit2, (U32)maxNbLit1);
1259 return (maxNbLit2 <= maxNbLit1)
1260 & (maxNbSeq2 <= maxNbSeq1)
Scott Baker2d897982019-09-24 11:50:08 -07001261 & (neededBufferSize2 <= bufferSize1);
1262}
1263
1264/** Equivalence for resetCCtx purposes */
1265static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
1266 ZSTD_CCtx_params params2,
Scott Baker8487c5d2019-10-18 12:49:46 -07001267 size_t buffSize1,
1268 size_t maxNbSeq1, size_t maxNbLit1,
Scott Baker2d897982019-09-24 11:50:08 -07001269 ZSTD_buffered_policy_e buffPol2,
1270 U64 pledgedSrcSize)
1271{
1272 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07001273 if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
1274 DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
1275 return 0;
1276 }
1277 if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
1278 DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
1279 return 0;
1280 }
1281 if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
1282 params2.cParams, pledgedSrcSize)) {
1283 DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
1284 return 0;
1285 }
1286 return 1;
Scott Baker2d897982019-09-24 11:50:08 -07001287}
1288
1289static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1290{
1291 int i;
1292 for (i = 0; i < ZSTD_REP_NUM; ++i)
1293 bs->rep[i] = repStartValue[i];
Scott Baker8487c5d2019-10-18 12:49:46 -07001294 bs->entropy.huf.repeatMode = HUF_repeat_none;
1295 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
1296 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
1297 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
Scott Baker2d897982019-09-24 11:50:08 -07001298}
1299
1300/*! ZSTD_invalidateMatchState()
Scott Baker8487c5d2019-10-18 12:49:46 -07001301 * Invalidate all the matches in the match finder tables.
1302 * Requires nextSrc and base to be set (can be NULL).
Scott Baker2d897982019-09-24 11:50:08 -07001303 */
1304static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms)
1305{
1306 ZSTD_window_clear(&ms->window);
1307
Scott Baker8487c5d2019-10-18 12:49:46 -07001308 ms->nextToUpdate = ms->window.dictLimit;
Scott Baker2d897982019-09-24 11:50:08 -07001309 ms->loadedDictEnd = 0;
1310 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
Scott Baker8487c5d2019-10-18 12:49:46 -07001311 ms->dictMatchState = NULL;
Scott Baker2d897982019-09-24 11:50:08 -07001312}
1313
1314/*! ZSTD_continueCCtx() :
1315 * reuse CCtx without reset (note : requires no dictionary) */
1316static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pledgedSrcSize)
1317{
1318 size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1319 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1320 DEBUGLOG(4, "ZSTD_continueCCtx: re-use context in place");
1321
1322 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
1323 cctx->appliedParams = params;
Scott Baker8487c5d2019-10-18 12:49:46 -07001324 cctx->blockState.matchState.cParams = params.cParams;
Scott Baker2d897982019-09-24 11:50:08 -07001325 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1326 cctx->consumedSrcSize = 0;
1327 cctx->producedCSize = 0;
1328 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1329 cctx->appliedParams.fParams.contentSizeFlag = 0;
1330 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
1331 (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
1332 cctx->stage = ZSTDcs_init;
1333 cctx->dictID = 0;
1334 if (params.ldmParams.enableLdm)
1335 ZSTD_window_clear(&cctx->ldmState.window);
1336 ZSTD_referenceExternalSequences(cctx, NULL, 0);
1337 ZSTD_invalidateMatchState(&cctx->blockState.matchState);
1338 ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
1339 XXH64_reset(&cctx->xxhState, 0);
1340 return 0;
1341}
1342
1343typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
1344
Scott Baker8487c5d2019-10-18 12:49:46 -07001345typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e;
1346
1347static void*
1348ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1349 void* ptr,
1350 const ZSTD_compressionParameters* cParams,
1351 ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho)
Scott Baker2d897982019-09-24 11:50:08 -07001352{
1353 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1354 size_t const hSize = ((size_t)1) << cParams->hashLog;
Scott Baker8487c5d2019-10-18 12:49:46 -07001355 U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
Scott Baker2d897982019-09-24 11:50:08 -07001356 size_t const h3Size = ((size_t)1) << hashLog3;
1357 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1358
1359 assert(((size_t)ptr & 3) == 0);
1360
1361 ms->hashLog3 = hashLog3;
1362 memset(&ms->window, 0, sizeof(ms->window));
Scott Baker8487c5d2019-10-18 12:49:46 -07001363 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1364 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1365 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
Scott Baker2d897982019-09-24 11:50:08 -07001366 ZSTD_invalidateMatchState(ms);
1367
1368 /* opt parser space */
Scott Baker8487c5d2019-10-18 12:49:46 -07001369 if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) {
Scott Baker2d897982019-09-24 11:50:08 -07001370 DEBUGLOG(4, "reserving optimal parser space");
Scott Baker8487c5d2019-10-18 12:49:46 -07001371 ms->opt.litFreq = (unsigned*)ptr;
Scott Baker2d897982019-09-24 11:50:08 -07001372 ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits);
1373 ms->opt.matchLengthFreq = ms->opt.litLengthFreq + (MaxLL+1);
1374 ms->opt.offCodeFreq = ms->opt.matchLengthFreq + (MaxML+1);
1375 ptr = ms->opt.offCodeFreq + (MaxOff+1);
1376 ms->opt.matchTable = (ZSTD_match_t*)ptr;
1377 ptr = ms->opt.matchTable + ZSTD_OPT_NUM+1;
1378 ms->opt.priceTable = (ZSTD_optimal_t*)ptr;
1379 ptr = ms->opt.priceTable + ZSTD_OPT_NUM+1;
1380 }
1381
1382 /* table Space */
1383 DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_noMemset);
1384 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1385 if (crp!=ZSTDcrp_noMemset) memset(ptr, 0, tableSpace); /* reset tables only */
1386 ms->hashTable = (U32*)(ptr);
1387 ms->chainTable = ms->hashTable + hSize;
1388 ms->hashTable3 = ms->chainTable + chainSize;
1389 ptr = ms->hashTable3 + h3Size;
1390
Scott Baker8487c5d2019-10-18 12:49:46 -07001391 ms->cParams = *cParams;
1392
Scott Baker2d897982019-09-24 11:50:08 -07001393 assert(((size_t)ptr & 3) == 0);
1394 return ptr;
1395}
1396
Scott Baker8487c5d2019-10-18 12:49:46 -07001397/* ZSTD_indexTooCloseToMax() :
1398 * minor optimization : prefer memset() rather than reduceIndex()
1399 * which is measurably slow in some circumstances (reported for Visual Studio).
1400 * Works when re-using a context for a lot of smallish inputs :
1401 * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN,
1402 * memset() will be triggered before reduceIndex().
1403 */
1404#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB)
1405static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
1406{
1407 return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
1408}
1409
1410#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
1411#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
1412 * during at least this number of times,
1413 * context's memory usage is considered wasteful,
1414 * because it's sized to handle a worst case scenario which rarely happens.
1415 * In which case, resize it down to free some memory */
1416
Scott Baker2d897982019-09-24 11:50:08 -07001417/*! ZSTD_resetCCtx_internal() :
1418 note : `params` are assumed fully validated at this stage */
1419static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
Scott Baker8487c5d2019-10-18 12:49:46 -07001420 ZSTD_CCtx_params params,
1421 U64 const pledgedSrcSize,
Scott Baker2d897982019-09-24 11:50:08 -07001422 ZSTD_compResetPolicy_e const crp,
1423 ZSTD_buffered_policy_e const zbuff)
1424{
1425 DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
1426 (U32)pledgedSrcSize, params.cParams.windowLog);
1427 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1428
1429 if (crp == ZSTDcrp_continue) {
1430 if (ZSTD_equivalentParams(zc->appliedParams, params,
Scott Baker8487c5d2019-10-18 12:49:46 -07001431 zc->inBuffSize,
1432 zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
1433 zbuff, pledgedSrcSize) ) {
1434 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode");
1435 zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
1436 if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) {
1437 DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)",
1438 zc->appliedParams.cParams.windowLog, zc->blockSize);
1439 if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) {
1440 /* prefer a reset, faster than a rescale */
1441 ZSTD_reset_matchState(&zc->blockState.matchState,
1442 zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1443 &params.cParams,
1444 crp, ZSTD_resetTarget_CCtx);
1445 }
1446 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
1447 } } }
Scott Baker2d897982019-09-24 11:50:08 -07001448 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
1449
1450 if (params.ldmParams.enableLdm) {
1451 /* Adjust long distance matching parameters */
Scott Baker2d897982019-09-24 11:50:08 -07001452 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1453 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
Scott Baker8487c5d2019-10-18 12:49:46 -07001454 assert(params.ldmParams.hashRateLog < 32);
1455 zc->ldmState.hashPower = ZSTD_rollingHash_primePower(params.ldmParams.minMatchLength);
Scott Baker2d897982019-09-24 11:50:08 -07001456 }
1457
1458 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1459 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07001460 U32 const divider = (params.cParams.minMatch==3) ? 3 : 4;
Scott Baker2d897982019-09-24 11:50:08 -07001461 size_t const maxNbSeq = blockSize / divider;
Scott Baker8487c5d2019-10-18 12:49:46 -07001462 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
Scott Baker2d897982019-09-24 11:50:08 -07001463 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
1464 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
1465 size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
1466 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07001467 void* ptr; /* used to partition workSpace */
Scott Baker2d897982019-09-24 11:50:08 -07001468
1469 /* Check if workSpace is large enough, alloc a new one if needed */
1470 { size_t const entropySpace = HUF_WORKSPACE_SIZE;
1471 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
1472 size_t const bufferSpace = buffInSize + buffOutSize;
1473 size_t const ldmSpace = ZSTD_ldm_getTableSize(params.ldmParams);
1474 size_t const ldmSeqSpace = maxNbLdmSeq * sizeof(rawSeq);
1475
1476 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
1477 ldmSeqSpace + matchStateSize + tokenSpace +
1478 bufferSpace;
Scott Baker2d897982019-09-24 11:50:08 -07001479
Scott Baker8487c5d2019-10-18 12:49:46 -07001480 int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
1481 int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
1482 int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
1483 zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
1484
1485 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
1486 neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
1487 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1488
1489 if (workSpaceTooSmall || workSpaceWasteful) {
1490 DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB",
1491 zc->workSpaceSize >> 10,
1492 neededSpace >> 10);
1493
1494 RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize");
Scott Baker2d897982019-09-24 11:50:08 -07001495
1496 zc->workSpaceSize = 0;
1497 ZSTD_free(zc->workSpace, zc->customMem);
1498 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
Scott Baker8487c5d2019-10-18 12:49:46 -07001499 RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation);
Scott Baker2d897982019-09-24 11:50:08 -07001500 zc->workSpaceSize = neededSpace;
Scott Baker8487c5d2019-10-18 12:49:46 -07001501 zc->workSpaceOversizedDuration = 0;
Scott Baker2d897982019-09-24 11:50:08 -07001502
Scott Baker8487c5d2019-10-18 12:49:46 -07001503 /* Statically sized space.
1504 * entropyWorkspace never moves,
1505 * though prev/next block swap places */
Scott Baker2d897982019-09-24 11:50:08 -07001506 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
1507 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
1508 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
1509 zc->blockState.nextCBlock = zc->blockState.prevCBlock + 1;
1510 ptr = zc->blockState.nextCBlock + 1;
1511 zc->entropyWorkspace = (U32*)ptr;
1512 } }
1513
1514 /* init params */
1515 zc->appliedParams = params;
Scott Baker8487c5d2019-10-18 12:49:46 -07001516 zc->blockState.matchState.cParams = params.cParams;
Scott Baker2d897982019-09-24 11:50:08 -07001517 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1518 zc->consumedSrcSize = 0;
1519 zc->producedCSize = 0;
1520 if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)
1521 zc->appliedParams.fParams.contentSizeFlag = 0;
1522 DEBUGLOG(4, "pledged content size : %u ; flag : %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07001523 (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag);
Scott Baker2d897982019-09-24 11:50:08 -07001524 zc->blockSize = blockSize;
1525
1526 XXH64_reset(&zc->xxhState, 0);
1527 zc->stage = ZSTDcs_init;
1528 zc->dictID = 0;
1529
1530 ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock);
1531
Scott Baker8487c5d2019-10-18 12:49:46 -07001532 ptr = ZSTD_reset_matchState(&zc->blockState.matchState,
1533 zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32,
1534 &params.cParams,
1535 crp, ZSTD_resetTarget_CCtx);
Scott Baker2d897982019-09-24 11:50:08 -07001536
1537 /* ldm hash table */
1538 /* initialize bucketOffsets table later for pointer alignment */
1539 if (params.ldmParams.enableLdm) {
1540 size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
1541 memset(ptr, 0, ldmHSize * sizeof(ldmEntry_t));
1542 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1543 zc->ldmState.hashTable = (ldmEntry_t*)ptr;
1544 ptr = zc->ldmState.hashTable + ldmHSize;
1545 zc->ldmSequences = (rawSeq*)ptr;
1546 ptr = zc->ldmSequences + maxNbLdmSeq;
1547 zc->maxNbLdmSequences = maxNbLdmSeq;
1548
1549 memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
1550 }
1551 assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */
1552
Scott Baker2d897982019-09-24 11:50:08 -07001553 /* sequences storage */
Scott Baker8487c5d2019-10-18 12:49:46 -07001554 zc->seqStore.maxNbSeq = maxNbSeq;
Scott Baker2d897982019-09-24 11:50:08 -07001555 zc->seqStore.sequencesStart = (seqDef*)ptr;
1556 ptr = zc->seqStore.sequencesStart + maxNbSeq;
1557 zc->seqStore.llCode = (BYTE*) ptr;
1558 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
1559 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
1560 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
Scott Baker8487c5d2019-10-18 12:49:46 -07001561 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1562 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1563 */
1564 zc->seqStore.maxNbLit = blockSize;
1565 ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
Scott Baker2d897982019-09-24 11:50:08 -07001566
1567 /* ldm bucketOffsets table */
1568 if (params.ldmParams.enableLdm) {
1569 size_t const ldmBucketSize =
1570 ((size_t)1) << (params.ldmParams.hashLog -
1571 params.ldmParams.bucketSizeLog);
1572 memset(ptr, 0, ldmBucketSize);
1573 zc->ldmState.bucketOffsets = (BYTE*)ptr;
1574 ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
1575 ZSTD_window_clear(&zc->ldmState.window);
1576 }
1577 ZSTD_referenceExternalSequences(zc, NULL, 0);
1578
1579 /* buffers */
1580 zc->inBuffSize = buffInSize;
1581 zc->inBuff = (char*)ptr;
1582 zc->outBuffSize = buffOutSize;
1583 zc->outBuff = zc->inBuff + buffInSize;
1584
1585 return 0;
1586 }
1587}
1588
1589/* ZSTD_invalidateRepCodes() :
1590 * ensures next compression will not use repcodes from previous block.
1591 * Note : only works with regular variant;
1592 * do not use with extDict variant ! */
1593void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) {
1594 int i;
1595 for (i=0; i<ZSTD_REP_NUM; i++) cctx->blockState.prevCBlock->rep[i] = 0;
1596 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1597}
1598
Scott Baker8487c5d2019-10-18 12:49:46 -07001599/* These are the approximate sizes for each strategy past which copying the
1600 * dictionary tables into the working context is faster than using them
1601 * in-place.
1602 */
1603static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = {
1604 8 KB, /* unused */
1605 8 KB, /* ZSTD_fast */
1606 16 KB, /* ZSTD_dfast */
1607 32 KB, /* ZSTD_greedy */
1608 32 KB, /* ZSTD_lazy */
1609 32 KB, /* ZSTD_lazy2 */
1610 32 KB, /* ZSTD_btlazy2 */
1611 32 KB, /* ZSTD_btopt */
1612 8 KB, /* ZSTD_btultra */
1613 8 KB /* ZSTD_btultra2 */
1614};
1615
1616static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1617 ZSTD_CCtx_params params,
1618 U64 pledgedSrcSize)
1619{
1620 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1621 return ( pledgedSrcSize <= cutoff
1622 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1623 || params.attachDictPref == ZSTD_dictForceAttach )
1624 && params.attachDictPref != ZSTD_dictForceCopy
1625 && !params.forceWindow; /* dictMatchState isn't correctly
1626 * handled in _enforceMaxDist */
1627}
1628
1629static size_t
1630ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
1631 const ZSTD_CDict* cdict,
1632 ZSTD_CCtx_params params,
1633 U64 pledgedSrcSize,
1634 ZSTD_buffered_policy_e zbuff)
1635{
1636 { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams;
1637 unsigned const windowLog = params.cParams.windowLog;
1638 assert(windowLog != 0);
1639 /* Resize working context table params for input only, since the dict
1640 * has its own tables. */
1641 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
1642 params.cParams.windowLog = windowLog;
1643 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1644 ZSTDcrp_continue, zbuff);
1645 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1646 }
1647
1648 { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1649 - cdict->matchState.window.base);
1650 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1651 if (cdictLen == 0) {
1652 /* don't even attach dictionaries with no contents */
1653 DEBUGLOG(4, "skipping attaching empty dictionary");
1654 } else {
1655 DEBUGLOG(4, "attaching dictionary into context");
1656 cctx->blockState.matchState.dictMatchState = &cdict->matchState;
1657
1658 /* prep working match state so dict matches never have negative indices
1659 * when they are translated to the working context's index space. */
1660 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
1661 cctx->blockState.matchState.window.nextSrc =
1662 cctx->blockState.matchState.window.base + cdictEnd;
1663 ZSTD_window_clear(&cctx->blockState.matchState.window);
1664 }
1665 /* loadedDictEnd is expressed within the referential of the active context */
1666 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1667 } }
1668
1669 cctx->dictID = cdict->dictID;
1670
1671 /* copy block state */
1672 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1673
1674 return 0;
1675}
1676
1677static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
Scott Baker2d897982019-09-24 11:50:08 -07001678 const ZSTD_CDict* cdict,
Scott Baker8487c5d2019-10-18 12:49:46 -07001679 ZSTD_CCtx_params params,
Scott Baker2d897982019-09-24 11:50:08 -07001680 U64 pledgedSrcSize,
1681 ZSTD_buffered_policy_e zbuff)
1682{
Scott Baker8487c5d2019-10-18 12:49:46 -07001683 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1684
1685 DEBUGLOG(4, "copying dictionary into context");
1686
1687 { unsigned const windowLog = params.cParams.windowLog;
1688 assert(windowLog != 0);
Scott Baker2d897982019-09-24 11:50:08 -07001689 /* Copy only compression parameters related to tables. */
Scott Baker8487c5d2019-10-18 12:49:46 -07001690 params.cParams = *cdict_cParams;
1691 params.cParams.windowLog = windowLog;
Scott Baker2d897982019-09-24 11:50:08 -07001692 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1693 ZSTDcrp_noMemset, zbuff);
Scott Baker8487c5d2019-10-18 12:49:46 -07001694 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1695 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1696 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
Scott Baker2d897982019-09-24 11:50:08 -07001697 }
1698
1699 /* copy tables */
Scott Baker8487c5d2019-10-18 12:49:46 -07001700 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1701 size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
Scott Baker2d897982019-09-24 11:50:08 -07001702 size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
1703 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1704 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
1705 assert((U32*)cdict->matchState.chainTable == (U32*)cdict->matchState.hashTable + hSize); /* chainTable must follow hashTable */
1706 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
1707 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1708 }
Scott Baker8487c5d2019-10-18 12:49:46 -07001709
Scott Baker2d897982019-09-24 11:50:08 -07001710 /* Zero the hashTable3, since the cdict never fills it */
1711 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
1712 assert(cdict->matchState.hashLog3 == 0);
1713 memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32));
1714 }
1715
1716 /* copy dictionary offsets */
Scott Baker8487c5d2019-10-18 12:49:46 -07001717 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
Scott Baker2d897982019-09-24 11:50:08 -07001718 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1719 dstMatchState->window = srcMatchState->window;
1720 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
Scott Baker2d897982019-09-24 11:50:08 -07001721 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1722 }
Scott Baker8487c5d2019-10-18 12:49:46 -07001723
Scott Baker2d897982019-09-24 11:50:08 -07001724 cctx->dictID = cdict->dictID;
1725
1726 /* copy block state */
1727 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1728
1729 return 0;
1730}
1731
Scott Baker8487c5d2019-10-18 12:49:46 -07001732/* We have a choice between copying the dictionary context into the working
1733 * context, or referencing the dictionary context from the working context
1734 * in-place. We decide here which strategy to use. */
1735static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1736 const ZSTD_CDict* cdict,
1737 ZSTD_CCtx_params params,
1738 U64 pledgedSrcSize,
1739 ZSTD_buffered_policy_e zbuff)
1740{
1741
1742 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)",
1743 (unsigned)pledgedSrcSize);
1744
1745 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1746 return ZSTD_resetCCtx_byAttachingCDict(
1747 cctx, cdict, params, pledgedSrcSize, zbuff);
1748 } else {
1749 return ZSTD_resetCCtx_byCopyingCDict(
1750 cctx, cdict, params, pledgedSrcSize, zbuff);
1751 }
1752}
1753
Scott Baker2d897982019-09-24 11:50:08 -07001754/*! ZSTD_copyCCtx_internal() :
1755 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1756 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1757 * The "context", in this case, refers to the hash and chain tables,
1758 * entropy tables, and dictionary references.
1759 * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx.
1760 * @return : 0, or an error code */
1761static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
1762 const ZSTD_CCtx* srcCCtx,
1763 ZSTD_frameParameters fParams,
1764 U64 pledgedSrcSize,
1765 ZSTD_buffered_policy_e zbuff)
1766{
1767 DEBUGLOG(5, "ZSTD_copyCCtx_internal");
Scott Baker8487c5d2019-10-18 12:49:46 -07001768 RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong);
Scott Baker2d897982019-09-24 11:50:08 -07001769
1770 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
1771 { ZSTD_CCtx_params params = dstCCtx->requestedParams;
1772 /* Copy only compression parameters related to tables. */
1773 params.cParams = srcCCtx->appliedParams.cParams;
1774 params.fParams = fParams;
1775 ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
1776 ZSTDcrp_noMemset, zbuff);
1777 assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
1778 assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
1779 assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog);
1780 assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog);
1781 assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3);
1782 }
1783
1784 /* copy tables */
1785 { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
1786 size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
1787 size_t const h3Size = (size_t)1 << srcCCtx->blockState.matchState.hashLog3;
1788 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
1789 assert((U32*)dstCCtx->blockState.matchState.chainTable == (U32*)dstCCtx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1790 assert((U32*)dstCCtx->blockState.matchState.hashTable3 == (U32*)dstCCtx->blockState.matchState.chainTable + chainSize);
1791 memcpy(dstCCtx->blockState.matchState.hashTable, srcCCtx->blockState.matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1792 }
1793
1794 /* copy dictionary offsets */
1795 {
Scott Baker8487c5d2019-10-18 12:49:46 -07001796 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
Scott Baker2d897982019-09-24 11:50:08 -07001797 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1798 dstMatchState->window = srcMatchState->window;
1799 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
Scott Baker2d897982019-09-24 11:50:08 -07001800 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1801 }
1802 dstCCtx->dictID = srcCCtx->dictID;
1803
1804 /* copy block state */
1805 memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock));
1806
1807 return 0;
1808}
1809
1810/*! ZSTD_copyCCtx() :
1811 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1812 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1813 * pledgedSrcSize==0 means "unknown".
1814* @return : 0, or an error code */
1815size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
1816{
1817 ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
1818 ZSTD_buffered_policy_e const zbuff = (ZSTD_buffered_policy_e)(srcCCtx->inBuffSize>0);
1819 ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1);
1820 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
1821 fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN);
1822
1823 return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx,
1824 fParams, pledgedSrcSize,
1825 zbuff);
1826}
1827
1828
1829#define ZSTD_ROWSIZE 16
1830/*! ZSTD_reduceTable() :
1831 * reduce table indexes by `reducerValue`, or squash to zero.
1832 * PreserveMark preserves "unsorted mark" for btlazy2 strategy.
1833 * It must be set to a clear 0/1 value, to remove branch during inlining.
1834 * Presume table size is a multiple of ZSTD_ROWSIZE
1835 * to help auto-vectorization */
1836FORCE_INLINE_TEMPLATE void
1837ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark)
1838{
1839 int const nbRows = (int)size / ZSTD_ROWSIZE;
1840 int cellNb = 0;
1841 int rowNb;
1842 assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */
1843 assert(size < (1U<<31)); /* can be casted to int */
1844 for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
1845 int column;
1846 for (column=0; column<ZSTD_ROWSIZE; column++) {
1847 if (preserveMark) {
1848 U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
1849 table[cellNb] += adder;
1850 }
1851 if (table[cellNb] < reducerValue) table[cellNb] = 0;
1852 else table[cellNb] -= reducerValue;
1853 cellNb++;
1854 } }
1855}
1856
1857static void ZSTD_reduceTable(U32* const table, U32 const size, U32 const reducerValue)
1858{
1859 ZSTD_reduceTable_internal(table, size, reducerValue, 0);
1860}
1861
1862static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const reducerValue)
1863{
1864 ZSTD_reduceTable_internal(table, size, reducerValue, 1);
1865}
1866
1867/*! ZSTD_reduceIndex() :
1868* rescale all indexes to avoid future overflow (indexes are U32) */
Scott Baker8487c5d2019-10-18 12:49:46 -07001869static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue)
Scott Baker2d897982019-09-24 11:50:08 -07001870{
Scott Baker8487c5d2019-10-18 12:49:46 -07001871 { U32 const hSize = (U32)1 << params->cParams.hashLog;
Scott Baker2d897982019-09-24 11:50:08 -07001872 ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
1873 }
1874
Scott Baker8487c5d2019-10-18 12:49:46 -07001875 if (params->cParams.strategy != ZSTD_fast) {
1876 U32 const chainSize = (U32)1 << params->cParams.chainLog;
1877 if (params->cParams.strategy == ZSTD_btlazy2)
Scott Baker2d897982019-09-24 11:50:08 -07001878 ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
1879 else
1880 ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue);
1881 }
1882
1883 if (ms->hashLog3) {
1884 U32 const h3Size = (U32)1 << ms->hashLog3;
1885 ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue);
1886 }
1887}
1888
1889
1890/*-*******************************************************
1891* Block entropic compression
1892*********************************************************/
1893
1894/* See doc/zstd_compression_format.md for detailed format description */
1895
Scott Baker8487c5d2019-10-18 12:49:46 -07001896static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
Scott Baker2d897982019-09-24 11:50:08 -07001897{
Scott Baker8487c5d2019-10-18 12:49:46 -07001898 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
1899 RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity,
1900 dstSize_tooSmall);
1901 MEM_writeLE24(dst, cBlockHeader24);
Scott Baker2d897982019-09-24 11:50:08 -07001902 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07001903 return ZSTD_blockHeaderSize + srcSize;
Scott Baker2d897982019-09-24 11:50:08 -07001904}
1905
Scott Baker2d897982019-09-24 11:50:08 -07001906static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1907{
1908 BYTE* const ostart = (BYTE* const)dst;
1909 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
1910
Scott Baker8487c5d2019-10-18 12:49:46 -07001911 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07001912
1913 switch(flSize)
1914 {
1915 case 1: /* 2 - 1 - 5 */
1916 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
1917 break;
1918 case 2: /* 2 - 2 - 12 */
1919 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
1920 break;
1921 case 3: /* 2 - 2 - 20 */
1922 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
1923 break;
1924 default: /* not necessary : flSize is {1,2,3} */
1925 assert(0);
1926 }
1927
1928 memcpy(ostart + flSize, src, srcSize);
1929 return srcSize + flSize;
1930}
1931
1932static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1933{
1934 BYTE* const ostart = (BYTE* const)dst;
1935 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
1936
1937 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
1938
1939 switch(flSize)
1940 {
1941 case 1: /* 2 - 1 - 5 */
1942 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
1943 break;
1944 case 2: /* 2 - 2 - 12 */
1945 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
1946 break;
1947 case 3: /* 2 - 2 - 20 */
1948 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
1949 break;
1950 default: /* not necessary : flSize is {1,2,3} */
1951 assert(0);
1952 }
1953
1954 ostart[flSize] = *(const BYTE*)src;
1955 return flSize+1;
1956}
1957
1958
Scott Baker8487c5d2019-10-18 12:49:46 -07001959/* ZSTD_minGain() :
1960 * minimum compression required
1961 * to generate a compress block or a compressed literals section.
1962 * note : use same formula for both situations */
1963static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1964{
1965 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6;
1966 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8);
1967 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
1968 return (srcSize >> minlog) + 2;
1969}
Scott Baker2d897982019-09-24 11:50:08 -07001970
Scott Baker8487c5d2019-10-18 12:49:46 -07001971static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
1972 ZSTD_hufCTables_t* nextHuf,
Scott Baker2d897982019-09-24 11:50:08 -07001973 ZSTD_strategy strategy, int disableLiteralCompression,
1974 void* dst, size_t dstCapacity,
1975 const void* src, size_t srcSize,
Scott Baker8487c5d2019-10-18 12:49:46 -07001976 void* workspace, size_t wkspSize,
1977 const int bmi2)
Scott Baker2d897982019-09-24 11:50:08 -07001978{
Scott Baker8487c5d2019-10-18 12:49:46 -07001979 size_t const minGain = ZSTD_minGain(srcSize, strategy);
Scott Baker2d897982019-09-24 11:50:08 -07001980 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1981 BYTE* const ostart = (BYTE*)dst;
1982 U32 singleStream = srcSize < 256;
1983 symbolEncodingType_e hType = set_compressed;
1984 size_t cLitSize;
1985
1986 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)",
1987 disableLiteralCompression);
1988
1989 /* Prepare nextEntropy assuming reusing the existing table */
Scott Baker8487c5d2019-10-18 12:49:46 -07001990 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
Scott Baker2d897982019-09-24 11:50:08 -07001991
1992 if (disableLiteralCompression)
1993 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1994
1995 /* small ? don't even attempt compression (speed opt) */
1996# define COMPRESS_LITERALS_SIZE_MIN 63
Scott Baker8487c5d2019-10-18 12:49:46 -07001997 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
Scott Baker2d897982019-09-24 11:50:08 -07001998 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1999 }
2000
Scott Baker8487c5d2019-10-18 12:49:46 -07002001 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
2002 { HUF_repeat repeat = prevHuf->repeatMode;
Scott Baker2d897982019-09-24 11:50:08 -07002003 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
2004 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
2005 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Scott Baker8487c5d2019-10-18 12:49:46 -07002006 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
Scott Baker2d897982019-09-24 11:50:08 -07002007 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
Scott Baker8487c5d2019-10-18 12:49:46 -07002008 workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
Scott Baker2d897982019-09-24 11:50:08 -07002009 if (repeat != HUF_repeat_none) {
2010 /* reused the existing table */
2011 hType = set_repeat;
2012 }
2013 }
2014
2015 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
Scott Baker8487c5d2019-10-18 12:49:46 -07002016 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
Scott Baker2d897982019-09-24 11:50:08 -07002017 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
2018 }
2019 if (cLitSize==1) {
Scott Baker8487c5d2019-10-18 12:49:46 -07002020 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
Scott Baker2d897982019-09-24 11:50:08 -07002021 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
2022 }
2023
2024 if (hType == set_compressed) {
2025 /* using a newly constructed table */
Scott Baker8487c5d2019-10-18 12:49:46 -07002026 nextHuf->repeatMode = HUF_repeat_check;
Scott Baker2d897982019-09-24 11:50:08 -07002027 }
2028
2029 /* Build header */
2030 switch(lhSize)
2031 {
2032 case 3: /* 2 - 2 - 10 - 10 */
2033 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
2034 MEM_writeLE24(ostart, lhc);
2035 break;
2036 }
2037 case 4: /* 2 - 2 - 14 - 14 */
2038 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
2039 MEM_writeLE32(ostart, lhc);
2040 break;
2041 }
2042 case 5: /* 2 - 2 - 18 - 18 */
2043 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
2044 MEM_writeLE32(ostart, lhc);
2045 ostart[4] = (BYTE)(cLitSize >> 10);
2046 break;
2047 }
2048 default: /* not possible : lhSize is {3,4,5} */
2049 assert(0);
2050 }
2051 return lhSize+cLitSize;
2052}
2053
2054
2055void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
2056{
2057 const seqDef* const sequences = seqStorePtr->sequencesStart;
2058 BYTE* const llCodeTable = seqStorePtr->llCode;
2059 BYTE* const ofCodeTable = seqStorePtr->ofCode;
2060 BYTE* const mlCodeTable = seqStorePtr->mlCode;
2061 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
2062 U32 u;
Scott Baker8487c5d2019-10-18 12:49:46 -07002063 assert(nbSeq <= seqStorePtr->maxNbSeq);
Scott Baker2d897982019-09-24 11:50:08 -07002064 for (u=0; u<nbSeq; u++) {
2065 U32 const llv = sequences[u].litLength;
2066 U32 const mlv = sequences[u].matchLength;
2067 llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
2068 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
2069 mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
2070 }
2071 if (seqStorePtr->longLengthID==1)
2072 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
2073 if (seqStorePtr->longLengthID==2)
2074 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
2075}
2076
Scott Baker8487c5d2019-10-18 12:49:46 -07002077
2078/**
2079 * -log2(x / 256) lookup table for x in [0, 256).
2080 * If x == 0: Return 0
2081 * Else: Return floor(-log2(x / 256) * 256)
2082 */
2083static unsigned const kInverseProbabilityLog256[256] = {
2084 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
2085 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
2086 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
2087 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
2088 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
2089 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
2090 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
2091 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
2092 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
2093 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
2094 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
2095 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
2096 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
2097 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
2098 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
2099 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
2100 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
2101 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
2102 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
2103 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
2104 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
2105 5, 4, 2, 1,
2106};
2107
2108
2109/**
2110 * Returns the cost in bits of encoding the distribution described by count
2111 * using the entropy bound.
2112 */
2113static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
2114{
2115 unsigned cost = 0;
2116 unsigned s;
2117 for (s = 0; s <= max; ++s) {
2118 unsigned norm = (unsigned)((256 * count[s]) / total);
2119 if (count[s] != 0 && norm == 0)
2120 norm = 1;
2121 assert(count[s] < total);
2122 cost += count[s] * kInverseProbabilityLog256[norm];
2123 }
2124 return cost >> 8;
2125}
2126
2127
2128/**
2129 * Returns the cost in bits of encoding the distribution in count using the
2130 * table described by norm. The max symbol support by norm is assumed >= max.
2131 * norm must be valid for every symbol with non-zero probability in count.
2132 */
2133static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
2134 unsigned const* count, unsigned const max)
2135{
2136 unsigned const shift = 8 - accuracyLog;
2137 size_t cost = 0;
2138 unsigned s;
2139 assert(accuracyLog <= 8);
2140 for (s = 0; s <= max; ++s) {
2141 unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
2142 unsigned const norm256 = normAcc << shift;
2143 assert(norm256 > 0);
2144 assert(norm256 < 256);
2145 cost += count[s] * kInverseProbabilityLog256[norm256];
2146 }
2147 return cost >> 8;
2148}
2149
2150
2151static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
2152 void const* ptr = ctable;
2153 U16 const* u16ptr = (U16 const*)ptr;
2154 U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
2155 return maxSymbolValue;
2156}
2157
2158
2159/**
2160 * Returns the cost in bits of encoding the distribution in count using ctable.
2161 * Returns an error if ctable cannot represent all the symbols in count.
2162 */
2163static size_t ZSTD_fseBitCost(
2164 FSE_CTable const* ctable,
2165 unsigned const* count,
2166 unsigned const max)
2167{
2168 unsigned const kAccuracyLog = 8;
2169 size_t cost = 0;
2170 unsigned s;
2171 FSE_CState_t cstate;
2172 FSE_initCState(&cstate, ctable);
2173 RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC,
2174 "Repeat FSE_CTable has maxSymbolValue %u < %u",
2175 ZSTD_getFSEMaxSymbolValue(ctable), max);
2176 for (s = 0; s <= max; ++s) {
2177 unsigned const tableLog = cstate.stateLog;
2178 unsigned const badCost = (tableLog + 1) << kAccuracyLog;
2179 unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
2180 if (count[s] == 0)
2181 continue;
2182 RETURN_ERROR_IF(bitCost >= badCost, GENERIC,
2183 "Repeat FSE_CTable has Prob[%u] == 0", s);
2184 cost += count[s] * bitCost;
2185 }
2186 return cost >> kAccuracyLog;
2187}
2188
2189/**
2190 * Returns the cost in bytes of encoding the normalized count header.
2191 * Returns an error if any of the helper functions return an error.
2192 */
2193static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
2194 size_t const nbSeq, unsigned const FSELog)
2195{
2196 BYTE wksp[FSE_NCOUNTBOUND];
2197 S16 norm[MaxSeq + 1];
2198 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2199 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
2200 return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
2201}
2202
2203
Scott Baker2d897982019-09-24 11:50:08 -07002204typedef enum {
2205 ZSTD_defaultDisallowed = 0,
2206 ZSTD_defaultAllowed = 1
2207} ZSTD_defaultPolicy_e;
2208
Scott Baker8487c5d2019-10-18 12:49:46 -07002209MEM_STATIC symbolEncodingType_e
2210ZSTD_selectEncodingType(
2211 FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
2212 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
2213 FSE_CTable const* prevCTable,
2214 short const* defaultNorm, U32 defaultNormLog,
2215 ZSTD_defaultPolicy_e const isDefaultAllowed,
2216 ZSTD_strategy const strategy)
Scott Baker2d897982019-09-24 11:50:08 -07002217{
Scott Baker2d897982019-09-24 11:50:08 -07002218 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
Scott Baker8487c5d2019-10-18 12:49:46 -07002219 if (mostFrequent == nbSeq) {
2220 *repeatMode = FSE_repeat_none;
2221 if (isDefaultAllowed && nbSeq <= 2) {
2222 /* Prefer set_basic over set_rle when there are 2 or less symbols,
2223 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
2224 * If basic encoding isn't possible, always choose RLE.
2225 */
2226 DEBUGLOG(5, "Selected set_basic");
2227 return set_basic;
2228 }
Scott Baker2d897982019-09-24 11:50:08 -07002229 DEBUGLOG(5, "Selected set_rle");
Scott Baker2d897982019-09-24 11:50:08 -07002230 return set_rle;
2231 }
Scott Baker8487c5d2019-10-18 12:49:46 -07002232 if (strategy < ZSTD_lazy) {
2233 if (isDefaultAllowed) {
2234 size_t const staticFse_nbSeq_max = 1000;
2235 size_t const mult = 10 - strategy;
2236 size_t const baseLog = 3;
2237 size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
2238 assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
2239 assert(mult <= 9 && mult >= 7);
2240 if ( (*repeatMode == FSE_repeat_valid)
2241 && (nbSeq < staticFse_nbSeq_max) ) {
2242 DEBUGLOG(5, "Selected set_repeat");
2243 return set_repeat;
2244 }
2245 if ( (nbSeq < dynamicFse_nbSeq_min)
2246 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
2247 DEBUGLOG(5, "Selected set_basic");
2248 /* The format allows default tables to be repeated, but it isn't useful.
2249 * When using simple heuristics to select encoding type, we don't want
2250 * to confuse these tables with dictionaries. When running more careful
2251 * analysis, we don't need to waste time checking both repeating tables
2252 * and default tables.
2253 */
2254 *repeatMode = FSE_repeat_none;
2255 return set_basic;
2256 }
2257 }
2258 } else {
2259 size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
2260 size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
2261 size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
2262 size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
2263
2264 if (isDefaultAllowed) {
2265 assert(!ZSTD_isError(basicCost));
2266 assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
2267 }
2268 assert(!ZSTD_isError(NCountCost));
2269 assert(compressedCost < ERROR(maxCode));
2270 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
2271 (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
2272 if (basicCost <= repeatCost && basicCost <= compressedCost) {
2273 DEBUGLOG(5, "Selected set_basic");
2274 assert(isDefaultAllowed);
2275 *repeatMode = FSE_repeat_none;
2276 return set_basic;
2277 }
2278 if (repeatCost <= compressedCost) {
2279 DEBUGLOG(5, "Selected set_repeat");
2280 assert(!ZSTD_isError(repeatCost));
2281 return set_repeat;
2282 }
2283 assert(compressedCost < basicCost && compressedCost < repeatCost);
Scott Baker2d897982019-09-24 11:50:08 -07002284 }
2285 DEBUGLOG(5, "Selected set_compressed");
2286 *repeatMode = FSE_repeat_check;
2287 return set_compressed;
2288}
2289
Scott Baker8487c5d2019-10-18 12:49:46 -07002290MEM_STATIC size_t
2291ZSTD_buildCTable(void* dst, size_t dstCapacity,
2292 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2293 unsigned* count, U32 max,
2294 const BYTE* codeTable, size_t nbSeq,
2295 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2296 const FSE_CTable* prevCTable, size_t prevCTableSize,
2297 void* workspace, size_t workspaceSize)
Scott Baker2d897982019-09-24 11:50:08 -07002298{
2299 BYTE* op = (BYTE*)dst;
Scott Baker8487c5d2019-10-18 12:49:46 -07002300 const BYTE* const oend = op + dstCapacity;
2301 DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
Scott Baker2d897982019-09-24 11:50:08 -07002302
2303 switch (type) {
2304 case set_rle:
Scott Baker8487c5d2019-10-18 12:49:46 -07002305 FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max));
2306 RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07002307 *op = codeTable[0];
Scott Baker2d897982019-09-24 11:50:08 -07002308 return 1;
2309 case set_repeat:
2310 memcpy(nextCTable, prevCTable, prevCTableSize);
2311 return 0;
2312 case set_basic:
Scott Baker8487c5d2019-10-18 12:49:46 -07002313 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */
Scott Baker2d897982019-09-24 11:50:08 -07002314 return 0;
2315 case set_compressed: {
2316 S16 norm[MaxSeq + 1];
2317 size_t nbSeq_1 = nbSeq;
2318 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
2319 if (count[codeTable[nbSeq-1]] > 1) {
2320 count[codeTable[nbSeq-1]]--;
2321 nbSeq_1--;
2322 }
2323 assert(nbSeq_1 > 1);
Scott Baker8487c5d2019-10-18 12:49:46 -07002324 FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max));
Scott Baker2d897982019-09-24 11:50:08 -07002325 { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
Scott Baker8487c5d2019-10-18 12:49:46 -07002326 FORWARD_IF_ERROR(NCountSize);
2327 FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize));
Scott Baker2d897982019-09-24 11:50:08 -07002328 return NCountSize;
2329 }
2330 }
Scott Baker8487c5d2019-10-18 12:49:46 -07002331 default: assert(0); RETURN_ERROR(GENERIC);
Scott Baker2d897982019-09-24 11:50:08 -07002332 }
2333}
2334
2335FORCE_INLINE_TEMPLATE size_t
2336ZSTD_encodeSequences_body(
2337 void* dst, size_t dstCapacity,
2338 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2339 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2340 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2341 seqDef const* sequences, size_t nbSeq, int longOffsets)
2342{
2343 BIT_CStream_t blockStream;
2344 FSE_CState_t stateMatchLength;
2345 FSE_CState_t stateOffsetBits;
2346 FSE_CState_t stateLitLength;
2347
Scott Baker8487c5d2019-10-18 12:49:46 -07002348 RETURN_ERROR_IF(
2349 ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
2350 dstSize_tooSmall, "not enough space remaining");
2351 DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
2352 (int)(blockStream.endPtr - blockStream.startPtr),
2353 (unsigned)dstCapacity);
Scott Baker2d897982019-09-24 11:50:08 -07002354
2355 /* first symbols */
2356 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
2357 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
2358 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
2359 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
2360 if (MEM_32bits()) BIT_flushBits(&blockStream);
2361 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
2362 if (MEM_32bits()) BIT_flushBits(&blockStream);
2363 if (longOffsets) {
2364 U32 const ofBits = ofCodeTable[nbSeq-1];
2365 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
2366 if (extraBits) {
2367 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
2368 BIT_flushBits(&blockStream);
2369 }
2370 BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
2371 ofBits - extraBits);
2372 } else {
2373 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
2374 }
2375 BIT_flushBits(&blockStream);
2376
2377 { size_t n;
2378 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
2379 BYTE const llCode = llCodeTable[n];
2380 BYTE const ofCode = ofCodeTable[n];
2381 BYTE const mlCode = mlCodeTable[n];
2382 U32 const llBits = LL_bits[llCode];
2383 U32 const ofBits = ofCode;
2384 U32 const mlBits = ML_bits[mlCode];
2385 DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
Scott Baker8487c5d2019-10-18 12:49:46 -07002386 (unsigned)sequences[n].litLength,
2387 (unsigned)sequences[n].matchLength + MINMATCH,
2388 (unsigned)sequences[n].offset);
Scott Baker2d897982019-09-24 11:50:08 -07002389 /* 32b*/ /* 64b*/
2390 /* (7)*/ /* (7)*/
2391 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
2392 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
2393 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
2394 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
2395 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
2396 BIT_flushBits(&blockStream); /* (7)*/
2397 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
2398 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
2399 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
2400 if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
2401 if (longOffsets) {
2402 int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
2403 if (extraBits) {
2404 BIT_addBits(&blockStream, sequences[n].offset, extraBits);
2405 BIT_flushBits(&blockStream); /* (7)*/
2406 }
2407 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
2408 ofBits - extraBits); /* 31 */
2409 } else {
2410 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
2411 }
2412 BIT_flushBits(&blockStream); /* (7)*/
Scott Baker8487c5d2019-10-18 12:49:46 -07002413 DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
Scott Baker2d897982019-09-24 11:50:08 -07002414 } }
2415
2416 DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
2417 FSE_flushCState(&blockStream, &stateMatchLength);
2418 DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
2419 FSE_flushCState(&blockStream, &stateOffsetBits);
2420 DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
2421 FSE_flushCState(&blockStream, &stateLitLength);
2422
2423 { size_t const streamSize = BIT_closeCStream(&blockStream);
Scott Baker8487c5d2019-10-18 12:49:46 -07002424 RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
Scott Baker2d897982019-09-24 11:50:08 -07002425 return streamSize;
2426 }
2427}
2428
2429static size_t
2430ZSTD_encodeSequences_default(
2431 void* dst, size_t dstCapacity,
2432 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2433 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2434 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2435 seqDef const* sequences, size_t nbSeq, int longOffsets)
2436{
2437 return ZSTD_encodeSequences_body(dst, dstCapacity,
2438 CTable_MatchLength, mlCodeTable,
2439 CTable_OffsetBits, ofCodeTable,
2440 CTable_LitLength, llCodeTable,
2441 sequences, nbSeq, longOffsets);
2442}
2443
2444
2445#if DYNAMIC_BMI2
2446
2447static TARGET_ATTRIBUTE("bmi2") size_t
2448ZSTD_encodeSequences_bmi2(
2449 void* dst, size_t dstCapacity,
2450 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2451 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2452 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2453 seqDef const* sequences, size_t nbSeq, int longOffsets)
2454{
2455 return ZSTD_encodeSequences_body(dst, dstCapacity,
2456 CTable_MatchLength, mlCodeTable,
2457 CTable_OffsetBits, ofCodeTable,
2458 CTable_LitLength, llCodeTable,
2459 sequences, nbSeq, longOffsets);
2460}
2461
2462#endif
2463
Scott Baker8487c5d2019-10-18 12:49:46 -07002464static size_t ZSTD_encodeSequences(
Scott Baker2d897982019-09-24 11:50:08 -07002465 void* dst, size_t dstCapacity,
2466 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2467 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2468 FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
2469 seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
2470{
Scott Baker8487c5d2019-10-18 12:49:46 -07002471 DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
Scott Baker2d897982019-09-24 11:50:08 -07002472#if DYNAMIC_BMI2
2473 if (bmi2) {
2474 return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
2475 CTable_MatchLength, mlCodeTable,
2476 CTable_OffsetBits, ofCodeTable,
2477 CTable_LitLength, llCodeTable,
2478 sequences, nbSeq, longOffsets);
2479 }
2480#endif
2481 (void)bmi2;
2482 return ZSTD_encodeSequences_default(dst, dstCapacity,
2483 CTable_MatchLength, mlCodeTable,
2484 CTable_OffsetBits, ofCodeTable,
2485 CTable_LitLength, llCodeTable,
2486 sequences, nbSeq, longOffsets);
2487}
2488
Scott Baker8487c5d2019-10-18 12:49:46 -07002489static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
2490{
2491 switch (cctxParams->literalCompressionMode) {
2492 case ZSTD_lcm_huffman:
2493 return 0;
2494 case ZSTD_lcm_uncompressed:
2495 return 1;
2496 default:
2497 assert(0 /* impossible: pre-validated */);
2498 /* fall-through */
2499 case ZSTD_lcm_auto:
2500 return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
2501 }
2502}
2503
2504/* ZSTD_compressSequences_internal():
2505 * actually compresses both literals and sequences */
2506MEM_STATIC size_t
2507ZSTD_compressSequences_internal(seqStore_t* seqStorePtr,
2508 const ZSTD_entropyCTables_t* prevEntropy,
2509 ZSTD_entropyCTables_t* nextEntropy,
2510 const ZSTD_CCtx_params* cctxParams,
2511 void* dst, size_t dstCapacity,
2512 void* workspace, size_t wkspSize,
2513 const int bmi2)
Scott Baker2d897982019-09-24 11:50:08 -07002514{
2515 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
Scott Baker8487c5d2019-10-18 12:49:46 -07002516 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
2517 unsigned count[MaxSeq+1];
2518 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2519 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2520 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
Scott Baker2d897982019-09-24 11:50:08 -07002521 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2522 const seqDef* const sequences = seqStorePtr->sequencesStart;
2523 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2524 const BYTE* const llCodeTable = seqStorePtr->llCode;
2525 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
2526 BYTE* const ostart = (BYTE*)dst;
2527 BYTE* const oend = ostart + dstCapacity;
2528 BYTE* op = ostart;
2529 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2530 BYTE* seqHead;
Scott Baker8487c5d2019-10-18 12:49:46 -07002531 BYTE* lastNCount = NULL;
Scott Baker2d897982019-09-24 11:50:08 -07002532
2533 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
Scott Baker8487c5d2019-10-18 12:49:46 -07002534 DEBUGLOG(5, "ZSTD_compressSequences_internal");
Scott Baker2d897982019-09-24 11:50:08 -07002535
2536 /* Compress literals */
2537 { const BYTE* const literals = seqStorePtr->litStart;
2538 size_t const litSize = seqStorePtr->lit - literals;
2539 size_t const cSize = ZSTD_compressLiterals(
Scott Baker8487c5d2019-10-18 12:49:46 -07002540 &prevEntropy->huf, &nextEntropy->huf,
2541 cctxParams->cParams.strategy,
2542 ZSTD_disableLiteralsCompression(cctxParams),
Scott Baker2d897982019-09-24 11:50:08 -07002543 op, dstCapacity,
2544 literals, litSize,
Scott Baker8487c5d2019-10-18 12:49:46 -07002545 workspace, wkspSize,
2546 bmi2);
2547 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07002548 assert(cSize <= dstCapacity);
2549 op += cSize;
2550 }
2551
2552 /* Sequences Header */
Scott Baker8487c5d2019-10-18 12:49:46 -07002553 RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
2554 dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07002555 if (nbSeq < 0x7F)
2556 *op++ = (BYTE)nbSeq;
2557 else if (nbSeq < LONGNBSEQ)
2558 op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
2559 else
2560 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
Scott Baker8487c5d2019-10-18 12:49:46 -07002561 assert(op <= oend);
Scott Baker2d897982019-09-24 11:50:08 -07002562 if (nbSeq==0) {
Scott Baker8487c5d2019-10-18 12:49:46 -07002563 /* Copy the old tables over as if we repeated them */
2564 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2565 return op - ostart;
Scott Baker2d897982019-09-24 11:50:08 -07002566 }
2567
2568 /* seqHead : flags for FSE encoding type */
2569 seqHead = op++;
Scott Baker8487c5d2019-10-18 12:49:46 -07002570 assert(op <= oend);
Scott Baker2d897982019-09-24 11:50:08 -07002571
2572 /* convert length/distances into codes */
2573 ZSTD_seqToCodes(seqStorePtr);
2574 /* build CTable for Literal Lengths */
Scott Baker8487c5d2019-10-18 12:49:46 -07002575 { unsigned max = MaxLL;
2576 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
Scott Baker2d897982019-09-24 11:50:08 -07002577 DEBUGLOG(5, "Building LL table");
Scott Baker8487c5d2019-10-18 12:49:46 -07002578 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2579 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
2580 count, max, mostFrequent, nbSeq,
2581 LLFSELog, prevEntropy->fse.litlengthCTable,
2582 LL_defaultNorm, LL_defaultNormLog,
2583 ZSTD_defaultAllowed, strategy);
2584 assert(set_basic < set_compressed && set_rle < set_compressed);
2585 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
Scott Baker2d897982019-09-24 11:50:08 -07002586 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
Scott Baker8487c5d2019-10-18 12:49:46 -07002587 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2588 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
2589 workspace, wkspSize);
2590 FORWARD_IF_ERROR(countSize);
2591 if (LLtype == set_compressed)
2592 lastNCount = op;
Scott Baker2d897982019-09-24 11:50:08 -07002593 op += countSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07002594 assert(op <= oend);
Scott Baker2d897982019-09-24 11:50:08 -07002595 } }
2596 /* build CTable for Offsets */
Scott Baker8487c5d2019-10-18 12:49:46 -07002597 { unsigned max = MaxOff;
2598 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
Scott Baker2d897982019-09-24 11:50:08 -07002599 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2600 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2601 DEBUGLOG(5, "Building OF table");
Scott Baker8487c5d2019-10-18 12:49:46 -07002602 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2603 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
2604 count, max, mostFrequent, nbSeq,
2605 OffFSELog, prevEntropy->fse.offcodeCTable,
2606 OF_defaultNorm, OF_defaultNormLog,
2607 defaultPolicy, strategy);
2608 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
Scott Baker2d897982019-09-24 11:50:08 -07002609 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
Scott Baker8487c5d2019-10-18 12:49:46 -07002610 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2611 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
2612 workspace, wkspSize);
2613 FORWARD_IF_ERROR(countSize);
2614 if (Offtype == set_compressed)
2615 lastNCount = op;
Scott Baker2d897982019-09-24 11:50:08 -07002616 op += countSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07002617 assert(op <= oend);
Scott Baker2d897982019-09-24 11:50:08 -07002618 } }
2619 /* build CTable for MatchLengths */
Scott Baker8487c5d2019-10-18 12:49:46 -07002620 { unsigned max = MaxML;
2621 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */
2622 DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
2623 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2624 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
2625 count, max, mostFrequent, nbSeq,
2626 MLFSELog, prevEntropy->fse.matchlengthCTable,
2627 ML_defaultNorm, ML_defaultNormLog,
2628 ZSTD_defaultAllowed, strategy);
2629 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
Scott Baker2d897982019-09-24 11:50:08 -07002630 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
Scott Baker8487c5d2019-10-18 12:49:46 -07002631 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2632 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
2633 workspace, wkspSize);
2634 FORWARD_IF_ERROR(countSize);
2635 if (MLtype == set_compressed)
2636 lastNCount = op;
Scott Baker2d897982019-09-24 11:50:08 -07002637 op += countSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07002638 assert(op <= oend);
Scott Baker2d897982019-09-24 11:50:08 -07002639 } }
2640
2641 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
2642
2643 { size_t const bitstreamSize = ZSTD_encodeSequences(
2644 op, oend - op,
2645 CTable_MatchLength, mlCodeTable,
2646 CTable_OffsetBits, ofCodeTable,
2647 CTable_LitLength, llCodeTable,
2648 sequences, nbSeq,
2649 longOffsets, bmi2);
Scott Baker8487c5d2019-10-18 12:49:46 -07002650 FORWARD_IF_ERROR(bitstreamSize);
Scott Baker2d897982019-09-24 11:50:08 -07002651 op += bitstreamSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07002652 assert(op <= oend);
2653 /* zstd versions <= 1.3.4 mistakenly report corruption when
2654 * FSE_readNCount() receives a buffer < 4 bytes.
2655 * Fixed by https://github.com/facebook/zstd/pull/1146.
2656 * This can happen when the last set_compressed table present is 2
2657 * bytes and the bitstream is only one byte.
2658 * In this exceedingly rare case, we will simply emit an uncompressed
2659 * block, since it isn't worth optimizing.
2660 */
2661 if (lastNCount && (op - lastNCount) < 4) {
2662 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2663 assert(op - lastNCount == 3);
2664 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2665 "emitting an uncompressed block.");
2666 return 0;
2667 }
Scott Baker2d897982019-09-24 11:50:08 -07002668 }
2669
Scott Baker8487c5d2019-10-18 12:49:46 -07002670 DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart));
Scott Baker2d897982019-09-24 11:50:08 -07002671 return op - ostart;
2672}
2673
Scott Baker8487c5d2019-10-18 12:49:46 -07002674MEM_STATIC size_t
2675ZSTD_compressSequences(seqStore_t* seqStorePtr,
2676 const ZSTD_entropyCTables_t* prevEntropy,
2677 ZSTD_entropyCTables_t* nextEntropy,
2678 const ZSTD_CCtx_params* cctxParams,
2679 void* dst, size_t dstCapacity,
2680 size_t srcSize,
2681 void* workspace, size_t wkspSize,
2682 int bmi2)
Scott Baker2d897982019-09-24 11:50:08 -07002683{
2684 size_t const cSize = ZSTD_compressSequences_internal(
Scott Baker8487c5d2019-10-18 12:49:46 -07002685 seqStorePtr, prevEntropy, nextEntropy, cctxParams,
2686 dst, dstCapacity,
2687 workspace, wkspSize, bmi2);
2688 if (cSize == 0) return 0;
Scott Baker2d897982019-09-24 11:50:08 -07002689 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2690 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2691 */
2692 if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
2693 return 0; /* block not compressed */
Scott Baker8487c5d2019-10-18 12:49:46 -07002694 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07002695
2696 /* Check compressibility */
Scott Baker8487c5d2019-10-18 12:49:46 -07002697 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
Scott Baker2d897982019-09-24 11:50:08 -07002698 if (cSize >= maxCSize) return 0; /* block not compressed */
2699 }
2700
Scott Baker2d897982019-09-24 11:50:08 -07002701 return cSize;
2702}
2703
2704/* ZSTD_selectBlockCompressor() :
2705 * Not static, but internal use only (used by long distance matcher)
2706 * assumption : strat is a valid strategy */
Scott Baker8487c5d2019-10-18 12:49:46 -07002707ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
Scott Baker2d897982019-09-24 11:50:08 -07002708{
Scott Baker8487c5d2019-10-18 12:49:46 -07002709 static const ZSTD_blockCompressor blockCompressor[3][ZSTD_STRATEGY_MAX+1] = {
Scott Baker2d897982019-09-24 11:50:08 -07002710 { ZSTD_compressBlock_fast /* default for 0 */,
Scott Baker8487c5d2019-10-18 12:49:46 -07002711 ZSTD_compressBlock_fast,
2712 ZSTD_compressBlock_doubleFast,
2713 ZSTD_compressBlock_greedy,
2714 ZSTD_compressBlock_lazy,
2715 ZSTD_compressBlock_lazy2,
2716 ZSTD_compressBlock_btlazy2,
2717 ZSTD_compressBlock_btopt,
2718 ZSTD_compressBlock_btultra,
2719 ZSTD_compressBlock_btultra2 },
Scott Baker2d897982019-09-24 11:50:08 -07002720 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
Scott Baker8487c5d2019-10-18 12:49:46 -07002721 ZSTD_compressBlock_fast_extDict,
2722 ZSTD_compressBlock_doubleFast_extDict,
2723 ZSTD_compressBlock_greedy_extDict,
2724 ZSTD_compressBlock_lazy_extDict,
2725 ZSTD_compressBlock_lazy2_extDict,
2726 ZSTD_compressBlock_btlazy2_extDict,
2727 ZSTD_compressBlock_btopt_extDict,
2728 ZSTD_compressBlock_btultra_extDict,
2729 ZSTD_compressBlock_btultra_extDict },
2730 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2731 ZSTD_compressBlock_fast_dictMatchState,
2732 ZSTD_compressBlock_doubleFast_dictMatchState,
2733 ZSTD_compressBlock_greedy_dictMatchState,
2734 ZSTD_compressBlock_lazy_dictMatchState,
2735 ZSTD_compressBlock_lazy2_dictMatchState,
2736 ZSTD_compressBlock_btlazy2_dictMatchState,
2737 ZSTD_compressBlock_btopt_dictMatchState,
2738 ZSTD_compressBlock_btultra_dictMatchState,
2739 ZSTD_compressBlock_btultra_dictMatchState }
Scott Baker2d897982019-09-24 11:50:08 -07002740 };
Scott Baker8487c5d2019-10-18 12:49:46 -07002741 ZSTD_blockCompressor selectedCompressor;
Scott Baker2d897982019-09-24 11:50:08 -07002742 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2743
Scott Baker8487c5d2019-10-18 12:49:46 -07002744 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
2745 selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
2746 assert(selectedCompressor != NULL);
2747 return selectedCompressor;
Scott Baker2d897982019-09-24 11:50:08 -07002748}
2749
2750static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
2751 const BYTE* anchor, size_t lastLLSize)
2752{
2753 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2754 seqStorePtr->lit += lastLLSize;
2755}
2756
Scott Baker8487c5d2019-10-18 12:49:46 -07002757void ZSTD_resetSeqStore(seqStore_t* ssPtr)
Scott Baker2d897982019-09-24 11:50:08 -07002758{
2759 ssPtr->lit = ssPtr->litStart;
2760 ssPtr->sequences = ssPtr->sequencesStart;
2761 ssPtr->longLengthID = 0;
2762}
2763
Scott Baker8487c5d2019-10-18 12:49:46 -07002764typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
2765
2766static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
Scott Baker2d897982019-09-24 11:50:08 -07002767{
2768 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
Scott Baker8487c5d2019-10-18 12:49:46 -07002769 DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize);
2770 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2771 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2772 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
Scott Baker2d897982019-09-24 11:50:08 -07002773 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
Scott Baker8487c5d2019-10-18 12:49:46 -07002774 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch);
2775 return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */
Scott Baker2d897982019-09-24 11:50:08 -07002776 }
2777 ZSTD_resetSeqStore(&(zc->seqStore));
Scott Baker8487c5d2019-10-18 12:49:46 -07002778 /* required for optimal parser to read stats from dictionary */
2779 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy;
2780 /* tell the optimal parser how we expect to compress literals */
2781 ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode;
2782 /* a gap between an attached dict and the current window is not safe,
2783 * they must remain adjacent,
2784 * and when that stops being the case, the dict must be unset */
2785 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
Scott Baker2d897982019-09-24 11:50:08 -07002786
2787 /* limited update after a very long match */
2788 { const BYTE* const base = ms->window.base;
2789 const BYTE* const istart = (const BYTE*)src;
2790 const U32 current = (U32)(istart-base);
Scott Baker8487c5d2019-10-18 12:49:46 -07002791 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
Scott Baker2d897982019-09-24 11:50:08 -07002792 if (current > ms->nextToUpdate + 384)
2793 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
2794 }
2795
2796 /* select and store sequences */
Scott Baker8487c5d2019-10-18 12:49:46 -07002797 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
Scott Baker2d897982019-09-24 11:50:08 -07002798 size_t lastLLSize;
2799 { int i;
2800 for (i = 0; i < ZSTD_REP_NUM; ++i)
2801 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
2802 }
2803 if (zc->externSeqStore.pos < zc->externSeqStore.size) {
2804 assert(!zc->appliedParams.ldmParams.enableLdm);
2805 /* Updates ldmSeqStore.pos */
2806 lastLLSize =
2807 ZSTD_ldm_blockCompress(&zc->externSeqStore,
2808 ms, &zc->seqStore,
2809 zc->blockState.nextCBlock->rep,
Scott Baker8487c5d2019-10-18 12:49:46 -07002810 src, srcSize);
Scott Baker2d897982019-09-24 11:50:08 -07002811 assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2812 } else if (zc->appliedParams.ldmParams.enableLdm) {
2813 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
2814
2815 ldmSeqStore.seq = zc->ldmSequences;
2816 ldmSeqStore.capacity = zc->maxNbLdmSequences;
2817 /* Updates ldmSeqStore.size */
Scott Baker8487c5d2019-10-18 12:49:46 -07002818 FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
Scott Baker2d897982019-09-24 11:50:08 -07002819 &zc->appliedParams.ldmParams,
2820 src, srcSize));
2821 /* Updates ldmSeqStore.pos */
2822 lastLLSize =
2823 ZSTD_ldm_blockCompress(&ldmSeqStore,
2824 ms, &zc->seqStore,
2825 zc->blockState.nextCBlock->rep,
Scott Baker8487c5d2019-10-18 12:49:46 -07002826 src, srcSize);
Scott Baker2d897982019-09-24 11:50:08 -07002827 assert(ldmSeqStore.pos == ldmSeqStore.size);
2828 } else { /* not long range mode */
Scott Baker8487c5d2019-10-18 12:49:46 -07002829 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2830 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
Scott Baker2d897982019-09-24 11:50:08 -07002831 }
2832 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2833 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2834 } }
Scott Baker8487c5d2019-10-18 12:49:46 -07002835 return ZSTDbss_compress;
2836}
2837
2838static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
2839 void* dst, size_t dstCapacity,
2840 const void* src, size_t srcSize)
2841{
2842 size_t cSize;
2843 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2844 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate);
2845
2846 { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
2847 FORWARD_IF_ERROR(bss);
2848 if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
2849 }
Scott Baker2d897982019-09-24 11:50:08 -07002850
2851 /* encode sequences and literals */
Scott Baker8487c5d2019-10-18 12:49:46 -07002852 cSize = ZSTD_compressSequences(&zc->seqStore,
2853 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2854 &zc->appliedParams,
2855 dst, dstCapacity,
2856 srcSize,
2857 zc->entropyWorkspace, HUF_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
2858 zc->bmi2);
2859
2860out:
2861 if (!ZSTD_isError(cSize) && cSize != 0) {
2862 /* confirm repcodes and entropy tables when emitting a compressed block */
2863 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2864 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2865 zc->blockState.nextCBlock = tmp;
2866 }
2867 /* We check that dictionaries have offset codes available for the first
2868 * block. After the first block, the offcode table might not have large
2869 * enough codes to represent the offsets in the data.
2870 */
2871 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2872 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2873
2874 return cSize;
2875}
2876
2877
2878static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend)
2879{
2880 if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
2881 U32 const maxDist = (U32)1 << params->cParams.windowLog;
2882 U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
2883 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
2884 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2885 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2886 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2887 ZSTD_reduceIndex(ms, params, correction);
2888 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2889 else ms->nextToUpdate -= correction;
2890 /* invalidate dictionaries on overflow correction */
2891 ms->loadedDictEnd = 0;
2892 ms->dictMatchState = NULL;
Scott Baker2d897982019-09-24 11:50:08 -07002893 }
2894}
2895
2896
2897/*! ZSTD_compress_frameChunk() :
2898* Compress a chunk of data into one or multiple blocks.
2899* All blocks will be terminated, all input will be consumed.
2900* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2901* Frame is supposed already started (header already produced)
2902* @return : compressed size, or an error code
2903*/
2904static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
2905 void* dst, size_t dstCapacity,
2906 const void* src, size_t srcSize,
2907 U32 lastFrameChunk)
2908{
2909 size_t blockSize = cctx->blockSize;
2910 size_t remaining = srcSize;
2911 const BYTE* ip = (const BYTE*)src;
2912 BYTE* const ostart = (BYTE*)dst;
2913 BYTE* op = ostart;
2914 U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog;
Scott Baker8487c5d2019-10-18 12:49:46 -07002915 assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX);
Scott Baker2d897982019-09-24 11:50:08 -07002916
Scott Baker8487c5d2019-10-18 12:49:46 -07002917 DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize);
Scott Baker2d897982019-09-24 11:50:08 -07002918 if (cctx->appliedParams.fParams.checksumFlag && srcSize)
2919 XXH64_update(&cctx->xxhState, src, srcSize);
2920
2921 while (remaining) {
2922 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2923 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2924
Scott Baker8487c5d2019-10-18 12:49:46 -07002925 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE,
2926 dstSize_tooSmall,
2927 "not enough space to store compressed block");
Scott Baker2d897982019-09-24 11:50:08 -07002928 if (remaining < blockSize) blockSize = remaining;
2929
Scott Baker8487c5d2019-10-18 12:49:46 -07002930 ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize);
2931 ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
Scott Baker2d897982019-09-24 11:50:08 -07002932
Scott Baker8487c5d2019-10-18 12:49:46 -07002933 /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
Scott Baker2d897982019-09-24 11:50:08 -07002934 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2935
2936 { size_t cSize = ZSTD_compressBlock_internal(cctx,
2937 op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
2938 ip, blockSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07002939 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07002940
2941 if (cSize == 0) { /* block is not compressible */
Scott Baker8487c5d2019-10-18 12:49:46 -07002942 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2943 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07002944 } else {
2945 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2946 MEM_writeLE24(op, cBlockHeader24);
2947 cSize += ZSTD_blockHeaderSize;
2948 }
2949
2950 ip += blockSize;
2951 assert(remaining >= blockSize);
2952 remaining -= blockSize;
2953 op += cSize;
2954 assert(dstCapacity >= cSize);
2955 dstCapacity -= cSize;
2956 DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07002957 (unsigned)cSize);
Scott Baker2d897982019-09-24 11:50:08 -07002958 } }
2959
2960 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
Scott Baker8487c5d2019-10-18 12:49:46 -07002961 return (size_t)(op-ostart);
Scott Baker2d897982019-09-24 11:50:08 -07002962}
2963
2964
2965static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2966 ZSTD_CCtx_params params, U64 pledgedSrcSize, U32 dictID)
2967{ BYTE* const op = (BYTE*)dst;
2968 U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2969 U32 const dictIDSizeCode = params.fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */
2970 U32 const checksumFlag = params.fParams.checksumFlag>0;
2971 U32 const windowSize = (U32)1 << params.cParams.windowLog;
2972 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize);
2973 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2974 U32 const fcsCode = params.fParams.contentSizeFlag ?
2975 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */
Scott Baker8487c5d2019-10-18 12:49:46 -07002976 BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
Scott Baker2d897982019-09-24 11:50:08 -07002977 size_t pos=0;
2978
Scott Baker8487c5d2019-10-18 12:49:46 -07002979 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2980 RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07002981 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07002982 !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode);
Scott Baker2d897982019-09-24 11:50:08 -07002983
2984 if (params.format == ZSTD_f_zstd1) {
2985 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2986 pos = 4;
2987 }
Scott Baker8487c5d2019-10-18 12:49:46 -07002988 op[pos++] = frameHeaderDescriptionByte;
Scott Baker2d897982019-09-24 11:50:08 -07002989 if (!singleSegment) op[pos++] = windowLogByte;
2990 switch(dictIDSizeCode)
2991 {
2992 default: assert(0); /* impossible */
2993 case 0 : break;
2994 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2995 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2996 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2997 }
2998 switch(fcsCode)
2999 {
3000 default: assert(0); /* impossible */
3001 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
3002 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
3003 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
3004 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
3005 }
3006 return pos;
3007}
3008
3009/* ZSTD_writeLastEmptyBlock() :
3010 * output an empty Block with end-of-frame mark to complete a frame
3011 * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h))
Scott Baker8487c5d2019-10-18 12:49:46 -07003012 * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize)
Scott Baker2d897982019-09-24 11:50:08 -07003013 */
3014size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
3015{
Scott Baker8487c5d2019-10-18 12:49:46 -07003016 RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07003017 { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */
3018 MEM_writeLE24(dst, cBlockHeader24);
3019 return ZSTD_blockHeaderSize;
3020 }
3021}
3022
3023size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
3024{
Scott Baker8487c5d2019-10-18 12:49:46 -07003025 RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong);
3026 RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
3027 parameter_unsupported);
Scott Baker2d897982019-09-24 11:50:08 -07003028 cctx->externSeqStore.seq = seq;
3029 cctx->externSeqStore.size = nbSeq;
3030 cctx->externSeqStore.capacity = nbSeq;
3031 cctx->externSeqStore.pos = 0;
3032 return 0;
3033}
3034
3035
3036static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
3037 void* dst, size_t dstCapacity,
3038 const void* src, size_t srcSize,
3039 U32 frame, U32 lastFrameChunk)
3040{
Scott Baker8487c5d2019-10-18 12:49:46 -07003041 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
Scott Baker2d897982019-09-24 11:50:08 -07003042 size_t fhSize = 0;
3043
3044 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07003045 cctx->stage, (unsigned)srcSize);
3046 RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong,
3047 "missing init (ZSTD_compressBegin)");
Scott Baker2d897982019-09-24 11:50:08 -07003048
3049 if (frame && (cctx->stage==ZSTDcs_init)) {
3050 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams,
3051 cctx->pledgedSrcSizePlusOne-1, cctx->dictID);
Scott Baker8487c5d2019-10-18 12:49:46 -07003052 FORWARD_IF_ERROR(fhSize);
3053 assert(fhSize <= dstCapacity);
Scott Baker2d897982019-09-24 11:50:08 -07003054 dstCapacity -= fhSize;
3055 dst = (char*)dst + fhSize;
3056 cctx->stage = ZSTDcs_ongoing;
3057 }
3058
3059 if (!srcSize) return fhSize; /* do not generate an empty block if no input */
3060
3061 if (!ZSTD_window_update(&ms->window, src, srcSize)) {
3062 ms->nextToUpdate = ms->window.dictLimit;
3063 }
Scott Baker8487c5d2019-10-18 12:49:46 -07003064 if (cctx->appliedParams.ldmParams.enableLdm) {
Scott Baker2d897982019-09-24 11:50:08 -07003065 ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07003066 }
Scott Baker2d897982019-09-24 11:50:08 -07003067
Scott Baker8487c5d2019-10-18 12:49:46 -07003068 if (!frame) {
3069 /* overflow check and correction for block mode */
3070 ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize);
3071 }
3072
3073 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize);
Scott Baker2d897982019-09-24 11:50:08 -07003074 { size_t const cSize = frame ?
3075 ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
3076 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07003077 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07003078 cctx->consumedSrcSize += srcSize;
3079 cctx->producedCSize += (cSize + fhSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07003080 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3081 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
3082 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
3083 RETURN_ERROR_IF(
3084 cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne,
3085 srcSize_wrong,
3086 "error : pledgedSrcSize = %u, while realSrcSize >= %u",
3087 (unsigned)cctx->pledgedSrcSizePlusOne-1,
3088 (unsigned)cctx->consumedSrcSize);
Scott Baker2d897982019-09-24 11:50:08 -07003089 }
3090 return cSize + fhSize;
3091 }
3092}
3093
3094size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
3095 void* dst, size_t dstCapacity,
3096 const void* src, size_t srcSize)
3097{
Scott Baker8487c5d2019-10-18 12:49:46 -07003098 DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize);
Scott Baker2d897982019-09-24 11:50:08 -07003099 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */);
3100}
3101
3102
3103size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx)
3104{
3105 ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams;
3106 assert(!ZSTD_checkCParams(cParams));
3107 return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog);
3108}
3109
3110size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
3111{
3112 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
Scott Baker8487c5d2019-10-18 12:49:46 -07003113 RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong);
3114
Scott Baker2d897982019-09-24 11:50:08 -07003115 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
3116}
3117
3118/*! ZSTD_loadDictionaryContent() :
3119 * @return : 0, or an error code
3120 */
Scott Baker8487c5d2019-10-18 12:49:46 -07003121static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
3122 ZSTD_CCtx_params const* params,
3123 const void* src, size_t srcSize,
3124 ZSTD_dictTableLoadMethod_e dtlm)
Scott Baker2d897982019-09-24 11:50:08 -07003125{
Scott Baker8487c5d2019-10-18 12:49:46 -07003126 const BYTE* ip = (const BYTE*) src;
Scott Baker2d897982019-09-24 11:50:08 -07003127 const BYTE* const iend = ip + srcSize;
Scott Baker2d897982019-09-24 11:50:08 -07003128
3129 ZSTD_window_update(&ms->window, src, srcSize);
3130 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
3131
Scott Baker8487c5d2019-10-18 12:49:46 -07003132 /* Assert that we the ms params match the params we're being given */
3133 ZSTD_assertEqualCParams(params->cParams, ms->cParams);
3134
Scott Baker2d897982019-09-24 11:50:08 -07003135 if (srcSize <= HASH_READ_SIZE) return 0;
3136
Scott Baker8487c5d2019-10-18 12:49:46 -07003137 while (iend - ip > HASH_READ_SIZE) {
3138 size_t const remaining = iend - ip;
3139 size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
3140 const BYTE* const ichunk = ip + chunk;
Scott Baker2d897982019-09-24 11:50:08 -07003141
Scott Baker8487c5d2019-10-18 12:49:46 -07003142 ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk);
Scott Baker2d897982019-09-24 11:50:08 -07003143
Scott Baker8487c5d2019-10-18 12:49:46 -07003144 switch(params->cParams.strategy)
3145 {
3146 case ZSTD_fast:
3147 ZSTD_fillHashTable(ms, ichunk, dtlm);
3148 break;
3149 case ZSTD_dfast:
3150 ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
3151 break;
Scott Baker2d897982019-09-24 11:50:08 -07003152
Scott Baker8487c5d2019-10-18 12:49:46 -07003153 case ZSTD_greedy:
3154 case ZSTD_lazy:
3155 case ZSTD_lazy2:
3156 if (chunk >= HASH_READ_SIZE)
3157 ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
3158 break;
3159
3160 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
3161 case ZSTD_btopt:
3162 case ZSTD_btultra:
3163 case ZSTD_btultra2:
3164 if (chunk >= HASH_READ_SIZE)
3165 ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
3166 break;
3167
3168 default:
3169 assert(0); /* not possible : not a valid strategy id */
3170 }
3171
3172 ip = ichunk;
Scott Baker2d897982019-09-24 11:50:08 -07003173 }
3174
3175 ms->nextToUpdate = (U32)(iend - ms->window.base);
3176 return 0;
3177}
3178
3179
3180/* Dictionaries that assign zero probability to symbols that show up causes problems
3181 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
3182 that we may encounter during compression.
3183 NOTE: This behavior is not standard and could be improved in the future. */
3184static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
3185 U32 s;
Scott Baker8487c5d2019-10-18 12:49:46 -07003186 RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003187 for (s = 0; s <= maxSymbolValue; ++s) {
Scott Baker8487c5d2019-10-18 12:49:46 -07003188 RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003189 }
3190 return 0;
3191}
3192
3193
3194/* Dictionary format :
3195 * See :
3196 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format
3197 */
3198/*! ZSTD_loadZstdDictionary() :
3199 * @return : dictID, or an error code
3200 * assumptions : magic number supposed already checked
3201 * dictSize supposed > 8
3202 */
Scott Baker8487c5d2019-10-18 12:49:46 -07003203static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
3204 ZSTD_matchState_t* ms,
3205 ZSTD_CCtx_params const* params,
3206 const void* dict, size_t dictSize,
3207 ZSTD_dictTableLoadMethod_e dtlm,
3208 void* workspace)
Scott Baker2d897982019-09-24 11:50:08 -07003209{
3210 const BYTE* dictPtr = (const BYTE*)dict;
3211 const BYTE* const dictEnd = dictPtr + dictSize;
3212 short offcodeNCount[MaxOff+1];
3213 unsigned offcodeMaxValue = MaxOff;
3214 size_t dictID;
3215
3216 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
Scott Baker8487c5d2019-10-18 12:49:46 -07003217 assert(dictSize > 8);
3218 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
Scott Baker2d897982019-09-24 11:50:08 -07003219
3220 dictPtr += 4; /* skip magic number */
3221 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
3222 dictPtr += 4;
3223
3224 { unsigned maxSymbolValue = 255;
Scott Baker8487c5d2019-10-18 12:49:46 -07003225 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
3226 RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted);
3227 RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003228 dictPtr += hufHeaderSize;
3229 }
3230
3231 { unsigned offcodeLog;
3232 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
Scott Baker8487c5d2019-10-18 12:49:46 -07003233 RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted);
3234 RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003235 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
Scott Baker8487c5d2019-10-18 12:49:46 -07003236 /* fill all offset symbols to avoid garbage at end of table */
3237 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3238 bs->entropy.fse.offcodeCTable,
3239 offcodeNCount, MaxOff, offcodeLog,
3240 workspace, HUF_WORKSPACE_SIZE)),
3241 dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003242 dictPtr += offcodeHeaderSize;
3243 }
3244
3245 { short matchlengthNCount[MaxML+1];
3246 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
3247 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
Scott Baker8487c5d2019-10-18 12:49:46 -07003248 RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted);
3249 RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003250 /* Every match length code must have non-zero probability */
Scott Baker8487c5d2019-10-18 12:49:46 -07003251 FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
3252 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3253 bs->entropy.fse.matchlengthCTable,
3254 matchlengthNCount, matchlengthMaxValue, matchlengthLog,
3255 workspace, HUF_WORKSPACE_SIZE)),
3256 dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003257 dictPtr += matchlengthHeaderSize;
3258 }
3259
3260 { short litlengthNCount[MaxLL+1];
3261 unsigned litlengthMaxValue = MaxLL, litlengthLog;
3262 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
Scott Baker8487c5d2019-10-18 12:49:46 -07003263 RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted);
3264 RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003265 /* Every literal length code must have non-zero probability */
Scott Baker8487c5d2019-10-18 12:49:46 -07003266 FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
3267 RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp(
3268 bs->entropy.fse.litlengthCTable,
3269 litlengthNCount, litlengthMaxValue, litlengthLog,
3270 workspace, HUF_WORKSPACE_SIZE)),
3271 dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003272 dictPtr += litlengthHeaderSize;
3273 }
3274
Scott Baker8487c5d2019-10-18 12:49:46 -07003275 RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003276 bs->rep[0] = MEM_readLE32(dictPtr+0);
3277 bs->rep[1] = MEM_readLE32(dictPtr+4);
3278 bs->rep[2] = MEM_readLE32(dictPtr+8);
3279 dictPtr += 12;
3280
3281 { size_t const dictContentSize = (size_t)(dictEnd - dictPtr);
3282 U32 offcodeMax = MaxOff;
3283 if (dictContentSize <= ((U32)-1) - 128 KB) {
3284 U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */
3285 offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */
3286 }
3287 /* All offset values <= dictContentSize + 128 KB must be representable */
Scott Baker8487c5d2019-10-18 12:49:46 -07003288 FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
Scott Baker2d897982019-09-24 11:50:08 -07003289 /* All repCodes must be <= dictContentSize and != 0*/
3290 { U32 u;
3291 for (u=0; u<3; u++) {
Scott Baker8487c5d2019-10-18 12:49:46 -07003292 RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted);
3293 RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted);
Scott Baker2d897982019-09-24 11:50:08 -07003294 } }
3295
Scott Baker8487c5d2019-10-18 12:49:46 -07003296 bs->entropy.huf.repeatMode = HUF_repeat_valid;
3297 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
3298 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
3299 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
3300 FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
Scott Baker2d897982019-09-24 11:50:08 -07003301 return dictID;
3302 }
3303}
3304
3305/** ZSTD_compress_insertDictionary() :
3306* @return : dictID, or an error code */
Scott Baker8487c5d2019-10-18 12:49:46 -07003307static size_t
3308ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
3309 ZSTD_matchState_t* ms,
3310 const ZSTD_CCtx_params* params,
3311 const void* dict, size_t dictSize,
3312 ZSTD_dictContentType_e dictContentType,
3313 ZSTD_dictTableLoadMethod_e dtlm,
3314 void* workspace)
Scott Baker2d897982019-09-24 11:50:08 -07003315{
3316 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
3317 if ((dict==NULL) || (dictSize<=8)) return 0;
3318
3319 ZSTD_reset_compressedBlockState(bs);
3320
3321 /* dict restricted modes */
3322 if (dictContentType == ZSTD_dct_rawContent)
Scott Baker8487c5d2019-10-18 12:49:46 -07003323 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
Scott Baker2d897982019-09-24 11:50:08 -07003324
3325 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
3326 if (dictContentType == ZSTD_dct_auto) {
3327 DEBUGLOG(4, "raw content dictionary detected");
Scott Baker8487c5d2019-10-18 12:49:46 -07003328 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
Scott Baker2d897982019-09-24 11:50:08 -07003329 }
Scott Baker8487c5d2019-10-18 12:49:46 -07003330 RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong);
Scott Baker2d897982019-09-24 11:50:08 -07003331 assert(0); /* impossible */
3332 }
3333
3334 /* dict as full zstd dictionary */
Scott Baker8487c5d2019-10-18 12:49:46 -07003335 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
Scott Baker2d897982019-09-24 11:50:08 -07003336}
3337
3338/*! ZSTD_compressBegin_internal() :
3339 * @return : 0, or an error code */
Scott Baker8487c5d2019-10-18 12:49:46 -07003340static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
3341 const void* dict, size_t dictSize,
3342 ZSTD_dictContentType_e dictContentType,
3343 ZSTD_dictTableLoadMethod_e dtlm,
3344 const ZSTD_CDict* cdict,
3345 ZSTD_CCtx_params params, U64 pledgedSrcSize,
3346 ZSTD_buffered_policy_e zbuff)
Scott Baker2d897982019-09-24 11:50:08 -07003347{
3348 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
3349 /* params are supposed to be fully validated at this point */
3350 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3351 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3352
3353 if (cdict && cdict->dictContentSize>0) {
Scott Baker8487c5d2019-10-18 12:49:46 -07003354 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
Scott Baker2d897982019-09-24 11:50:08 -07003355 }
3356
Scott Baker8487c5d2019-10-18 12:49:46 -07003357 FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
Scott Baker2d897982019-09-24 11:50:08 -07003358 ZSTDcrp_continue, zbuff) );
Scott Baker8487c5d2019-10-18 12:49:46 -07003359 { size_t const dictID = ZSTD_compress_insertDictionary(
Scott Baker2d897982019-09-24 11:50:08 -07003360 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
Scott Baker8487c5d2019-10-18 12:49:46 -07003361 &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
3362 FORWARD_IF_ERROR(dictID);
3363 assert(dictID <= UINT_MAX);
Scott Baker2d897982019-09-24 11:50:08 -07003364 cctx->dictID = (U32)dictID;
3365 }
3366 return 0;
3367}
3368
3369size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
3370 const void* dict, size_t dictSize,
3371 ZSTD_dictContentType_e dictContentType,
Scott Baker8487c5d2019-10-18 12:49:46 -07003372 ZSTD_dictTableLoadMethod_e dtlm,
Scott Baker2d897982019-09-24 11:50:08 -07003373 const ZSTD_CDict* cdict,
3374 ZSTD_CCtx_params params,
3375 unsigned long long pledgedSrcSize)
3376{
3377 DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog);
3378 /* compression parameters verification and optimization */
Scott Baker8487c5d2019-10-18 12:49:46 -07003379 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
Scott Baker2d897982019-09-24 11:50:08 -07003380 return ZSTD_compressBegin_internal(cctx,
Scott Baker8487c5d2019-10-18 12:49:46 -07003381 dict, dictSize, dictContentType, dtlm,
Scott Baker2d897982019-09-24 11:50:08 -07003382 cdict,
3383 params, pledgedSrcSize,
3384 ZSTDb_not_buffered);
3385}
3386
3387/*! ZSTD_compressBegin_advanced() :
3388* @return : 0, or an error code */
3389size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
3390 const void* dict, size_t dictSize,
3391 ZSTD_parameters params, unsigned long long pledgedSrcSize)
3392{
3393 ZSTD_CCtx_params const cctxParams =
3394 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3395 return ZSTD_compressBegin_advanced_internal(cctx,
Scott Baker8487c5d2019-10-18 12:49:46 -07003396 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
Scott Baker2d897982019-09-24 11:50:08 -07003397 NULL /*cdict*/,
3398 cctxParams, pledgedSrcSize);
3399}
3400
3401size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
3402{
3403 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize);
3404 ZSTD_CCtx_params const cctxParams =
3405 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
Scott Baker8487c5d2019-10-18 12:49:46 -07003406 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize);
3407 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
Scott Baker2d897982019-09-24 11:50:08 -07003408 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3409}
3410
3411size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel)
3412{
3413 return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel);
3414}
3415
3416
3417/*! ZSTD_writeEpilogue() :
3418* Ends a frame.
3419* @return : nb of bytes written into dst (or an error code) */
3420static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
3421{
3422 BYTE* const ostart = (BYTE*)dst;
3423 BYTE* op = ostart;
3424 size_t fhSize = 0;
3425
3426 DEBUGLOG(4, "ZSTD_writeEpilogue");
Scott Baker8487c5d2019-10-18 12:49:46 -07003427 RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
Scott Baker2d897982019-09-24 11:50:08 -07003428
3429 /* special case : empty frame */
3430 if (cctx->stage == ZSTDcs_init) {
3431 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0);
Scott Baker8487c5d2019-10-18 12:49:46 -07003432 FORWARD_IF_ERROR(fhSize);
Scott Baker2d897982019-09-24 11:50:08 -07003433 dstCapacity -= fhSize;
3434 op += fhSize;
3435 cctx->stage = ZSTDcs_ongoing;
3436 }
3437
3438 if (cctx->stage != ZSTDcs_ending) {
3439 /* write one last empty block, make it the "last" block */
3440 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07003441 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
Scott Baker2d897982019-09-24 11:50:08 -07003442 MEM_writeLE32(op, cBlockHeader24);
3443 op += ZSTD_blockHeaderSize;
3444 dstCapacity -= ZSTD_blockHeaderSize;
3445 }
3446
3447 if (cctx->appliedParams.fParams.checksumFlag) {
3448 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
Scott Baker8487c5d2019-10-18 12:49:46 -07003449 RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall);
3450 DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum);
Scott Baker2d897982019-09-24 11:50:08 -07003451 MEM_writeLE32(op, checksum);
3452 op += 4;
3453 }
3454
3455 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
3456 return op-ostart;
3457}
3458
3459size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
3460 void* dst, size_t dstCapacity,
3461 const void* src, size_t srcSize)
3462{
3463 size_t endResult;
3464 size_t const cSize = ZSTD_compressContinue_internal(cctx,
3465 dst, dstCapacity, src, srcSize,
3466 1 /* frame mode */, 1 /* last chunk */);
Scott Baker8487c5d2019-10-18 12:49:46 -07003467 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07003468 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07003469 FORWARD_IF_ERROR(endResult);
3470 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3471 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
3472 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
Scott Baker2d897982019-09-24 11:50:08 -07003473 DEBUGLOG(4, "end of frame : controlling src size");
Scott Baker8487c5d2019-10-18 12:49:46 -07003474 RETURN_ERROR_IF(
3475 cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1,
3476 srcSize_wrong,
3477 "error : pledgedSrcSize = %u, while realSrcSize = %u",
3478 (unsigned)cctx->pledgedSrcSizePlusOne-1,
3479 (unsigned)cctx->consumedSrcSize);
3480 }
Scott Baker2d897982019-09-24 11:50:08 -07003481 return cSize + endResult;
3482}
3483
3484
3485static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
Scott Baker8487c5d2019-10-18 12:49:46 -07003486 void* dst, size_t dstCapacity,
3487 const void* src, size_t srcSize,
3488 const void* dict,size_t dictSize,
3489 ZSTD_parameters params)
Scott Baker2d897982019-09-24 11:50:08 -07003490{
3491 ZSTD_CCtx_params const cctxParams =
3492 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3493 DEBUGLOG(4, "ZSTD_compress_internal");
3494 return ZSTD_compress_advanced_internal(cctx,
Scott Baker8487c5d2019-10-18 12:49:46 -07003495 dst, dstCapacity,
3496 src, srcSize,
3497 dict, dictSize,
3498 cctxParams);
Scott Baker2d897982019-09-24 11:50:08 -07003499}
3500
Scott Baker8487c5d2019-10-18 12:49:46 -07003501size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
Scott Baker2d897982019-09-24 11:50:08 -07003502 void* dst, size_t dstCapacity,
3503 const void* src, size_t srcSize,
3504 const void* dict,size_t dictSize,
3505 ZSTD_parameters params)
3506{
3507 DEBUGLOG(4, "ZSTD_compress_advanced");
Scott Baker8487c5d2019-10-18 12:49:46 -07003508 FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams));
3509 return ZSTD_compress_internal(cctx,
3510 dst, dstCapacity,
3511 src, srcSize,
3512 dict, dictSize,
3513 params);
Scott Baker2d897982019-09-24 11:50:08 -07003514}
3515
3516/* Internal */
3517size_t ZSTD_compress_advanced_internal(
3518 ZSTD_CCtx* cctx,
3519 void* dst, size_t dstCapacity,
3520 const void* src, size_t srcSize,
3521 const void* dict,size_t dictSize,
3522 ZSTD_CCtx_params params)
3523{
Scott Baker8487c5d2019-10-18 12:49:46 -07003524 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize);
3525 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3526 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3527 params, srcSize, ZSTDb_not_buffered) );
Scott Baker2d897982019-09-24 11:50:08 -07003528 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3529}
3530
Scott Baker8487c5d2019-10-18 12:49:46 -07003531size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3532 void* dst, size_t dstCapacity,
3533 const void* src, size_t srcSize,
3534 const void* dict, size_t dictSize,
3535 int compressionLevel)
Scott Baker2d897982019-09-24 11:50:08 -07003536{
Scott Baker8487c5d2019-10-18 12:49:46 -07003537 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
Scott Baker2d897982019-09-24 11:50:08 -07003538 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3539 assert(params.fParams.contentSizeFlag == 1);
Scott Baker2d897982019-09-24 11:50:08 -07003540 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
3541}
3542
Scott Baker8487c5d2019-10-18 12:49:46 -07003543size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3544 void* dst, size_t dstCapacity,
3545 const void* src, size_t srcSize,
3546 int compressionLevel)
Scott Baker2d897982019-09-24 11:50:08 -07003547{
Scott Baker8487c5d2019-10-18 12:49:46 -07003548 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize);
3549 assert(cctx != NULL);
Scott Baker2d897982019-09-24 11:50:08 -07003550 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3551}
3552
Scott Baker8487c5d2019-10-18 12:49:46 -07003553size_t ZSTD_compress(void* dst, size_t dstCapacity,
3554 const void* src, size_t srcSize,
3555 int compressionLevel)
Scott Baker2d897982019-09-24 11:50:08 -07003556{
3557 size_t result;
3558 ZSTD_CCtx ctxBody;
Scott Baker8487c5d2019-10-18 12:49:46 -07003559 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
Scott Baker2d897982019-09-24 11:50:08 -07003560 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
Scott Baker8487c5d2019-10-18 12:49:46 -07003561 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */
Scott Baker2d897982019-09-24 11:50:08 -07003562 return result;
3563}
3564
3565
3566/* ===== Dictionary API ===== */
3567
3568/*! ZSTD_estimateCDictSize_advanced() :
3569 * Estimate amount of memory that will be needed to create a dictionary with following arguments */
3570size_t ZSTD_estimateCDictSize_advanced(
3571 size_t dictSize, ZSTD_compressionParameters cParams,
3572 ZSTD_dictLoadMethod_e dictLoadMethod)
3573{
Scott Baker8487c5d2019-10-18 12:49:46 -07003574 DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
Scott Baker2d897982019-09-24 11:50:08 -07003575 return sizeof(ZSTD_CDict) + HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
3576 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
3577}
3578
3579size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel)
3580{
3581 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3582 return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy);
3583}
3584
3585size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
3586{
3587 if (cdict==NULL) return 0; /* support sizeof on NULL */
Scott Baker8487c5d2019-10-18 12:49:46 -07003588 DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict));
Scott Baker2d897982019-09-24 11:50:08 -07003589 return cdict->workspaceSize + (cdict->dictBuffer ? cdict->dictContentSize : 0) + sizeof(*cdict);
3590}
3591
3592static size_t ZSTD_initCDict_internal(
3593 ZSTD_CDict* cdict,
3594 const void* dictBuffer, size_t dictSize,
3595 ZSTD_dictLoadMethod_e dictLoadMethod,
3596 ZSTD_dictContentType_e dictContentType,
3597 ZSTD_compressionParameters cParams)
3598{
Scott Baker8487c5d2019-10-18 12:49:46 -07003599 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType);
Scott Baker2d897982019-09-24 11:50:08 -07003600 assert(!ZSTD_checkCParams(cParams));
Scott Baker8487c5d2019-10-18 12:49:46 -07003601 cdict->matchState.cParams = cParams;
Scott Baker2d897982019-09-24 11:50:08 -07003602 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3603 cdict->dictBuffer = NULL;
3604 cdict->dictContent = dictBuffer;
3605 } else {
3606 void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem);
3607 cdict->dictBuffer = internalBuffer;
3608 cdict->dictContent = internalBuffer;
Scott Baker8487c5d2019-10-18 12:49:46 -07003609 RETURN_ERROR_IF(!internalBuffer, memory_allocation);
Scott Baker2d897982019-09-24 11:50:08 -07003610 memcpy(internalBuffer, dictBuffer, dictSize);
3611 }
3612 cdict->dictContentSize = dictSize;
3613
3614 /* Reset the state to no dictionary */
3615 ZSTD_reset_compressedBlockState(&cdict->cBlockState);
Scott Baker8487c5d2019-10-18 12:49:46 -07003616 { void* const end = ZSTD_reset_matchState(&cdict->matchState,
3617 (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32,
3618 &cParams,
3619 ZSTDcrp_continue, ZSTD_resetTarget_CDict);
Scott Baker2d897982019-09-24 11:50:08 -07003620 assert(end == (char*)cdict->workspace + cdict->workspaceSize);
3621 (void)end;
3622 }
3623 /* (Maybe) load the dictionary
3624 * Skips loading the dictionary if it is <= 8 bytes.
3625 */
3626 { ZSTD_CCtx_params params;
3627 memset(&params, 0, sizeof(params));
3628 params.compressionLevel = ZSTD_CLEVEL_DEFAULT;
3629 params.fParams.contentSizeFlag = 1;
3630 params.cParams = cParams;
3631 { size_t const dictID = ZSTD_compress_insertDictionary(
3632 &cdict->cBlockState, &cdict->matchState, &params,
3633 cdict->dictContent, cdict->dictContentSize,
Scott Baker8487c5d2019-10-18 12:49:46 -07003634 dictContentType, ZSTD_dtlm_full, cdict->workspace);
3635 FORWARD_IF_ERROR(dictID);
Scott Baker2d897982019-09-24 11:50:08 -07003636 assert(dictID <= (size_t)(U32)-1);
3637 cdict->dictID = (U32)dictID;
3638 }
3639 }
3640
3641 return 0;
3642}
3643
3644ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
3645 ZSTD_dictLoadMethod_e dictLoadMethod,
3646 ZSTD_dictContentType_e dictContentType,
3647 ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
3648{
Scott Baker8487c5d2019-10-18 12:49:46 -07003649 DEBUGLOG(3, "ZSTD_createCDict_advanced, mode %u", (unsigned)dictContentType);
Scott Baker2d897982019-09-24 11:50:08 -07003650 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
3651
3652 { ZSTD_CDict* const cdict = (ZSTD_CDict*)ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
3653 size_t const workspaceSize = HUF_WORKSPACE_SIZE + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3654 void* const workspace = ZSTD_malloc(workspaceSize, customMem);
3655
3656 if (!cdict || !workspace) {
3657 ZSTD_free(cdict, customMem);
3658 ZSTD_free(workspace, customMem);
3659 return NULL;
3660 }
3661 cdict->customMem = customMem;
3662 cdict->workspace = workspace;
3663 cdict->workspaceSize = workspaceSize;
3664 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3665 dictBuffer, dictSize,
3666 dictLoadMethod, dictContentType,
3667 cParams) )) {
3668 ZSTD_freeCDict(cdict);
3669 return NULL;
3670 }
3671
3672 return cdict;
3673 }
3674}
3675
3676ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
3677{
3678 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3679 return ZSTD_createCDict_advanced(dict, dictSize,
3680 ZSTD_dlm_byCopy, ZSTD_dct_auto,
3681 cParams, ZSTD_defaultCMem);
3682}
3683
3684ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel)
3685{
3686 ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, 0, dictSize);
3687 return ZSTD_createCDict_advanced(dict, dictSize,
3688 ZSTD_dlm_byRef, ZSTD_dct_auto,
3689 cParams, ZSTD_defaultCMem);
3690}
3691
3692size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
3693{
3694 if (cdict==NULL) return 0; /* support free on NULL */
3695 { ZSTD_customMem const cMem = cdict->customMem;
3696 ZSTD_free(cdict->workspace, cMem);
3697 ZSTD_free(cdict->dictBuffer, cMem);
3698 ZSTD_free(cdict, cMem);
3699 return 0;
3700 }
3701}
3702
3703/*! ZSTD_initStaticCDict_advanced() :
3704 * Generate a digested dictionary in provided memory area.
3705 * workspace: The memory area to emplace the dictionary into.
3706 * Provided pointer must 8-bytes aligned.
3707 * It must outlive dictionary usage.
3708 * workspaceSize: Use ZSTD_estimateCDictSize()
3709 * to determine how large workspace must be.
3710 * cParams : use ZSTD_getCParams() to transform a compression level
3711 * into its relevants cParams.
3712 * @return : pointer to ZSTD_CDict*, or NULL if error (size too small)
3713 * Note : there is no corresponding "free" function.
3714 * Since workspace was allocated externally, it must be freed externally.
3715 */
3716const ZSTD_CDict* ZSTD_initStaticCDict(
3717 void* workspace, size_t workspaceSize,
3718 const void* dict, size_t dictSize,
3719 ZSTD_dictLoadMethod_e dictLoadMethod,
3720 ZSTD_dictContentType_e dictContentType,
3721 ZSTD_compressionParameters cParams)
3722{
3723 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
3724 size_t const neededSize = sizeof(ZSTD_CDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize)
3725 + HUF_WORKSPACE_SIZE + matchStateSize;
3726 ZSTD_CDict* const cdict = (ZSTD_CDict*) workspace;
3727 void* ptr;
3728 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
3729 DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07003730 (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize));
Scott Baker2d897982019-09-24 11:50:08 -07003731 if (workspaceSize < neededSize) return NULL;
3732
3733 if (dictLoadMethod == ZSTD_dlm_byCopy) {
3734 memcpy(cdict+1, dict, dictSize);
3735 dict = cdict+1;
3736 ptr = (char*)workspace + sizeof(ZSTD_CDict) + dictSize;
3737 } else {
3738 ptr = cdict+1;
3739 }
3740 cdict->workspace = ptr;
3741 cdict->workspaceSize = HUF_WORKSPACE_SIZE + matchStateSize;
3742
3743 if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
3744 dict, dictSize,
3745 ZSTD_dlm_byRef, dictContentType,
3746 cParams) ))
3747 return NULL;
3748
3749 return cdict;
3750}
3751
3752ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
3753{
3754 assert(cdict != NULL);
Scott Baker8487c5d2019-10-18 12:49:46 -07003755 return cdict->matchState.cParams;
Scott Baker2d897982019-09-24 11:50:08 -07003756}
3757
3758/* ZSTD_compressBegin_usingCDict_advanced() :
3759 * cdict must be != NULL */
3760size_t ZSTD_compressBegin_usingCDict_advanced(
3761 ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
3762 ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
3763{
3764 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
Scott Baker8487c5d2019-10-18 12:49:46 -07003765 RETURN_ERROR_IF(cdict==NULL, dictionary_wrong);
Scott Baker2d897982019-09-24 11:50:08 -07003766 { ZSTD_CCtx_params params = cctx->requestedParams;
3767 params.cParams = ZSTD_getCParamsFromCDict(cdict);
3768 /* Increase window log to fit the entire dictionary and source if the
3769 * source size is known. Limit the increase to 19, which is the
3770 * window log for compression level 1 with the largest source size.
3771 */
3772 if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) {
3773 U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19);
3774 U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1;
3775 params.cParams.windowLog = MAX(params.cParams.windowLog, limitedSrcLog);
3776 }
3777 params.fParams = fParams;
3778 return ZSTD_compressBegin_internal(cctx,
Scott Baker8487c5d2019-10-18 12:49:46 -07003779 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
Scott Baker2d897982019-09-24 11:50:08 -07003780 cdict,
3781 params, pledgedSrcSize,
3782 ZSTDb_not_buffered);
3783 }
3784}
3785
3786/* ZSTD_compressBegin_usingCDict() :
3787 * pledgedSrcSize=0 means "unknown"
3788 * if pledgedSrcSize>0, it will enable contentSizeFlag */
3789size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
3790{
3791 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3792 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
Scott Baker8487c5d2019-10-18 12:49:46 -07003793 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
Scott Baker2d897982019-09-24 11:50:08 -07003794}
3795
3796size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3797 void* dst, size_t dstCapacity,
3798 const void* src, size_t srcSize,
3799 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
3800{
Scott Baker8487c5d2019-10-18 12:49:46 -07003801 FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */
Scott Baker2d897982019-09-24 11:50:08 -07003802 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3803}
3804
3805/*! ZSTD_compress_usingCDict() :
3806 * Compression using a digested Dictionary.
3807 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
3808 * Note that compression parameters are decided at CDict creation time
3809 * while frame parameters are hardcoded */
3810size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
3811 void* dst, size_t dstCapacity,
3812 const void* src, size_t srcSize,
3813 const ZSTD_CDict* cdict)
3814{
3815 ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3816 return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
3817}
3818
3819
3820
3821/* ******************************************************************
3822* Streaming
3823********************************************************************/
3824
3825ZSTD_CStream* ZSTD_createCStream(void)
3826{
3827 DEBUGLOG(3, "ZSTD_createCStream");
3828 return ZSTD_createCStream_advanced(ZSTD_defaultCMem);
3829}
3830
3831ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize)
3832{
3833 return ZSTD_initStaticCCtx(workspace, workspaceSize);
3834}
3835
3836ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
3837{ /* CStream and CCtx are now same object */
3838 return ZSTD_createCCtx_advanced(customMem);
3839}
3840
3841size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
3842{
3843 return ZSTD_freeCCtx(zcs); /* same object */
3844}
3845
3846
3847
3848/*====== Initialization ======*/
3849
3850size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; }
3851
3852size_t ZSTD_CStreamOutSize(void)
3853{
3854 return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ;
3855}
3856
3857static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
3858 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
3859 const ZSTD_CDict* const cdict,
Scott Baker8487c5d2019-10-18 12:49:46 -07003860 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
Scott Baker2d897982019-09-24 11:50:08 -07003861{
Scott Baker8487c5d2019-10-18 12:49:46 -07003862 DEBUGLOG(4, "ZSTD_resetCStream_internal");
3863 /* Finalize the compression parameters */
3864 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
Scott Baker2d897982019-09-24 11:50:08 -07003865 /* params are supposed to be fully validated at this point */
3866 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3867 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3868
Scott Baker8487c5d2019-10-18 12:49:46 -07003869 FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx,
3870 dict, dictSize, dictContentType, ZSTD_dtlm_fast,
Scott Baker2d897982019-09-24 11:50:08 -07003871 cdict,
3872 params, pledgedSrcSize,
3873 ZSTDb_buffered) );
3874
3875 cctx->inToCompress = 0;
3876 cctx->inBuffPos = 0;
3877 cctx->inBuffTarget = cctx->blockSize
3878 + (cctx->blockSize == pledgedSrcSize); /* for small input: avoid automatic flush on reaching end of block, since it would require to add a 3-bytes null block to end frame */
3879 cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0;
3880 cctx->streamStage = zcss_load;
3881 cctx->frameEnded = 0;
3882 return 0; /* ready to go */
3883}
3884
3885/* ZSTD_resetCStream():
3886 * pledgedSrcSize == 0 means "unknown" */
Scott Baker8487c5d2019-10-18 12:49:46 -07003887size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss)
Scott Baker2d897982019-09-24 11:50:08 -07003888{
Scott Baker8487c5d2019-10-18 12:49:46 -07003889 /* temporary : 0 interpreted as "unknown" during transition period.
3890 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
3891 * 0 will be interpreted as "empty" in the future.
3892 */
3893 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3894 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize);
3895 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3896 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3897 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003898}
3899
3900/*! ZSTD_initCStream_internal() :
3901 * Note : for lib/compress only. Used by zstdmt_compress.c.
3902 * Assumption 1 : params are valid
3903 * Assumption 2 : either dict, or cdict, is defined, not both */
3904size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs,
3905 const void* dict, size_t dictSize, const ZSTD_CDict* cdict,
3906 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
3907{
3908 DEBUGLOG(4, "ZSTD_initCStream_internal");
Scott Baker8487c5d2019-10-18 12:49:46 -07003909 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3910 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
Scott Baker2d897982019-09-24 11:50:08 -07003911 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
Scott Baker8487c5d2019-10-18 12:49:46 -07003912 zcs->requestedParams = params;
Scott Baker2d897982019-09-24 11:50:08 -07003913 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
Scott Baker8487c5d2019-10-18 12:49:46 -07003914 if (dict) {
3915 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
Scott Baker2d897982019-09-24 11:50:08 -07003916 } else {
Scott Baker8487c5d2019-10-18 12:49:46 -07003917 /* Dictionary is cleared if !cdict */
3918 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
Scott Baker2d897982019-09-24 11:50:08 -07003919 }
Scott Baker8487c5d2019-10-18 12:49:46 -07003920 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003921}
3922
3923/* ZSTD_initCStream_usingCDict_advanced() :
3924 * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */
3925size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
3926 const ZSTD_CDict* cdict,
3927 ZSTD_frameParameters fParams,
3928 unsigned long long pledgedSrcSize)
3929{
3930 DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced");
Scott Baker8487c5d2019-10-18 12:49:46 -07003931 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3932 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3933 zcs->requestedParams.fParams = fParams;
3934 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
3935 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003936}
3937
3938/* note : cdict must outlive compression session */
3939size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
3940{
Scott Baker2d897982019-09-24 11:50:08 -07003941 DEBUGLOG(4, "ZSTD_initCStream_usingCDict");
Scott Baker8487c5d2019-10-18 12:49:46 -07003942 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3943 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) );
3944 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003945}
3946
3947
3948/* ZSTD_initCStream_advanced() :
3949 * pledgedSrcSize must be exact.
3950 * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
3951 * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */
3952size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
3953 const void* dict, size_t dictSize,
Scott Baker8487c5d2019-10-18 12:49:46 -07003954 ZSTD_parameters params, unsigned long long pss)
Scott Baker2d897982019-09-24 11:50:08 -07003955{
Scott Baker8487c5d2019-10-18 12:49:46 -07003956 /* for compatibility with older programs relying on this behavior.
3957 * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN.
3958 * This line will be removed in the future.
3959 */
3960 U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3961 DEBUGLOG(4, "ZSTD_initCStream_advanced");
3962 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3963 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3964 FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) );
3965 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3966 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3967 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003968}
3969
3970size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3971{
Scott Baker8487c5d2019-10-18 12:49:46 -07003972 DEBUGLOG(4, "ZSTD_initCStream_usingDict");
3973 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3974 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
3975 FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) );
3976 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003977}
3978
3979size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
3980{
Scott Baker8487c5d2019-10-18 12:49:46 -07003981 /* temporary : 0 interpreted as "unknown" during transition period.
3982 * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN.
3983 * 0 will be interpreted as "empty" in the future.
3984 */
3985 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss;
3986 DEBUGLOG(4, "ZSTD_initCStream_srcSize");
3987 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3988 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
3989 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
3990 FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) );
3991 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07003992}
3993
3994size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3995{
3996 DEBUGLOG(4, "ZSTD_initCStream");
Scott Baker8487c5d2019-10-18 12:49:46 -07003997 FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) );
3998 FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) );
3999 FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) );
4000 return 0;
Scott Baker2d897982019-09-24 11:50:08 -07004001}
4002
4003/*====== Compression ======*/
4004
Scott Baker8487c5d2019-10-18 12:49:46 -07004005static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx)
4006{
4007 size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos;
4008 if (hintInSize==0) hintInSize = cctx->blockSize;
4009 return hintInSize;
4010}
4011
4012static size_t ZSTD_limitCopy(void* dst, size_t dstCapacity,
4013 const void* src, size_t srcSize)
Scott Baker2d897982019-09-24 11:50:08 -07004014{
4015 size_t const length = MIN(dstCapacity, srcSize);
4016 if (length) memcpy(dst, src, length);
4017 return length;
4018}
4019
4020/** ZSTD_compressStream_generic():
Scott Baker8487c5d2019-10-18 12:49:46 -07004021 * internal function for all *compressStream*() variants
Scott Baker2d897982019-09-24 11:50:08 -07004022 * non-static, because can be called from zstdmt_compress.c
4023 * @return : hint size for next input */
Scott Baker8487c5d2019-10-18 12:49:46 -07004024static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
4025 ZSTD_outBuffer* output,
4026 ZSTD_inBuffer* input,
4027 ZSTD_EndDirective const flushMode)
Scott Baker2d897982019-09-24 11:50:08 -07004028{
4029 const char* const istart = (const char*)input->src;
4030 const char* const iend = istart + input->size;
4031 const char* ip = istart + input->pos;
4032 char* const ostart = (char*)output->dst;
4033 char* const oend = ostart + output->size;
4034 char* op = ostart + output->pos;
4035 U32 someMoreWork = 1;
4036
4037 /* check expectations */
Scott Baker8487c5d2019-10-18 12:49:46 -07004038 DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode);
Scott Baker2d897982019-09-24 11:50:08 -07004039 assert(zcs->inBuff != NULL);
4040 assert(zcs->inBuffSize > 0);
4041 assert(zcs->outBuff != NULL);
4042 assert(zcs->outBuffSize > 0);
4043 assert(output->pos <= output->size);
4044 assert(input->pos <= input->size);
4045
4046 while (someMoreWork) {
4047 switch(zcs->streamStage)
4048 {
4049 case zcss_init:
Scott Baker8487c5d2019-10-18 12:49:46 -07004050 RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!");
Scott Baker2d897982019-09-24 11:50:08 -07004051
4052 case zcss_load:
4053 if ( (flushMode == ZSTD_e_end)
4054 && ((size_t)(oend-op) >= ZSTD_compressBound(iend-ip)) /* enough dstCapacity */
4055 && (zcs->inBuffPos == 0) ) {
4056 /* shortcut to compression pass directly into output buffer */
4057 size_t const cSize = ZSTD_compressEnd(zcs,
4058 op, oend-op, ip, iend-ip);
Scott Baker8487c5d2019-10-18 12:49:46 -07004059 DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize);
4060 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07004061 ip = iend;
4062 op += cSize;
4063 zcs->frameEnded = 1;
Scott Baker8487c5d2019-10-18 12:49:46 -07004064 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
Scott Baker2d897982019-09-24 11:50:08 -07004065 someMoreWork = 0; break;
4066 }
4067 /* complete loading into inBuffer */
4068 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
4069 size_t const loaded = ZSTD_limitCopy(
4070 zcs->inBuff + zcs->inBuffPos, toLoad,
4071 ip, iend-ip);
4072 zcs->inBuffPos += loaded;
4073 ip += loaded;
4074 if ( (flushMode == ZSTD_e_continue)
4075 && (zcs->inBuffPos < zcs->inBuffTarget) ) {
4076 /* not enough input to fill full block : stop here */
4077 someMoreWork = 0; break;
4078 }
4079 if ( (flushMode == ZSTD_e_flush)
4080 && (zcs->inBuffPos == zcs->inToCompress) ) {
4081 /* empty */
4082 someMoreWork = 0; break;
4083 }
4084 }
4085 /* compress current block (note : this stage cannot be stopped in the middle) */
4086 DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode);
4087 { void* cDst;
4088 size_t cSize;
4089 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
4090 size_t oSize = oend-op;
4091 unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend);
4092 if (oSize >= ZSTD_compressBound(iSize))
4093 cDst = op; /* compress into output buffer, to skip flush stage */
4094 else
4095 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
4096 cSize = lastBlock ?
4097 ZSTD_compressEnd(zcs, cDst, oSize,
4098 zcs->inBuff + zcs->inToCompress, iSize) :
4099 ZSTD_compressContinue(zcs, cDst, oSize,
4100 zcs->inBuff + zcs->inToCompress, iSize);
Scott Baker8487c5d2019-10-18 12:49:46 -07004101 FORWARD_IF_ERROR(cSize);
Scott Baker2d897982019-09-24 11:50:08 -07004102 zcs->frameEnded = lastBlock;
4103 /* prepare next block */
4104 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
4105 if (zcs->inBuffTarget > zcs->inBuffSize)
4106 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize;
4107 DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u",
Scott Baker8487c5d2019-10-18 12:49:46 -07004108 (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize);
Scott Baker2d897982019-09-24 11:50:08 -07004109 if (!lastBlock)
4110 assert(zcs->inBuffTarget <= zcs->inBuffSize);
4111 zcs->inToCompress = zcs->inBuffPos;
4112 if (cDst == op) { /* no need to flush */
4113 op += cSize;
4114 if (zcs->frameEnded) {
4115 DEBUGLOG(5, "Frame completed directly in outBuffer");
4116 someMoreWork = 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07004117 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
Scott Baker2d897982019-09-24 11:50:08 -07004118 }
4119 break;
4120 }
4121 zcs->outBuffContentSize = cSize;
4122 zcs->outBuffFlushedSize = 0;
4123 zcs->streamStage = zcss_flush; /* pass-through to flush stage */
4124 }
4125 /* fall-through */
4126 case zcss_flush:
4127 DEBUGLOG(5, "flush stage");
4128 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
Scott Baker8487c5d2019-10-18 12:49:46 -07004129 size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op),
Scott Baker2d897982019-09-24 11:50:08 -07004130 zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
4131 DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u",
Scott Baker8487c5d2019-10-18 12:49:46 -07004132 (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed);
Scott Baker2d897982019-09-24 11:50:08 -07004133 op += flushed;
4134 zcs->outBuffFlushedSize += flushed;
4135 if (toFlush!=flushed) {
4136 /* flush not fully completed, presumably because dst is too small */
4137 assert(op==oend);
4138 someMoreWork = 0;
4139 break;
4140 }
4141 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
4142 if (zcs->frameEnded) {
4143 DEBUGLOG(5, "Frame completed on flush");
4144 someMoreWork = 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07004145 ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
Scott Baker2d897982019-09-24 11:50:08 -07004146 break;
4147 }
4148 zcs->streamStage = zcss_load;
4149 break;
4150 }
4151
4152 default: /* impossible */
4153 assert(0);
4154 }
4155 }
4156
4157 input->pos = ip - istart;
4158 output->pos = op - ostart;
4159 if (zcs->frameEnded) return 0;
Scott Baker8487c5d2019-10-18 12:49:46 -07004160 return ZSTD_nextInputSizeHint(zcs);
4161}
4162
4163static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx)
4164{
4165#ifdef ZSTD_MULTITHREAD
4166 if (cctx->appliedParams.nbWorkers >= 1) {
4167 assert(cctx->mtctx != NULL);
4168 return ZSTDMT_nextInputSizeHint(cctx->mtctx);
Scott Baker2d897982019-09-24 11:50:08 -07004169 }
Scott Baker8487c5d2019-10-18 12:49:46 -07004170#endif
4171 return ZSTD_nextInputSizeHint(cctx);
4172
Scott Baker2d897982019-09-24 11:50:08 -07004173}
4174
4175size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
4176{
Scott Baker8487c5d2019-10-18 12:49:46 -07004177 FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) );
4178 return ZSTD_nextInputSizeHint_MTorST(zcs);
Scott Baker2d897982019-09-24 11:50:08 -07004179}
4180
4181
Scott Baker8487c5d2019-10-18 12:49:46 -07004182size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
4183 ZSTD_outBuffer* output,
4184 ZSTD_inBuffer* input,
4185 ZSTD_EndDirective endOp)
Scott Baker2d897982019-09-24 11:50:08 -07004186{
Scott Baker8487c5d2019-10-18 12:49:46 -07004187 DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp);
Scott Baker2d897982019-09-24 11:50:08 -07004188 /* check conditions */
Scott Baker8487c5d2019-10-18 12:49:46 -07004189 RETURN_ERROR_IF(output->pos > output->size, GENERIC);
4190 RETURN_ERROR_IF(input->pos > input->size, GENERIC);
Scott Baker2d897982019-09-24 11:50:08 -07004191 assert(cctx!=NULL);
4192
4193 /* transparent initialization stage */
4194 if (cctx->streamStage == zcss_init) {
4195 ZSTD_CCtx_params params = cctx->requestedParams;
4196 ZSTD_prefixDict const prefixDict = cctx->prefixDict;
Scott Baker8487c5d2019-10-18 12:49:46 -07004197 FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */
4198 memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */
4199 assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */
4200 DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
Scott Baker2d897982019-09-24 11:50:08 -07004201 if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = input->size + 1; /* auto-fix pledgedSrcSize */
4202 params.cParams = ZSTD_getCParamsFromCCtxParams(
4203 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
4204
Scott Baker8487c5d2019-10-18 12:49:46 -07004205
Scott Baker2d897982019-09-24 11:50:08 -07004206#ifdef ZSTD_MULTITHREAD
4207 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
4208 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
4209 }
4210 if (params.nbWorkers > 0) {
4211 /* mt context creation */
Scott Baker8487c5d2019-10-18 12:49:46 -07004212 if (cctx->mtctx == NULL) {
4213 DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u",
Scott Baker2d897982019-09-24 11:50:08 -07004214 params.nbWorkers);
Scott Baker2d897982019-09-24 11:50:08 -07004215 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
Scott Baker8487c5d2019-10-18 12:49:46 -07004216 RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation);
Scott Baker2d897982019-09-24 11:50:08 -07004217 }
4218 /* mt compression */
4219 DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers);
Scott Baker8487c5d2019-10-18 12:49:46 -07004220 FORWARD_IF_ERROR( ZSTDMT_initCStream_internal(
Scott Baker2d897982019-09-24 11:50:08 -07004221 cctx->mtctx,
4222 prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent,
4223 cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) );
4224 cctx->streamStage = zcss_load;
4225 cctx->appliedParams.nbWorkers = params.nbWorkers;
4226 } else
4227#endif
Scott Baker8487c5d2019-10-18 12:49:46 -07004228 { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx,
Scott Baker2d897982019-09-24 11:50:08 -07004229 prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType,
4230 cctx->cdict,
4231 params, cctx->pledgedSrcSizePlusOne-1) );
4232 assert(cctx->streamStage == zcss_load);
4233 assert(cctx->appliedParams.nbWorkers == 0);
4234 } }
Scott Baker8487c5d2019-10-18 12:49:46 -07004235 /* end of transparent initialization stage */
Scott Baker2d897982019-09-24 11:50:08 -07004236
4237 /* compression stage */
4238#ifdef ZSTD_MULTITHREAD
4239 if (cctx->appliedParams.nbWorkers > 0) {
Scott Baker8487c5d2019-10-18 12:49:46 -07004240 int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end);
4241 size_t flushMin;
4242 assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */);
Scott Baker2d897982019-09-24 11:50:08 -07004243 if (cctx->cParamsChanged) {
4244 ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams);
4245 cctx->cParamsChanged = 0;
4246 }
Scott Baker8487c5d2019-10-18 12:49:46 -07004247 do {
4248 flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
Scott Baker2d897982019-09-24 11:50:08 -07004249 if ( ZSTD_isError(flushMin)
4250 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
Scott Baker8487c5d2019-10-18 12:49:46 -07004251 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
Scott Baker2d897982019-09-24 11:50:08 -07004252 }
Scott Baker8487c5d2019-10-18 12:49:46 -07004253 FORWARD_IF_ERROR(flushMin);
4254 } while (forceMaxProgress && flushMin != 0 && output->pos < output->size);
4255 DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic");
4256 /* Either we don't require maximum forward progress, we've finished the
4257 * flush, or we are out of output space.
4258 */
4259 assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size);
4260 return flushMin;
4261 }
Scott Baker2d897982019-09-24 11:50:08 -07004262#endif
Scott Baker8487c5d2019-10-18 12:49:46 -07004263 FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) );
4264 DEBUGLOG(5, "completed ZSTD_compressStream2");
Scott Baker2d897982019-09-24 11:50:08 -07004265 return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */
4266}
4267
Scott Baker8487c5d2019-10-18 12:49:46 -07004268size_t ZSTD_compressStream2_simpleArgs (
Scott Baker2d897982019-09-24 11:50:08 -07004269 ZSTD_CCtx* cctx,
4270 void* dst, size_t dstCapacity, size_t* dstPos,
4271 const void* src, size_t srcSize, size_t* srcPos,
4272 ZSTD_EndDirective endOp)
4273{
4274 ZSTD_outBuffer output = { dst, dstCapacity, *dstPos };
4275 ZSTD_inBuffer input = { src, srcSize, *srcPos };
Scott Baker8487c5d2019-10-18 12:49:46 -07004276 /* ZSTD_compressStream2() will check validity of dstPos and srcPos */
4277 size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp);
Scott Baker2d897982019-09-24 11:50:08 -07004278 *dstPos = output.pos;
4279 *srcPos = input.pos;
4280 return cErr;
4281}
4282
Scott Baker8487c5d2019-10-18 12:49:46 -07004283size_t ZSTD_compress2(ZSTD_CCtx* cctx,
4284 void* dst, size_t dstCapacity,
4285 const void* src, size_t srcSize)
4286{
4287 ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
4288 { size_t oPos = 0;
4289 size_t iPos = 0;
4290 size_t const result = ZSTD_compressStream2_simpleArgs(cctx,
4291 dst, dstCapacity, &oPos,
4292 src, srcSize, &iPos,
4293 ZSTD_e_end);
4294 FORWARD_IF_ERROR(result);
4295 if (result != 0) { /* compression not completed, due to lack of output space */
4296 assert(oPos == dstCapacity);
4297 RETURN_ERROR(dstSize_tooSmall);
4298 }
4299 assert(iPos == srcSize); /* all input is expected consumed */
4300 return oPos;
4301 }
4302}
Scott Baker2d897982019-09-24 11:50:08 -07004303
4304/*====== Finalize ======*/
4305
4306/*! ZSTD_flushStream() :
4307 * @return : amount of data remaining to flush */
4308size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4309{
4310 ZSTD_inBuffer input = { NULL, 0, 0 };
Scott Baker8487c5d2019-10-18 12:49:46 -07004311 return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush);
Scott Baker2d897982019-09-24 11:50:08 -07004312}
4313
4314
4315size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
4316{
4317 ZSTD_inBuffer input = { NULL, 0, 0 };
Scott Baker8487c5d2019-10-18 12:49:46 -07004318 size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end);
4319 FORWARD_IF_ERROR( remainingToFlush );
4320 if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */
4321 /* single thread mode : attempt to calculate remaining to flush more precisely */
Scott Baker2d897982019-09-24 11:50:08 -07004322 { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE;
Scott Baker8487c5d2019-10-18 12:49:46 -07004323 size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4);
4324 size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize;
4325 DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush);
Scott Baker2d897982019-09-24 11:50:08 -07004326 return toFlush;
4327 }
4328}
4329
4330
4331/*-===== Pre-defined compression levels =====-*/
4332
4333#define ZSTD_MAX_CLEVEL 22
4334int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
Scott Baker8487c5d2019-10-18 12:49:46 -07004335int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
Scott Baker2d897982019-09-24 11:50:08 -07004336
4337static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
Scott Baker8487c5d2019-10-18 12:49:46 -07004338{ /* "default" - for any srcSize > 256 KB */
Scott Baker2d897982019-09-24 11:50:08 -07004339 /* W, C, H, S, L, TL, strat */
4340 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
Scott Baker8487c5d2019-10-18 12:49:46 -07004341 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
4342 { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
4343 { 21, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
4344 { 21, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
4345 { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */
4346 { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */
4347 { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */
4348 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
4349 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
4350 { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
4351 { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
4352 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
4353 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */
4354 { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
4355 { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
4356 { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */
4357 { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */
4358 { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */
4359 { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */
4360 { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */
4361 { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */
4362 { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */
Scott Baker2d897982019-09-24 11:50:08 -07004363},
4364{ /* for srcSize <= 256 KB */
4365 /* W, C, H, S, L, T, strat */
4366 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
Scott Baker8487c5d2019-10-18 12:49:46 -07004367 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
4368 { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
4369 { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
4370 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
4371 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
4372 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
4373 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
4374 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4375 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4376 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
4377 { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/
4378 { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/
4379 { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */
4380 { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
4381 { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/
4382 { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/
4383 { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/
4384 { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/
4385 { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
4386 { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/
4387 { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/
4388 { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/
Scott Baker2d897982019-09-24 11:50:08 -07004389},
4390{ /* for srcSize <= 128 KB */
4391 /* W, C, H, S, L, T, strat */
Scott Baker8487c5d2019-10-18 12:49:46 -07004392 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
4393 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
4394 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
4395 { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
4396 { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
4397 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
4398 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
4399 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
Scott Baker2d897982019-09-24 11:50:08 -07004400 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
4401 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
4402 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
Scott Baker8487c5d2019-10-18 12:49:46 -07004403 { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */
4404 { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */
4405 { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/
4406 { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/
4407 { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/
4408 { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/
4409 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/
4410 { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/
4411 { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/
4412 { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/
4413 { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
4414 { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/
Scott Baker2d897982019-09-24 11:50:08 -07004415},
4416{ /* for srcSize <= 16 KB */
4417 /* W, C, H, S, L, T, strat */
4418 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
Scott Baker8487c5d2019-10-18 12:49:46 -07004419 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
4420 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
4421 { 14, 14, 15, 2, 4, 1, ZSTD_dfast }, /* level 3 */
4422 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */
4423 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
4424 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
4425 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
4426 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
4427 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
4428 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
4429 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
4430 { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/
4431 { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/
4432 { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/
4433 { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/
4434 { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/
4435 { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/
4436 { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/
4437 { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/
4438 { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/
4439 { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/
4440 { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/
Scott Baker2d897982019-09-24 11:50:08 -07004441},
4442};
4443
4444/*! ZSTD_getCParams() :
Scott Baker8487c5d2019-10-18 12:49:46 -07004445 * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize.
4446 * Size values are optional, provide 0 if not known or unused */
Scott Baker2d897982019-09-24 11:50:08 -07004447ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize)
4448{
4449 size_t const addedSize = srcSizeHint ? 0 : 500;
Scott Baker8487c5d2019-10-18 12:49:46 -07004450 U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */
4451 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB);
Scott Baker2d897982019-09-24 11:50:08 -07004452 int row = compressionLevel;
4453 DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel);
4454 if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */
4455 if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */
4456 if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL;
4457 { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
4458 if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */
Scott Baker8487c5d2019-10-18 12:49:46 -07004459 return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */
4460 }
Scott Baker2d897982019-09-24 11:50:08 -07004461}
4462
4463/*! ZSTD_getParams() :
Scott Baker8487c5d2019-10-18 12:49:46 -07004464 * same idea as ZSTD_getCParams()
4465 * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`).
4466 * Fields of `ZSTD_frameParameters` are set to default values */
Scott Baker2d897982019-09-24 11:50:08 -07004467ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) {
4468 ZSTD_parameters params;
4469 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize);
4470 DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel);
4471 memset(&params, 0, sizeof(params));
4472 params.cParams = cParams;
4473 params.fParams.contentSizeFlag = 1;
4474 return params;
4475}