Blame - vendor/github.com/DataDog/zstd/zstd_ldm.c - voltha-simonu-adapter

blob: 3dcf86e6e8a32ac75399fc3bbe2442f86d76f980 [file] [log] [blame]

Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	1	/*
				2	* Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
				3	* All rights reserved.
				4	*
				5	* This source code is licensed under both the BSD-style license (found in the
				6	* LICENSE file in the root directory of this source tree) and the GPLv2 (found
				7	* in the COPYING file in the root directory of this source tree).
				8	*/
				9
				10	#include "zstd_ldm.h"
				11
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	12	#include "debug.h"
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	13	#include "zstd_fast.h" /* ZSTD_fillHashTable() */
				14	#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
				15
				16	#define LDM_BUCKET_SIZE_LOG 3
				17	#define LDM_MIN_MATCH_LENGTH 64
				18	#define LDM_HASH_RLOG 7
				19	#define LDM_HASH_CHAR_OFFSET 10
				20
				21	void ZSTD_ldm_adjustParameters(ldmParams_t* params,
				22	ZSTD_compressionParameters const* cParams)
				23	{
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	24	params->windowLog = cParams->windowLog;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	25	ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
				26	DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
				27	if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
				28	if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
				29	if (cParams->strategy >= ZSTD_btopt) {
				30	/* Get out of the way of the optimal parser */
				31	U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
				32	assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
				33	assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
				34	params->minMatchLength = minMatch;
				35	}
				36	if (params->hashLog == 0) {
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	37	params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	38	assert(params->hashLog <= ZSTD_HASHLOG_MAX);
				39	}
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	40	if (params->hashRateLog == 0) {
				41	params->hashRateLog = params->windowLog < params->hashLog
				42	? 0
				43	: params->windowLog - params->hashLog;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	44	}
				45	params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
				46	}
				47
				48	size_t ZSTD_ldm_getTableSize(ldmParams_t params)
				49	{
				50	size_t const ldmHSize = ((size_t)1) << params.hashLog;
				51	size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
				52	size_t const ldmBucketSize =
				53	((size_t)1) << (params.hashLog - ldmBucketSizeLog);
				54	size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
				55	return params.enableLdm ? totalSize : 0;
				56	}
				57
				58	size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
				59	{
				60	return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
				61	}
				62
				63	/** ZSTD_ldm_getSmallHash() :
				64	* numBits should be <= 32
				65	* If numBits==0, returns 0.
				66	* @return : the most significant numBits of value. */
				67	static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
				68	{
				69	assert(numBits <= 32);
				70	return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
				71	}
				72
				73	/** ZSTD_ldm_getChecksum() :
				74	* numBitsToDiscard should be <= 32
				75	* @return : the next most significant 32 bits after numBitsToDiscard */
				76	static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
				77	{
				78	assert(numBitsToDiscard <= 32);
				79	return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
				80	}
				81
				82	/** ZSTD_ldm_getTag() ;
				83	* Given the hash, returns the most significant numTagBits bits
				84	* after (32 + hbits) bits.
				85	*
				86	* If there are not enough bits remaining, return the last
				87	* numTagBits bits. */
				88	static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
				89	{
				90	assert(numTagBits < 32 && hbits <= 32);
				91	if (32 - hbits < numTagBits) {
				92	return hash & (((U32)1 << numTagBits) - 1);
				93	} else {
				94	return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
				95	}
				96	}
				97
				98	/** ZSTD_ldm_getBucket() :
				99	* Returns a pointer to the start of the bucket associated with hash. */
				100	static ldmEntry_t* ZSTD_ldm_getBucket(
				101	ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
				102	{
				103	return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
				104	}
				105
				106	/** ZSTD_ldm_insertEntry() :
				107	* Insert the entry with corresponding hash into the hash table */
				108	static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
				109	size_t const hash, const ldmEntry_t entry,
				110	ldmParams_t const ldmParams)
				111	{
				112	BYTE* const bucketOffsets = ldmState->bucketOffsets;
				113	*(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
				114	bucketOffsets[hash]++;
				115	bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
				116	}
				117
				118	/** ZSTD_ldm_makeEntryAndInsertByTag() :
				119	*
				120	* Gets the small hash, checksum, and tag from the rollingHash.
				121	*
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	122	* If the tag matches (1 << ldmParams.hashRateLog)-1, then
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	123	* creates an ldmEntry from the offset, and inserts it into the hash table.
				124	*
				125	* hBits is the length of the small hash, which is the most significant hBits
				126	* of rollingHash. The checksum is the next 32 most significant bits, followed
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	127	* by ldmParams.hashRateLog bits that make up the tag. */
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	128	static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
				129	U64 const rollingHash,
				130	U32 const hBits,
				131	U32 const offset,
				132	ldmParams_t const ldmParams)
				133	{
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	134	U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
				135	U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	136	if (tag == tagMask) {
				137	U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
				138	U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
				139	ldmEntry_t entry;
				140	entry.offset = offset;
				141	entry.checksum = checksum;
				142	ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
				143	}
				144	}
				145
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	146	/** ZSTD_ldm_countBackwardsMatch() :
				147	* Returns the number of bytes that match backwards before pIn and pMatch.
				148	*
				149	* We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
				150	static size_t ZSTD_ldm_countBackwardsMatch(
				151	const BYTE* pIn, const BYTE* pAnchor,
				152	const BYTE* pMatch, const BYTE* pBase)
				153	{
				154	size_t matchLength = 0;
				155	while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
				156	pIn--;
				157	pMatch--;
				158	matchLength++;
				159	}
				160	return matchLength;
				161	}
				162
				163	/** ZSTD_ldm_fillFastTables() :
				164	*
				165	* Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
				166	* This is similar to ZSTD_loadDictionaryContent.
				167	*
				168	* The tables for the other strategies are filled within their
				169	* block compressors. */
				170	static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	171	void const* end)
				172	{
				173	const BYTE* const iend = (const BYTE*)end;
				174
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	175	switch(ms->cParams.strategy)
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	176	{
				177	case ZSTD_fast:
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	178	ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	179	break;
				180
				181	case ZSTD_dfast:
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	182	ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	183	break;
				184
				185	case ZSTD_greedy:
				186	case ZSTD_lazy:
				187	case ZSTD_lazy2:
				188	case ZSTD_btlazy2:
				189	case ZSTD_btopt:
				190	case ZSTD_btultra:
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	191	case ZSTD_btultra2:
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	192	break;
				193	default:
				194	assert(0); /* not possible : not a valid strategy id */
				195	}
				196
				197	return 0;
				198	}
				199
				200	/** ZSTD_ldm_fillLdmHashTable() :
				201	*
				202	* Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
				203	* lastHash is the rolling hash that corresponds to lastHashed.
				204	*
				205	* Returns the rolling hash corresponding to position iend-1. */
				206	static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
				207	U64 lastHash, const BYTE* lastHashed,
				208	const BYTE* iend, const BYTE* base,
				209	U32 hBits, ldmParams_t const ldmParams)
				210	{
				211	U64 rollingHash = lastHash;
				212	const BYTE* cur = lastHashed + 1;
				213
				214	while (cur < iend) {
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	215	rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
				216	cur[ldmParams.minMatchLength-1],
				217	state->hashPower);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	218	ZSTD_ldm_makeEntryAndInsertByTag(state,
				219	rollingHash, hBits,
				220	(U32)(cur - base), ldmParams);
				221	++cur;
				222	}
				223	return rollingHash;
				224	}
				225
				226
				227	/** ZSTD_ldm_limitTableUpdate() :
				228	*
				229	* Sets cctx->nextToUpdate to a position corresponding closer to anchor
				230	* if it is far way
				231	* (after a long match, only update tables a limited amount). */
				232	static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
				233	{
				234	U32 const current = (U32)(anchor - ms->window.base);
				235	if (current > ms->nextToUpdate + 1024) {
				236	ms->nextToUpdate =
				237	current - MIN(512, current - ms->nextToUpdate - 1024);
				238	}
				239	}
				240
				241	static size_t ZSTD_ldm_generateSequences_internal(
				242	ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
				243	ldmParams_t const* params, void const* src, size_t srcSize)
				244	{
				245	/* LDM parameters */
				246	int const extDict = ZSTD_window_hasExtDict(ldmState->window);
				247	U32 const minMatchLength = params->minMatchLength;
				248	U64 const hashPower = ldmState->hashPower;
				249	U32 const hBits = params->hashLog - params->bucketSizeLog;
				250	U32 const ldmBucketSize = 1U << params->bucketSizeLog;
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	251	U32 const hashRateLog = params->hashRateLog;
				252	U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	253	/* Prefix and extDict parameters */
				254	U32 const dictLimit = ldmState->window.dictLimit;
				255	U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
				256	BYTE const* const base = ldmState->window.base;
				257	BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
				258	BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
				259	BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
				260	BYTE const* const lowPrefixPtr = base + dictLimit;
				261	/* Input bounds */
				262	BYTE const* const istart = (BYTE const*)src;
				263	BYTE const* const iend = istart + srcSize;
				264	BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
				265	/* Input positions */
				266	BYTE const* anchor = istart;
				267	BYTE const* ip = istart;
				268	/* Rolling hash */
				269	BYTE const* lastHashed = NULL;
				270	U64 rollingHash = 0;
				271
				272	while (ip <= ilimit) {
				273	size_t mLength;
				274	U32 const current = (U32)(ip - base);
				275	size_t forwardMatchLength = 0, backwardMatchLength = 0;
				276	ldmEntry_t* bestEntry = NULL;
				277	if (ip != istart) {
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	278	rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
				279	lastHashed[minMatchLength],
				280	hashPower);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	281	} else {
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	282	rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	283	}
				284	lastHashed = ip;
				285
				286	/* Do not insert and do not look for a match */
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	287	if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	288	ip++;
				289	continue;
				290	}
				291
				292	/* Get the best entry and compute the match lengths */
				293	{
				294	ldmEntry_t* const bucket =
				295	ZSTD_ldm_getBucket(ldmState,
				296	ZSTD_ldm_getSmallHash(rollingHash, hBits),
				297	*params);
				298	ldmEntry_t* cur;
				299	size_t bestMatchLength = 0;
				300	U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
				301
				302	for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
				303	size_t curForwardMatchLength, curBackwardMatchLength,
				304	curTotalMatchLength;
				305	if (cur->checksum != checksum \|\| cur->offset <= lowestIndex) {
				306	continue;
				307	}
				308	if (extDict) {
				309	BYTE const* const curMatchBase =
				310	cur->offset < dictLimit ? dictBase : base;
				311	BYTE const* const pMatch = curMatchBase + cur->offset;
				312	BYTE const* const matchEnd =
				313	cur->offset < dictLimit ? dictEnd : iend;
				314	BYTE const* const lowMatchPtr =
				315	cur->offset < dictLimit ? dictStart : lowPrefixPtr;
				316
				317	curForwardMatchLength = ZSTD_count_2segments(
				318	ip, pMatch, iend,
				319	matchEnd, lowPrefixPtr);
				320	if (curForwardMatchLength < minMatchLength) {
				321	continue;
				322	}
				323	curBackwardMatchLength =
				324	ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
				325	lowMatchPtr);
				326	curTotalMatchLength = curForwardMatchLength +
				327	curBackwardMatchLength;
				328	} else { /* !extDict */
				329	BYTE const* const pMatch = base + cur->offset;
				330	curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
				331	if (curForwardMatchLength < minMatchLength) {
				332	continue;
				333	}
				334	curBackwardMatchLength =
				335	ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
				336	lowPrefixPtr);
				337	curTotalMatchLength = curForwardMatchLength +
				338	curBackwardMatchLength;
				339	}
				340
				341	if (curTotalMatchLength > bestMatchLength) {
				342	bestMatchLength = curTotalMatchLength;
				343	forwardMatchLength = curForwardMatchLength;
				344	backwardMatchLength = curBackwardMatchLength;
				345	bestEntry = cur;
				346	}
				347	}
				348	}
				349
				350	/* No match found -- continue searching */
				351	if (bestEntry == NULL) {
				352	ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
				353	hBits, current,
				354	*params);
				355	ip++;
				356	continue;
				357	}
				358
				359	/* Match found */
				360	mLength = forwardMatchLength + backwardMatchLength;
				361	ip -= backwardMatchLength;
				362
				363	{
				364	/* Store the sequence:
				365	* ip = current - backwardMatchLength
				366	* The match is at (bestEntry->offset - backwardMatchLength)
				367	*/
				368	U32 const matchIndex = bestEntry->offset;
				369	U32 const offset = current - matchIndex;
				370	rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
				371
				372	/* Out of sequence storage */
				373	if (rawSeqStore->size == rawSeqStore->capacity)
				374	return ERROR(dstSize_tooSmall);
				375	seq->litLength = (U32)(ip - anchor);
				376	seq->matchLength = (U32)mLength;
				377	seq->offset = offset;
				378	rawSeqStore->size++;
				379	}
				380
				381	/* Insert the current entry into the hash table */
				382	ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
				383	(U32)(lastHashed - base),
				384	*params);
				385
				386	assert(ip + backwardMatchLength == lastHashed);
				387
				388	/* Fill the hash table from lastHashed+1 to ip+mLength*/
				389	/* Heuristic: don't need to fill the entire table at end of block */
				390	if (ip + mLength <= ilimit) {
				391	rollingHash = ZSTD_ldm_fillLdmHashTable(
				392	ldmState, rollingHash, lastHashed,
				393	ip + mLength, base, hBits, *params);
				394	lastHashed = ip + mLength - 1;
				395	}
				396	ip += mLength;
				397	anchor = ip;
				398	}
				399	return iend - anchor;
				400	}
				401
				402	/*! ZSTD_ldm_reduceTable() :
				403	* reduce table indexes by `reducerValue` */
				404	static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
				405	U32 const reducerValue)
				406	{
				407	U32 u;
				408	for (u = 0; u < size; u++) {
				409	if (table[u].offset < reducerValue) table[u].offset = 0;
				410	else table[u].offset -= reducerValue;
				411	}
				412	}
				413
				414	size_t ZSTD_ldm_generateSequences(
				415	ldmState_t* ldmState, rawSeqStore_t* sequences,
				416	ldmParams_t const* params, void const* src, size_t srcSize)
				417	{
				418	U32 const maxDist = 1U << params->windowLog;
				419	BYTE const* const istart = (BYTE const*)src;
				420	BYTE const* const iend = istart + srcSize;
				421	size_t const kMaxChunkSize = 1 << 20;
				422	size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
				423	size_t chunk;
				424	size_t leftoverSize = 0;
				425
				426	assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
				427	/* Check that ZSTD_window_update() has been called for this chunk prior
				428	* to passing it to this function.
				429	*/
				430	assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
				431	/* The input could be very large (in zstdmt), so it must be broken up into
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	432	* chunks to enforce the maximum distance and handle overflow correction.
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	433	*/
				434	assert(sequences->pos <= sequences->size);
				435	assert(sequences->size <= sequences->capacity);
				436	for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
				437	BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
				438	size_t const remaining = (size_t)(iend - chunkStart);
				439	BYTE const *const chunkEnd =
				440	(remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
				441	size_t const chunkSize = chunkEnd - chunkStart;
				442	size_t newLeftoverSize;
				443	size_t const prevSize = sequences->size;
				444
				445	assert(chunkStart < iend);
				446	/* 1. Perform overflow correction if necessary. */
				447	if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
				448	U32 const ldmHSize = 1U << params->hashLog;
				449	U32 const correction = ZSTD_window_correctOverflow(
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	450	&ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	451	ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
				452	}
				453	/* 2. We enforce the maximum offset allowed.
				454	*
				455	* kMaxChunkSize should be small enough that we don't lose too much of
				456	* the window through early invalidation.
				457	* TODO: * Test the chunk size.
				458	* * Try invalidation after the sequence generation and test the
				459	* the offset against maxDist directly.
				460	*/
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	461	ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	462	/* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
				463	newLeftoverSize = ZSTD_ldm_generateSequences_internal(
				464	ldmState, sequences, params, chunkStart, chunkSize);
				465	if (ZSTD_isError(newLeftoverSize))
				466	return newLeftoverSize;
				467	/* 4. We add the leftover literals from previous iterations to the first
				468	* newly generated sequence, or add the `newLeftoverSize` if none are
				469	* generated.
				470	*/
				471	/* Prepend the leftover literals from the last call */
				472	if (prevSize < sequences->size) {
				473	sequences->seq[prevSize].litLength += (U32)leftoverSize;
				474	leftoverSize = newLeftoverSize;
				475	} else {
				476	assert(newLeftoverSize == chunkSize);
				477	leftoverSize += chunkSize;
				478	}
				479	}
				480	return 0;
				481	}
				482
				483	void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
				484	while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
				485	rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
				486	if (srcSize <= seq->litLength) {
				487	/* Skip past srcSize literals */
				488	seq->litLength -= (U32)srcSize;
				489	return;
				490	}
				491	srcSize -= seq->litLength;
				492	seq->litLength = 0;
				493	if (srcSize < seq->matchLength) {
				494	/* Skip past the first srcSize of the match */
				495	seq->matchLength -= (U32)srcSize;
				496	if (seq->matchLength < minMatch) {
				497	/* The match is too short, omit it */
				498	if (rawSeqStore->pos + 1 < rawSeqStore->size) {
				499	seq[1].litLength += seq[0].matchLength;
				500	}
				501	rawSeqStore->pos++;
				502	}
				503	return;
				504	}
				505	srcSize -= seq->matchLength;
				506	seq->matchLength = 0;
				507	rawSeqStore->pos++;
				508	}
				509	}
				510
				511	/**
				512	* If the sequence length is longer than remaining then the sequence is split
				513	* between this block and the next.
				514	*
				515	* Returns the current sequence to handle, or if the rest of the block should
				516	* be literals, it returns a sequence with offset == 0.
				517	*/
				518	static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
				519	U32 const remaining, U32 const minMatch)
				520	{
				521	rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
				522	assert(sequence.offset > 0);
				523	/* Likely: No partial sequence */
				524	if (remaining >= sequence.litLength + sequence.matchLength) {
				525	rawSeqStore->pos++;
				526	return sequence;
				527	}
				528	/* Cut the sequence short (offset == 0 ==> rest is literals). */
				529	if (remaining <= sequence.litLength) {
				530	sequence.offset = 0;
				531	} else if (remaining < sequence.litLength + sequence.matchLength) {
				532	sequence.matchLength = remaining - sequence.litLength;
				533	if (sequence.matchLength < minMatch) {
				534	sequence.offset = 0;
				535	}
				536	}
				537	/* Skip past `remaining` bytes for the future sequences. */
				538	ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
				539	return sequence;
				540	}
				541
				542	size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
				543	ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	544	void const* src, size_t srcSize)
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	545	{
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	546	const ZSTD_compressionParameters* const cParams = &ms->cParams;
				547	unsigned const minMatch = cParams->minMatch;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	548	ZSTD_blockCompressor const blockCompressor =
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	549	ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	550	/* Input bounds */
				551	BYTE const* const istart = (BYTE const*)src;
				552	BYTE const* const iend = istart + srcSize;
				553	/* Input positions */
				554	BYTE const* ip = istart;
				555
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	556	DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	557	assert(rawSeqStore->pos <= rawSeqStore->size);
				558	assert(rawSeqStore->size <= rawSeqStore->capacity);
				559	/* Loop through each sequence and apply the block compressor to the lits */
				560	while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
				561	/* maybeSplitSequence updates rawSeqStore->pos */
				562	rawSeq const sequence = maybeSplitSequence(rawSeqStore,
				563	(U32)(iend - ip), minMatch);
				564	int i;
				565	/* End signal */
				566	if (sequence.offset == 0)
				567	break;
				568
				569	assert(sequence.offset <= (1U << cParams->windowLog));
				570	assert(ip + sequence.litLength + sequence.matchLength <= iend);
				571
				572	/* Fill tables for block compressor */
				573	ZSTD_ldm_limitTableUpdate(ms, ip);
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	574	ZSTD_ldm_fillFastTables(ms, ip);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	575	/* Run the block compressor */
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	576	DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	577	{
				578	size_t const newLitLength =
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	579	blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	580	ip += sequence.litLength;
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	581	/* Update the repcodes */
				582	for (i = ZSTD_REP_NUM - 1; i > 0; i--)
				583	rep[i] = rep[i-1];
				584	rep[0] = sequence.offset;
				585	/* Store the sequence */
				586	ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
				587	sequence.offset + ZSTD_REP_MOVE,
				588	sequence.matchLength - MINMATCH);
				589	ip += sequence.matchLength;
				590	}
				591	}
				592	/* Fill the tables for the block compressor */
				593	ZSTD_ldm_limitTableUpdate(ms, ip);
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	594	ZSTD_ldm_fillFastTables(ms, ip);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	595	/* Compress the last literals */
Scott Baker	611f6bd	2019-10-18 13:45:19 -0700	[diff] [blame^]	596	return blockCompressor(ms, seqStore, rep, ip, iend - ip);
Scott Baker	eee8dd8	2019-09-24 12:52:34 -0700	[diff] [blame]	597	}