blob: efb46807c7851064583658d482e610ffb96c1780 [file] [log] [blame]
#include <stdio.h> /* fprintf */
#include <stdlib.h> /* malloc, free, qsort */
#include <string.h> /* memset */
#include <time.h> /* clock */
#include "mem.h" /* read */
#include "pool.h"
#include "threading.h"
#include "zstd_internal.h" /* includes zstd.h */
#include "zdict.h"
* COVER_best_t is used for two purposes:
* 1. Synchronizing threads.
* 2. Saving the best parameters and dictionary.
* All of the methods except COVER_best_init() are thread safe if zstd is
* compiled with multithreaded support.
typedef struct COVER_best_s {
ZSTD_pthread_mutex_t mutex;
ZSTD_pthread_cond_t cond;
size_t liveJobs;
void *dict;
size_t dictSize;
ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;
* A segment is a range in the source as well as the score of the segment.
typedef struct {
U32 begin;
U32 end;
U32 score;
} COVER_segment_t;
*Number of epochs and size of each epoch.
typedef struct {
U32 num;
U32 size;
} COVER_epoch_info_t;
* Computes the number of epochs and the size of each epoch.
* We will make sure that each epoch gets at least 10 * k bytes.
* The COVER algorithms divide the data up into epochs of equal size and
* select one segment from each epoch.
* @param maxDictSize The maximum allowed dictionary size.
* @param nbDmers The number of dmers we are training on.
* @param k The parameter k (segment size).
* @param passes The target number of passes over the dmer corpus.
* More passes means a better dictionary.
COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers,
U32 k, U32 passes);
* Warns the user when their corpus is too small.
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel);
* Checks total compressed size of a dictionary
size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
const size_t *samplesSizes, const BYTE *samples,
size_t *offsets,
size_t nbTrainSamples, size_t nbSamples,
BYTE *const dict, size_t dictBufferCapacity);
* Returns the sum of the sample sizes.
size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
* Initialize the `COVER_best_t`.
void COVER_best_init(COVER_best_t *best);
* Wait until liveJobs == 0.
void COVER_best_wait(COVER_best_t *best);
* Call COVER_best_wait() and then destroy the COVER_best_t.
void COVER_best_destroy(COVER_best_t *best);
* Called when a thread is about to be launched.
* Increments liveJobs.
void COVER_best_start(COVER_best_t *best);
* Called when a thread finishes executing, both on error or success.
* Decrements liveJobs and signals any waiting threads if liveJobs == 0.
* If this dictionary is the best so far save it and its parameters.
void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
ZDICT_cover_params_t parameters, void *dict,
size_t dictSize);