Scott Baker | 611f6bd | 2019-10-18 13:45:19 -0700 | [diff] [blame] | 1 | /*-************************************* |
| 2 | * Dependencies |
| 3 | ***************************************/ |
| 4 | #include <stdio.h> /* fprintf */ |
| 5 | #include <stdlib.h> /* malloc, free, qsort */ |
| 6 | #include <string.h> /* memset */ |
| 7 | #include <time.h> /* clock */ |
| 8 | |
| 9 | #include "mem.h" /* read */ |
| 10 | #include "pool.h" |
| 11 | #include "threading.h" |
| 12 | #include "cover.h" |
| 13 | #include "zstd_internal.h" /* includes zstd.h */ |
| 14 | #ifndef ZDICT_STATIC_LINKING_ONLY |
| 15 | #define ZDICT_STATIC_LINKING_ONLY |
| 16 | #endif |
| 17 | #include "zdict.h" |
| 18 | |
| 19 | |
| 20 | /*-************************************* |
| 21 | * Constants |
| 22 | ***************************************/ |
| 23 | #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) |
| 24 | #define FASTCOVER_MAX_F 31 |
| 25 | #define FASTCOVER_MAX_ACCEL 10 |
| 26 | #define DEFAULT_SPLITPOINT 0.75 |
| 27 | #define DEFAULT_F 20 |
| 28 | #define DEFAULT_ACCEL 1 |
| 29 | |
| 30 | |
| 31 | /*-************************************* |
| 32 | * Console display |
| 33 | ***************************************/ |
| 34 | static int g_displayLevel = 2; |
| 35 | #define DISPLAY(...) \ |
| 36 | { \ |
| 37 | fprintf(stderr, __VA_ARGS__); \ |
| 38 | fflush(stderr); \ |
| 39 | } |
| 40 | #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ |
| 41 | if (displayLevel >= l) { \ |
| 42 | DISPLAY(__VA_ARGS__); \ |
| 43 | } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ |
| 44 | #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) |
| 45 | |
| 46 | #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ |
| 47 | if (displayLevel >= l) { \ |
| 48 | if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \ |
| 49 | g_time = clock(); \ |
| 50 | DISPLAY(__VA_ARGS__); \ |
| 51 | } \ |
| 52 | } |
| 53 | #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) |
| 54 | static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; |
| 55 | static clock_t g_time = 0; |
| 56 | |
| 57 | |
| 58 | /*-************************************* |
| 59 | * Hash Functions |
| 60 | ***************************************/ |
| 61 | static const U64 prime6bytes = 227718039650203ULL; |
| 62 | static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } |
| 63 | static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } |
| 64 | |
| 65 | static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; |
| 66 | static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } |
| 67 | static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } |
| 68 | |
| 69 | |
| 70 | /** |
| 71 | * Hash the d-byte value pointed to by p and mod 2^f |
| 72 | */ |
| 73 | static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) { |
| 74 | if (d == 6) { |
| 75 | return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1); |
| 76 | } |
| 77 | return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1); |
| 78 | } |
| 79 | |
| 80 | |
| 81 | /*-************************************* |
| 82 | * Acceleration |
| 83 | ***************************************/ |
| 84 | typedef struct { |
| 85 | unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ |
| 86 | unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ |
| 87 | } FASTCOVER_accel_t; |
| 88 | |
| 89 | |
| 90 | static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { |
| 91 | { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ |
| 92 | { 100, 0 }, /* accel = 1 */ |
| 93 | { 50, 1 }, /* accel = 2 */ |
| 94 | { 34, 2 }, /* accel = 3 */ |
| 95 | { 25, 3 }, /* accel = 4 */ |
| 96 | { 20, 4 }, /* accel = 5 */ |
| 97 | { 17, 5 }, /* accel = 6 */ |
| 98 | { 14, 6 }, /* accel = 7 */ |
| 99 | { 13, 7 }, /* accel = 8 */ |
| 100 | { 11, 8 }, /* accel = 9 */ |
| 101 | { 10, 9 }, /* accel = 10 */ |
| 102 | }; |
| 103 | |
| 104 | |
| 105 | /*-************************************* |
| 106 | * Context |
| 107 | ***************************************/ |
| 108 | typedef struct { |
| 109 | const BYTE *samples; |
| 110 | size_t *offsets; |
| 111 | const size_t *samplesSizes; |
| 112 | size_t nbSamples; |
| 113 | size_t nbTrainSamples; |
| 114 | size_t nbTestSamples; |
| 115 | size_t nbDmers; |
| 116 | U32 *freqs; |
| 117 | unsigned d; |
| 118 | unsigned f; |
| 119 | FASTCOVER_accel_t accelParams; |
| 120 | } FASTCOVER_ctx_t; |
| 121 | |
| 122 | |
| 123 | /*-************************************* |
| 124 | * Helper functions |
| 125 | ***************************************/ |
| 126 | /** |
| 127 | * Selects the best segment in an epoch. |
| 128 | * Segments of are scored according to the function: |
| 129 | * |
| 130 | * Let F(d) be the frequency of all dmers with hash value d. |
| 131 | * Let S_i be hash value of the dmer at position i of segment S which has length k. |
| 132 | * |
| 133 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
| 134 | * |
| 135 | * Once the dmer with hash value d is in the dictionary we set F(d) = 0. |
| 136 | */ |
| 137 | static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, |
| 138 | U32 *freqs, U32 begin, U32 end, |
| 139 | ZDICT_cover_params_t parameters, |
| 140 | U16* segmentFreqs) { |
| 141 | /* Constants */ |
| 142 | const U32 k = parameters.k; |
| 143 | const U32 d = parameters.d; |
| 144 | const U32 f = ctx->f; |
| 145 | const U32 dmersInK = k - d + 1; |
| 146 | |
| 147 | /* Try each segment (activeSegment) and save the best (bestSegment) */ |
| 148 | COVER_segment_t bestSegment = {0, 0, 0}; |
| 149 | COVER_segment_t activeSegment; |
| 150 | |
| 151 | /* Reset the activeDmers in the segment */ |
| 152 | /* The activeSegment starts at the beginning of the epoch. */ |
| 153 | activeSegment.begin = begin; |
| 154 | activeSegment.end = begin; |
| 155 | activeSegment.score = 0; |
| 156 | |
| 157 | /* Slide the activeSegment through the whole epoch. |
| 158 | * Save the best segment in bestSegment. |
| 159 | */ |
| 160 | while (activeSegment.end < end) { |
| 161 | /* Get hash value of current dmer */ |
| 162 | const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); |
| 163 | |
| 164 | /* Add frequency of this index to score if this is the first occurrence of index in active segment */ |
| 165 | if (segmentFreqs[idx] == 0) { |
| 166 | activeSegment.score += freqs[idx]; |
| 167 | } |
| 168 | /* Increment end of segment and segmentFreqs*/ |
| 169 | activeSegment.end += 1; |
| 170 | segmentFreqs[idx] += 1; |
| 171 | /* If the window is now too large, drop the first position */ |
| 172 | if (activeSegment.end - activeSegment.begin == dmersInK + 1) { |
| 173 | /* Get hash value of the dmer to be eliminated from active segment */ |
| 174 | const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); |
| 175 | segmentFreqs[delIndex] -= 1; |
| 176 | /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ |
| 177 | if (segmentFreqs[delIndex] == 0) { |
| 178 | activeSegment.score -= freqs[delIndex]; |
| 179 | } |
| 180 | /* Increment start of segment */ |
| 181 | activeSegment.begin += 1; |
| 182 | } |
| 183 | |
| 184 | /* If this segment is the best so far save it */ |
| 185 | if (activeSegment.score > bestSegment.score) { |
| 186 | bestSegment = activeSegment; |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | /* Zero out rest of segmentFreqs array */ |
| 191 | while (activeSegment.begin < end) { |
| 192 | const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); |
| 193 | segmentFreqs[delIndex] -= 1; |
| 194 | activeSegment.begin += 1; |
| 195 | } |
| 196 | |
| 197 | { |
| 198 | /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ |
| 199 | U32 pos; |
| 200 | for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { |
| 201 | const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); |
| 202 | freqs[i] = 0; |
| 203 | } |
| 204 | } |
| 205 | |
| 206 | return bestSegment; |
| 207 | } |
| 208 | |
| 209 | |
| 210 | static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, |
| 211 | size_t maxDictSize, unsigned f, |
| 212 | unsigned accel) { |
| 213 | /* k, d, and f are required parameters */ |
| 214 | if (parameters.d == 0 || parameters.k == 0) { |
| 215 | return 0; |
| 216 | } |
| 217 | /* d has to be 6 or 8 */ |
| 218 | if (parameters.d != 6 && parameters.d != 8) { |
| 219 | return 0; |
| 220 | } |
| 221 | /* k <= maxDictSize */ |
| 222 | if (parameters.k > maxDictSize) { |
| 223 | return 0; |
| 224 | } |
| 225 | /* d <= k */ |
| 226 | if (parameters.d > parameters.k) { |
| 227 | return 0; |
| 228 | } |
| 229 | /* 0 < f <= FASTCOVER_MAX_F*/ |
| 230 | if (f > FASTCOVER_MAX_F || f == 0) { |
| 231 | return 0; |
| 232 | } |
| 233 | /* 0 < splitPoint <= 1 */ |
| 234 | if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { |
| 235 | return 0; |
| 236 | } |
| 237 | /* 0 < accel <= 10 */ |
| 238 | if (accel > 10 || accel == 0) { |
| 239 | return 0; |
| 240 | } |
| 241 | return 1; |
| 242 | } |
| 243 | |
| 244 | |
| 245 | /** |
| 246 | * Clean up a context initialized with `FASTCOVER_ctx_init()`. |
| 247 | */ |
| 248 | static void |
| 249 | FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx) |
| 250 | { |
| 251 | if (!ctx) return; |
| 252 | |
| 253 | free(ctx->freqs); |
| 254 | ctx->freqs = NULL; |
| 255 | |
| 256 | free(ctx->offsets); |
| 257 | ctx->offsets = NULL; |
| 258 | } |
| 259 | |
| 260 | |
| 261 | /** |
| 262 | * Calculate for frequency of hash value of each dmer in ctx->samples |
| 263 | */ |
| 264 | static void |
| 265 | FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) |
| 266 | { |
| 267 | const unsigned f = ctx->f; |
| 268 | const unsigned d = ctx->d; |
| 269 | const unsigned skip = ctx->accelParams.skip; |
| 270 | const unsigned readLength = MAX(d, 8); |
| 271 | size_t i; |
| 272 | assert(ctx->nbTrainSamples >= 5); |
| 273 | assert(ctx->nbTrainSamples <= ctx->nbSamples); |
| 274 | for (i = 0; i < ctx->nbTrainSamples; i++) { |
| 275 | size_t start = ctx->offsets[i]; /* start of current dmer */ |
| 276 | size_t const currSampleEnd = ctx->offsets[i+1]; |
| 277 | while (start + readLength <= currSampleEnd) { |
| 278 | const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); |
| 279 | freqs[dmerIndex]++; |
| 280 | start = start + skip + 1; |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | |
| 286 | /** |
| 287 | * Prepare a context for dictionary building. |
| 288 | * The context is only dependent on the parameter `d` and can used multiple |
| 289 | * times. |
| 290 | * Returns 0 on success or error code on error. |
| 291 | * The context must be destroyed with `FASTCOVER_ctx_destroy()`. |
| 292 | */ |
| 293 | static size_t |
| 294 | FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, |
| 295 | const void* samplesBuffer, |
| 296 | const size_t* samplesSizes, unsigned nbSamples, |
| 297 | unsigned d, double splitPoint, unsigned f, |
| 298 | FASTCOVER_accel_t accelParams) |
| 299 | { |
| 300 | const BYTE* const samples = (const BYTE*)samplesBuffer; |
| 301 | const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); |
| 302 | /* Split samples into testing and training sets */ |
| 303 | const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; |
| 304 | const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; |
| 305 | const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; |
| 306 | const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; |
| 307 | |
| 308 | /* Checks */ |
| 309 | if (totalSamplesSize < MAX(d, sizeof(U64)) || |
| 310 | totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { |
| 311 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
| 312 | (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); |
| 313 | return ERROR(srcSize_wrong); |
| 314 | } |
| 315 | |
| 316 | /* Check if there are at least 5 training samples */ |
| 317 | if (nbTrainSamples < 5) { |
| 318 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); |
| 319 | return ERROR(srcSize_wrong); |
| 320 | } |
| 321 | |
| 322 | /* Check if there's testing sample */ |
| 323 | if (nbTestSamples < 1) { |
| 324 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); |
| 325 | return ERROR(srcSize_wrong); |
| 326 | } |
| 327 | |
| 328 | /* Zero the context */ |
| 329 | memset(ctx, 0, sizeof(*ctx)); |
| 330 | DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, |
| 331 | (unsigned)trainingSamplesSize); |
| 332 | DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, |
| 333 | (unsigned)testSamplesSize); |
| 334 | |
| 335 | ctx->samples = samples; |
| 336 | ctx->samplesSizes = samplesSizes; |
| 337 | ctx->nbSamples = nbSamples; |
| 338 | ctx->nbTrainSamples = nbTrainSamples; |
| 339 | ctx->nbTestSamples = nbTestSamples; |
| 340 | ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; |
| 341 | ctx->d = d; |
| 342 | ctx->f = f; |
| 343 | ctx->accelParams = accelParams; |
| 344 | |
| 345 | /* The offsets of each file */ |
| 346 | ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t)); |
| 347 | if (ctx->offsets == NULL) { |
| 348 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); |
| 349 | FASTCOVER_ctx_destroy(ctx); |
| 350 | return ERROR(memory_allocation); |
| 351 | } |
| 352 | |
| 353 | /* Fill offsets from the samplesSizes */ |
| 354 | { U32 i; |
| 355 | ctx->offsets[0] = 0; |
| 356 | assert(nbSamples >= 5); |
| 357 | for (i = 1; i <= nbSamples; ++i) { |
| 358 | ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | /* Initialize frequency array of size 2^f */ |
| 363 | ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32)); |
| 364 | if (ctx->freqs == NULL) { |
| 365 | DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); |
| 366 | FASTCOVER_ctx_destroy(ctx); |
| 367 | return ERROR(memory_allocation); |
| 368 | } |
| 369 | |
| 370 | DISPLAYLEVEL(2, "Computing frequencies\n"); |
| 371 | FASTCOVER_computeFrequency(ctx->freqs, ctx); |
| 372 | |
| 373 | return 0; |
| 374 | } |
| 375 | |
| 376 | |
| 377 | /** |
| 378 | * Given the prepared context build the dictionary. |
| 379 | */ |
| 380 | static size_t |
| 381 | FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, |
| 382 | U32* freqs, |
| 383 | void* dictBuffer, size_t dictBufferCapacity, |
| 384 | ZDICT_cover_params_t parameters, |
| 385 | U16* segmentFreqs) |
| 386 | { |
| 387 | BYTE *const dict = (BYTE *)dictBuffer; |
| 388 | size_t tail = dictBufferCapacity; |
| 389 | /* Divide the data into epochs. We will select one segment from each epoch. */ |
| 390 | const COVER_epoch_info_t epochs = COVER_computeEpochs( |
| 391 | (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); |
| 392 | const size_t maxZeroScoreRun = 10; |
| 393 | size_t zeroScoreRun = 0; |
| 394 | size_t epoch; |
| 395 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
| 396 | (U32)epochs.num, (U32)epochs.size); |
| 397 | /* Loop through the epochs until there are no more segments or the dictionary |
| 398 | * is full. |
| 399 | */ |
| 400 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { |
| 401 | const U32 epochBegin = (U32)(epoch * epochs.size); |
| 402 | const U32 epochEnd = epochBegin + epochs.size; |
| 403 | size_t segmentSize; |
| 404 | /* Select a segment */ |
| 405 | COVER_segment_t segment = FASTCOVER_selectSegment( |
| 406 | ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); |
| 407 | |
| 408 | /* If the segment covers no dmers, then we are out of content. |
| 409 | * There may be new content in other epochs, for continue for some time. |
| 410 | */ |
| 411 | if (segment.score == 0) { |
| 412 | if (++zeroScoreRun >= maxZeroScoreRun) { |
| 413 | break; |
| 414 | } |
| 415 | continue; |
| 416 | } |
| 417 | zeroScoreRun = 0; |
| 418 | |
| 419 | /* Trim the segment if necessary and if it is too small then we are done */ |
| 420 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
| 421 | if (segmentSize < parameters.d) { |
| 422 | break; |
| 423 | } |
| 424 | |
| 425 | /* We fill the dictionary from the back to allow the best segments to be |
| 426 | * referenced with the smallest offsets. |
| 427 | */ |
| 428 | tail -= segmentSize; |
| 429 | memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); |
| 430 | DISPLAYUPDATE( |
| 431 | 2, "\r%u%% ", |
| 432 | (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); |
| 433 | } |
| 434 | DISPLAYLEVEL(2, "\r%79s\r", ""); |
| 435 | return tail; |
| 436 | } |
| 437 | |
| 438 | /** |
| 439 | * Parameters for FASTCOVER_tryParameters(). |
| 440 | */ |
| 441 | typedef struct FASTCOVER_tryParameters_data_s { |
| 442 | const FASTCOVER_ctx_t* ctx; |
| 443 | COVER_best_t* best; |
| 444 | size_t dictBufferCapacity; |
| 445 | ZDICT_cover_params_t parameters; |
| 446 | } FASTCOVER_tryParameters_data_t; |
| 447 | |
| 448 | |
| 449 | /** |
| 450 | * Tries a set of parameters and updates the COVER_best_t with the results. |
| 451 | * This function is thread safe if zstd is compiled with multithreaded support. |
| 452 | * It takes its parameters as an *OWNING* opaque pointer to support threading. |
| 453 | */ |
| 454 | static void FASTCOVER_tryParameters(void *opaque) |
| 455 | { |
| 456 | /* Save parameters as local variables */ |
| 457 | FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque; |
| 458 | const FASTCOVER_ctx_t *const ctx = data->ctx; |
| 459 | const ZDICT_cover_params_t parameters = data->parameters; |
| 460 | size_t dictBufferCapacity = data->dictBufferCapacity; |
| 461 | size_t totalCompressedSize = ERROR(GENERIC); |
| 462 | /* Initialize array to keep track of frequency of dmer within activeSegment */ |
| 463 | U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); |
| 464 | /* Allocate space for hash table, dict, and freqs */ |
| 465 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
| 466 | COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); |
| 467 | U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); |
| 468 | if (!segmentFreqs || !dict || !freqs) { |
| 469 | DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); |
| 470 | goto _cleanup; |
| 471 | } |
| 472 | /* Copy the frequencies because we need to modify them */ |
| 473 | memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); |
| 474 | /* Build the dictionary */ |
| 475 | { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, |
| 476 | parameters, segmentFreqs); |
| 477 | |
| 478 | const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); |
| 479 | selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, |
| 480 | ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, |
| 481 | totalCompressedSize); |
| 482 | |
| 483 | if (COVER_dictSelectionIsError(selection)) { |
| 484 | DISPLAYLEVEL(1, "Failed to select dictionary\n"); |
| 485 | goto _cleanup; |
| 486 | } |
| 487 | } |
| 488 | _cleanup: |
| 489 | free(dict); |
| 490 | COVER_best_finish(data->best, parameters, selection); |
| 491 | free(data); |
| 492 | free(segmentFreqs); |
| 493 | COVER_dictSelectionFree(selection); |
| 494 | free(freqs); |
| 495 | } |
| 496 | |
| 497 | |
| 498 | static void |
| 499 | FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, |
| 500 | ZDICT_cover_params_t* coverParams) |
| 501 | { |
| 502 | coverParams->k = fastCoverParams.k; |
| 503 | coverParams->d = fastCoverParams.d; |
| 504 | coverParams->steps = fastCoverParams.steps; |
| 505 | coverParams->nbThreads = fastCoverParams.nbThreads; |
| 506 | coverParams->splitPoint = fastCoverParams.splitPoint; |
| 507 | coverParams->zParams = fastCoverParams.zParams; |
| 508 | coverParams->shrinkDict = fastCoverParams.shrinkDict; |
| 509 | } |
| 510 | |
| 511 | |
| 512 | static void |
| 513 | FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, |
| 514 | ZDICT_fastCover_params_t* fastCoverParams, |
| 515 | unsigned f, unsigned accel) |
| 516 | { |
| 517 | fastCoverParams->k = coverParams.k; |
| 518 | fastCoverParams->d = coverParams.d; |
| 519 | fastCoverParams->steps = coverParams.steps; |
| 520 | fastCoverParams->nbThreads = coverParams.nbThreads; |
| 521 | fastCoverParams->splitPoint = coverParams.splitPoint; |
| 522 | fastCoverParams->f = f; |
| 523 | fastCoverParams->accel = accel; |
| 524 | fastCoverParams->zParams = coverParams.zParams; |
| 525 | fastCoverParams->shrinkDict = coverParams.shrinkDict; |
| 526 | } |
| 527 | |
| 528 | |
| 529 | ZDICTLIB_API size_t |
| 530 | ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, |
| 531 | const void* samplesBuffer, |
| 532 | const size_t* samplesSizes, unsigned nbSamples, |
| 533 | ZDICT_fastCover_params_t parameters) |
| 534 | { |
| 535 | BYTE* const dict = (BYTE*)dictBuffer; |
| 536 | FASTCOVER_ctx_t ctx; |
| 537 | ZDICT_cover_params_t coverParams; |
| 538 | FASTCOVER_accel_t accelParams; |
| 539 | /* Initialize global data */ |
| 540 | g_displayLevel = parameters.zParams.notificationLevel; |
| 541 | /* Assign splitPoint and f if not provided */ |
| 542 | parameters.splitPoint = 1.0; |
| 543 | parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; |
| 544 | parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; |
| 545 | /* Convert to cover parameter */ |
| 546 | memset(&coverParams, 0 , sizeof(coverParams)); |
| 547 | FASTCOVER_convertToCoverParams(parameters, &coverParams); |
| 548 | /* Checks */ |
| 549 | if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, |
| 550 | parameters.accel)) { |
| 551 | DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); |
| 552 | return ERROR(parameter_outOfBound); |
| 553 | } |
| 554 | if (nbSamples == 0) { |
| 555 | DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); |
| 556 | return ERROR(srcSize_wrong); |
| 557 | } |
| 558 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
| 559 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
| 560 | ZDICT_DICTSIZE_MIN); |
| 561 | return ERROR(dstSize_tooSmall); |
| 562 | } |
| 563 | /* Assign corresponding FASTCOVER_accel_t to accelParams*/ |
| 564 | accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; |
| 565 | /* Initialize context */ |
| 566 | { |
| 567 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, |
| 568 | coverParams.d, parameters.splitPoint, parameters.f, |
| 569 | accelParams); |
| 570 | if (ZSTD_isError(initVal)) { |
| 571 | DISPLAYLEVEL(1, "Failed to initialize context\n"); |
| 572 | return initVal; |
| 573 | } |
| 574 | } |
| 575 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); |
| 576 | /* Build the dictionary */ |
| 577 | DISPLAYLEVEL(2, "Building dictionary\n"); |
| 578 | { |
| 579 | /* Initialize array to keep track of frequency of dmer within activeSegment */ |
| 580 | U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); |
| 581 | const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, |
| 582 | dictBufferCapacity, coverParams, segmentFreqs); |
| 583 | const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); |
| 584 | const size_t dictionarySize = ZDICT_finalizeDictionary( |
| 585 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, |
| 586 | samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); |
| 587 | if (!ZSTD_isError(dictionarySize)) { |
| 588 | DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", |
| 589 | (unsigned)dictionarySize); |
| 590 | } |
| 591 | FASTCOVER_ctx_destroy(&ctx); |
| 592 | free(segmentFreqs); |
| 593 | return dictionarySize; |
| 594 | } |
| 595 | } |
| 596 | |
| 597 | |
| 598 | ZDICTLIB_API size_t |
| 599 | ZDICT_optimizeTrainFromBuffer_fastCover( |
| 600 | void* dictBuffer, size_t dictBufferCapacity, |
| 601 | const void* samplesBuffer, |
| 602 | const size_t* samplesSizes, unsigned nbSamples, |
| 603 | ZDICT_fastCover_params_t* parameters) |
| 604 | { |
| 605 | ZDICT_cover_params_t coverParams; |
| 606 | FASTCOVER_accel_t accelParams; |
| 607 | /* constants */ |
| 608 | const unsigned nbThreads = parameters->nbThreads; |
| 609 | const double splitPoint = |
| 610 | parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint; |
| 611 | const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; |
| 612 | const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; |
| 613 | const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; |
| 614 | const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; |
| 615 | const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; |
| 616 | const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); |
| 617 | const unsigned kIterations = |
| 618 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
| 619 | const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; |
| 620 | const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; |
| 621 | const unsigned shrinkDict = 0; |
| 622 | /* Local variables */ |
| 623 | const int displayLevel = parameters->zParams.notificationLevel; |
| 624 | unsigned iteration = 1; |
| 625 | unsigned d; |
| 626 | unsigned k; |
| 627 | COVER_best_t best; |
| 628 | POOL_ctx *pool = NULL; |
| 629 | int warned = 0; |
| 630 | /* Checks */ |
| 631 | if (splitPoint <= 0 || splitPoint > 1) { |
| 632 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); |
| 633 | return ERROR(parameter_outOfBound); |
| 634 | } |
| 635 | if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { |
| 636 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); |
| 637 | return ERROR(parameter_outOfBound); |
| 638 | } |
| 639 | if (kMinK < kMaxD || kMaxK < kMinK) { |
| 640 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); |
| 641 | return ERROR(parameter_outOfBound); |
| 642 | } |
| 643 | if (nbSamples == 0) { |
| 644 | LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); |
| 645 | return ERROR(srcSize_wrong); |
| 646 | } |
| 647 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
| 648 | LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", |
| 649 | ZDICT_DICTSIZE_MIN); |
| 650 | return ERROR(dstSize_tooSmall); |
| 651 | } |
| 652 | if (nbThreads > 1) { |
| 653 | pool = POOL_create(nbThreads, 1); |
| 654 | if (!pool) { |
| 655 | return ERROR(memory_allocation); |
| 656 | } |
| 657 | } |
| 658 | /* Initialization */ |
| 659 | COVER_best_init(&best); |
| 660 | memset(&coverParams, 0 , sizeof(coverParams)); |
| 661 | FASTCOVER_convertToCoverParams(*parameters, &coverParams); |
| 662 | accelParams = FASTCOVER_defaultAccelParameters[accel]; |
| 663 | /* Turn down global display level to clean up display at level 2 and below */ |
| 664 | g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; |
| 665 | /* Loop through d first because each new value needs a new context */ |
| 666 | LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", |
| 667 | kIterations); |
| 668 | for (d = kMinD; d <= kMaxD; d += 2) { |
| 669 | /* Initialize the context for this value of d */ |
| 670 | FASTCOVER_ctx_t ctx; |
| 671 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
| 672 | { |
| 673 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); |
| 674 | if (ZSTD_isError(initVal)) { |
| 675 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
| 676 | COVER_best_destroy(&best); |
| 677 | POOL_free(pool); |
| 678 | return initVal; |
| 679 | } |
| 680 | } |
| 681 | if (!warned) { |
| 682 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); |
| 683 | warned = 1; |
| 684 | } |
| 685 | /* Loop through k reusing the same context */ |
| 686 | for (k = kMinK; k <= kMaxK; k += kStepSize) { |
| 687 | /* Prepare the arguments */ |
| 688 | FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( |
| 689 | sizeof(FASTCOVER_tryParameters_data_t)); |
| 690 | LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); |
| 691 | if (!data) { |
| 692 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); |
| 693 | COVER_best_destroy(&best); |
| 694 | FASTCOVER_ctx_destroy(&ctx); |
| 695 | POOL_free(pool); |
| 696 | return ERROR(memory_allocation); |
| 697 | } |
| 698 | data->ctx = &ctx; |
| 699 | data->best = &best; |
| 700 | data->dictBufferCapacity = dictBufferCapacity; |
| 701 | data->parameters = coverParams; |
| 702 | data->parameters.k = k; |
| 703 | data->parameters.d = d; |
| 704 | data->parameters.splitPoint = splitPoint; |
| 705 | data->parameters.steps = kSteps; |
| 706 | data->parameters.shrinkDict = shrinkDict; |
| 707 | data->parameters.zParams.notificationLevel = g_displayLevel; |
| 708 | /* Check the parameters */ |
| 709 | if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, |
| 710 | data->ctx->f, accel)) { |
| 711 | DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); |
| 712 | free(data); |
| 713 | continue; |
| 714 | } |
| 715 | /* Call the function and pass ownership of data to it */ |
| 716 | COVER_best_start(&best); |
| 717 | if (pool) { |
| 718 | POOL_add(pool, &FASTCOVER_tryParameters, data); |
| 719 | } else { |
| 720 | FASTCOVER_tryParameters(data); |
| 721 | } |
| 722 | /* Print status */ |
| 723 | LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", |
| 724 | (unsigned)((iteration * 100) / kIterations)); |
| 725 | ++iteration; |
| 726 | } |
| 727 | COVER_best_wait(&best); |
| 728 | FASTCOVER_ctx_destroy(&ctx); |
| 729 | } |
| 730 | LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); |
| 731 | /* Fill the output buffer and parameters with output of the best parameters */ |
| 732 | { |
| 733 | const size_t dictSize = best.dictSize; |
| 734 | if (ZSTD_isError(best.compressedSize)) { |
| 735 | const size_t compressedSize = best.compressedSize; |
| 736 | COVER_best_destroy(&best); |
| 737 | POOL_free(pool); |
| 738 | return compressedSize; |
| 739 | } |
| 740 | FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); |
| 741 | memcpy(dictBuffer, best.dict, dictSize); |
| 742 | COVER_best_destroy(&best); |
| 743 | POOL_free(pool); |
| 744 | return dictSize; |
| 745 | } |
| 746 | |
| 747 | } |