377
377
create_delta_index(const struct source_info *src,
378
378
struct delta_index *old,
379
struct delta_index **fresh)
379
struct delta_index **fresh,
380
int max_bytes_to_index)
381
382
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
382
unsigned int total_num_entries;
383
unsigned int total_num_entries, stride, max_entries;
383
384
const unsigned char *data, *buffer;
384
385
struct delta_index *index;
385
386
struct unpacked_index_entry *entry, **hash;
391
392
buffer = src->buf;
393
394
/* Determine index hash size. Note that indexing skips the
394
first byte to allow for optimizing the Rabin's polynomial
395
initialization in create_delta(). */
395
first byte so we subtract 1 to get the edge cases right.
397
stride = RABIN_WINDOW;
396
398
num_entries = (src->size - 1) / RABIN_WINDOW;
399
if (max_bytes_to_index > 0) {
400
max_entries = (unsigned int) (max_bytes_to_index / RABIN_WINDOW);
401
if (num_entries > max_entries) {
402
/* Limit the max number of matching entries. This reduces the 'best'
403
* possible match, but means we don't consume all of ram.
405
num_entries = max_entries;
406
stride = (src->size - 1) / num_entries;
398
410
total_num_entries = num_entries + old->num_entries;
429
441
/* then populate the index for the new data */
431
for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
443
for (data = buffer + num_entries * stride - RABIN_WINDOW;
433
data -= RABIN_WINDOW) {
434
446
unsigned int val = 0;
435
447
for (i = 1; i <= RABIN_WINDOW; i++)
436
448
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
476
488
unsigned int hsize)
478
490
unsigned int hash_offset, hmask, memsize;
479
struct index_entry *entry, *last_entry;
491
struct index_entry *entry;
480
492
struct index_entry_linked_list *out_entry, **hash;
496
508
/* We know that entries are in the order we want in the output, but they
497
509
* aren't "grouped" by hash bucket yet.
499
last_entry = entries + num_entries;
500
511
for (entry = entries + num_entries - 1; entry >= entries; --entry) {
501
512
hash_offset = entry->val & hmask;
502
513
out_entry->p_entry = entry;
521
532
unsigned int i, j, hsize, hmask, total_num_entries;
522
533
struct delta_index *index;
523
534
struct index_entry *entry, *packed_entry, **packed_hash;
524
struct index_entry *last_entry, null_entry = {0};
535
struct index_entry null_entry = {0};
526
537
unsigned long memsize;
527
538
struct index_entry_linked_list *unpacked_entry, **mini_hash;
1100
1115
return DELTA_OK;
1120
get_entry_summary(const struct delta_index *index, int pos,
1121
unsigned int *text_offset, unsigned int *hash_val)
1124
const struct index_entry *entry;
1125
const struct index_entry *start_of_entries;
1126
unsigned int offset;
1127
if (pos < 0 || text_offset == NULL || hash_val == NULL
1132
hsize = index->hash_mask + 1;
1133
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1134
entry = start_of_entries + pos;
1135
if (entry > index->last_entry) {
1138
if (entry->ptr == NULL) {
1142
offset = entry->src->agg_offset;
1143
offset += (entry->ptr - ((unsigned char *)entry->src->buf));
1144
*text_offset = offset;
1145
*hash_val = entry->val;
1152
get_hash_offset(const struct delta_index *index, int pos,
1153
unsigned int *entry_offset)
1156
const struct index_entry *entry;
1157
const struct index_entry *start_of_entries;
1158
if (pos < 0 || index == NULL || entry_offset == NULL)
1162
hsize = index->hash_mask + 1;
1163
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1167
entry = index->hash[pos];
1168
if (entry == NULL) {
1171
*entry_offset = (entry - start_of_entries);
1178
rabin_hash(const unsigned char *data)
1181
unsigned int val = 0;
1182
for (i = 0; i < RABIN_WINDOW; i++)
1183
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
1103
1187
/* vim: et ts=4 sw=4 sts=4