125
125
struct delta_index {
126
unsigned long memsize;
128
unsigned long src_size;
129
unsigned int hash_mask;
126
unsigned long memsize; /* Total bytes pointed to by this index */
127
const void *src_buf; /* Pointer to the beginning of source data */
128
unsigned long src_size; /* Total length of source data */
129
unsigned long agg_src_offset; /* Start of source data as part of the
131
unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
130
133
struct index_entry *hash[];
133
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
136
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize,
137
unsigned long agg_src_offset)
135
139
unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
136
140
const unsigned char *data, *buffer = buf;
222
226
* Assume that this loop is gone through exactly
223
227
* HASH_LIMIT times and is entered and left with
224
* acc==0. So the first statement in the loop
228
* acc==0. So the first statement in the loop
225
229
* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
226
230
* to the accumulator, and the inner loop consequently
227
231
* is run (hash_count[i]-HASH_LIMIT) times, removing
308
313
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
311
create_delta(const struct delta_index *index,
312
const void *trg_buf, unsigned long trg_size,
313
unsigned long *delta_size, unsigned long max_size)
316
create_delta(struct delta_index **indexes,
317
unsigned int num_indexes,
318
const void *trg_buf, unsigned long trg_size,
319
unsigned long *delta_size, unsigned long max_size)
315
unsigned int i, outpos, outsize, moff, msize, val;
321
unsigned int i, j, outpos, outsize, moff, msize, val;
322
const struct delta_index *index, *mindex;
317
324
const unsigned char *ref_data, *ref_top, *data, *top;
318
325
unsigned char *out;
345
356
out[outpos++] = i;
347
ref_data = index->src_buf;
348
ref_top = ref_data + index->src_size;
350
359
top = (const unsigned char *) trg_buf + trg_size;
361
/* Start the matching by filling out with a simple 'insert' instruction, of
362
* the first RABIN_WINDOW bytes of the input.
364
outpos++; /* leave a byte for the insert command */
354
366
for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
355
367
out[outpos++] = *data;
356
368
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
370
/* we are now setup with an insert of 'i' bytes and val contains the RABIN
371
* hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
362
379
while (data < top) {
363
380
if (msize < 4096) {
381
/* we don't have a 'worthy enough' match yet, so let's look for
364
384
struct index_entry *entry;
385
/* Shift the window by one byte. */
365
386
val ^= U[data[-RABIN_WINDOW]];
366
387
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
367
i = val & index->hash_mask;
368
for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
369
const unsigned char *ref = entry->ptr;
370
const unsigned char *src = data;
371
unsigned int ref_size = ref_top - ref;
372
if (entry->val != val)
374
if (ref_size > top - src)
375
ref_size = top - src;
376
if (ref_size <= msize)
378
while (ref_size-- && *src++ == *ref)
380
if (msize < ref - entry->ptr) {
381
/* this is our best match so far */
382
msize = ref - entry->ptr;
383
moff = entry->ptr - ref_data;
384
if (msize >= 4096) /* good enough */
388
for (j = 0; j < num_indexes; ++j) {
390
i = val & index->hash_mask;
391
ref_top = index->src_buf + index->src_size;
392
ref_data = index->src_buf;
393
for (entry = index->hash[i]; entry < index->hash[i+1];
395
const unsigned char *ref = entry->ptr;
396
const unsigned char *src = data;
397
unsigned int ref_size = ref_top - ref;
398
if (entry->val != val)
400
/* ref_size is the longest possible match that we could make
401
* here. If ref_size <= msize, then we know that we cannot
402
* match more bytes with this location that we have already
405
if (ref_size > top - src)
406
ref_size = top - src;
407
if (ref_size <= msize)
409
/* See how many bytes actually match at this location. */
410
while (ref_size-- && *src++ == *ref)
412
if (msize < ref - entry->ptr) {
413
/* this is our best match so far */
414
msize = ref - entry->ptr;
415
moff = entry->ptr - ref_data;
417
if (msize >= 4096) /* good enough */
425
/* The best match right now is less than 4 bytes long. So just add
426
* the current byte to the insert instruction. Increment the insert
427
* counter, and copy the byte of data into the output buffer.
393
431
out[outpos++] = *data++;
395
433
if (inscnt == 0x7f) {
434
/* We have a max length insert instruction, finalize it in the
396
437
out[outpos - inscnt - 1] = inscnt;