2
* diff-delta.c: generate a delta between two buffers
4
* This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
* http://www.xmailserver.org/xdiff-lib.html
7
* Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
8
* Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009
10
* This code is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License version 2 as
12
* published by the Free Software Foundation.
20
/* maximum hash entry list for the same hash bucket */
23
#define RABIN_SHIFT 23
24
#define RABIN_WINDOW 16
26
static const unsigned int T[256] = {
27
0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
28
0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
29
0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
30
0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
31
0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
32
0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
33
0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
34
0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
35
0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
36
0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
37
0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
38
0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
39
0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
40
0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
41
0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
42
0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
43
0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
44
0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
45
0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
46
0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
47
0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
48
0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
49
0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
50
0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
51
0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
52
0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
53
0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
54
0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
55
0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
56
0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
57
0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
58
0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
59
0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
60
0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
61
0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
62
0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
63
0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
64
0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
65
0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
66
0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
67
0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
68
0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
69
0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
72
static const unsigned int U[256] = {
73
0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
74
0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
75
0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
76
0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
77
0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
78
0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
79
0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
80
0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
81
0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
82
0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
83
0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
84
0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
85
0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
86
0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
87
0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
88
0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
89
0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
90
0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
91
0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
92
0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
93
0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
94
0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
95
0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
96
0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
97
0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
98
0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
99
0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
100
0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
101
0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
102
0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
103
0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
104
0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
105
0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
106
0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
107
0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
108
0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
109
0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
110
0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
111
0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
112
0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
113
0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
114
0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
115
0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
119
const unsigned char *ptr;
120
const struct source_info *src;
124
struct unpacked_index_entry {
125
struct index_entry entry;
126
struct unpacked_index_entry *next;
130
unsigned long memsize; /* Total bytes pointed to by this index */
131
const struct source_info *last_src; /* Information about the referenced source */
132
unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
134
unsigned int num_entries; /* The total number of entries in this index */
135
struct index_entry *last_entry; /* Pointer to the last valid entry */
136
struct index_entry *hash[];
140
limit_hash_buckets(struct unpacked_index_entry **hash,
141
unsigned int *hash_count, unsigned int hsize,
142
unsigned int entries)
144
struct unpacked_index_entry *entry;
147
* Determine a limit on the number of entries in the same hash
148
* bucket. This guards us against pathological data sets causing
149
* really bad hash distribution with most entries in the same hash
150
* bucket that would bring us to O(m*n) computing costs (m and n
151
* corresponding to reference and target buffer sizes).
153
* Make sure none of the hash buckets has more entries than
154
* we're willing to test. Otherwise we cull the entry list
155
* uniformly to still preserve a good repartition across
156
* the reference buffer.
158
for (i = 0; i < hsize; i++) {
161
if (hash_count[i] <= HASH_LIMIT)
164
/* We leave exactly HASH_LIMIT entries in the bucket */
165
entries -= hash_count[i] - HASH_LIMIT;
171
* Assume that this loop is gone through exactly
172
* HASH_LIMIT times and is entered and left with
173
* acc==0. So the first statement in the loop
174
* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
175
* to the accumulator, and the inner loop consequently
176
* is run (hash_count[i]-HASH_LIMIT) times, removing
177
* one element from the list each time. Since acc
178
* balances out to 0 at the final run, the inner loop
179
* body can't be left with entry==NULL. So we indeed
180
* encounter entry==NULL in the outer loop only.
183
acc += hash_count[i] - HASH_LIMIT;
185
struct unpacked_index_entry *keep = entry;
190
keep->next = entry->next;
198
static struct delta_index *
199
pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
200
unsigned int num_entries)
202
unsigned int i, memsize;
203
struct unpacked_index_entry *entry;
204
struct delta_index *index;
205
struct index_entry *packed_entry, **packed_hash;
208
* Now create the packed index in array form
209
* rather than linked lists.
211
memsize = sizeof(*index)
212
+ sizeof(*packed_hash) * (hsize+1)
213
+ sizeof(*packed_entry) * num_entries;
214
mem = malloc(memsize);
220
index->memsize = memsize;
221
index->hash_mask = hsize - 1;
222
index->num_entries = num_entries;
226
mem = packed_hash + (hsize+1);
229
for (i = 0; i < hsize; i++) {
231
* Coalesce all entries belonging to one linked list
232
* into consecutive array entries.
234
packed_hash[i] = packed_entry;
235
for (entry = hash[i]; entry; entry = entry->next)
236
*packed_entry++ = entry->entry;
239
/* Sentinel value to indicate the length of the last hash bucket */
240
packed_hash[hsize] = packed_entry;
242
assert(packed_entry - (struct index_entry *)mem == num_entries);
243
index->last_entry = (packed_entry - 1);
247
void include_entries_from_index(struct unpacked_index_entry **hash,
248
unsigned int *hash_count,
250
struct unpacked_index_entry *entry,
251
const struct delta_index *old)
253
unsigned int i, hmask, masked_val;
254
struct index_entry *old_entry;
257
/* Iterate backwards through the existing entries
258
* This is so that matches early in the file end up being the first pointer
259
* in the linked list.
260
* TODO: We know that all old entries are going to occur before the new
261
* entries, and that we are going to follow this with a limit and
262
* then pack step. There is probably a way we could optimize this
263
* step, so that we wouldn't have to copy everything into a new
264
* buffer, and then copy everything again into yet another buffer.
266
for (old_entry = old->last_entry, i = 0; i < old->num_entries;
268
masked_val = old_entry->val & hmask;
269
entry->entry = *old_entry;
270
entry->next = hash[masked_val];
271
hash[masked_val] = entry++;
272
hash_count[masked_val]++;
277
create_index_from_old_and_hash(const struct delta_index *old,
278
struct unpacked_index_entry **hash,
280
unsigned int num_entries)
282
unsigned int i, memsize, total_num_entries;
283
struct unpacked_index_entry *entry;
284
struct delta_index *index;
285
struct index_entry *packed_entry, **packed_hash;
286
struct index_entry *copy_from_start, *copy_to_start;
287
size_t total_copy, to_copy;
288
unsigned int num_ops;
289
unsigned int bytes_copied;
292
* Now create the packed index in array form
293
* rather than linked lists.
295
total_num_entries = num_entries + old->num_entries;
296
memsize = sizeof(*index)
297
+ sizeof(*packed_hash) * (hsize+1)
298
+ sizeof(*packed_entry) * total_num_entries;
299
mem = malloc(memsize);
305
index->memsize = memsize;
306
index->hash_mask = hsize - 1;
307
index->num_entries = total_num_entries;
311
mem = packed_hash + (hsize+1);
317
copy_from_start = copy_to_start = NULL;
318
for (i = 0; i < hsize; i++) {
320
* Coalesce all entries belonging to one linked list
321
* into consecutive array entries.
322
* The entries in old all come before the entries in hash.
324
packed_hash[i] = packed_entry;
325
to_copy = (old->hash[i+1] - old->hash[i]);
327
/* This next range can be copied wholesale. However, we will wait
328
* until we have to insert something from the new data before we
330
* So for now, just reserve space for the copy.
332
if (total_copy == 0) {
333
copy_from_start = old->hash[i];
334
copy_to_start = packed_entry;
336
packed_entry += to_copy;
337
total_copy += to_copy;
338
assert((packed_entry - copy_to_start) == total_copy);
339
assert((old->hash[i+1] - copy_from_start) == total_copy);
341
for (entry = hash[i]; entry; entry = entry->next) {
342
/* We have an entry to insert, so flush the copy buffer */
343
if (total_copy > 0) {
344
assert(copy_to_start != NULL);
345
assert(copy_from_start != NULL);
346
memcpy(copy_to_start, copy_from_start,
347
total_copy * sizeof(struct index_entry));
348
bytes_copied += (total_copy * sizeof(struct index_entry));
350
copy_from_start = copy_to_start = NULL;
353
*packed_entry++ = entry->entry;
356
if (total_copy > 0) {
357
assert(copy_to_start != NULL);
358
assert(copy_from_start != NULL);
359
memcpy(copy_to_start, copy_from_start,
360
total_copy * sizeof(struct index_entry));
361
bytes_copied += (total_copy * sizeof(struct index_entry));
365
/* Sentinel value to indicate the length of the last hash bucket */
366
packed_hash[hsize] = packed_entry;
368
assert(packed_entry - (struct index_entry *)mem == total_num_entries);
369
index->last_entry = (packed_entry - 1);
374
struct delta_index * create_delta_index(const struct source_info *src,
375
const struct delta_index *old)
377
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
378
unsigned int total_num_entries;
379
const unsigned char *data, *buffer;
380
struct delta_index *index;
381
struct unpacked_index_entry *entry, **hash;
383
unsigned long memsize;
385
if (!src->buf || !src->size)
389
/* Determine index hash size. Note that indexing skips the
390
first byte to allow for optimizing the Rabin's polynomial
391
initialization in create_delta(). */
392
num_entries = (src->size - 1) / RABIN_WINDOW;
394
total_num_entries = num_entries + old->num_entries;
396
total_num_entries = num_entries;
397
hsize = total_num_entries / 4;
398
for (i = 4; (1u << i) < hsize && i < 31; i++);
402
/* allocate lookup index */
403
memsize = sizeof(*hash) * hsize +
404
sizeof(*entry) * total_num_entries;
405
mem = malloc(memsize);
412
memset(hash, 0, hsize * sizeof(*hash));
414
/* allocate an array to count hash num_entries */
415
hash_count = calloc(hsize, sizeof(*hash_count));
421
/* then populate the index for the new data */
423
for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
425
data -= RABIN_WINDOW) {
426
unsigned int val = 0;
427
for (i = 1; i <= RABIN_WINDOW; i++)
428
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
429
if (val == prev_val) {
430
/* keep the lowest of consecutive identical blocks */
431
entry[-1].entry.ptr = data + RABIN_WINDOW;
437
entry->entry.ptr = data + RABIN_WINDOW;
438
entry->entry.val = val;
439
entry->entry.src = src;
440
entry->next = hash[i];
445
/* If we have an existing delta_index, we want to bring its info into the
446
* new structure. We assume that the existing structure's objects all occur
447
* before the new source, so they get first precedence in the index.
450
include_entries_from_index(hash, hash_count, hsize, entry, old);
452
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
455
index = pack_delta_index(hash, hsize, total_num_entries);
456
index->last_src = src;
465
create_delta_index_from_delta(const struct source_info *src,
466
const struct delta_index *old)
468
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
469
unsigned int total_num_entries;
470
const unsigned char *data, *buffer, *top;
472
struct delta_index *index;
473
struct unpacked_index_entry *entry, **hash;
475
unsigned long memsize;
477
if (!src->buf || !src->size)
480
top = buffer + src->size;
482
/* Determine index hash size. Note that indexing skips the
483
first byte to allow for optimizing the Rabin's polynomial
484
initialization in create_delta().
485
This computes the maximum number of entries that could be held. The
486
actual number will be recomputed during processing.
489
num_entries = (src->size - 1) / RABIN_WINDOW;
491
total_num_entries = num_entries + old->num_entries;
493
total_num_entries = num_entries;
494
hsize = total_num_entries / 4;
495
for (i = 4; (1u << i) < hsize && i < 31; i++);
499
/* allocate lookup index */
500
memsize = sizeof(*hash) * hsize +
501
sizeof(*entry) * total_num_entries;
502
mem = malloc(memsize);
509
memset(hash, 0, hsize * sizeof(*hash));
511
/* allocate an array to count hash num_entries */
512
hash_count = calloc(hsize, sizeof(*hash_count));
518
/* then populate the index for the new data */
519
/* The create_delta_index code starts at the end and works backward,
520
* because it is easier to update the entry pointers (you always update the
521
* head, rather than updating the tail).
522
* However, we can't walk the delta structure that way.
527
get_delta_hdr_size(&data, top);
529
get_delta_hdr_size(&data, top);
530
num_entries = 0; /* calculate the real number of entries */
534
/* Copy instruction, skip it */
535
if (cmd & 0x01) data++;
536
if (cmd & 0x02) data++;
537
if (cmd & 0x04) data++;
538
if (cmd & 0x08) data++;
539
if (cmd & 0x10) data++;
540
if (cmd & 0x20) data++;
541
if (cmd & 0x40) data++;
543
/* Insert instruction, we want to index these bytes */
544
if (data + cmd > top) {
545
/* Invalid insert, not enough bytes in the delta */
548
for (; cmd > RABIN_WINDOW; cmd -= RABIN_WINDOW,
549
data += RABIN_WINDOW) {
550
unsigned int val = 0;
551
for (i = 1; i <= RABIN_WINDOW; i++)
552
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
553
if (val != prev_val) {
554
/* Only keep the first of consecutive data */
557
entry->entry.ptr = data + RABIN_WINDOW;
558
entry->entry.val = val;
559
entry->entry.src = src;
561
/* Now we have to insert this entry at the end of the hash
564
if (hash[i] == NULL) {
567
struct unpacked_index_entry *this;
570
this = this->next) { /* No action */ }
579
/* Move the data pointer by whatever remainder is left */
583
* cmd == 0 is reserved for future encoding
584
* extensions. In the mean time we must fail when
585
* encountering them (might be data corruption).
591
/* Something was wrong with this delta */
596
if (num_entries == 0) {
597
/** Nothing to index **/
603
if (hmask == old->hash_mask) {
604
/* The total hash table size didn't change, which means that
605
* entries will end up in the same pages. We can do bulk-copying to
606
* get the final output
608
index = create_index_from_old_and_hash(old, hash, hsize,
615
index->last_src = src;
618
total_num_entries = num_entries + old->num_entries;
619
include_entries_from_index(hash, hash_count, hsize, entry, old);
622
total_num_entries = num_entries;
625
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
628
index = pack_delta_index(hash, hsize, total_num_entries);
633
index->last_src = src;
637
void free_delta_index(struct delta_index *index)
642
unsigned long sizeof_delta_index(struct delta_index *index)
645
return index->memsize;
651
* The maximum size for any opcode sequence, including the initial header
652
* plus Rabin window plus biggest copy.
654
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
657
create_delta(const struct delta_index *index,
658
const void *trg_buf, unsigned long trg_size,
659
unsigned long *delta_size, unsigned long max_size)
661
unsigned int i, outpos, outsize, moff, msize, val;
662
const struct source_info *msource;
664
const unsigned char *ref_data, *ref_top, *data, *top;
666
unsigned long source_size;
668
if (!trg_buf || !trg_size)
675
if (max_size && outsize >= max_size)
676
outsize = max_size + MAX_OP_SIZE + 1;
677
out = malloc(outsize);
681
/* store reference buffer size */
682
source_size = index->last_src->size + index->last_src->agg_offset;
685
out[outpos++] = i | 0x80;
690
/* store target buffer size */
693
out[outpos++] = i | 0x80;
699
top = (const unsigned char *) trg_buf + trg_size;
701
/* Start the matching by filling out with a simple 'insert' instruction, of
702
* the first RABIN_WINDOW bytes of the input.
704
outpos++; /* leave a byte for the insert command */
706
for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
707
out[outpos++] = *data;
708
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
710
/* we are now setup with an insert of 'i' bytes and val contains the RABIN
711
* hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
721
/* we don't have a 'worthy enough' match yet, so let's look for
724
struct index_entry *entry;
725
/* Shift the window by one byte. */
726
val ^= U[data[-RABIN_WINDOW]];
727
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
728
i = val & index->hash_mask;
729
/* TODO: When using multiple indexes like this, the hash tables
730
* mapping val => index_entry become less efficient.
731
* You end up getting a lot more collisions in the hash,
732
* which doesn't actually lead to a entry->val match.
734
for (entry = index->hash[i]; entry < index->hash[i+1];
736
const unsigned char *ref;
737
const unsigned char *src;
738
unsigned int ref_size;
739
if (entry->val != val)
743
ref_data = entry->src->buf;
744
ref_top = ref_data + entry->src->size;
745
ref_size = ref_top - ref;
746
/* ref_size is the longest possible match that we could make
747
* here. If ref_size <= msize, then we know that we cannot
748
* match more bytes with this location that we have already
751
if (ref_size > top - src)
752
ref_size = top - src;
753
if (ref_size <= msize)
755
/* See how many bytes actually match at this location. */
756
while (ref_size-- && *src++ == *ref)
758
if (msize < ref - entry->ptr) {
759
/* this is our best match so far */
760
msize = ref - entry->ptr;
761
msource = entry->src;
762
moff = entry->ptr - ref_data;
763
if (msize >= 4096) /* good enough */
770
/* The best match right now is less than 4 bytes long. So just add
771
* the current byte to the insert instruction. Increment the insert
772
* counter, and copy the byte of data into the output buffer.
776
out[outpos++] = *data++;
778
if (inscnt == 0x7f) {
779
/* We have a max length insert instruction, finalize it in the
782
out[outpos - inscnt - 1] = inscnt;
791
ref_data = msource->buf;
792
while (moff && ref_data[moff-1] == data[-1]) {
793
/* we can match one byte back */
800
outpos--; /* remove count slot */
801
inscnt--; /* make it -1 */
804
out[outpos - inscnt - 1] = inscnt;
808
/* A copy op is currently limited to 64KB (pack v2) */
809
left = (msize < 0x10000) ? 0 : (msize - 0x10000);
815
/* moff is the offset in the local structure, for encoding, we need
816
* to push it into the global offset
818
assert(moff < msource->size);
819
moff += msource->agg_offset;
820
assert(moff + msize <= source_size);
821
if (moff & 0x000000ff)
822
out[outpos++] = moff >> 0, i |= 0x01;
823
if (moff & 0x0000ff00)
824
out[outpos++] = moff >> 8, i |= 0x02;
825
if (moff & 0x00ff0000)
826
out[outpos++] = moff >> 16, i |= 0x04;
827
if (moff & 0xff000000)
828
out[outpos++] = moff >> 24, i |= 0x08;
829
/* Put it back into local coordinates, in case we have multiple
832
moff -= msource->agg_offset;
835
out[outpos++] = msize >> 0, i |= 0x10;
837
out[outpos++] = msize >> 8, i |= 0x20;
848
for (j = -RABIN_WINDOW; j < 0; j++)
849
val = ((val << 8) | data[j])
850
^ T[val >> RABIN_SHIFT];
854
if (outpos >= outsize - MAX_OP_SIZE) {
856
outsize = outsize * 3 / 2;
857
if (max_size && outsize >= max_size)
858
outsize = max_size + MAX_OP_SIZE + 1;
859
if (max_size && outpos > max_size)
861
out = realloc(out, outsize);
870
out[outpos - inscnt - 1] = inscnt;
872
if (max_size && outpos > max_size) {
877
*delta_size = outpos;
881
/* vim: et ts=4 sw=4 sts=4