2
* diff-delta.c: generate a delta between two buffers
4
* This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
* http://www.xmailserver.org/xdiff-lib.html
7
* Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
9
* This code is free software; you can redistribute it and/or modify
10
* it under the terms of the GNU General Public License version 2 as
11
* published by the Free Software Foundation.
14
#include "git-compat-util.h"
17
/* maximum hash entry list for the same hash bucket */
20
#define RABIN_SHIFT 23
21
#define RABIN_WINDOW 16
23
static const unsigned int T[256] = {
24
0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
25
0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
26
0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
27
0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
28
0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
29
0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
30
0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
31
0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
32
0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
33
0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
34
0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
35
0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
36
0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
37
0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
38
0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
39
0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
40
0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
41
0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
42
0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
43
0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
44
0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
45
0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
46
0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
47
0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
48
0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
49
0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
50
0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
51
0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
52
0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
53
0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
54
0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
55
0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
56
0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
57
0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
58
0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
59
0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
60
0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
61
0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
62
0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
63
0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
64
0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
65
0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
66
0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
69
static const unsigned int U[256] = {
70
0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
71
0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
72
0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
73
0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
74
0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
75
0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
76
0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
77
0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
78
0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
79
0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
80
0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
81
0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
82
0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
83
0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
84
0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
85
0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
86
0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
87
0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
88
0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
89
0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
90
0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
91
0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
92
0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
93
0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
94
0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
95
0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
96
0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
97
0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
98
0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
99
0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
100
0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
101
0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
102
0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
103
0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
104
0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
105
0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
106
0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
107
0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
108
0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
109
0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
110
0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
111
0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
112
0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
116
const unsigned char *ptr;
120
struct unpacked_index_entry {
121
struct index_entry entry;
122
struct unpacked_index_entry *next;
126
unsigned long memsize;
128
unsigned long src_size;
129
unsigned int hash_mask;
130
struct index_entry *hash[FLEX_ARRAY];
133
struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
135
unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
136
const unsigned char *data, *buffer = buf;
137
struct delta_index *index;
138
struct unpacked_index_entry *entry, **hash;
139
struct index_entry *packed_entry, **packed_hash;
141
unsigned long memsize;
143
if (!buf || !bufsize)
146
/* Determine index hash size. Note that indexing skips the
147
first byte to allow for optimizing the Rabin's polynomial
148
initialization in create_delta(). */
149
entries = (bufsize - 1) / RABIN_WINDOW;
151
for (i = 4; (1u << i) < hsize && i < 31; i++);
155
/* allocate lookup index */
156
memsize = sizeof(*hash) * hsize +
157
sizeof(*entry) * entries;
158
mem = malloc(memsize);
165
memset(hash, 0, hsize * sizeof(*hash));
167
/* allocate an array to count hash entries */
168
hash_count = calloc(hsize, sizeof(*hash_count));
174
/* then populate the index */
176
for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
178
data -= RABIN_WINDOW) {
179
unsigned int val = 0;
180
for (i = 1; i <= RABIN_WINDOW; i++)
181
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
182
if (val == prev_val) {
183
/* keep the lowest of consecutive identical blocks */
184
entry[-1].entry.ptr = data + RABIN_WINDOW;
189
entry->entry.ptr = data + RABIN_WINDOW;
190
entry->entry.val = val;
191
entry->next = hash[i];
198
* Determine a limit on the number of entries in the same hash
199
* bucket. This guards us against pathological data sets causing
200
* really bad hash distribution with most entries in the same hash
201
* bucket that would bring us to O(m*n) computing costs (m and n
202
* corresponding to reference and target buffer sizes).
204
* Make sure none of the hash buckets has more entries than
205
* we're willing to test. Otherwise we cull the entry list
206
* uniformly to still preserve a good repartition across
207
* the reference buffer.
209
for (i = 0; i < hsize; i++) {
212
if (hash_count[i] <= HASH_LIMIT)
215
/* We leave exactly HASH_LIMIT entries in the bucket */
216
entries -= hash_count[i] - HASH_LIMIT;
222
* Assume that this loop is gone through exactly
223
* HASH_LIMIT times and is entered and left with
224
* acc==0. So the first statement in the loop
225
* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
226
* to the accumulator, and the inner loop consequently
227
* is run (hash_count[i]-HASH_LIMIT) times, removing
228
* one element from the list each time. Since acc
229
* balances out to 0 at the final run, the inner loop
230
* body can't be left with entry==NULL. So we indeed
231
* encounter entry==NULL in the outer loop only.
234
acc += hash_count[i] - HASH_LIMIT;
236
struct unpacked_index_entry *keep = entry;
241
keep->next = entry->next;
249
* Now create the packed index in array form
250
* rather than linked lists.
252
memsize = sizeof(*index)
253
+ sizeof(*packed_hash) * (hsize+1)
254
+ sizeof(*packed_entry) * entries;
255
mem = malloc(memsize);
262
index->memsize = memsize;
263
index->src_buf = buf;
264
index->src_size = bufsize;
265
index->hash_mask = hmask;
269
mem = packed_hash + (hsize+1);
272
for (i = 0; i < hsize; i++) {
274
* Coalesce all entries belonging to one linked list
275
* into consecutive array entries.
277
packed_hash[i] = packed_entry;
278
for (entry = hash[i]; entry; entry = entry->next)
279
*packed_entry++ = entry->entry;
282
/* Sentinel value to indicate the length of the last hash bucket */
283
packed_hash[hsize] = packed_entry;
285
assert(packed_entry - (struct index_entry *)mem == entries);
291
void free_delta_index(struct delta_index *index)
296
unsigned long sizeof_delta_index(struct delta_index *index)
299
return index->memsize;
305
* The maximum size for any opcode sequence, including the initial header
306
* plus Rabin window plus biggest copy.
308
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
311
create_delta(const struct delta_index *index,
312
const void *trg_buf, unsigned long trg_size,
313
unsigned long *delta_size, unsigned long max_size)
315
unsigned int i, outpos, outsize, moff, msize, val;
317
const unsigned char *ref_data, *ref_top, *data, *top;
320
if (!trg_buf || !trg_size)
325
if (max_size && outsize >= max_size)
326
outsize = max_size + MAX_OP_SIZE + 1;
327
out = malloc(outsize);
331
/* store reference buffer size */
334
out[outpos++] = i | 0x80;
339
/* store target buffer size */
342
out[outpos++] = i | 0x80;
347
ref_data = index->src_buf;
348
ref_top = ref_data + index->src_size;
350
top = (const unsigned char *) trg_buf + trg_size;
354
for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
355
out[outpos++] = *data;
356
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
364
struct index_entry *entry;
365
val ^= U[data[-RABIN_WINDOW]];
366
val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
367
i = val & index->hash_mask;
368
for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
369
const unsigned char *ref = entry->ptr;
370
const unsigned char *src = data;
371
unsigned int ref_size = ref_top - ref;
372
if (entry->val != val)
374
if (ref_size > top - src)
375
ref_size = top - src;
376
if (ref_size <= msize)
378
while (ref_size-- && *src++ == *ref)
380
if (msize < ref - entry->ptr) {
381
/* this is our best match so far */
382
msize = ref - entry->ptr;
383
moff = entry->ptr - ref_data;
384
if (msize >= 4096) /* good enough */
393
out[outpos++] = *data++;
395
if (inscnt == 0x7f) {
396
out[outpos - inscnt - 1] = inscnt;
405
while (moff && ref_data[moff-1] == data[-1]) {
406
/* we can match one byte back */
413
outpos--; /* remove count slot */
414
inscnt--; /* make it -1 */
417
out[outpos - inscnt - 1] = inscnt;
421
/* A copy op is currently limited to 64KB (pack v2) */
422
left = (msize < 0x10000) ? 0 : (msize - 0x10000);
428
if (moff & 0x000000ff)
429
out[outpos++] = moff >> 0, i |= 0x01;
430
if (moff & 0x0000ff00)
431
out[outpos++] = moff >> 8, i |= 0x02;
432
if (moff & 0x00ff0000)
433
out[outpos++] = moff >> 16, i |= 0x04;
434
if (moff & 0xff000000)
435
out[outpos++] = moff >> 24, i |= 0x08;
438
out[outpos++] = msize >> 0, i |= 0x10;
440
out[outpos++] = msize >> 8, i |= 0x20;
451
for (j = -RABIN_WINDOW; j < 0; j++)
452
val = ((val << 8) | data[j])
453
^ T[val >> RABIN_SHIFT];
457
if (outpos >= outsize - MAX_OP_SIZE) {
459
outsize = outsize * 3 / 2;
460
if (max_size && outsize >= max_size)
461
outsize = max_size + MAX_OP_SIZE + 1;
462
if (max_size && outpos > max_size)
464
out = realloc(out, outsize);
473
out[outpos - inscnt - 1] = inscnt;
475
if (max_size && outpos > max_size) {
480
*delta_size = outpos;