4
4
* This code was greatly inspired by parts of LibXDiff from Davide Libenzi
5
5
* http://www.xmailserver.org/xdiff-lib.html
7
* Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
7
* Rewritten for GIT by Nicolas Pitre <nico@fluxnic.net>, (C) 2005-2007
8
8
* Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009
10
* This code is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License version 2 as
12
* published by the Free Software Foundation.
10
* This program is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License as published by
12
* the Free Software Foundation; either version 2 of the License, or
13
* (at your option) any later version.
15
* NB: The version in GIT is 'version 2 of the Licence only', however Nicolas
16
* has granted permission for use under 'version 2 or later' in private email
17
* to Robert Collins and Karl Fogel on the 6th April 2009.
275
280
if (fit_in_old) {
276
281
// fprintf(stderr, "Fit all %d entries into old index\n",
277
282
// copied_count);
278
/* No need to allocate a new buffer */
284
* No need to allocate a new buffer, but return old_index ptr so
285
* callers can distinguish this from an OOM failure.
281
289
// fprintf(stderr, "Fit only %d entries into old index,"
282
290
// " reallocating\n", copied_count);
369
377
create_delta_index(const struct source_info *src,
370
struct delta_index *old)
378
struct delta_index *old,
379
struct delta_index **fresh,
380
int max_bytes_to_index)
372
382
unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
373
unsigned int total_num_entries;
383
unsigned int total_num_entries, stride, max_entries;
374
384
const unsigned char *data, *buffer;
375
385
struct delta_index *index;
376
386
struct unpacked_index_entry *entry, **hash;
378
388
unsigned long memsize;
380
390
if (!src->buf || !src->size)
391
return DELTA_SOURCE_EMPTY;
382
392
buffer = src->buf;
384
394
/* Determine index hash size. Note that indexing skips the
385
first byte to allow for optimizing the Rabin's polynomial
386
initialization in create_delta(). */
395
first byte so we subtract 1 to get the edge cases right.
397
stride = RABIN_WINDOW;
387
398
num_entries = (src->size - 1) / RABIN_WINDOW;
399
if (max_bytes_to_index > 0) {
400
max_entries = (unsigned int) (max_bytes_to_index / RABIN_WINDOW);
401
if (num_entries > max_entries) {
402
/* Limit the max number of matching entries. This reduces the 'best'
403
* possible match, but means we don't consume all of ram.
405
num_entries = max_entries;
406
stride = (src->size - 1) / num_entries;
389
410
total_num_entries = num_entries + old->num_entries;
414
435
hash_count = calloc(hsize, sizeof(*hash_count));
415
436
if (!hash_count) {
438
return DELTA_OUT_OF_MEMORY;
420
441
/* then populate the index for the new data */
422
for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
443
for (data = buffer + num_entries * stride - RABIN_WINDOW;
424
data -= RABIN_WINDOW) {
425
446
unsigned int val = 0;
426
447
for (i = 1; i <= RABIN_WINDOW; i++)
427
448
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
445
466
total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
446
467
total_num_entries);
447
468
free(hash_count);
451
469
index = pack_delta_index(hash, hsize, total_num_entries, old);
471
/* pack_delta_index only returns NULL on malloc failure */
473
return DELTA_OUT_OF_MEMORY;
456
475
index->last_src = src;
460
480
/* Take some entries, and put them into a custom hash.
468
488
unsigned int hsize)
470
490
unsigned int hash_offset, hmask, memsize;
471
struct index_entry *entry, *last_entry;
491
struct index_entry *entry;
472
492
struct index_entry_linked_list *out_entry, **hash;
488
508
/* We know that entries are in the order we want in the output, but they
489
509
* aren't "grouped" by hash bucket yet.
491
last_entry = entries + num_entries;
492
511
for (entry = entries + num_entries - 1; entry >= entries; --entry) {
493
512
hash_offset = entry->val & hmask;
494
513
out_entry->p_entry = entry;
513
532
unsigned int i, j, hsize, hmask, total_num_entries;
514
533
struct delta_index *index;
515
534
struct index_entry *entry, *packed_entry, **packed_hash;
516
struct index_entry *last_entry, null_entry = {0};
535
struct index_entry null_entry = {0};
518
537
unsigned long memsize;
519
538
struct index_entry_linked_list *unpacked_entry, **mini_hash;
678
696
create_delta_index_from_delta(const struct source_info *src,
679
struct delta_index *old_index)
697
struct delta_index *old_index,
698
struct delta_index **fresh)
681
700
unsigned int i, num_entries, max_num_entries, prev_val, num_inserted;
682
701
unsigned int hash_offset;
683
702
const unsigned char *data, *buffer, *top;
684
703
unsigned char cmd;
685
704
struct delta_index *new_index;
686
struct index_entry *entry, *entries, *old_entry;
705
struct index_entry *entry, *entries;
708
return DELTA_INDEX_NEEDED;
688
709
if (!src->buf || !src->size)
710
return DELTA_SOURCE_EMPTY;
690
711
buffer = src->buf;
691
712
top = buffer + src->size;
700
721
max_num_entries = (src->size - 1) / RABIN_WINDOW;
723
if (!max_num_entries) {
702
728
/* allocate an array to hold whatever entries we find */
703
729
entries = malloc(sizeof(*entry) * max_num_entries);
704
730
if (!entries) /* malloc failure */
731
return DELTA_OUT_OF_MEMORY;
707
733
/* then populate the index for the new data */
710
736
/* target size */
711
get_delta_hdr_size(&data, top);
737
/* get_delta_hdr_size doesn't mutate the content, just moves the
738
* start-of-data pointer, so it is safe to do the cast.
740
get_delta_hdr_size((unsigned char**)&data, top);
712
741
entry = entries; /* start at the first slot */
713
742
num_entries = 0; /* calculate the real number of entries */
714
743
while (data < top) {
768
797
if (data != top) {
769
/* Something was wrong with this delta */
798
/* The source_info data passed was corrupted or otherwise invalid */
800
return DELTA_SOURCE_BAD;
773
802
if (num_entries == 0) {
774
803
/** Nothing to index **/
778
assert(old_index != NULL);
779
808
old_index->last_src = src;
780
809
/* See if we can fill in these values into the holes in the array */
782
811
num_inserted = 0;
783
812
for (; num_entries > 0; --num_entries, ++entry) {
813
struct index_entry *next_bucket_entry, *cur_entry, *bucket_first_entry;
784
814
hash_offset = (entry->val & old_index->hash_mask);
785
815
/* The basic structure is a hash => packed_entries that fit in that
786
816
* hash bucket. Things are structured such that the hash-pointers are
789
819
* forward. If there are no NULL targets, then we know because
790
820
* entry->ptr will not be NULL.
792
old_entry = old_index->hash[hash_offset + 1];
794
while (old_entry->ptr == NULL
795
&& old_entry >= old_index->hash[hash_offset]) {
822
// The start of the next bucket, this may point past the end of the
823
// entry table if hash_offset is the last bucket.
824
next_bucket_entry = old_index->hash[hash_offset + 1];
825
// First entry in this bucket
826
bucket_first_entry = old_index->hash[hash_offset];
827
cur_entry = next_bucket_entry - 1;
828
while (cur_entry->ptr == NULL && cur_entry >= bucket_first_entry) {
799
if (old_entry->ptr != NULL
800
|| old_entry >= old_index->hash[hash_offset + 1]) {
831
// cur_entry now either points at the first NULL, or it points to
832
// next_bucket_entry if there were no blank spots.
834
if (cur_entry >= next_bucket_entry || cur_entry->ptr != NULL) {
801
835
/* There is no room for this entry, we have to resize */
802
836
// char buff[128];
803
837
// get_text(buff, entry->ptr);
828
862
new_index = create_index_from_old_and_new_entries(old_index,
829
863
entry, num_entries);
865
new_index = old_index;
832
866
// fprintf(stderr, "inserted %d without resizing\n", num_inserted);
869
/* create_index_from_old_and_new_entries returns NULL on malloc failure */
871
return DELTA_OUT_OF_MEMORY;
838
876
void free_delta_index(struct delta_index *index)
855
894
#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
858
897
create_delta(const struct delta_index *index,
859
898
const void *trg_buf, unsigned long trg_size,
860
unsigned long *delta_size, unsigned long max_size)
899
unsigned long *delta_size, unsigned long max_size,
862
unsigned int i, outpos, outsize, moff, msize, val;
902
unsigned int i, outpos, outsize, moff, val;
863
904
const struct source_info *msource;
865
906
const unsigned char *ref_data, *ref_top, *data, *top;
943
984
* match more bytes with this location that we have already
946
if (ref_size > top - src)
987
if (ref_size > (top - src))
947
988
ref_size = top - src;
948
989
if (ref_size <= msize)
950
991
/* See how many bytes actually match at this location. */
951
992
while (ref_size-- && *src++ == *ref)
953
if (msize < ref - entry->ptr) {
994
if (msize < (ref - entry->ptr)) {
954
995
/* this is our best match so far */
955
996
msize = ref - entry->ptr;
956
997
msource = entry->src;
1067
1108
if (max_size && outpos > max_size) {
1110
return DELTA_SIZE_TOO_BIG;
1072
1113
*delta_size = outpos;
1120
get_entry_summary(const struct delta_index *index, int pos,
1121
unsigned int *text_offset, unsigned int *hash_val)
1124
const struct index_entry *entry;
1125
const struct index_entry *start_of_entries;
1126
unsigned int offset;
1127
if (pos < 0 || text_offset == NULL || hash_val == NULL
1132
hsize = index->hash_mask + 1;
1133
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1134
entry = start_of_entries + pos;
1135
if (entry > index->last_entry) {
1138
if (entry->ptr == NULL) {
1142
offset = entry->src->agg_offset;
1143
offset += (entry->ptr - ((unsigned char *)entry->src->buf));
1144
*text_offset = offset;
1145
*hash_val = entry->val;
1152
get_hash_offset(const struct delta_index *index, int pos,
1153
unsigned int *entry_offset)
1156
const struct index_entry *entry;
1157
const struct index_entry *start_of_entries;
1158
if (pos < 0 || index == NULL || entry_offset == NULL)
1162
hsize = index->hash_mask + 1;
1163
start_of_entries = (struct index_entry *)(((struct index_entry **)index->hash) + (hsize + 1));
1167
entry = index->hash[pos];
1168
if (entry == NULL) {
1171
*entry_offset = (entry - start_of_entries);
1178
rabin_hash(const unsigned char *data)
1181
unsigned int val = 0;
1182
for (i = 0; i < RABIN_WINDOW; i++)
1183
val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
1076
1187
/* vim: et ts=4 sw=4 sts=4