~bzr-pqm/bzr/bzr.dev : contents of diff-delta.c at revision 0.17.44

~bzr-pqm/bzr/bzr.dev : (revision 0.17.44)

0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	1	/*
	2	* diff-delta.c: generate a delta between two buffers
	3	*
	4	* This code was greatly inspired by parts of LibXDiff from Davide Libenzi
	5	* http://www.xmailserver.org/xdiff-lib.html
	6	*
	7	* Rewritten for GIT by Nicolas Pitre <nico@cam.org>, (C) 2005-2007
0.17.31 by John Arbash Meinel Bring in the 'rabin' experiment.	8	* Adapted for Bazaar by John Arbash Meinel <john@arbash-meinel.com> (C) 2009
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	9	*
	10	* This code is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License version 2 as
	12	* published by the Free Software Foundation.
	13	*/
	14
	15	#include "delta.h"
0.17.31 by John Arbash Meinel Bring in the 'rabin' experiment.	16	#include <stdlib.h>
	17	#include <string.h>
0.23.5 by John Arbash Meinel Minor changes to get diff-delta.c and patch-delta.c to compile.	18	#include <assert.h>
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	19
	20	/* maximum hash entry list for the same hash bucket */
	21	#define HASH_LIMIT 64
	22
	23	#define RABIN_SHIFT 23
	24	#define RABIN_WINDOW 16
	25
	26	static const unsigned int T[256] = {
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	27	0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
	28	0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
	29	0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
	30	0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
	31	0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
	32	0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
	33	0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
	34	0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
	35	0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
	36	0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
	37	0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
	38	0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
	39	0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
	40	0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
	41	0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
	42	0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
	43	0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
	44	0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
	45	0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
	46	0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
	47	0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
	48	0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
	49	0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
	50	0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
	51	0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
	52	0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
	53	0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
	54	0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
	55	0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
	56	0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
	57	0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
	58	0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
	59	0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
	60	0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
	61	0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
	62	0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
	63	0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
	64	0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
	65	0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
	66	0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
	67	0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
	68	0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
	69	0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	70	};
	71
	72	static const unsigned int U[256] = {
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	73	0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
	74	0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
	75	0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
	76	0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
	77	0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
	78	0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
	79	0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
	80	0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
	81	0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
	82	0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
	83	0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
	84	0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
	85	0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
	86	0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
	87	0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
	88	0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
	89	0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
	90	0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
	91	0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
	92	0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
	93	0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
	94	0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
	95	0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
	96	0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
	97	0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
	98	0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
	99	0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
	100	0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
	101	0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
	102	0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
	103	0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
	104	0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
	105	0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
	106	0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
	107	0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
	108	0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
	109	0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
	110	0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
	111	0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
	112	0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
	113	0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
	114	0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
	115	0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	116	};
	117
	118	struct index_entry {
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	119	const unsigned char *ptr;
	120	const struct source_info *src;
	121	unsigned int val;
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	122	};
	123
	124	struct unpacked_index_entry {
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	125	struct index_entry entry;
	126	struct unpacked_index_entry *next;
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	127	};
	128
	129	struct delta_index {
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	130	unsigned long memsize; /* Total bytes pointed to by this index */
	131	const struct source_info last_src; / Information about the referenced source */
	132	unsigned int hash_mask; /* val & hash_mask gives the hash index for a given
	133	entry */
	134	unsigned int num_entries; /* The total number of entries in this index */
	135	struct index_entry last_entry; / Pointer to the last valid entry */
	136	struct index_entry *hash[];
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	137	};
	138
0.23.32 by John Arbash Meinel Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.	139	static unsigned int
	140	limit_hash_buckets(struct unpacked_index_entry **hash,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	141	unsigned int *hash_count, unsigned int hsize,
	142	unsigned int entries)
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	143	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	144	struct unpacked_index_entry *entry;
	145	unsigned int i;
	146	/*
	147	* Determine a limit on the number of entries in the same hash
	148	* bucket. This guards us against pathological data sets causing
	149	* really bad hash distribution with most entries in the same hash
	150	* bucket that would bring us to O(m*n) computing costs (m and n
	151	* corresponding to reference and target buffer sizes).
	152	*
	153	* Make sure none of the hash buckets has more entries than
	154	* we're willing to test. Otherwise we cull the entry list
	155	* uniformly to still preserve a good repartition across
	156	* the reference buffer.
	157	*/
	158	for (i = 0; i < hsize; i++) {
	159	int acc;
	160
	161	if (hash_count[i] <= HASH_LIMIT)
	162	continue;
	163
	164	/* We leave exactly HASH_LIMIT entries in the bucket */
	165	entries -= hash_count[i] - HASH_LIMIT;
	166
	167	entry = hash[i];
	168	acc = 0;
	169
	170	/*
	171	* Assume that this loop is gone through exactly
	172	* HASH_LIMIT times and is entered and left with
	173	* acc==0. So the first statement in the loop
	174	* contributes (hash_count[i]-HASH_LIMIT)*HASH_LIMIT
	175	* to the accumulator, and the inner loop consequently
	176	* is run (hash_count[i]-HASH_LIMIT) times, removing
	177	* one element from the list each time. Since acc
	178	* balances out to 0 at the final run, the inner loop
	179	* body can't be left with entry==NULL. So we indeed
	180	* encounter entry==NULL in the outer loop only.
	181	*/
	182	do {
	183	acc += hash_count[i] - HASH_LIMIT;
	184	if (acc > 0) {
	185	struct unpacked_index_entry *keep = entry;
	186	do {
	187	entry = entry->next;
	188	acc -= HASH_LIMIT;
	189	} while (acc > 0);
	190	keep->next = entry->next;
	191	}
	192	entry = entry->next;
	193	} while (entry);
	194	}
	195	return entries;
0.23.32 by John Arbash Meinel Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.	196	}
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	197
0.23.32 by John Arbash Meinel Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.	198	static struct delta_index *
	199	pack_delta_index(struct unpacked_index_entry **hash, unsigned int hsize,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	200	unsigned int num_entries)
0.23.32 by John Arbash Meinel Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.	201	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	202	unsigned int i, memsize;
	203	struct unpacked_index_entry *entry;
	204	struct delta_index *index;
	205	struct index_entry packed_entry, *packed_hash;
	206	void *mem;
	207	/*
	208	* Now create the packed index in array form
	209	* rather than linked lists.
	210	*/
	211	memsize = sizeof(*index)
	212	+ sizeof(packed_hash) (hsize+1)
	213	+ sizeof(packed_entry) num_entries;
	214	mem = malloc(memsize);
	215	if (!mem) {
	216	return NULL;
	217	}
	218
	219	index = mem;
	220	index->memsize = memsize;
	221	index->hash_mask = hsize - 1;
	222	index->num_entries = num_entries;
	223
	224	mem = index->hash;
	225	packed_hash = mem;
	226	mem = packed_hash + (hsize+1);
	227	packed_entry = mem;
	228
	229	for (i = 0; i < hsize; i++) {
	230	/*
	231	* Coalesce all entries belonging to one linked list
	232	* into consecutive array entries.
	233	*/
	234	packed_hash[i] = packed_entry;
	235	for (entry = hash[i]; entry; entry = entry->next)
	236	*packed_entry++ = entry->entry;
	237	}
	238
	239	/* Sentinel value to indicate the length of the last hash bucket */
	240	packed_hash[hsize] = packed_entry;
	241
	242	assert(packed_entry - (struct index_entry *)mem == num_entries);
	243	index->last_entry = (packed_entry - 1);
	244	return index;
0.23.32 by John Arbash Meinel Refactor the code a bit, so that I can re-use bits for a create_delta_index_from_delta.	245	}
	246
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	247	void include_entries_from_index(struct unpacked_index_entry **hash,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	248	unsigned int *hash_count,
	249	unsigned int hsize,
	250	struct unpacked_index_entry *entry,
	251	const struct delta_index *old)
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	252	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	253	unsigned int i, hmask, masked_val;
	254	struct index_entry *old_entry;
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	255
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	256	hmask = hsize - 1;
	257	/* Iterate backwards through the existing entries
	258	* This is so that matches early in the file end up being the first pointer
	259	* in the linked list.
	260	* TODO: We know that all old entries are going to occur before the new
	261	* entries, and that we are going to follow this with a limit and
	262	* then pack step. There is probably a way we could optimize this
	263	* step, so that we wouldn't have to copy everything into a new
	264	* buffer, and then copy everything again into yet another buffer.
	265	*/
	266	for (old_entry = old->last_entry, i = 0; i < old->num_entries;
	267	i++, old_entry--) {
	268	masked_val = old_entry->val & hmask;
	269	entry->entry = *old_entry;
	270	entry->next = hash[masked_val];
	271	hash[masked_val] = entry++;
	272	hash_count[masked_val]++;
	273	}
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	274	}
	275
0.23.49 by John Arbash Meinel When adding new entries to the delta index, use memcpy	276	struct delta_index *
	277	create_index_from_old_and_hash(const struct delta_index *old,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	278	struct unpacked_index_entry **hash,
	279	unsigned int hsize,
	280	unsigned int num_entries)
0.23.49 by John Arbash Meinel When adding new entries to the delta index, use memcpy	281	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	282	unsigned int i, memsize, total_num_entries;
	283	struct unpacked_index_entry *entry;
	284	struct delta_index *index;
	285	struct index_entry packed_entry, *packed_hash;
	286	struct index_entry copy_from_start, copy_to_start;
	287	size_t total_copy, to_copy;
	288	unsigned int num_ops;
	289	unsigned int bytes_copied;
	290	void *mem;
	291	/*
	292	* Now create the packed index in array form
	293	* rather than linked lists.
	294	*/
	295	total_num_entries = num_entries + old->num_entries;
	296	memsize = sizeof(*index)
	297	+ sizeof(packed_hash) (hsize+1)
	298	+ sizeof(packed_entry) total_num_entries;
	299	mem = malloc(memsize);
	300	if (!mem) {
	301	return NULL;
	302	}
	303
	304	index = mem;
	305	index->memsize = memsize;
	306	index->hash_mask = hsize - 1;
	307	index->num_entries = total_num_entries;
	308
	309	mem = index->hash;
	310	packed_hash = mem;
	311	mem = packed_hash + (hsize+1);
	312	packed_entry = mem;
	313
	314	total_copy = 0;
	315	bytes_copied = 0;
	316	num_ops = 0;
	317	copy_from_start = copy_to_start = NULL;
	318	for (i = 0; i < hsize; i++) {
	319	/*
	320	* Coalesce all entries belonging to one linked list
	321	* into consecutive array entries.
	322	* The entries in old all come before the entries in hash.
	323	*/
	324	packed_hash[i] = packed_entry;
	325	to_copy = (old->hash[i+1] - old->hash[i]);
	326	if (to_copy > 0) {
	327	/* This next range can be copied wholesale. However, we will wait
	328	* until we have to insert something from the new data before we
	329	* copy everything.
	330	* So for now, just reserve space for the copy.
	331	*/
	332	if (total_copy == 0) {
	333	copy_from_start = old->hash[i];
	334	copy_to_start = packed_entry;
	335	}
	336	packed_entry += to_copy;
	337	total_copy += to_copy;
	338	assert((packed_entry - copy_to_start) == total_copy);
	339	assert((old->hash[i+1] - copy_from_start) == total_copy);
	340	}
	341	for (entry = hash[i]; entry; entry = entry->next) {
	342	/* We have an entry to insert, so flush the copy buffer */
	343	if (total_copy > 0) {
	344	assert(copy_to_start != NULL);
	345	assert(copy_from_start != NULL);
346	memcpy(copy_to_start, copy_from_start,
347	total_copy * sizeof(struct index_entry));
348	bytes_copied += (total_copy * sizeof(struct index_entry));
349	total_copy = 0;
350	copy_from_start = copy_to_start = NULL;
351	num_ops += 1;
352	}
353	*packed_entry++ = entry->entry;
354	}
355	}
356	if (total_copy > 0) {
357	assert(copy_to_start != NULL);
358	assert(copy_from_start != NULL);
359	memcpy(copy_to_start, copy_from_start,
360	total_copy * sizeof(struct index_entry));
361	bytes_copied += (total_copy * sizeof(struct index_entry));
362	num_ops += 1;
363	}
364
365	/* Sentinel value to indicate the length of the last hash bucket */
366	packed_hash[hsize] = packed_entry;
367
368	assert(packed_entry - (struct index_entry *)mem == total_num_entries);
369	index->last_entry = (packed_entry - 1);
370	return index;
0.23.49 by John Arbash Meinel When adding new entries to the delta index, use memcpy	371	}
	372
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	373
	374	struct delta_index * create_delta_index(const struct source_info *src,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	375	const struct delta_index *old)
0.23.43 by John Arbash Meinel Change the internals to allow delta indexes to be expanded with new source data.	376	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	377	unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
	378	unsigned int total_num_entries;
	379	const unsigned char data, buffer;
	380	struct delta_index *index;
	381	struct unpacked_index_entry entry, *hash;
	382	void *mem;
	383	unsigned long memsize;
	384
	385	if (!src->buf \|\| !src->size)
	386	return NULL;
	387	buffer = src->buf;
	388
	389	/* Determine index hash size. Note that indexing skips the
	390	first byte to allow for optimizing the Rabin's polynomial
	391	initialization in create_delta(). */
	392	num_entries = (src->size - 1) / RABIN_WINDOW;
	393	if (old != NULL)
	394	total_num_entries = num_entries + old->num_entries;
	395	else
	396	total_num_entries = num_entries;
	397	hsize = total_num_entries / 4;
	398	for (i = 4; (1u << i) < hsize && i < 31; i++);
	399	hsize = 1 << i;
	400	hmask = hsize - 1;
	401
	402	/* allocate lookup index */
	403	memsize = sizeof(hash) hsize +
	404	sizeof(entry) total_num_entries;
	405	mem = malloc(memsize);
	406	if (!mem)
	407	return NULL;
	408	hash = mem;
	409	mem = hash + hsize;
	410	entry = mem;
	411
	412	memset(hash, 0, hsize * sizeof(*hash));
	413
	414	/* allocate an array to count hash num_entries */
	415	hash_count = calloc(hsize, sizeof(*hash_count));
	416	if (!hash_count) {
	417	free(hash);
	418	return NULL;
	419	}
	420
	421	/* then populate the index for the new data */
	422	prev_val = ~0;
	423	for (data = buffer + num_entries * RABIN_WINDOW - RABIN_WINDOW;
	424	data >= buffer;
	425	data -= RABIN_WINDOW) {
	426	unsigned int val = 0;
	427	for (i = 1; i <= RABIN_WINDOW; i++)
	428	val = ((val << 8) \| data[i]) ^ T[val >> RABIN_SHIFT];
	429	if (val == prev_val) {
	430	/* keep the lowest of consecutive identical blocks */
	431	entry[-1].entry.ptr = data + RABIN_WINDOW;
	432	--num_entries;
	433	--total_num_entries;
	434	} else {
	435	prev_val = val;
	436	i = val & hmask;
	437	entry->entry.ptr = data + RABIN_WINDOW;
	438	entry->entry.val = val;
	439	entry->entry.src = src;
	440	entry->next = hash[i];
441	hash[i] = entry++;
442	hash_count[i]++;
443	}
444	}
445	/* If we have an existing delta_index, we want to bring its info into the
446	* new structure. We assume that the existing structure's objects all occur
447	* before the new source, so they get first precedence in the index.
448	*/
449	if (old != NULL)
450	include_entries_from_index(hash, hash_count, hsize, entry, old);
451
452	total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
453	total_num_entries);
454	free(hash_count);
455	index = pack_delta_index(hash, hsize, total_num_entries);
456	index->last_src = src;
457	free(hash);
458	if (!index) {
459	return NULL;
460	}
461	return index;
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	462	}
	463
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	464	struct delta_index *
	465	create_delta_index_from_delta(const struct source_info *src,
	466	const struct delta_index *old)
0.23.45 by John Arbash Meinel Add a function that updates the index for delta bytes.	467	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	468	unsigned int i, hsize, hmask, num_entries, prev_val, *hash_count;
	469	unsigned int total_num_entries;
	470	const unsigned char data, buffer, *top;
	471	unsigned char cmd;
	472	struct delta_index *index;
	473	struct unpacked_index_entry entry, *hash;
	474	void *mem;
	475	unsigned long memsize;
	476
	477	if (!src->buf \|\| !src->size)
	478	return NULL;
	479	buffer = src->buf;
	480	top = buffer + src->size;
	481
	482	/* Determine index hash size. Note that indexing skips the
	483	first byte to allow for optimizing the Rabin's polynomial
	484	initialization in create_delta().
	485	This computes the maximum number of entries that could be held. The
	486	actual number will be recomputed during processing.
	487	*/
	488
	489	num_entries = (src->size - 1) / RABIN_WINDOW;
	490	if (old != NULL)
	491	total_num_entries = num_entries + old->num_entries;
	492	else
	493	total_num_entries = num_entries;
	494	hsize = total_num_entries / 4;
	495	for (i = 4; (1u << i) < hsize && i < 31; i++);
	496	hsize = 1 << i;
	497	hmask = hsize - 1;
	498
	499	/* allocate lookup index */
	500	memsize = sizeof(hash) hsize +
	501	sizeof(entry) total_num_entries;
	502	mem = malloc(memsize);
	503	if (!mem)
	504	return NULL;
	505	hash = mem;
	506	mem = hash + hsize;
	507	entry = mem;
	508
	509	memset(hash, 0, hsize * sizeof(*hash));
	510
	511	/* allocate an array to count hash num_entries */
	512	hash_count = calloc(hsize, sizeof(*hash_count));
	513	if (!hash_count) {
	514	free(hash);
	515	return NULL;
	516	}
	517
	518	/* then populate the index for the new data */
	519	/* The create_delta_index code starts at the end and works backward,
	520	* because it is easier to update the entry pointers (you always update the
	521	* head, rather than updating the tail).
	522	* However, we can't walk the delta structure that way.
	523	*/
	524	prev_val = ~0;
	525	data = buffer;
	526	/* source size */
	527	get_delta_hdr_size(&data, top);
	528	/* target size */
	529	get_delta_hdr_size(&data, top);
	530	num_entries = 0; /* calculate the real number of entries */
	531	while (data < top) {
532	cmd = *data++;
533	if (cmd & 0x80) {
534	/* Copy instruction, skip it */
535	if (cmd & 0x01) data++;
536	if (cmd & 0x02) data++;
537	if (cmd & 0x04) data++;
538	if (cmd & 0x08) data++;
539	if (cmd & 0x10) data++;
540	if (cmd & 0x20) data++;
541	if (cmd & 0x40) data++;
542	} else if (cmd) {
543	/* Insert instruction, we want to index these bytes */
544	if (data + cmd > top) {
545	/* Invalid insert, not enough bytes in the delta */
546	break;
547	}
548	for (; cmd > RABIN_WINDOW; cmd -= RABIN_WINDOW,
549	data += RABIN_WINDOW) {
550	unsigned int val = 0;
551	for (i = 1; i <= RABIN_WINDOW; i++)
552	val = ((val << 8) \| data[i]) ^ T[val >> RABIN_SHIFT];
553	if (val != prev_val) {
554	/* Only keep the first of consecutive data */
555	prev_val = val;
556	i = val & hmask;
557	entry->entry.ptr = data + RABIN_WINDOW;
558	entry->entry.val = val;
559	entry->entry.src = src;
560	entry->next = NULL;
561	/* Now we have to insert this entry at the end of the hash
562	* map
563	*/
564	if (hash[i] == NULL) {
565	hash[i] = entry;
566	} else {
567	struct unpacked_index_entry *this;
568	for (this = hash[i];
569	this->next != NULL;
570	this = this->next) { /* No action */ }
571	this->next = entry;
572	this = entry;
573	}
574	hash_count[i]++;
575	entry++;
576	num_entries++;
577	}
578	}
579	/* Move the data pointer by whatever remainder is left */
580	data += cmd;
581	} else {
582	/*
583	* cmd == 0 is reserved for future encoding
584	* extensions. In the mean time we must fail when
585	* encountering them (might be data corruption).
586	*/
587	break;
588	}
589	}
590	if (data != top) {
591	/* Something was wrong with this delta */
592	free(hash);
593	free(hash_count);
594	return NULL;
595	}
596	if (num_entries == 0) {
597	/ Nothing to index /
598	free(hash);
599	free(hash_count);
600	return NULL;
601	}
602	if (old != NULL) {
603	if (hmask == old->hash_mask) {
604	/* The total hash table size didn't change, which means that
605	* entries will end up in the same pages. We can do bulk-copying to
606	* get the final output
607	*/
608	index = create_index_from_old_and_hash(old, hash, hsize,
609	num_entries);
610	free(hash_count);
611	free(hash);
612	if (!index) {
613	return NULL;
614	}
615	index->last_src = src;
616	return index;
617	} else {
618	total_num_entries = num_entries + old->num_entries;
619	include_entries_from_index(hash, hash_count, hsize, entry, old);
620	}
621	} else {
622	total_num_entries = num_entries;
623	}
624
625	total_num_entries = limit_hash_buckets(hash, hash_count, hsize,
626	total_num_entries);
627	free(hash_count);
628	index = pack_delta_index(hash, hsize, total_num_entries);
629	free(hash);
630	if (!index) {
631	return NULL;
632	}
633	index->last_src = src;
634	return index;
0.23.45 by John Arbash Meinel Add a function that updates the index for delta bytes.	635	}
	636
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	637	void free_delta_index(struct delta_index *index)
	638	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	639	free(index);
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	640	}
	641
	642	unsigned long sizeof_delta_index(struct delta_index *index)
	643	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	644	if (index)
	645	return index->memsize;
	646	else
	647	return 0;
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	648	}
	649
	650	/*
	651	* The maximum size for any opcode sequence, including the initial header
	652	* plus Rabin window plus biggest copy.
	653	*/
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	654	#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	655
	656	void *
0.23.44 by John Arbash Meinel Remove the multi-index handling now that we have index combining instead.	657	create_delta(const struct delta_index *index,
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	658	const void *trg_buf, unsigned long trg_size,
	659	unsigned long *delta_size, unsigned long max_size)
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	660	{
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	661	unsigned int i, outpos, outsize, moff, msize, val;
	662	const struct source_info *msource;
	663	int inscnt;
	664	const unsigned char ref_data, ref_top, data, top;
	665	unsigned char *out;
	666	unsigned long source_size;
	667
	668	if (!trg_buf \|\| !trg_size)
	669	return NULL;
	670	if (index == NULL)
	671	return NULL;
	672
	673	outpos = 0;
	674	outsize = 8192;
	675	if (max_size && outsize >= max_size)
	676	outsize = max_size + MAX_OP_SIZE + 1;
	677	out = malloc(outsize);
	678	if (!out)
	679	return NULL;
	680
	681	/* store reference buffer size */
	682	source_size = index->last_src->size + index->last_src->agg_offset;
	683	i = source_size;
	684	while (i >= 0x80) {
	685	out[outpos++] = i \| 0x80;
	686	i >>= 7;
	687	}
	688	out[outpos++] = i;
	689
	690	/* store target buffer size */
	691	i = trg_size;
	692	while (i >= 0x80) {
	693	out[outpos++] = i \| 0x80;
	694	i >>= 7;
	695	}
	696	out[outpos++] = i;
	697
	698	data = trg_buf;
	699	top = (const unsigned char *) trg_buf + trg_size;
	700
	701	/* Start the matching by filling out with a simple 'insert' instruction, of
	702	* the first RABIN_WINDOW bytes of the input.
	703	*/
	704	outpos++; /* leave a byte for the insert command */
	705	val = 0;
	706	for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
	707	out[outpos++] = *data;
	708	val = ((val << 8) \| *data) ^ T[val >> RABIN_SHIFT];
	709	}
	710	/* we are now setup with an insert of 'i' bytes and val contains the RABIN
	711	* hash for those bytes, and data points to the RABIN_WINDOW+1 byte of
	712	* input.
	713	*/
	714	inscnt = i;
	715
	716	moff = 0;
	717	msize = 0;
	718	msource = NULL;
	719	while (data < top) {
	720	if (msize < 4096) {
	721	/* we don't have a 'worthy enough' match yet, so let's look for
	722	* one.
	723	*/
	724	struct index_entry *entry;
725	/* Shift the window by one byte. */
726	val ^= U[data[-RABIN_WINDOW]];
727	val = ((val << 8) \| *data) ^ T[val >> RABIN_SHIFT];
728	i = val & index->hash_mask;
729	/* TODO: When using multiple indexes like this, the hash tables
730	* mapping val => index_entry become less efficient.
731	* You end up getting a lot more collisions in the hash,
732	* which doesn't actually lead to a entry->val match.
733	*/
734	for (entry = index->hash[i]; entry < index->hash[i+1];
735	entry++) {
736	const unsigned char *ref;
737	const unsigned char *src;
738	unsigned int ref_size;
739	if (entry->val != val)
740	continue;
741	ref = entry->ptr;
742	src = data;
743	ref_data = entry->src->buf;
744	ref_top = ref_data + entry->src->size;
745	ref_size = ref_top - ref;
746	/* ref_size is the longest possible match that we could make
747	* here. If ref_size <= msize, then we know that we cannot
748	* match more bytes with this location that we have already
749	* matched.
750	*/
751	if (ref_size > top - src)
752	ref_size = top - src;
753	if (ref_size <= msize)
754	break;
755	/* See how many bytes actually match at this location. */
756	while (ref_size-- && src++ == ref)
757	ref++;
758	if (msize < ref - entry->ptr) {
759	/* this is our best match so far */
760	msize = ref - entry->ptr;
761	msource = entry->src;
762	moff = entry->ptr - ref_data;
763	if (msize >= 4096) /* good enough */
764	break;
765	}
766	}
767	}
768
769	if (msize < 4) {
770	/* The best match right now is less than 4 bytes long. So just add
771	* the current byte to the insert instruction. Increment the insert
772	* counter, and copy the byte of data into the output buffer.
773	*/
774	if (!inscnt)
775	outpos++;
776	out[outpos++] = *data++;
777	inscnt++;
778	if (inscnt == 0x7f) {
779	/* We have a max length insert instruction, finalize it in the
780	* output.
781	*/
782	out[outpos - inscnt - 1] = inscnt;
783	inscnt = 0;
784	}
785	msize = 0;
786	} else {
787	unsigned int left;
788	unsigned char *op;
789
790	if (inscnt) {
791	ref_data = msource->buf;
792	while (moff && ref_data[moff-1] == data[-1]) {
793	/* we can match one byte back */
794	msize++;
795	moff--;
796	data--;
797	outpos--;
798	if (--inscnt)
799	continue;
800	outpos--; /* remove count slot */
801	inscnt--; /* make it -1 */
802	break;
803	}
804	out[outpos - inscnt - 1] = inscnt;
805	inscnt = 0;
806	}
807
808	/* A copy op is currently limited to 64KB (pack v2) */
809	left = (msize < 0x10000) ? 0 : (msize - 0x10000);
810	msize -= left;
811
812	op = out + outpos++;
813	i = 0x80;
814
815	/* moff is the offset in the local structure, for encoding, we need
816	* to push it into the global offset
817	*/
818	assert(moff < msource->size);
819	moff += msource->agg_offset;
820	assert(moff + msize <= source_size);
821	if (moff & 0x000000ff)
822	out[outpos++] = moff >> 0, i \|= 0x01;
823	if (moff & 0x0000ff00)
824	out[outpos++] = moff >> 8, i \|= 0x02;
825	if (moff & 0x00ff0000)
826	out[outpos++] = moff >> 16, i \|= 0x04;
827	if (moff & 0xff000000)
828	out[outpos++] = moff >> 24, i \|= 0x08;
829	/* Put it back into local coordinates, in case we have multiple
830	* copies in a row.
831	*/
832	moff -= msource->agg_offset;
833
834	if (msize & 0x00ff)
835	out[outpos++] = msize >> 0, i \|= 0x10;
836	if (msize & 0xff00)
837	out[outpos++] = msize >> 8, i \|= 0x20;
838
839	*op = i;
840
841	data += msize;
842	moff += msize;
843	msize = left;
844
845	if (msize < 4096) {
846	int j;
847	val = 0;
848	for (j = -RABIN_WINDOW; j < 0; j++)
849	val = ((val << 8) \| data[j])
850	^ T[val >> RABIN_SHIFT];
851	}
852	}
853
854	if (outpos >= outsize - MAX_OP_SIZE) {
855	void *tmp = out;
856	outsize = outsize * 3 / 2;
857	if (max_size && outsize >= max_size)
858	outsize = max_size + MAX_OP_SIZE + 1;
859	if (max_size && outpos > max_size)
860	break;
861	out = realloc(out, outsize);
862	if (!out) {
863	free(tmp);
864	return NULL;
865	}
866	}
867	}
868
869	if (inscnt)
870	out[outpos - inscnt - 1] = inscnt;
871
872	if (max_size && outpos > max_size) {
873	free(out);
874	return NULL;
875	}
876
877	*delta_size = outpos;
878	return out;
0.23.2 by Nicolas Pitre Add the diff-delta.c and patch-delta.c files.	879	}
0.23.24 by John Arbash Meinel Change the code so that we can pass in multiple sources to match against.	880
0.23.57 by John Arbash Meinel Change the formatting, replace \t with spaces to be consistent with bzr coding.	881	/* vim: et ts=4 sw=4 sts=4
0.23.36 by John Arbash Meinel Track down a memory leak in the refactored diff-delta.c code.	882	*/