~bzr-pqm/bzr/bzr.dev

0.17.31 by John Arbash Meinel
Bring in the 'rabin' experiment.
1
/*
2
 * delta.h: headers for delta functionality
3
 *
4
 * Adapted from GIT for Bazaar by
5
 *   John Arbash Meinel <john@arbash-meinel.com> (C) 2009
6
 *
7
 * This code is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License version 2 as
9
 * published by the Free Software Foundation.
10
 */
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
11
#ifndef DELTA_H
12
#define DELTA_H
13
14
/* opaque object for delta index */
15
struct delta_index;
16
0.23.42 by John Arbash Meinel
Change the code around again.
17
struct source_info {
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
18
    const void *buf; /* Pointer to the beginning of source data */
19
    unsigned long size; /* Total length of source data */
20
    unsigned long agg_offset; /* Start of source data as part of the
21
                                 aggregate source */
0.23.42 by John Arbash Meinel
Change the code around again.
22
};
23
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
24
/* result type for functions that have multiple failure modes */
25
typedef enum {
26
    DELTA_OK,             /* Success */
27
    DELTA_OUT_OF_MEMORY,  /* Could not allocate required memory */
28
    DELTA_INDEX_NEEDED,   /* A delta_index must be passed */
29
    DELTA_SOURCE_EMPTY,   /* A source_info had no content */
30
    DELTA_SOURCE_BAD,     /* A source_info had invalid or corrupt content */
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
31
    DELTA_BUFFER_EMPTY,   /* A buffer pointer and size */
5698.2.7 by Martin
Non-code fixes noticed looking at full diff
32
    DELTA_SIZE_TOO_BIG,   /* Delta data is larger than the max requested */
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
33
} delta_result;
34
35
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
36
/*
37
 * create_delta_index: compute index data from given buffer
38
 *
5698.2.8 by Martin
Tweak function descriptions in delta header for clarity
39
 * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
40
 * delta_index that should be passed to subsequent create_delta() calls, or to
5698.2.8 by Martin
Tweak function descriptions in delta header for clarity
41
 * free_delta_index().  Other values are a failure, and *fresh is unset.
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
42
 * The given buffer must not be freed nor altered before free_delta_index() is
43
 * called. The resultant struct must be freed using free_delta_index().
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
44
 *
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
45
 * :param max_bytes_to_index: Limit the number of regions to sample to this
46
 *      amount of text. We will store at most max_bytes_to_index / RABIN_WINDOW
47
 *      pointers into the source text.  Useful if src can be unbounded in size,
48
 *      and you are willing to trade match accuracy for peak memory.
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
49
 */
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
50
extern delta_result
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
51
create_delta_index(const struct source_info *src,
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
52
                   struct delta_index *old,
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
53
                   struct delta_index **fresh,
5755.2.10 by John Arbash Meinel
Merge Martin gz's tweaks for signed vs unsigned, but tweak them a bit further.
54
                   int max_bytes_to_index);
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
55
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
56
57
/*
58
 * create_delta_index_from_delta: compute index data from given buffer
59
 *
5698.2.8 by Martin
Tweak function descriptions in delta header for clarity
60
 * Returns a delta_result status, when DELTA_OK then *fresh is set to a struct
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
61
 * delta_index that should be passed to subsequent create_delta() calls, or to
5698.2.8 by Martin
Tweak function descriptions in delta header for clarity
62
 * free_delta_index().  Other values are a failure, and *fresh is unset.
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
63
 * The bytes must be in the form of a delta structure, as generated by
64
 * create_delta(). The generated index will only index the insert bytes, and
65
 * not any of the control structures.
66
 */
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
67
extern delta_result
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
68
create_delta_index_from_delta(const struct source_info *delta,
5698.2.5 by Martin
Switch approach to delta function interfaces and use a return code and outparam
69
                              struct delta_index *old,
70
                              struct delta_index **fresh);
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
71
/*
72
 * free_delta_index: free the index created by create_delta_index()
73
 *
74
 * Given pointer must be what create_delta_index() returned, or NULL.
75
 */
76
extern void free_delta_index(struct delta_index *index);
77
78
/*
79
 * sizeof_delta_index: returns memory usage of delta index
80
 *
81
 * Given pointer must be what create_delta_index() returned, or NULL.
82
 */
83
extern unsigned long sizeof_delta_index(struct delta_index *index);
84
85
/*
86
 * create_delta: create a delta from given index for the given buffer
87
 *
88
 * This function may be called multiple times with different buffers using
89
 * the same delta_index pointer.  If max_delta_size is non-zero and the
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
90
 * resulting delta is to be larger than max_delta_size then DELTA_SIZE_TOO_BIG
5698.2.8 by Martin
Tweak function descriptions in delta header for clarity
91
 * is returned.  Otherwise on success, DELTA_OK is returned and *delta_data is
92
 * set to a new buffer with the delta data and *delta_size is updated with its
93
 * size.  That buffer must be freed by the caller.
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
94
 */
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
95
extern delta_result
0.23.44 by John Arbash Meinel
Remove the multi-index handling now that we have index combining instead.
96
create_delta(const struct delta_index *index,
5698.2.6 by Martin
Also adapt create_delta to the return code interface as it uses malloc
97
             const void *buf, unsigned long bufsize,
98
             unsigned long *delta_size, unsigned long max_delta_size,
99
             void **delta_data);
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
100
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
101
/* the smallest possible delta size is 3 bytes
102
 * Target size, Copy command, Copy length
103
 */
104
#define DELTA_SIZE_MIN  3
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
105
106
/*
107
 * This must be called twice on the delta data buffer, first to get the
108
 * expected source buffer size, and again to get the target buffer size.
109
 */
3735.36.7 by John Arbash Meinel
Get rid of inline and const, to reduce warnings and errors.
110
static unsigned long
111
get_delta_hdr_size(unsigned char **datap, const unsigned char *top)
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
112
{
3735.36.7 by John Arbash Meinel
Get rid of inline and const, to reduce warnings and errors.
113
    unsigned char *data = *datap;
0.23.57 by John Arbash Meinel
Change the formatting, replace \t with spaces to be consistent with bzr coding.
114
    unsigned char cmd;
115
    unsigned long size = 0;
116
    int i = 0;
117
    do {
118
        cmd = *data++;
119
        size |= (cmd & ~0x80) << i;
120
        i += 7;
121
    } while (cmd & 0x80 && data < top);
122
    *datap = data;
123
    return size;
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
124
}
125
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
126
/*
127
 * Return the basic information about a given delta index.
128
 * :param index: The delta_index object
129
 * :param pos: The offset in the entry list. Start at 0, and walk until you get
130
 *      0 as a return code.
131
 * :param global_offset: return value, distance to the beginning of all sources
132
 * :param hash_val: return value, the RABIN hash associated with this pointer
133
 * :param hash_offset: Location for this entry in the hash array.
134
 * :return: 1 if pos != -1 (there was data produced)
135
 */
136
extern int
137
get_entry_summary(const struct delta_index *index, int pos,
138
                  unsigned int *text_offset, unsigned int *hash_val);
139
140
/*
141
 * Determine what entry index->hash[X] points to.
142
 */
143
extern int
144
get_hash_offset(const struct delta_index *index, int pos,
145
                unsigned int *entry_offset);
146
147
/*
148
 * Compute the rabin_hash of the given data, it is assumed the data is at least
149
 * RABIN_WINDOW wide (16 bytes).
150
 */
151
extern unsigned int
5755.2.10 by John Arbash Meinel
Merge Martin gz's tweaks for signed vs unsigned, but tweak them a bit further.
152
rabin_hash(const unsigned char *data);
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
153
0.23.5 by John Arbash Meinel
Minor changes to get diff-delta.c and patch-delta.c to compile.
154
#endif