86
cdef class EquivalenceTable:
87
"""This tracks equivalencies between lists of hashable objects.
89
:ivar lines: The 'static' lines that will be preserved between runs.
92
cdef readonly object lines
93
cdef readonly object _right_lines
94
cdef Py_ssize_t _hashtable_size
95
cdef Py_ssize_t _hashtable_bitmask
96
cdef _hash_bucket *_hashtable
97
cdef _raw_line *_raw_lines
98
cdef Py_ssize_t _len_lines
100
def __init__(self, lines):
101
self.lines = list(lines)
102
self._right_lines = None
103
self._hashtable_size = 0
104
self._hashtable = NULL
105
self._raw_lines = NULL
107
self._lines_to_raw_lines(lines)
108
self._build_hash_table()
77
def make_delta_index(source):
78
return DeltaIndex(source)
81
cdef class DeltaIndex:
83
# We need Pyrex 0.9.8+ to understand a 'list' definition, and this object
84
# isn't performance critical
85
# cdef readonly list _sources
86
cdef readonly object _sources
87
cdef source_info *_source_infos
88
cdef delta_index *_index
89
cdef readonly unsigned int _max_num_sources
90
cdef public unsigned long _source_offset
93
return '%s(%d, %d)' % (self.__class__.__name__,
94
len(self._sources), self._source_offset)
96
def __init__(self, source=None):
99
self._max_num_sources = 65000
100
self._source_infos = <source_info *>safe_malloc(sizeof(source_info)
101
* self._max_num_sources)
102
self._source_offset = 0
104
if source is not None:
105
self.add_source(source, 0)
110
107
def __dealloc__(self):
111
safe_free(<void**>&self._hashtable)
112
safe_free(<void**>&self._raw_lines)
114
cdef int _line_to_raw_line(self, PyObject *line, _raw_line *raw_line) except -1:
115
"""Convert a single PyObject into a raw line."""
116
raw_line.hash = PyObject_Hash(line)
117
raw_line.next_line_index = SENTINEL
118
raw_line.hash_offset = SENTINEL
119
raw_line.flags = INDEXED
122
cdef int _lines_to_raw_lines(self, object lines) except -1:
123
"""Load a sequence of objects into the _raw_line format"""
124
cdef Py_ssize_t count, i
129
# Do we want to use PySequence_Fast, or just assume that it is a list
130
# Also, do we need to decref the return value?
131
# http://www.python.org/doc/current/api/sequence.html
132
seq = PySequence_Fast(lines, "expected a sequence")
134
count = PySequence_Fast_GET_SIZE(seq)
137
raw = <_raw_line*>safe_malloc(count * sizeof(_raw_line))
138
safe_free(<void**>&self._raw_lines)
139
self._raw_lines = raw
140
self._len_lines = count
142
for i from 0 <= i < count:
143
item = PySequence_Fast_GET_ITEM(seq, i)
144
# NB: We don't Py_INCREF the data pointer, because we know we
145
# maintain a pointer to the item in self.lines or
147
# TODO: Do we even need to track a data pointer here? It would
148
# be less memory, and we *could* just look it up in the
149
# appropriate line list.
150
self._line_to_raw_line(item, &raw[i])
152
# TODO: Unfortunately, try/finally always generates a compiler
153
# warning about a possibly unused variable :(
157
cdef Py_ssize_t _compute_minimum_hash_size(self, Py_ssize_t needed):
158
"""Determine the smallest hash size that can reasonably fit the data.
160
:param needed: The number of entries we might be inserting into
161
the hash (assuming there are no duplicate lines.)
162
:return: The number of hash table entries to use. This will always be a
165
cdef Py_ssize_t hash_size
166
cdef Py_ssize_t min_size
168
# TODO: Another alternative would be to actually count how full the
169
# hash-table is, and decide if we need to grow it based on
170
# density. That can take into account duplicated lines. Though if
171
# we compress well, there should be a minimal amount of
172
# duplicated lines in the output.
174
# At the bare minimum, we could fit all entries into a 'needed'
175
# size hash table. However, any collision would then have a long way to
176
# traverse before it could find a 'free' slot.
177
# So we set the minimum size to give us 33% empty slots.
178
min_size = <Py_ssize_t>(needed * 2)
180
while hash_size < min_size:
181
hash_size = hash_size << 1
184
def _py_compute_minimum_hash_size(self, needed):
185
"""Expose _compute_minimum_hash_size to python for testing."""
186
return self._compute_minimum_hash_size(needed)
188
cdef Py_ssize_t _compute_recommended_hash_size(self, Py_ssize_t needed):
189
"""Determine a reasonable hash size, assuming some room for growth.
191
:param needed: The number of entries we might be inserting into
192
the hash (assuming there are no duplicate lines.)
193
:return: The number of hash table entries to use. This will always be a
196
cdef Py_ssize_t hash_size
197
cdef Py_ssize_t min_size
199
# We start off with a 8k hash (after doubling), because there isn't a
200
# lot of reason to go smaller than that (this class isn't one you'll be
201
# instantiating thousands of, and you are always likely to grow here.)
203
while hash_size < needed:
204
hash_size = hash_size << 1
205
# And we always give at least 4x blank space
206
hash_size = hash_size << 2
209
def _py_compute_recommended_hash_size(self, needed):
210
"""Expose _compute_recommended_hash_size to python for testing."""
211
return self._compute_recommended_hash_size(needed)
213
cdef int _build_hash_table(self) except -1:
214
"""Build the hash table 'from scratch'."""
215
cdef Py_ssize_t hash_size
216
cdef Py_ssize_t hash_bitmask
218
cdef _raw_line *cur_line
219
cdef Py_ssize_t hash_offset
220
cdef _hash_bucket *cur_bucket
221
cdef _hash_bucket *new_hashtable
223
# Hash size is a power of 2
224
hash_size = self._compute_recommended_hash_size(self._len_lines)
226
new_hashtable = <_hash_bucket*>safe_malloc(sizeof(_hash_bucket) *
228
safe_free(<void**>&self._hashtable)
229
self._hashtable = new_hashtable
231
self._hashtable_size = hash_size
232
for i from 0 <= i < hash_size:
233
self._hashtable[i].line_index = SENTINEL
234
self._hashtable[i].count = 0
236
# Turn the hash size into a bitmask
237
self._hashtable_bitmask = hash_size - 1
239
# Iterate backwards, because it makes it easier to insert items into
240
# the hash (you just change the head pointer, and everything else keeps
241
# pointing to the same location).
242
for i from self._len_lines > i >= 0:
243
cur_line = self._raw_lines + i
244
if not (cur_line.flags & INDEXED == INDEXED):
246
hash_offset = self._find_hash_position(cur_line)
248
# Point this line to the location in the hash table
249
cur_line.hash_offset = hash_offset
250
# And make this line the head of the hash table
251
cur_bucket = self._hashtable + hash_offset
252
cur_line.next_line_index = cur_bucket.line_index
253
cur_bucket.line_index = i
254
cur_bucket.count = cur_bucket.count + 1
256
cdef int _extend_raw_lines(self, object index) except -1:
257
"""Add the last N lines from self.lines into the raw_lines array."""
258
cdef Py_ssize_t new_count
259
cdef Py_ssize_t new_total_len
260
cdef Py_ssize_t old_len
262
cdef PyObject *should_index
264
cdef Py_ssize_t line_index
265
cdef _raw_line *cur_line
266
cdef PyObject *seq_index
267
cdef Py_ssize_t actual_new_len
269
seq_index = PySequence_Fast(index, "expected a sequence for index")
271
old_len = self._len_lines
272
new_count = PySequence_Fast_GET_SIZE(seq_index)
273
new_total_len = new_count + self._len_lines
275
while actual_new_len < new_total_len:
276
actual_new_len = actual_new_len << 1
277
self._raw_lines = <_raw_line*>safe_realloc(<void*>self._raw_lines,
278
actual_new_len * sizeof(_raw_line))
279
self._len_lines = new_total_len
280
# Now that we have enough space, start adding the new lines
281
# into the array. These are done in forward order.
282
for i from 0 <= i < new_count:
283
line_index = i + old_len
284
cur_line = self._raw_lines + line_index
285
item = PyList_GET_ITEM(self.lines, line_index)
286
self._line_to_raw_line(item, cur_line)
287
should_index = PySequence_Fast_GET_ITEM(seq_index, i)
288
if PyObject_Not(should_index):
289
cur_line.flags = cur_line.flags & ~(<int>INDEXED)
293
cdef int _extend_hash_table(self, Py_ssize_t count) except -1:
294
"""Add the last N entries in self.lines to the hash table.
296
:param index: A sequence that declares whether each node should be
299
cdef Py_ssize_t line_index
300
cdef Py_ssize_t start
301
cdef _raw_line *cur_line
302
cdef _raw_line *next_line
303
cdef Py_ssize_t hash_offset
304
cdef _hash_bucket *cur_bucket
306
start = self._len_lines - count
308
for line_index from start <= line_index < self._len_lines:
309
cur_line = self._raw_lines + line_index
310
if cur_line.flags & INDEXED != INDEXED:
312
hash_offset = self._find_hash_position(cur_line)
314
# Point this line to the location in the hash table
315
cur_line.hash_offset = hash_offset
317
# Make this line the tail of the hash table
318
cur_bucket = self._hashtable + hash_offset
319
cur_bucket.count = cur_bucket.count + 1
320
if cur_bucket.line_index == SENTINEL:
321
cur_bucket.line_index = line_index
323
# We need to track through the pointers and insert this at
325
next_line = self._raw_lines + cur_bucket.line_index
326
while next_line.next_line_index != SENTINEL:
327
next_line = self._raw_lines + next_line.next_line_index
328
next_line.next_line_index = line_index
330
cdef Py_ssize_t _find_hash_position(self, _raw_line *line) except -1:
331
"""Find the node in the hash which defines this line.
333
Each bucket in the hash table points at exactly 1 equivalent line. If 2
334
objects would collide, we just increment to the next bucket until we
335
get to an empty bucket that is either empty or exactly matches this
338
:return: The offset in the hash table for this entry
340
cdef Py_ssize_t location
341
cdef _raw_line *ref_line
342
cdef Py_ssize_t ref_index
343
cdef int compare_result
345
location = line.hash & self._hashtable_bitmask
346
ref_index = self._hashtable[location].line_index
347
while ref_index != SENTINEL:
348
ref_line = self._raw_lines + ref_index
349
if (ref_line.hash == line.hash):
350
PyObject_Cmp(ref_line.data, line.data, &compare_result)
351
if compare_result == 0:
353
location = (location + 1) & self._hashtable_bitmask
354
ref_index = self._hashtable[location].line_index
357
def _py_find_hash_position(self, line):
358
"""Used only for testing.
360
Return the location where this fits in the hash table
362
cdef _raw_line raw_line
364
self._line_to_raw_line(<PyObject *>line, &raw_line)
365
return self._find_hash_position(&raw_line)
367
def _inspect_left_lines(self):
368
"""Used only for testing.
370
:return: None if _raw_lines is NULL,
371
else [(object, hash_val, hash_loc, next_val)] for each node in raw
376
if self._raw_lines == NULL:
108
if self._index != NULL:
109
free_delta_index(self._index)
111
safe_free(<void **>&self._source_infos)
113
def add_delta_source(self, delta, unadded_bytes):
114
"""Add a new delta to the source texts.
116
:param delta: The text of the delta, this must be a byte string.
117
:param unadded_bytes: Number of bytes that were added to the source
118
that were not indexed.
121
cdef Py_ssize_t c_delta_size
122
cdef delta_index *index
123
cdef unsigned int source_location
124
cdef source_info *src
125
cdef unsigned int num_indexes
127
if not PyString_CheckExact(delta):
128
raise TypeError('delta is not a str')
130
source_location = len(self._sources)
131
if source_location >= self._max_num_sources:
132
self._expand_sources()
133
self._sources.append(delta)
134
c_delta = PyString_AS_STRING(delta)
135
c_delta_size = PyString_GET_SIZE(delta)
136
src = self._source_infos + source_location
138
src.size = c_delta_size
139
src.agg_offset = self._source_offset + unadded_bytes
140
index = create_delta_index_from_delta(src, self._index)
141
self._source_offset = src.agg_offset + src.size
143
free_delta_index(self._index)
146
def add_source(self, source, unadded_bytes):
147
"""Add a new bit of source text to the delta indexes.
149
:param source: The text in question, this must be a byte string
150
:param unadded_bytes: Assume there are this many bytes that didn't get
151
added between this source and the end of the previous source.
154
cdef Py_ssize_t c_source_size
155
cdef delta_index *index
156
cdef unsigned int source_location
157
cdef source_info *src
158
cdef unsigned int num_indexes
160
if not PyString_CheckExact(source):
161
raise TypeError('source is not a str')
163
source_location = len(self._sources)
164
if source_location >= self._max_num_sources:
165
self._expand_sources()
166
self._sources.append(source)
167
c_source = PyString_AS_STRING(source)
168
c_source_size = PyString_GET_SIZE(source)
169
src = self._source_infos + source_location
171
src.size = c_source_size
173
src.agg_offset = self._source_offset + unadded_bytes
174
index = create_delta_index(src, self._index)
175
self._source_offset = src.agg_offset + src.size
177
free_delta_index(self._index)
180
cdef _expand_sources(self):
181
raise RuntimeError('if we move self._source_infos, then we need to'
182
' change all of the index pointers as well.')
183
self._max_num_sources = self._max_num_sources * 2
184
self._source_infos = <source_info *>safe_realloc(self._source_infos,
186
* self._max_num_sources)
188
def make_delta(self, target_bytes, max_delta_size=0):
189
"""Create a delta from the current source to the target bytes."""
191
cdef Py_ssize_t target_size
193
cdef unsigned long delta_size
195
if self._index == NULL:
380
for i from 0 <= i < self._len_lines:
381
PyList_Append(result,
382
(<object>self._raw_lines[i].data,
383
self._raw_lines[i].hash,
384
self._raw_lines[i].hash_offset,
385
self._raw_lines[i].next_line_index,
389
def _inspect_hash_table(self):
390
"""Used only for testing.
392
This iterates the hash table, and returns 'interesting' entries.
393
:return: (total_size, [(offset, line_index, count)] for entries that
399
for i from 0 <= i < self._hashtable_size:
400
if self._hashtable[i].line_index != SENTINEL:
401
PyList_Append(interesting,
402
(i, self._hashtable[i].line_index,
403
self._hashtable[i].count))
404
return (self._hashtable_size, interesting)
406
def get_matches(self, line):
407
"""Return the lines which match the line in right."""
408
cdef Py_ssize_t count
410
cdef Py_ssize_t *matches
415
count = self.get_raw_matches(<PyObject *>line, &matches)
419
for i from 0 <= i < count:
420
PyList_Append(result, matches[i])
422
safe_free(<void**>&matches)
425
cdef Py_ssize_t get_raw_matches(self, PyObject *line,
426
Py_ssize_t **matches) except -1:
427
"""Get all the possible entries that match the given location.
429
:param line: A Python line which we want to match
430
:param pos: The index position in _right_lines
431
:param matches: A variable to hold an array containing all the matches.
432
This will be allocated using malloc, and the caller is responsible
433
to free it with free(). If there are no matches, no memory will be
435
:retrun: The number of matched positions
437
cdef Py_ssize_t hash_offset
438
cdef _raw_line raw_line
439
cdef _hash_bucket cur_bucket
440
cdef Py_ssize_t cur_line_idx
441
cdef Py_ssize_t count
442
cdef Py_ssize_t *local_matches
444
self._line_to_raw_line(line, &raw_line)
445
hash_offset = self._find_hash_position(&raw_line)
446
cur_bucket = self._hashtable[hash_offset]
447
cur_line_idx = cur_bucket.line_index
448
if cur_line_idx == SENTINEL:
450
local_matches = <Py_ssize_t*>safe_malloc(sizeof(Py_ssize_t) *
451
(cur_bucket.count + 1))
452
matches[0] = local_matches
453
for count from 0 <= count < cur_bucket.count:
454
assert cur_line_idx != SENTINEL
455
local_matches[count] = cur_line_idx
456
cur_line_idx = self._raw_lines[cur_line_idx].next_line_index
457
return cur_bucket.count
459
cdef Py_ssize_t get_next_matches(self, PyObject *line,
461
Py_ssize_t tip_count,
462
int *did_match) except -1:
463
"""Find matches that are interesting for the next step.
465
This finds offsets which match the current search tips. It is slightly
466
different from get_raw_matches, in that it intersects matching lines
467
against the existing tips, and then returns the *next* line.
469
TODO: It might be nice to know if there were 0 matches because the line
470
didn't match anything, or if it was just because it didn't follow
473
:param line: A line to match against
474
:param tips: A list of matches that we already have. This list
475
will be updated "in place". If there are no new matches, the list
476
will go unmodified, and the returned count will be 0.
477
:param tip_count: The current length of tips
478
:param did_match: Will be set to True if the line had matches, else 0
479
:return: The new number of matched lines.
481
cdef Py_ssize_t hash_offset
482
cdef _raw_line raw_line
483
cdef _hash_bucket cur_bucket
484
cdef Py_ssize_t cur_line_idx
485
cdef Py_ssize_t count
486
cdef Py_ssize_t *cur_tip
487
cdef Py_ssize_t *end_tip
488
cdef Py_ssize_t *cur_out
490
self._line_to_raw_line(line, &raw_line)
491
hash_offset = self._find_hash_position(&raw_line)
492
cur_bucket = self._hashtable[hash_offset]
493
cur_line_idx = cur_bucket.line_index
494
if cur_line_idx == SENTINEL:
501
end_tip = tips + tip_count
504
while cur_tip < end_tip and cur_line_idx != SENTINEL:
505
if cur_line_idx == cur_tip[0]:
506
# These match, so put a new entry in the output
507
# We put the *next* line, so that this is ready to pass back in
508
# for the next search.
509
cur_out[0] = (cur_line_idx + 1)
511
cur_out = cur_out + 1
512
cur_tip = cur_tip + 1
513
cur_line_idx = self._raw_lines[cur_line_idx].next_line_index
514
elif cur_line_idx < cur_tip[0]:
515
# cur_line_idx is "behind"
516
cur_line_idx = self._raw_lines[cur_line_idx].next_line_index
518
cur_tip = cur_tip + 1
521
def _get_matching_lines(self):
522
"""Return a dictionary showing matching lines."""
524
for line in self.lines:
525
matching[line] = self.get_matches(line)
528
def get_idx_matches(self, right_idx):
529
"""Return the left lines matching the right line at the given offset."""
530
line = self._right_lines[right_idx]
531
return self.get_matches(line)
533
def extend_lines(self, lines, index):
534
"""Add more lines to the left-lines list.
536
:param lines: A list of lines to add
537
:param index: A True/False for each node to define if it should be
540
cdef Py_ssize_t orig_len
541
cdef Py_ssize_t min_new_hash_size
542
assert len(lines) == len(index)
543
min_new_hash_size = self._compute_minimum_hash_size(len(self.lines) +
545
orig_len = len(self.lines)
546
self.lines.extend(lines)
547
self._extend_raw_lines(index)
548
if self._hashtable_size >= min_new_hash_size:
549
# Just add the new lines, don't bother resizing the hash table
550
self._extend_hash_table(len(index))
552
self._build_hash_table()
554
def set_right_lines(self, lines):
555
"""Set the lines we will be matching against."""
556
self._right_lines = lines
559
cdef Py_ssize_t intersect_sorted(Py_ssize_t *base, Py_ssize_t base_count,
560
Py_ssize_t *new_values, Py_ssize_t new_count) except -1:
561
"""Intersect the base values with the new values.
563
It is assumed that both base and new_values are sorted in ascending order,
566
The values in 'base' will be modified to only contain the entries that are
567
in both base and new. If nothing matches, 'base' will not be modified
569
:param base: An array of base values
570
:param base_count: The length of the base array
571
:param new_values: An array of values to compare to base, will be modified
572
to only contain matching entries
573
:param new_count: The number of new entries
574
:return: The count of matching values
576
cdef Py_ssize_t *cur_base
577
cdef Py_ssize_t *cur_new
578
cdef Py_ssize_t *cur_out
579
cdef Py_ssize_t count
580
cdef Py_ssize_t *base_end
581
cdef Py_ssize_t *new_end
586
base_end = base + base_count
587
new_end = new_values + new_count
590
while cur_base < base_end and cur_new < new_end:
591
if cur_base[0] == cur_new[0]:
592
# This may be assigning to self.
593
cur_out[0] = cur_base[0]
595
cur_out = cur_out + 1
596
# We matched, so move both pointers
597
cur_base = cur_base + 1
598
cur_new = cur_new + 1
599
elif cur_base[0] < cur_new[0]:
600
# cur_base is "behind" so increment it
601
cur_base = cur_base + 1
603
# cur_new must be "behind" move its pointer
604
cur_new = cur_new + 1
608
cdef object array_as_list(Py_ssize_t *array, Py_ssize_t count):
611
for i from 0 <= i < count:
612
PyList_Append(lst, array[i])
616
cdef Py_ssize_t list_as_array(object lst, Py_ssize_t **array) except -1:
617
"""Convert a Python sequence to an integer array.
619
:return: The size of the output, -1 on error
198
if not PyString_CheckExact(target_bytes):
199
raise TypeError('target is not a str')
201
target = PyString_AS_STRING(target_bytes)
202
target_size = PyString_GET_SIZE(target_bytes)
204
# TODO: inline some of create_delta so we at least don't have to double
205
# malloc, and can instead use PyString_FromStringAndSize, to
206
# allocate the bytes into the final string
207
delta = create_delta(self._index,
209
&delta_size, max_delta_size)
212
result = PyString_FromStringAndSize(<char *>delta, delta_size)
217
def make_delta(source_bytes, target_bytes):
218
"""Create a delta, this is a wrapper around DeltaIndex.make_delta."""
219
di = DeltaIndex(source_bytes)
220
return di.make_delta(target_bytes)
223
def apply_delta(source_bytes, delta_bytes):
224
"""Apply a delta generated by make_delta to source_bytes."""
226
cdef Py_ssize_t source_size
228
cdef Py_ssize_t delta_size
229
cdef unsigned char *data, *top
230
cdef unsigned char *dst_buf, *out, cmd
623
231
cdef Py_ssize_t size
625
seq = PySequence_Fast(lst, "expected a sequence for lst")
627
size = PySequence_Fast_GET_SIZE(seq)
628
array[0] = <Py_ssize_t*>safe_malloc(sizeof(Py_ssize_t) * size)
629
for i from 0 <= i < size:
630
array[0][i] = <object>PySequence_Fast_GET_ITEM(seq, i)
636
def _get_longest_match(equivalence_table, pos, max_pos, locations):
637
"""Get the longest possible match for the current position."""
638
cdef EquivalenceTable eq
640
cdef Py_ssize_t cmax_pos
641
cdef Py_ssize_t *matches
642
cdef Py_ssize_t match_count
644
cdef Py_ssize_t *copy_ends
645
cdef Py_ssize_t copy_count
646
cdef Py_ssize_t range_len
647
cdef Py_ssize_t in_start
649
cdef Py_ssize_t intersect_count
652
eq = equivalence_table
662
# TODO: Handle when locations is not None
663
# if locations is not None:
664
# copy_count = list_as_array(locations, ©_ends)
665
# # We know the match for this position
668
while cpos < cmax_pos:
669
# TODO: Instead of grabbing the full list of matches and then doing an
670
# intersection, use a helper that does the intersection
672
line = PyList_GET_ITEM(eq._right_lines, cpos)
673
if copy_ends == NULL:
674
copy_count = eq.get_raw_matches(line, ©_ends)
676
# No matches for this line
679
# point at the next location, for the next round trip
681
for i from 0 <= i < copy_count:
682
copy_ends[i] = copy_ends[i] + 1
232
cdef unsigned long cp_off, cp_size
234
if not PyString_CheckExact(source_bytes):
235
raise TypeError('source is not a str')
236
if not PyString_CheckExact(delta_bytes):
237
raise TypeError('delta is not a str')
239
source = PyString_AS_STRING(source_bytes)
240
source_size = PyString_GET_SIZE(source_bytes)
241
delta = PyString_AS_STRING(delta_bytes)
242
delta_size = PyString_GET_SIZE(delta_bytes)
244
# Code taken from patch-delta.c, only brought here to give better error
245
# handling, and to avoid double allocating memory
246
if (delta_size < DELTA_SIZE_MIN):
247
# XXX: Invalid delta block
248
raise RuntimeError('delta_size %d smaller than min delta size %d'
249
% (delta_size, DELTA_SIZE_MIN))
251
data = <unsigned char *>delta
252
top = data + delta_size
254
# make sure the orig file size matches what we expect
255
# XXX: gcc warns because data isn't defined as 'const'
256
size = get_delta_hdr_size(&data, top)
257
if (size > source_size):
258
# XXX: mismatched source size
259
raise RuntimeError('source size %d < expected source size %d'
260
% (source_size, size))
263
# now the result size
264
size = get_delta_hdr_size(&data, top)
265
result = PyString_FromStringAndSize(NULL, size)
266
dst_buf = <unsigned char*>PyString_AS_STRING(result)
267
# XXX: The original code added a trailing null here, but this shouldn't be
268
# necessary when using PyString_FromStringAndSize
281
cp_off = cp_off | (data[0] << 8)
284
cp_off = cp_off | (data[0] << 16)
287
cp_off = cp_off | (data[0] << 24)
293
cp_size = cp_size | (data[0] << 8)
296
cp_size = cp_size | (data[0] << 16)
300
if (cp_off + cp_size < cp_size or
301
cp_off + cp_size > source_size or
303
raise RuntimeError('Something wrong with:'
304
' cp_off = %s, cp_size = %s'
305
' source_size = %s, size = %s'
306
% (cp_off, cp_size, source_size, size))
307
memcpy(out, source + cp_off, cp_size)
309
size = size - cp_size
312
raise RuntimeError('Insert instruction longer than remaining'
313
' bytes: %d > %d' % (cmd, size))
314
memcpy(out, data, cmd)
684
# We are in the middle of a match
685
intersect_count = eq.get_next_matches(line, copy_ends, copy_count,
687
if intersect_count == 0:
690
# because the next line is not interesting
694
copy_count = intersect_count
695
range_len = range_len + 1
697
if copy_ends == NULL or copy_count == 0:
698
# We never had a match
699
return None, cpos, None
700
# Because copy_ends is always sorted, we know the 'min' is just the first
702
in_start = copy_ends[0]
703
safe_free(<void**>©_ends)
704
return ((in_start - range_len), range_start, range_len), cpos, None
320
# * cmd == 0 is reserved for future encoding
321
# * extensions. In the mean time we must fail when
322
# * encountering them (might be data corruption).
324
## /* XXX: error("unexpected delta opcode 0"); */
325
raise RuntimeError('Got delta opcode: 0, not supported')
328
if (data != top or size != 0):
329
## /* XXX: error("delta replay has gone wild"); */
330
raise RuntimeError('Did not extract the number of bytes we expected'
331
' we were left with %d bytes in "size", and top - data = %d'
332
% (size, <int>(top - data)))
335
# *dst_size = out - dst_buf;
336
assert (out - dst_buf) == PyString_GET_SIZE(result)