4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
1 |
# Copyright (C) 2009 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
17 |
"""Python version of compiled extensions for doing compression.
|
|
18 |
||
19 |
We separate the implementation from the groupcompress.py to avoid importing
|
|
20 |
useless stuff.
|
|
21 |
"""
|
|
22 |
||
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
23 |
from bzrlib import osutils |
24 |
||
25 |
||
3735.40.13
by John Arbash Meinel
Rename EquivalenceTable to LinesDeltaIndex. |
26 |
class LinesDeltaIndex(object): |
27 |
"""This class indexes matches between strings.
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
28 |
|
29 |
:ivar lines: The 'static' lines that will be preserved between runs.
|
|
3735.40.13
by John Arbash Meinel
Rename EquivalenceTable to LinesDeltaIndex. |
30 |
:ivar _matching_lines: A dict of {line:[matching offsets]}
|
31 |
:ivar line_offsets: The byte offset for the end of each line, used to
|
|
32 |
quickly map between a matching line number and the byte location
|
|
33 |
:ivar endpoint: The total number of bytes in self.line_offsets
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
34 |
"""
|
35 |
||
36 |
def __init__(self, lines): |
|
3735.40.13
by John Arbash Meinel
Rename EquivalenceTable to LinesDeltaIndex. |
37 |
self.lines = [] |
3735.40.6
by John Arbash Meinel
Move more information down into EquivalenceTable. |
38 |
self.line_offsets = [] |
3735.40.13
by John Arbash Meinel
Rename EquivalenceTable to LinesDeltaIndex. |
39 |
self.endpoint = 0 |
40 |
self._matching_lines = {} |
|
41 |
self.extend_lines(lines, [True]*len(lines)) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
42 |
|
43 |
def _update_matching_lines(self, new_lines, index): |
|
44 |
matches = self._matching_lines |
|
45 |
start_idx = len(self.lines) |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
46 |
if len(new_lines) != len(index): |
47 |
raise AssertionError('The number of lines to be indexed does' |
|
48 |
' not match the index/don\'t index flags: %d != %d' |
|
49 |
% (len(new_lines), len(index))) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
50 |
for idx, do_index in enumerate(index): |
51 |
if not do_index: |
|
52 |
continue
|
|
53 |
matches.setdefault(new_lines[idx], []).append(start_idx + idx) |
|
54 |
||
55 |
def get_matches(self, line): |
|
56 |
"""Return the lines which match the line in right."""
|
|
57 |
try: |
|
58 |
return self._matching_lines[line] |
|
59 |
except KeyError: |
|
60 |
return None |
|
61 |
||
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
62 |
def _get_longest_match(self, lines, pos, locations): |
63 |
"""Look at all matches for the current line, return the longest.
|
|
64 |
||
65 |
:param lines: The lines we are matching against
|
|
66 |
:param pos: The current location we care about
|
|
67 |
:param locations: A list of lines that matched the current location.
|
|
68 |
This may be None, but often we'll have already found matches for
|
|
69 |
this line.
|
|
70 |
:return: (start_in_self, start_in_lines, num_lines)
|
|
71 |
All values are the offset in the list (aka the line number)
|
|
72 |
If start_in_self is None, then we have no matches, and this line
|
|
73 |
should be inserted in the target.
|
|
74 |
"""
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
75 |
range_start = pos |
76 |
range_len = 0 |
|
77 |
copy_ends = None |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
78 |
max_pos = len(lines) |
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
79 |
while pos < max_pos: |
80 |
if locations is None: |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
81 |
# TODO: is try/except better than get(..., None)?
|
3735.40.14
by John Arbash Meinel
Get rid of the self._right_lines state. It doesn't matter anymore. |
82 |
try: |
83 |
locations = self._matching_lines[lines[pos]] |
|
84 |
except KeyError: |
|
85 |
locations = None |
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
86 |
if locations is None: |
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
87 |
# No more matches, just return whatever we have, but we know
|
88 |
# that this last position is not going to match anything
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
89 |
pos += 1 |
90 |
break
|
|
91 |
else: |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
92 |
# We have a match
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
93 |
if copy_ends is None: |
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
94 |
# This is the first match in a range
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
95 |
copy_ends = [loc + 1 for loc in locations] |
96 |
range_len = 1 |
|
97 |
locations = None # Consumed |
|
98 |
else: |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
99 |
# We have a match started, compare to see if any of the
|
100 |
# current matches can be continued
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
101 |
next_locations = set(copy_ends).intersection(locations) |
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
102 |
if next_locations: |
103 |
# At least one of the regions continues to match
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
104 |
copy_ends = [loc + 1 for loc in next_locations] |
105 |
range_len += 1 |
|
106 |
locations = None # Consumed |
|
107 |
else: |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
108 |
# All current regions no longer match.
|
109 |
# This line does still match something, just not at the
|
|
110 |
# end of the previous matches. We will return locations
|
|
111 |
# so that we can avoid another _matching_lines lookup.
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
112 |
break
|
113 |
pos += 1 |
|
114 |
if copy_ends is None: |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
115 |
# We have no matches, this is a pure insert
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
116 |
return None, pos, locations |
117 |
return (((min(copy_ends) - range_len, range_start, range_len)), |
|
118 |
pos, locations) |
|
119 |
||
120 |
def get_matching_blocks(self, lines, soft=False): |
|
121 |
"""Return the ranges in lines which match self.lines.
|
|
122 |
||
123 |
:param lines: lines to compress
|
|
124 |
:return: A list of (old_start, new_start, length) tuples which reflect
|
|
125 |
a region in self.lines that is present in lines. The last element
|
|
126 |
of the list is always (old_len, new_len, 0) to provide a end point
|
|
127 |
for generating instructions from the matching blocks list.
|
|
128 |
"""
|
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
129 |
# In this code, we iterate over multiple _get_longest_match calls, to
|
130 |
# find the next longest copy, and possible insert regions. We then
|
|
131 |
# convert that to the simple matching_blocks representation, since
|
|
132 |
# otherwise inserting 10 lines in a row would show up as 10
|
|
133 |
# instructions.
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
134 |
result = [] |
135 |
pos = 0 |
|
136 |
locations = None |
|
137 |
max_pos = len(lines) |
|
138 |
result_append = result.append |
|
139 |
min_match_bytes = 10 |
|
140 |
if soft: |
|
141 |
min_match_bytes = 200 |
|
142 |
while pos < max_pos: |
|
3735.40.14
by John Arbash Meinel
Get rid of the self._right_lines state. It doesn't matter anymore. |
143 |
block, pos, locations = self._get_longest_match(lines, pos, |
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
144 |
locations) |
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
145 |
if block is not None: |
3735.40.14
by John Arbash Meinel
Get rid of the self._right_lines state. It doesn't matter anymore. |
146 |
# Check to see if we match fewer than min_match_bytes. As we
|
147 |
# will turn this into a pure 'insert', rather than a copy.
|
|
148 |
# block[-1] is the number of lines. A quick check says if we
|
|
149 |
# have more lines than min_match_bytes, then we know we have
|
|
150 |
# enough bytes.
|
|
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
151 |
if block[-1] < min_match_bytes: |
152 |
# This block may be a 'short' block, check
|
|
153 |
old_start, new_start, range_len = block |
|
154 |
matched_bytes = sum(map(len, |
|
155 |
lines[new_start:new_start + range_len])) |
|
156 |
if matched_bytes < min_match_bytes: |
|
157 |
block = None |
|
158 |
if block is not None: |
|
159 |
result_append(block) |
|
160 |
result_append((len(self.lines), len(lines), 0)) |
|
161 |
return result |
|
162 |
||
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
163 |
def extend_lines(self, lines, index): |
164 |
"""Add more lines to the left-lines list.
|
|
165 |
||
166 |
:param lines: A list of lines to add
|
|
167 |
:param index: A True/False for each node to define if it should be
|
|
168 |
indexed.
|
|
169 |
"""
|
|
170 |
self._update_matching_lines(lines, index) |
|
171 |
self.lines.extend(lines) |
|
3735.40.6
by John Arbash Meinel
Move more information down into EquivalenceTable. |
172 |
endpoint = self.endpoint |
173 |
for line in lines: |
|
174 |
endpoint += len(line) |
|
175 |
self.line_offsets.append(endpoint) |
|
3735.40.15
by John Arbash Meinel
Some cleanup passes over the LinesDeltaIndex code. |
176 |
if len(self.line_offsets) != len(self.lines): |
177 |
raise AssertionError('Somehow the line offset indicator' |
|
178 |
' got out of sync with the line counter.') |
|
3735.40.6
by John Arbash Meinel
Move more information down into EquivalenceTable. |
179 |
self.endpoint = endpoint |
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
180 |
|
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
181 |
def _flush_insert(self, start_linenum, end_linenum, |
182 |
new_lines, out_lines, index_lines): |
|
183 |
"""Add an 'insert' request to the data stream."""
|
|
184 |
bytes_to_insert = ''.join(new_lines[start_linenum:end_linenum]) |
|
185 |
insert_length = len(bytes_to_insert) |
|
186 |
# Each insert instruction is at most 127 bytes long
|
|
187 |
for start_byte in xrange(0, insert_length, 127): |
|
188 |
insert_count = min(insert_length - start_byte, 127) |
|
189 |
out_lines.append(chr(insert_count)) |
|
190 |
# Don't index the 'insert' instruction
|
|
191 |
index_lines.append(False) |
|
192 |
insert = bytes_to_insert[start_byte:start_byte+insert_count] |
|
193 |
as_lines = osutils.split_lines(insert) |
|
194 |
out_lines.extend(as_lines) |
|
195 |
index_lines.extend([True]*len(as_lines)) |
|
196 |
||
197 |
def _flush_copy(self, old_start_linenum, num_lines, |
|
198 |
out_lines, index_lines): |
|
199 |
if old_start_linenum == 0: |
|
200 |
first_byte = 0 |
|
201 |
else: |
|
202 |
first_byte = self.line_offsets[old_start_linenum - 1] |
|
203 |
stop_byte = self.line_offsets[old_start_linenum + num_lines - 1] |
|
204 |
num_bytes = stop_byte - first_byte |
|
205 |
# The data stream allows >64kB in a copy, but to match the compiled
|
|
206 |
# code, we will also limit it to a 64kB copy
|
|
207 |
for start_byte in xrange(first_byte, stop_byte, 64*1024): |
|
208 |
num_bytes = min(64*1024, stop_byte - first_byte) |
|
209 |
copy_bytes = encode_copy_instruction(start_byte, num_bytes) |
|
210 |
out_lines.append(copy_bytes) |
|
211 |
index_lines.append(False) |
|
212 |
||
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
213 |
def make_delta(self, new_lines, bytes_length=None, soft=False): |
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
214 |
"""Compute the delta for this content versus the original content."""
|
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
215 |
if bytes_length is None: |
216 |
bytes_length = sum(map(len, new_lines)) |
|
217 |
# reserved for content type, content length
|
|
218 |
out_lines = ['', '', encode_base128_int(bytes_length)] |
|
3735.40.10
by John Arbash Meinel
Merge in the new delta format code. |
219 |
index_lines = [False, False, False] |
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
220 |
blocks = self.get_matching_blocks(new_lines, soft=soft) |
221 |
current_line_num = 0 |
|
222 |
# We either copy a range (while there are reusable lines) or we
|
|
223 |
# insert new lines. To find reusable lines we traverse
|
|
224 |
for old_start, new_start, range_len in blocks: |
|
225 |
if new_start != current_line_num: |
|
226 |
# non-matching region, insert the content
|
|
227 |
self._flush_insert(current_line_num, new_start, |
|
228 |
new_lines, out_lines, index_lines) |
|
229 |
current_line_num = new_start + range_len |
|
230 |
if range_len: |
|
231 |
self._flush_copy(old_start, range_len, out_lines, index_lines) |
|
232 |
return out_lines, index_lines |
|
233 |
||
234 |
||
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
235 |
def encode_base128_int(val): |
236 |
"""Convert an integer into a 7-bit lsb encoding."""
|
|
237 |
bytes = [] |
|
238 |
count = 0 |
|
239 |
while val >= 0x80: |
|
240 |
bytes.append(chr((val | 0x80) & 0xFF)) |
|
241 |
val >>= 7 |
|
242 |
bytes.append(chr(val)) |
|
243 |
return ''.join(bytes) |
|
244 |
||
245 |
||
246 |
def decode_base128_int(bytes): |
|
247 |
"""Decode an integer from a 7-bit lsb encoding."""
|
|
248 |
offset = 0 |
|
249 |
val = 0 |
|
250 |
shift = 0 |
|
251 |
bval = ord(bytes[offset]) |
|
252 |
while bval >= 0x80: |
|
253 |
val |= (bval & 0x7F) << shift |
|
254 |
shift += 7 |
|
255 |
offset += 1 |
|
256 |
bval = ord(bytes[offset]) |
|
257 |
val |= bval << shift |
|
258 |
offset += 1 |
|
259 |
return val, offset |
|
260 |
||
261 |
||
3735.40.7
by John Arbash Meinel
Move even more functionality into EquivalenceTable. |
262 |
def encode_copy_instruction(offset, length): |
263 |
"""Convert this offset into a control code and bytes."""
|
|
264 |
copy_command = 0x80 |
|
265 |
copy_bytes = [None] |
|
266 |
||
267 |
for copy_bit in (0x01, 0x02, 0x04, 0x08): |
|
268 |
base_byte = offset & 0xff |
|
269 |
if base_byte: |
|
270 |
copy_command |= copy_bit |
|
271 |
copy_bytes.append(chr(base_byte)) |
|
272 |
offset >>= 8 |
|
273 |
if length is None: |
|
274 |
# None is used by the test suite
|
|
275 |
copy_bytes[0] = chr(copy_command) |
|
276 |
return ''.join(copy_bytes) |
|
277 |
if length > 0x10000: |
|
278 |
raise ValueError("we don't emit copy records for lengths > 64KiB") |
|
279 |
if length == 0: |
|
280 |
raise ValueError("We cannot emit a copy of length 0") |
|
281 |
if length != 0x10000: |
|
282 |
# A copy of length exactly 64*1024 == 0x10000 is sent as a length of 0,
|
|
283 |
# since that saves bytes for large chained copies
|
|
284 |
for copy_bit in (0x10, 0x20): |
|
285 |
base_byte = length & 0xff |
|
286 |
if base_byte: |
|
287 |
copy_command |= copy_bit |
|
288 |
copy_bytes.append(chr(base_byte)) |
|
289 |
length >>= 8 |
|
290 |
copy_bytes[0] = chr(copy_command) |
|
291 |
return ''.join(copy_bytes) |
|
292 |
||
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
293 |
|
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
294 |
def decode_copy_instruction(bytes, cmd, pos): |
295 |
"""Decode a copy instruction from the next few bytes.
|
|
296 |
||
297 |
A copy instruction is a variable number of bytes, so we will parse the
|
|
298 |
bytes we care about, and return the new position, as well as the offset and
|
|
299 |
length referred to in the bytes.
|
|
300 |
||
301 |
:param bytes: A string of bytes
|
|
302 |
:param cmd: The command code
|
|
303 |
:param pos: The position in bytes right after the copy command
|
|
304 |
:return: (offset, length, newpos)
|
|
305 |
The offset of the copy start, the number of bytes to copy, and the
|
|
306 |
position after the last byte of the copy
|
|
307 |
"""
|
|
308 |
if cmd & 0x80 != 0x80: |
|
309 |
raise ValueError('copy instructions must have bit 0x80 set') |
|
310 |
offset = 0 |
|
311 |
length = 0 |
|
312 |
if (cmd & 0x01): |
|
313 |
offset = ord(bytes[pos]) |
|
314 |
pos += 1 |
|
315 |
if (cmd & 0x02): |
|
316 |
offset = offset | (ord(bytes[pos]) << 8) |
|
317 |
pos += 1 |
|
318 |
if (cmd & 0x04): |
|
319 |
offset = offset | (ord(bytes[pos]) << 16) |
|
320 |
pos += 1 |
|
321 |
if (cmd & 0x08): |
|
322 |
offset = offset | (ord(bytes[pos]) << 24) |
|
323 |
pos += 1 |
|
324 |
if (cmd & 0x10): |
|
325 |
length = ord(bytes[pos]) |
|
326 |
pos += 1 |
|
327 |
if (cmd & 0x20): |
|
328 |
length = length | (ord(bytes[pos]) << 8) |
|
329 |
pos += 1 |
|
330 |
if (cmd & 0x40): |
|
331 |
length = length | (ord(bytes[pos]) << 16) |
|
332 |
pos += 1 |
|
333 |
if length == 0: |
|
334 |
length = 65536 |
|
335 |
return (offset, length, pos) |
|
336 |
||
3735.40.5
by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx |
337 |
|
338 |
def make_delta(source_bytes, target_bytes): |
|
339 |
"""Create a delta from source to target."""
|
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
340 |
# TODO: The checks below may not be a the right place yet.
|
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
341 |
if type(source_bytes) is not str: |
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
342 |
raise TypeError('source is not a str') |
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
343 |
if type(target_bytes) is not str: |
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
344 |
raise TypeError('target is not a str') |
3735.40.13
by John Arbash Meinel
Rename EquivalenceTable to LinesDeltaIndex. |
345 |
line_locations = LinesDeltaIndex(osutils.split_lines(source_bytes)) |
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
346 |
delta, _ = line_locations.make_delta(osutils.split_lines(target_bytes), |
347 |
bytes_length=len(target_bytes)) |
|
348 |
return ''.join(delta) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
349 |
|
350 |
||
351 |
def apply_delta(basis, delta): |
|
352 |
"""Apply delta to this object to become new_version_id."""
|
|
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
353 |
if type(basis) is not str: |
354 |
raise TypeError('basis is not a str') |
|
355 |
if type(delta) is not str: |
|
356 |
raise TypeError('delta is not a str') |
|
357 |
target_length, pos = decode_base128_int(delta) |
|
4241.6.6
by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core. |
358 |
lines = [] |
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
359 |
len_delta = len(delta) |
360 |
while pos < len_delta: |
|
361 |
cmd = ord(delta[pos]) |
|
362 |
pos += 1 |
|
363 |
if cmd & 0x80: |
|
364 |
offset, length, pos = decode_copy_instruction(delta, cmd, pos) |
|
3735.40.19
by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string. |
365 |
last = offset + length |
366 |
if last > len(basis): |
|
367 |
raise ValueError('data would copy bytes past the' |
|
368 |
'end of source') |
|
369 |
lines.append(basis[offset:last]) |
|
3735.40.11
by John Arbash Meinel
Implement make_delta and apply_delta. |
370 |
else: # Insert of 'cmd' bytes |
371 |
if cmd == 0: |
|
372 |
raise ValueError('Command == 0 not supported yet') |
|
373 |
lines.append(delta[pos:pos+cmd]) |
|
374 |
pos += cmd |
|
375 |
bytes = ''.join(lines) |
|
376 |
if len(bytes) != target_length: |
|
377 |
raise ValueError('Delta claimed to be %d long, but ended up' |
|
378 |
' %d long' % (target_length, len(bytes))) |
|
379 |
return bytes |
|
3735.40.19
by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string. |
380 |
|
381 |
||
382 |
def apply_delta_to_source(source, delta_start, delta_end): |
|
383 |
"""Extract a delta from source bytes, and apply it."""
|
|
384 |
source_size = len(source) |
|
385 |
if delta_start >= source_size: |
|
386 |
raise ValueError('delta starts after source') |
|
387 |
if delta_end > source_size: |
|
388 |
raise ValueError('delta ends after source') |
|
389 |
if delta_start >= delta_end: |
|
390 |
raise ValueError('delta starts after it ends') |
|
391 |
delta_bytes = source[delta_start:delta_end] |
|
392 |
return apply_delta(source, delta_bytes) |