113
_equivalence_table_class = equivalence_table.EquivalenceTable
112
115
def __init__(self, delta=True):
113
116
"""Create a GroupCompressor.
115
118
:paeam delta: If False, do not compress records.
117
120
self._delta = delta
119
121
self.line_offsets = []
120
122
self.endpoint = 0
121
123
self.input_bytes = 0
122
# line: set(locations it appears at), set(N+1 for N in locations)
123
self.line_locations = {}
124
self.line_locations = self._equivalence_table_class([])
125
self.lines = self.line_locations.lines
124
126
self.labels_deltas = {}
126
128
def get_matching_blocks(self, lines):
132
134
of the list is always (old_len, new_len, 0) to provide a end point
133
135
for generating instructions from the matching blocks list.
137
140
line_locations = self.line_locations
141
line_locations.set_right_lines(lines)
141
142
# We either copy a range (while there are reusable lines) or we
142
143
# insert new lines. To find reusable lines we traverse
143
while pos < len(lines):
145
if line not in line_locations:
147
result.append((min(copy_ends) - range_len, range_start, range_len))
154
locations = line_locations[line]
156
next_locations = locations.intersection(copy_ends)
157
if len(next_locations):
160
copy_ends = set(loc + 1 for loc in next_locations)
164
result.append((min(copy_ends) - range_len, range_start, range_len))
166
copy_ends = set(loc + 1 for loc in locations)
170
result.append((min(copy_ends) - range_len, range_start, range_len))
151
block, next_pos, locations = _get_longest_match(line_locations, pos,
153
tdelta = timer() - tstart
154
if tdelta > max_time:
156
max_info = tdelta, pos, block, next_pos, locations
159
if block is not None:
171
161
result.append((len(self.lines), len(lines), 0))
162
# if max_time > 0.01:
163
# print max_info[:-1]
164
# import pdb; pdb.set_trace()
174
167
def compress(self, key, lines, expected_sha):
267
260
:param index_lines: A boolean flag for each line - when True, index
263
# indexed_newlines = [idx for idx, val in enumerate(index_lines)
264
# if val and new_lines[idx] == '\n']
265
# if indexed_newlines:
266
# import pdb; pdb.set_trace()
270
267
endpoint = self.endpoint
271
offset = len(self.lines)
272
line_append = self.line_offsets.append
273
setdefault = self.line_locations.setdefault
274
for (pos, line), index in izip(enumerate(new_lines), index_lines):
268
self.line_locations.extend_lines(new_lines, index_lines)
269
for line in new_lines:
275
270
endpoint += len(line)
276
line_append(endpoint)
278
indices = setdefault(line, set())
279
indices.add(pos + offset)
280
self.lines.extend(new_lines)
271
self.line_offsets.append(endpoint)
281
272
self.endpoint = endpoint
837
828
basis_end = int(bits[2])
838
829
delta_end = int(bits[3])
839
830
return node[0], start, stop, basis_end, delta_end
833
def _get_longest_match(equivalence_table, pos, max_pos, locations):
834
"""Get the longest possible match for the current position."""
839
if locations is None:
840
locations = equivalence_table.get_idx_matches(pos)
841
if locations is None:
842
# No more matches, just return whatever we have, but we know that
843
# this last position is not going to match anything
847
if copy_ends is None:
848
# We are starting a new range
849
copy_ends = [loc + 1 for loc in locations]
851
locations = None # Consumed
853
# We are currently in the middle of a match
854
next_locations = set(copy_ends).intersection(locations)
855
if len(next_locations):
857
copy_ends = [loc + 1 for loc in next_locations]
859
locations = None # Consumed
861
# But we are done with this match, we should be
862
# starting a new one, though. We will pass back 'locations'
863
# so that we don't have to do another lookup.
866
if copy_ends is None:
867
return None, pos, locations
868
return ((min(copy_ends) - range_len, range_start, range_len)), pos, locations
872
from bzrlib.plugins.groupcompress import _groupcompress_c
876
GroupCompressor._equivalence_table_class = _groupcompress_c.EquivalenceTable
877
_get_longest_match = _groupcompress_c._get_longest_match