28
28
# making _extract build and return a list, rather than being a generator
31
# with python -O, r923 does 2000 versions in 36.87s
33
# with optimizations to avoid mutating lists - 35.75! I guess copying
34
# all the elements every time costs more than the small manipulations.
35
# a surprisingly small change.
37
# r931, which avoids using a generator for extract, does 36.98s
39
# with memoized inclusions, takes 41.49s; not very good
41
# with slots, takes 37.35s; without takes 39.16, a bit surprising
43
# with the delta calculation mixed in with the add method, rather than
44
# separated, takes 36.78s
46
# with delta folded in and mutation of the list, 36.13s
48
# with all this and simplification of add code, 33s
31
51
# TODO: Perhaps have copy method for Weave instances?
33
53
# XXX: If we do weaves this way, will a merge still behave the same
177
200
Sequence of lines to be added in the new version."""
178
## self._check_versions(parents)
202
self._check_versions(parents)
179
203
## self._check_lines(text)
204
new_version = len(self._v)
186
209
sha1 = s.hexdigest()
189
# TODO: It'd probably be faster to append things on to a new
190
# list rather than modifying the existing one, which is likely
191
# to cause a lot of copying.
194
ancestors = self.inclusions(parents)
195
delta = self._delta(ancestors, text)
197
# offset gives the number of lines that have been inserted
198
# into the weave up to the current point; if the original edit instruction
199
# says to change line A then we actually change (A+offset)
202
for i1, i2, newlines in delta:
205
assert i2 <= len(self._l)
207
# the deletion and insertion are handled separately.
208
# first delete the region.
210
self._l.insert(i1+offset, ('[', idx))
211
self._l.insert(i2+offset+1, (']', idx))
216
# there may have been a deletion spanning up to
217
# i2; we want to insert after this region to make sure
218
# we don't destroy ourselves
220
self._l[i:i] = [('{', idx)] \
223
offset += 2 + len(newlines)
225
self._addversion(parents)
227
# special case; adding with no parents revision; can do this
228
# more quickly by just appending unconditionally
229
self._l.append(('{', idx))
231
self._l.append(('}', idx))
233
self._addversion(None)
212
# if we abort after here the weave will be corrupt
213
self._addversion(parents)
235
214
self._sha1s.append(sha1)
218
# special case; adding with no parents revision; can do
219
# this more quickly by just appending unconditionally.
220
# even more specially, if we're adding an empty text we
221
# need do nothing at all.
223
self._l.append(('{', new_version))
225
self._l.append(('}', new_version))
229
if len(parents) == 1 and sha1 == self._sha1s[parents[0]]:
230
# special case: same as the single parent
234
ancestors = self.inclusions(parents)
238
# basis a list of (origin, lineno, line)
241
for origin, lineno, line in self._extract(ancestors):
242
basis_lineno.append(lineno)
243
basis_lines.append(line)
245
# another small special case: a merge, producing the same text as auto-merge
246
if text == basis_lines:
249
# add a sentinal, because we can also match against the final line
250
basis_lineno.append(len(self._l))
252
# XXX: which line of the weave should we really consider
253
# matches the end of the file? the current code says it's the
254
# last line of the weave?
256
#print 'basis_lines:', basis_lines
257
#print 'new_lines: ', lines
259
from difflib import SequenceMatcher
260
s = SequenceMatcher(None, basis_lines, text)
262
# offset gives the number of lines that have been inserted
263
# into the weave up to the current point; if the original edit instruction
264
# says to change line A then we actually change (A+offset)
267
for tag, i1, i2, j1, j2 in s.get_opcodes():
268
# i1,i2 are given in offsets within basis_lines; we need to map them
269
# back to offsets within the entire weave
270
#print 'raw match', tag, i1, i2, j1, j2
274
i1 = basis_lineno[i1]
275
i2 = basis_lineno[i2]
277
assert 0 <= i1 <= i2 <= len(old_l)
278
assert 0 <= j1 <= j2 <= len(text)
280
#print tag, i1, i2, j1, j2
282
# the deletion and insertion are handled separately.
283
# first delete the region.
285
self._l.insert(i1+offset, ('[', new_version))
286
self._l.insert(i2+offset+1, (']', new_version))
290
# there may have been a deletion spanning up to
291
# i2; we want to insert after this region to make sure
292
# we don't destroy ourselves
294
self._l[i:i] = ([('{', new_version)]
296
+ [('}', new_version)])
297
offset += 2 + (j2 - j1)
240
302
def inclusions(self, versions):
510
572
If line1=line2, this is a pure insert; if newlines=[] this is a
511
573
pure delete. (Similar to difflib.)
513
# basis a list of (origin, lineno, line)
516
for origin, lineno, line in self._extract(included):
517
basis_lineno.append(lineno)
518
basis_lines.append(line)
520
# add a sentinal, because we can also match against the final line
521
basis_lineno.append(len(self._l))
523
# XXX: which line of the weave should we really consider
524
# matches the end of the file? the current code says it's the
525
# last line of the weave?
527
from difflib import SequenceMatcher
528
s = SequenceMatcher(None, basis_lines, lines)
530
# TODO: Perhaps return line numbers from composed weave as well?
532
for tag, i1, i2, j1, j2 in s.get_opcodes():
533
##print tag, i1, i2, j1, j2
538
# i1,i2 are given in offsets within basis_lines; we need to map them
539
# back to offsets within the entire weave
540
real_i1 = basis_lineno[i1]
541
real_i2 = basis_lineno[i2]
545
assert j2 <= len(lines)
547
yield real_i1, real_i2, lines[j1:j2]