1
from sets import Set as set
3
from bisect import bisect
6
# set index[line in a] = position of line in a unless
7
# unless a is a duplicate, in which case it's set to None
9
for i in xrange(len(a)):
15
# make btoa[i] = position of line i in a, unless
16
# that line doesn't occur exactly once in both,
17
# in which case it's set to None
18
btoa = [None] * len(b)
20
for pos, line in enumerate(b):
21
next = index.get(line)
24
# unset the previous mapping, which we now know to
25
# be invalid because the line isn't unique
26
btoa[index2[line]] = None
31
# this is the Patience sorting algorithm
32
# see http://en.wikipedia.org/wiki/Patience_sorting
33
backpointers = [None] * len(b)
37
for bpos, apos in enumerate(btoa):
40
# as an optimization, check if the next line comes at the end,
41
# because it usually does
42
if stacks and stacks[-1] < apos:
44
# as an optimization, check if the next line comes right after
45
# the previous line, because usually it does
46
elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):
49
k = bisect(stacks, apos)
51
backpointers[bpos] = lasts[k-1]
63
result.append((btoa[k], k))
68
assert unique_lcs('', '') == []
69
assert unique_lcs('a', 'a') == [(0, 0)]
70
assert unique_lcs('a', 'b') == []
71
assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]
72
assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]
73
assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]
74
assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]
75
assert unique_lcs('acbac', 'abc') == [(2, 1)]
77
def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):
80
# this will never happen normally, this check is to prevent DOS attacks
82
oldlength = len(answer)
89
if alo == ahi or blo == bhi:
91
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
92
# recurse between lines which are unique in each file and match
95
recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)
96
answer.append((apos, bpos))
97
if len(answer) > oldlength:
98
# find matches between the last match and the end
99
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
100
elif a[alo] == b[blo]:
101
# find matching lines at the very beginning
102
while alo < ahi and blo < bhi and a[alo] == b[blo]:
103
answer.append((alo, blo))
106
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
107
elif a[ahi - 1] == b[bhi - 1]:
108
# find matching lines at the very end
111
while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
114
recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)
115
for i in xrange(ahi - nahi):
116
answer.append((nahi + i, nbhi + i))
119
recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)
120
assert a1 == [(0, 0), (2, 2), (4, 4)]
122
recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)
123
assert a2 == [(0, 0), (2, 1), (4, 2)]
131
# {revid: [(lineid, state)]}
132
# states are integers
133
# each line's state starts at 0, then goes to 1, 2, etc.
134
# odd states are when the line is present, even are when it is not
135
# the merge between two states is the greater of the two values
136
self.newedgestates = {}
138
def add_revision(self, revid, lines, parents):
139
assert revid not in self.parents
141
assert p in self.parents
142
self.parents[revid] = copy(parents)
143
# match against the weave
145
lines2 = [line for (lineid, line) in self.weave]
146
recurse_matches(lines, lines2, len(lines), len(lines2), matches, 10)
151
matches.append((len(lines), len(lines2)))
155
# add current weave lines to the new weave
156
newweave.extend(self.weave[weavepos + 1:b])
158
# add lines which have never appeared before to the weave
159
for i in xrange(revpos + 1, a):
161
currentlines.append(lineid)
162
newweave.append((lineid, lines[i]))
164
newweave.append(self.weave[b])
165
currentlines.append(self.weave[b][0])
168
self.weave = newweave
169
# calculate which lines had their states changed in this revision
171
if len(currentlines) > 0:
172
for i in xrange(len(currentlines) - 1):
173
currentedges.add((currentlines[i], currentlines[i+1]))
174
currentedges.add((None, currentlines[0]))
175
currentedges.add((currentlines[-1], None))
177
currentedges.add((None, None))
179
vals = self._make_vals(revid)
180
for edge in currentedges:
182
newedgevals.append((edge, 1))
183
for edge, state in vals.items():
184
if (state & 1 == 1) != (edge in currentedges):
185
newedgevals.append((edge, state + 1))
186
if len(newedgevals) > 0:
187
self.newedgestates[revid] = newedgevals
189
def _make_vals(self, revid):
190
# return {lineid: state} for the given revision
196
unused.extend(self.parents[next])
200
for p, q in self.newedgestates.get(n, []):
201
v[p] = max(v.get(p, 0), q)
204
def _lineids(self, revid):
206
for (ida, idb), state in self._make_vals(revid).items():
214
def retrieve_revision(self, revid):
215
# returns a list of strings
216
return [line for (lineid, line) in self.weave if lineid in self._lineids(revid)]
218
def merge(self, reva, revb):
220
# non-conflict lines are strings, conflict sections are
221
# ([linesa], [linesb])
222
edgesa = self._make_vals(reva)
223
alines = self._lineids(reva)
224
edgesb = self._make_vals(revb)
225
blines = self._lineids(revb)
226
# figure out which side wins at each line
228
for pos, (lineid, line) in enumerate(self.weave):
229
weavepositions[lineid] = pos
230
awins = [False] * len(self.weave)
231
bwins = [False] * len(self.weave)
232
for edge in edgesa.keys() + edgesb.keys():
233
statea = edgesa.get(edge, 0)
234
stateb = edgesb.get(edge, 0)
244
start = weavepositions[begin]
245
if begin not in alines or begin not in blines:
250
stop = weavepositions[end]
251
if end not in alines or end not in blines:
253
for i in xrange(start + 1, stop):
255
# pull out matched lines and winning sections
261
for pos, (lineid, line) in enumerate(self.weave):
262
if not awins[pos] and not bwins[pos]:
264
if amustwin and bmustwin:
265
if unmatcheda or unmatchedb:
266
result.append((unmatcheda, unmatchedb))
270
result.extend(unmatcheda)
272
result.extend(unmatchedb)
282
unmatcheda.append(line)
284
unmatchedb.append(line)
285
if amustwin and bmustwin:
286
if unmatcheda or unmatchedb:
287
result.append((unmatcheda, unmatchedb))
289
result.extend(unmatcheda)
291
result.extend(unmatchedb)
295
w.add_revision(1, ['a', 'b'], [])
296
assert w.retrieve_revision(1) == ['a', 'b']
297
w.add_revision(2, ['a', 'x', 'b'], [1])
298
assert w.retrieve_revision(2) == ['a', 'x', 'b']
299
w.add_revision(3, ['a', 'y', 'b'], [1])
300
assert w.retrieve_revision(3) == ['a', 'y', 'b']
301
assert w.merge(2, 3) == ['a', (['x'], ['y']), 'b']
302
w.add_revision(4, ['a', 'x', 'b'], [1])
303
w.add_revision(5, ['a', 'z', 'b'], [4])
304
assert w.merge(2, 5) == ['a', 'z', 'b']
306
w.add_revision(1, ['b'], [])
307
assert w.retrieve_revision(1) == ['b']
308
w.add_revision(2, ['x', 'b'], [1])
309
assert w.retrieve_revision(2) == ['x', 'b']
310
w.add_revision(3, ['y', 'b'], [1])
311
assert w.retrieve_revision(3) == ['y', 'b']
312
assert w.merge(2, 3) == [(['x'], ['y']), 'b']
313
w.add_revision(4, ['x', 'b'], [1])
314
w.add_revision(5, ['z', 'b'], [4])
315
assert w.merge(2, 5) == ['z', 'b']
317
w.add_revision(1, ['a'], [])
318
assert w.retrieve_revision(1) == ['a']
319
w.add_revision(2, ['a', 'x'], [1])
320
assert w.retrieve_revision(2) == ['a', 'x']
321
w.add_revision(3, ['a', 'y'], [1])
322
assert w.retrieve_revision(3) == ['a', 'y']
323
assert w.merge(2, 3) == ['a', (['x'], ['y'])]
324
w.add_revision(4, ['a', 'x'], [1])
325
w.add_revision(5, ['a', 'z'], [4])
326
assert w.merge(2, 5) == ['a', 'z']
328
w.add_revision(1, [], [])
329
assert w.retrieve_revision(1) == []
330
w.add_revision(2, ['x'], [1])
331
assert w.retrieve_revision(2) == ['x']
332
w.add_revision(3, ['y'], [1])
333
assert w.retrieve_revision(3) == ['y']
334
assert w.merge(2, 3) == [(['x'], ['y'])]
335
w.add_revision(4, ['x'], [1])
336
w.add_revision(5, ['z'], [4])
337
assert w.merge(2, 5) == ['z']
340
w.add_revision(1, ['a', 'b'], [])
341
w.add_revision(2, ['a', 'c', 'b'], [1])
342
w.add_revision(3, ['a', 'b'], [2])
343
w.add_revision(4, ['a', 'd', 'b'], [1])
344
assert w.merge(2, 4) == ['a', (['c'], ['d']), 'b']
345
assert w.merge(3, 4) == ['a', ([], ['d']), 'b']
346
w.add_revision(5, ['a', 'b'], [4])
347
assert w.merge(4, 5) == ['a', 'b']
349
w.add_revision(1, ['b'], [])
350
w.add_revision(2, ['c', 'b'], [1])
351
w.add_revision(3, ['b'], [2])
352
w.add_revision(4, ['d', 'b'], [1])
353
assert w.merge(2, 4) == [(['c'], ['d']), 'b']
354
assert w.merge(3, 4) == [([], ['d']), 'b']
355
w.add_revision(5, ['b'], [4])
356
assert w.merge(4, 5) == ['b']
358
w.add_revision(1, ['a'], [])
359
w.add_revision(2, ['a', 'c'], [1])
360
w.add_revision(3, ['a'], [2])
361
w.add_revision(4, ['a', 'd'], [1])
362
assert w.merge(2, 4) == ['a', (['c'], ['d'])]
363
assert w.merge(3, 4) == ['a', ([], ['d'])]
364
w.add_revision(5, ['a'], [4])
365
assert w.merge(4, 5) == ['a']
367
w.add_revision(1, [], [])
368
w.add_revision(2, ['c'], [1])
369
w.add_revision(3, [], [2])
370
w.add_revision(4, ['d'], [1])
371
assert w.merge(2, 4) == [(['c'], ['d'])]
372
assert w.merge(3, 4) == [([], ['d'])]
373
w.add_revision(5, [], [4])
374
assert w.merge(4, 5) == []
377
w.add_revision(1, ['a', 'b', 'c', 'd', 'e'], [])
378
w.add_revision(2, ['a', 'x', 'c', 'd', 'e'], [1])
379
w.add_revision(3, ['a', 'e'], [1])
380
w.add_revision(4, ['a', 'b', 'c', 'd', 'e'], [3])
381
assert w.merge(2, 4) == ['a', (['x', 'c', 'd'], ['b', 'c', 'd']), 'e']
383
w.add_revision(1, ['b', 'c', 'd', 'e'], [])
384
w.add_revision(2, ['x', 'c', 'd', 'e'], [1])
385
w.add_revision(3, ['e'], [1])
386
w.add_revision(4, ['b', 'c', 'd', 'e'], [3])
387
assert w.merge(2, 4) == [(['x', 'c', 'd'], ['b', 'c', 'd']), 'e']
389
w.add_revision(1, ['a', 'b', 'c', 'd'], [])
390
w.add_revision(2, ['a', 'x', 'c', 'd'], [1])
391
w.add_revision(3, ['a'], [1])
392
w.add_revision(4, ['a', 'b', 'c', 'd'], [3])
393
assert w.merge(2, 4) == ['a', (['x', 'c', 'd'], ['b', 'c', 'd'])]
395
w.add_revision(1, ['b', 'c', 'd'], [])
396
w.add_revision(2, ['x', 'c', 'd'], [1])
397
w.add_revision(3, [], [1])
398
w.add_revision(4, ['b', 'c', 'd'], [3])
399
assert w.merge(2, 4) == [(['x', 'c', 'd'], ['b', 'c', 'd'])]
402
w.add_revision(1, ['a', 'b'], [])
403
w.add_revision(2, ['a', 'c', 'b'], [1])
404
w.add_revision(3, ['a', 'd', 'b'], [1])
405
w.add_revision(4, ['a', 'c', 'd', 'b'], [2, 3])
406
w.add_revision(5, ['a', 'd', 'c', 'b'], [2, 3])
407
assert w.merge(4, 5) == ['a', (['c'], []), 'd', 'c', 'b']
409
w.add_revision(1, ['b'], [])
410
w.add_revision(2, ['c', 'b'], [1])
411
w.add_revision(3, ['d', 'b'], [1])
412
w.add_revision(4, ['c', 'd', 'b'], [2, 3])
413
w.add_revision(5, ['d', 'c', 'b'], [2, 3])
414
assert w.merge(4, 5) == [(['c'], []), 'd', 'c', 'b']
416
w.add_revision(1, ['a'], [])
417
w.add_revision(2, ['a', 'c'], [1])
418
w.add_revision(3, ['a', 'd'], [1])
419
w.add_revision(4, ['a', 'c', 'd'], [2, 3])
420
w.add_revision(5, ['a', 'd', 'c'], [2, 3])
421
assert w.merge(4, 5) == ['a', (['c'], []), 'd', 'c']
423
w.add_revision(1, [], [])
424
w.add_revision(2, ['c'], [1])
425
w.add_revision(3, ['d'], [1])
426
w.add_revision(4, ['c', 'd'], [2, 3])
427
w.add_revision(5, ['d', 'c'], [2, 3])
428
assert w.merge(4, 5) == [(['c'], []), 'd', 'c']