1
from sets import Set as set
3
from bisect import bisect
6
# set index[line in a] = position of line in a unless
7
# unless a is a duplicate, in which case it's set to None
9
for i in xrange(len(a)):
15
# make btoa[i] = position of line i in a, unless
16
# that line doesn't occur exactly once in both,
17
# in which case it's set to None
18
btoa = [None] * len(b)
20
for pos, line in enumerate(b):
21
next = index.get(line)
24
# unset the previous mapping, which we now know to
25
# be invalid because the line isn't unique
26
btoa[index2[line]] = None
31
# this is the Patience sorting algorithm
32
# see http://en.wikipedia.org/wiki/Patience_sorting
33
backpointers = [None] * len(b)
37
for bpos, apos in enumerate(btoa):
40
# as an optimization, check if the next line comes at the end,
41
# because it usually does
42
if stacks and stacks[-1] < apos:
44
# as an optimization, check if the next line comes right after
45
# the previous line, because usually it does
46
elif stacks and stacks[k] < apos and (k == len(stacks) - 1 or stacks[k+1] > apos):
49
k = bisect(stacks, apos)
51
backpointers[bpos] = lasts[k-1]
63
result.append((btoa[k], k))
68
assert unique_lcs('', '') == []
69
assert unique_lcs('a', 'a') == [(0, 0)]
70
assert unique_lcs('a', 'b') == []
71
assert unique_lcs('ab', 'ab') == [(0, 0), (1, 1)]
72
assert unique_lcs('abcde', 'cdeab') == [(2, 0), (3, 1), (4, 2)]
73
assert unique_lcs('cdeab', 'abcde') == [(0, 2), (1, 3), (2, 4)]
74
assert unique_lcs('abXde', 'abYde') == [(0, 0), (1, 1), (3, 3), (4, 4)]
75
assert unique_lcs('acbac', 'abc') == [(2, 1)]
77
def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):
80
# this will never happen normally, this check is to prevent DOS attacks
82
oldlength = len(answer)
89
if alo == ahi or blo == bhi:
91
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
92
# recurse between lines which are unique in each file and match
95
recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)
96
answer.append((apos, bpos))
97
if len(answer) > oldlength:
98
# find matches between the last match and the end
99
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
100
elif a[alo] == b[blo]:
101
# find matching lines at the very beginning
102
while alo < ahi and blo < bhi and a[alo] == b[blo]:
103
answer.append((alo, blo))
106
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
107
elif a[ahi - 1] == b[bhi - 1]:
108
# find matching lines at the very end
111
while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
114
recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)
115
for i in xrange(ahi - nahi):
116
answer.append((nahi + i, nbhi + i))
119
recurse_matches(['a', None, 'b', None, 'c'], ['a', 'a', 'b', 'c', 'c'], 5, 5, a1, 10)
120
assert a1 == [(0, 0), (2, 2), (4, 4)]
122
recurse_matches(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'], 5, 3, a2, 10)
123
assert a2 == [(0, 0), (2, 1), (4, 2)]
131
# {revid: [(lineid, state)]}
132
# states are integers
133
# each line's state starts at 0, then goes to 1, 2, etc.
134
# odd states are when the line is present, even are when it is not
135
# the merge between two states is the greater of the two values
138
def add_revision(self, revid, lines, parents):
139
assert revid not in self.parents
141
assert p in self.parents
142
self.parents[revid] = copy(parents)
144
# match against the weave
145
lines2 = [line for (lineid, line) in self.weave]
146
recurse_matches(lines, lines2, len(lines), len(lines2), matches, 10)
149
s.add(self.weave[b][0])
150
vs = [self._make_vals(p) for p in parents]
151
# calculate which lines had their states changed in this revision
154
for lineid, line in self.weave:
155
state = max([v.get(lineid, 0) for v in vs])
156
if (state & 1 == 1) != (lineid in s):
157
newvals.append((lineid, state + 1))
159
for lineid, line in self.weave:
160
newvals.append((lineid, 1))
165
matches.append((len(lines), len(lines2)))
168
# add current weave lines to the new weave
169
newweave.extend(self.weave[weavepos + 1:b])
171
# add lines which have never appeared before to the weave
172
for i in xrange(revpos + 1, a):
174
newweave.append((lineid, lines[i]))
175
newvals.append((lineid, 1))
177
newweave.append(self.weave[b])
180
self.newstates[revid] = newvals
181
self.weave = newweave
183
def _parents(self, revid):
188
if next not in result:
189
unused.extend(self.parents[next])
193
def _make_vals(self, revid):
194
# return {lineid: state} for the given revision
195
s = self._parents(revid)
198
for p, q in self.newstates[n]:
199
v[p] = max(v.get(p, 0), q)
202
def retrieve_revision(self, revid):
203
# returns a list of strings
204
v = self._make_vals(revid)
205
return [line for (lineid, line) in self.weave if (v.get(lineid, 0) & 1)]
207
def annotate(self, revid):
208
# returns [(line, whether present, [perpetrator])]
209
ps = self._parents(revid)
210
# {lineid: [(parent, state)]}
213
for lineid, state in self.newstates[parent]:
214
byline.setdefault(lineid, []).append((parent, state))
216
for (lineid, line) in self.weave:
219
for (parent, state) in byline.get(lineid, []):
223
elif state == maxstate:
226
result.append((line, (maxstate & 1) == 1, perps))
229
def merge_revisions(self, reva, revb):
231
# non-conflict lines are strings, conflict sections are
232
# ([linesa], [linesb])
233
va = self._make_vals(reva)
234
vb = self._make_vals(revb)
236
awins, bwins = False, False
237
alines, blines = [], []
238
for lineid, line in self.weave:
239
aval, bval = va.get(lineid, 0), vb.get(lineid, 0)
240
if aval & 1 and bval & 1:
241
# append a matched line and the section prior to it
244
r.append((alines, blines))
250
awins, bwins = False, False
251
alines, blines = [], []
252
elif aval & 1 or bval & 1:
253
# extend either side of the potential conflict
254
# section with a non-matching line
263
# add the potential conflict section at the end
265
r.append((alines, blines))
273
w.add_revision(1, ['a', 'b'], [])
274
assert w.retrieve_revision(1) == ['a', 'b']
275
w.add_revision(2, ['a', 'x', 'b'], [1])
276
assert w.retrieve_revision(2) == ['a', 'x', 'b']
277
w.add_revision(3, ['a', 'y', 'b'], [1])
278
assert w.retrieve_revision(3) == ['a', 'y', 'b']
279
assert w.merge_revisions(2, 3) == ['a', (['x'], ['y']), 'b']
280
w.add_revision(4, ['a', 'x', 'b'], [1])
281
w.add_revision(5, ['a', 'z', 'b'], [4])
282
assert w.merge_revisions(2, 5) == ['a', 'z', 'b']
284
w.add_revision('p', ['a', 'b'], [])
285
w.add_revision('q', ['a', 'c'], ['p'])
286
w.add_revision('r', ['a'], ['p'])
287
assert w.annotate('r') == [('a', True, ['p']), ('b', False, ['r'])]