1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
23
def intersect(ra, rb):
24
"""Given two ranges return the range where they intersect or None.
26
>>> intersect((0, 10), (0, 6))
28
>>> intersect((0, 10), (5, 15))
30
>>> intersect((0, 10), (10, 15))
31
>>> intersect((0, 9), (10, 15))
32
>>> intersect((0, 9), (7, 15))
38
sa = max(ra[0], rb[0])
39
sb = min(ra[1], rb[1])
46
def compare_range(a, astart, aend, b, bstart, bend):
47
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
49
if (aend-astart) != (bend-bstart):
51
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
61
"""3-way merge of texts.
63
Given BASE, OTHER, THIS, tries to produce a combined text
64
incorporating the changes from both BASE->OTHER and BASE->THIS.
65
All three will typically be sequences of lines."""
66
def __init__(self, base, a, b):
70
from difflib import SequenceMatcher
71
self.a_ops = SequenceMatcher(None, base, a).get_opcodes()
72
self.b_ops = SequenceMatcher(None, base, b).get_opcodes()
79
start_marker='<<<<<<<<',
80
mid_marker='========',
81
end_marker='>>>>>>>>',
83
"""Return merge in cvs-like form.
86
start_marker = start_marker + ' ' + name_a
88
end_marker = end_marker + ' ' + name_b
90
for t in self.merge_regions():
92
if what == 'unchanged':
93
for i in range(t[1], t[2]):
95
elif what == 'a' or what == 'same':
96
for i in range(t[1], t[2]):
99
for i in range(t[1], t[2]):
101
elif what == 'conflict':
102
yield start_marker + '\n'
103
for i in range(t[3], t[4]):
105
yield mid_marker + '\n'
106
for i in range(t[5], t[6]):
108
yield end_marker + '\n'
110
raise ValueError(what)
116
def merge_annotated(self):
117
"""Return merge with conflicts, showing origin of lines.
119
Most useful for debugging merge.
121
for t in self.merge_regions():
123
if what == 'unchanged':
124
for i in range(t[1], t[2]):
125
yield 'u | ' + self.base[i]
126
elif what == 'a' or what == 'same':
127
for i in range(t[1], t[2]):
128
yield what[0] + ' | ' + self.a[i]
130
for i in range(t[1], t[2]):
131
yield 'b | ' + self.b[i]
132
elif what == 'conflict':
134
for i in range(t[3], t[4]):
135
yield 'A | ' + self.a[i]
137
for i in range(t[5], t[6]):
138
yield 'B | ' + self.b[i]
141
raise ValueError(what)
147
def merge_groups(self):
148
"""Yield sequence of line groups. Each one is a tuple:
151
Lines unchanged from base
157
Lines taken from a (and equal to b)
162
'conflict', base_lines, a_lines, b_lines
163
Lines from base were changed to either a or b and conflict.
165
for t in self.merge_regions():
167
if what == 'unchanged':
168
yield what, self.base[t[1]:t[2]]
169
elif what == 'a' or what == 'same':
170
yield what, self.a[t[1]:t[2]]
172
yield what, self.b[t[1]:t[2]]
173
elif what == 'conflict':
175
self.base[t[1]:t[2]],
179
raise ValueError(what)
182
def merge_regions(self):
183
"""Return sequences of matching and conflicting regions.
185
This returns tuples, where the first value says what kind we
188
'unchanged', start, end
189
Take a region of base[start:end]
192
b and a are different from base but give the same result
195
Non-clashing insertion from a[start:end]
197
Method is as follows:
199
The two sequences align only on regions which match the base
200
and both descendents. These are found by doing a two-way diff
201
of each one against the base, and then finding the
202
intersections between those regions. These "sync regions"
203
are by definition unchanged in both and easily dealt with.
205
The regions in between can be in any of three cases:
206
conflicted, or changed on only one side.
209
# section a[0:ia] has been disposed of, etc
212
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
213
#print 'match base [%d:%d]' % (zmatch, zend)
215
matchlen = zend - zmatch
217
assert matchlen == (aend - amatch)
218
assert matchlen == (bend - bmatch)
222
len_base = zmatch - iz
227
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
230
# try to avoid actually slicing the lists
231
equal_a = compare_range(self.a, ia, amatch,
232
self.base, iz, zmatch)
233
equal_b = compare_range(self.b, ib, bmatch,
234
self.base, iz, zmatch)
235
same = compare_range(self.a, ia, amatch,
239
yield 'same', ia, amatch
240
elif equal_a and not equal_b:
241
yield 'b', ib, bmatch
242
elif equal_b and not equal_a:
243
yield 'a', ia, amatch
244
elif not equal_a and not equal_b:
245
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
247
raise AssertionError("can't handle a=b=base but unmatched")
253
# if the same part of the base was deleted on both sides
254
# that's OK, we can just skip it.
262
yield 'unchanged', zmatch, zend
269
def find_sync_regions(self):
270
"""Return a list of sync regions, where both descendents match the base.
272
Generates a list of (base1, base2, a1, a2, b1, b2). There is
273
always a zero-length sync region at the end of all the files.
275
from difflib import SequenceMatcher
278
amatches = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
279
bmatches = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
280
len_a = len(amatches)
281
len_b = len(bmatches)
285
while ia < len_a and ib < len_b:
286
abase, amatch, alen = amatches[ia]
287
bbase, bmatch, blen = bmatches[ib]
289
# there is an unconflicted block at i; how long does it
290
# extend? until whichever one ends earlier.
291
i = intersect((abase, abase+alen), (bbase, bbase+blen))
295
intlen = intend - intbase
297
# found a match of base[i[0], i[1]]; this may be less than
298
# the region that matches in either one
299
assert intlen <= alen
300
assert intlen <= blen
301
assert abase <= intbase
302
assert bbase <= intbase
304
asub = amatch + (intbase - abase)
305
bsub = bmatch + (intbase - bbase)
309
assert self.base[intbase:intend] == self.a[asub:aend], \
310
(self.base[intbase:intend], self.a[asub:aend])
312
assert self.base[intbase:intend] == self.b[bsub:bend]
314
sl.append((intbase, intend,
318
# advance whichever one ends first in the base text
319
if (abase + alen) < (bbase + blen):
324
intbase = len(self.base)
327
sl.append((intbase, intbase, abase, abase, bbase, bbase))
333
def find_unconflicted(self):
334
"""Return a list of ranges in base that are not conflicted."""
335
from difflib import SequenceMatcher
339
# don't sync-up on lines containing only blanks or pounds
340
junk_re = re.compile(r'^[ \t#]*$')
342
am = SequenceMatcher(junk_re.match, self.base, self.a).get_matching_blocks()
343
bm = SequenceMatcher(junk_re.match, self.base, self.b).get_matching_blocks()
348
# there is an unconflicted block at i; how long does it
349
# extend? until whichever one ends earlier.
354
i = intersect((a1, a2), (b1, b2))
367
# as for diff3 and meld the syntax is "MINE BASE OTHER"
368
a = file(argv[1], 'rt').readlines()
369
base = file(argv[2], 'rt').readlines()
370
b = file(argv[3], 'rt').readlines()
372
m3 = Merge3(base, a, b)
374
#for sr in m3.find_sync_regions():
377
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
378
sys.stdout.writelines(m3.merge_annotated())
381
if __name__ == '__main__':
383
sys.exit(main(sys.argv))