1
# Copyright (C) 2004, 2005 by Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
# mbp: "you know that thing where cvs gives you conflict markers?"
22
from difflib import SequenceMatcher
23
from bzrlib.errors import CantReprocessAndShowBase
25
def intersect(ra, rb):
26
"""Given two ranges return the range where they intersect or None.
28
>>> intersect((0, 10), (0, 6))
30
>>> intersect((0, 10), (5, 15))
32
>>> intersect((0, 10), (10, 15))
33
>>> intersect((0, 9), (10, 15))
34
>>> intersect((0, 9), (7, 15))
40
sa = max(ra[0], rb[0])
41
sb = min(ra[1], rb[1])
48
def compare_range(a, astart, aend, b, bstart, bend):
49
"""Compare a[astart:aend] == b[bstart:bend], without slicing.
51
if (aend-astart) != (bend-bstart):
53
for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
63
"""3-way merge of texts.
65
Given BASE, OTHER, THIS, tries to produce a combined text
66
incorporating the changes from both BASE->OTHER and BASE->THIS.
67
All three will typically be sequences of lines."""
68
def __init__(self, base, a, b):
79
start_marker='<<<<<<<',
84
"""Return merge in cvs-like form.
86
if base_marker and reprocess:
87
raise CantReprocessAndShowBase()
89
start_marker = start_marker + ' ' + name_a
91
end_marker = end_marker + ' ' + name_b
92
if name_base and base_marker:
93
base_marker = base_marker + ' ' + name_base
94
merge_regions = self.merge_regions()
96
merge_regions = self.reprocess_merge_regions(merge_regions)
97
for t in merge_regions:
99
if what == 'unchanged':
100
for i in range(t[1], t[2]):
102
elif what == 'a' or what == 'same':
103
for i in range(t[1], t[2]):
106
for i in range(t[1], t[2]):
108
elif what == 'conflict':
109
yield start_marker + '\n'
110
for i in range(t[3], t[4]):
112
if base_marker is not None:
113
yield base_marker + '\n'
114
for i in range(t[1], t[2]):
116
yield mid_marker + '\n'
117
for i in range(t[5], t[6]):
119
yield end_marker + '\n'
121
raise ValueError(what)
127
def merge_annotated(self):
128
"""Return merge with conflicts, showing origin of lines.
130
Most useful for debugging merge.
132
for t in self.merge_regions():
134
if what == 'unchanged':
135
for i in range(t[1], t[2]):
136
yield 'u | ' + self.base[i]
137
elif what == 'a' or what == 'same':
138
for i in range(t[1], t[2]):
139
yield what[0] + ' | ' + self.a[i]
141
for i in range(t[1], t[2]):
142
yield 'b | ' + self.b[i]
143
elif what == 'conflict':
145
for i in range(t[3], t[4]):
146
yield 'A | ' + self.a[i]
148
for i in range(t[5], t[6]):
149
yield 'B | ' + self.b[i]
152
raise ValueError(what)
158
def merge_groups(self):
159
"""Yield sequence of line groups. Each one is a tuple:
162
Lines unchanged from base
168
Lines taken from a (and equal to b)
173
'conflict', base_lines, a_lines, b_lines
174
Lines from base were changed to either a or b and conflict.
176
for t in self.merge_regions():
178
if what == 'unchanged':
179
yield what, self.base[t[1]:t[2]]
180
elif what == 'a' or what == 'same':
181
yield what, self.a[t[1]:t[2]]
183
yield what, self.b[t[1]:t[2]]
184
elif what == 'conflict':
186
self.base[t[1]:t[2]],
190
raise ValueError(what)
193
def merge_regions(self):
194
"""Return sequences of matching and conflicting regions.
196
This returns tuples, where the first value says what kind we
199
'unchanged', start, end
200
Take a region of base[start:end]
203
b and a are different from base but give the same result
206
Non-clashing insertion from a[start:end]
208
Method is as follows:
210
The two sequences align only on regions which match the base
211
and both descendents. These are found by doing a two-way diff
212
of each one against the base, and then finding the
213
intersections between those regions. These "sync regions"
214
are by definition unchanged in both and easily dealt with.
216
The regions in between can be in any of three cases:
217
conflicted, or changed on only one side.
220
# section a[0:ia] has been disposed of, etc
223
for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
224
#print 'match base [%d:%d]' % (zmatch, zend)
226
matchlen = zend - zmatch
228
assert matchlen == (aend - amatch)
229
assert matchlen == (bend - bmatch)
233
len_base = zmatch - iz
238
#print 'unmatched a=%d, b=%d' % (len_a, len_b)
241
# try to avoid actually slicing the lists
242
equal_a = compare_range(self.a, ia, amatch,
243
self.base, iz, zmatch)
244
equal_b = compare_range(self.b, ib, bmatch,
245
self.base, iz, zmatch)
246
same = compare_range(self.a, ia, amatch,
250
yield 'same', ia, amatch
251
elif equal_a and not equal_b:
252
yield 'b', ib, bmatch
253
elif equal_b and not equal_a:
254
yield 'a', ia, amatch
255
elif not equal_a and not equal_b:
256
yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
258
raise AssertionError("can't handle a=b=base but unmatched")
264
# if the same part of the base was deleted on both sides
265
# that's OK, we can just skip it.
273
yield 'unchanged', zmatch, zend
279
def reprocess_merge_regions(self, merge_regions):
280
"""Where there are conflict regions, remove the agreed lines.
282
Lines where both A and B have made the same changes are
285
for region in merge_regions:
286
if region[0] != "conflict":
289
type, iz, zmatch, ia, amatch, ib, bmatch = region
290
a_region = self.a[ia:amatch]
291
b_region = self.b[ib:bmatch]
292
matches = SequenceMatcher(None, a_region,
293
b_region).get_matching_blocks()
296
for region_ia, region_ib, region_len in matches[:-1]:
299
reg = self.mismatch_region(next_a, region_ia, next_b,
303
yield 'same', region_ia, region_len+region_ia
304
next_a = region_ia + region_len
305
next_b = region_ib + region_len
306
reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
312
def mismatch_region(next_a, region_ia, next_b, region_ib):
313
if next_a < region_ia or next_b < region_ib:
314
return 'conflict', None, None, next_a, region_ia, next_b, region_ib
317
def find_sync_regions(self):
318
"""Return a list of sync regions, where both descendents match the base.
320
Generates a list of (base1, base2, a1, a2, b1, b2). There is
321
always a zero-length sync region at the end of all the files.
325
amatches = SequenceMatcher(None, self.base, self.a).get_matching_blocks()
326
bmatches = SequenceMatcher(None, self.base, self.b).get_matching_blocks()
327
len_a = len(amatches)
328
len_b = len(bmatches)
332
while ia < len_a and ib < len_b:
333
abase, amatch, alen = amatches[ia]
334
bbase, bmatch, blen = bmatches[ib]
336
# there is an unconflicted block at i; how long does it
337
# extend? until whichever one ends earlier.
338
i = intersect((abase, abase+alen), (bbase, bbase+blen))
342
intlen = intend - intbase
344
# found a match of base[i[0], i[1]]; this may be less than
345
# the region that matches in either one
346
assert intlen <= alen
347
assert intlen <= blen
348
assert abase <= intbase
349
assert bbase <= intbase
351
asub = amatch + (intbase - abase)
352
bsub = bmatch + (intbase - bbase)
356
assert self.base[intbase:intend] == self.a[asub:aend], \
357
(self.base[intbase:intend], self.a[asub:aend])
359
assert self.base[intbase:intend] == self.b[bsub:bend]
361
sl.append((intbase, intend,
365
# advance whichever one ends first in the base text
366
if (abase + alen) < (bbase + blen):
371
intbase = len(self.base)
374
sl.append((intbase, intbase, abase, abase, bbase, bbase))
380
def find_unconflicted(self):
381
"""Return a list of ranges in base that are not conflicted."""
385
# don't sync-up on lines containing only blanks or pounds
386
junk_re = re.compile(r'^[ \t#]*$')
388
am = SequenceMatcher(junk_re.match, self.base, self.a).get_matching_blocks()
389
bm = SequenceMatcher(junk_re.match, self.base, self.b).get_matching_blocks()
394
# there is an unconflicted block at i; how long does it
395
# extend? until whichever one ends earlier.
400
i = intersect((a1, a2), (b1, b2))
413
# as for diff3 and meld the syntax is "MINE BASE OTHER"
414
a = file(argv[1], 'rt').readlines()
415
base = file(argv[2], 'rt').readlines()
416
b = file(argv[3], 'rt').readlines()
418
m3 = Merge3(base, a, b)
420
#for sr in m3.find_sync_regions():
423
# sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
424
sys.stdout.writelines(m3.merge_annotated())
427
if __name__ == '__main__':
429
sys.exit(main(sys.argv))