~bzr-pqm/bzr/bzr.dev

2052.3.2 by John Arbash Meinel
Change Copyright .. by Canonical to Copyright ... Canonical
1
# Copyright (C) 2004, 2005 Canonical Ltd
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
2
#
821 by Martin Pool
- start code for built-in diff3-style resolve
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
7
#
821 by Martin Pool
- start code for built-in diff3-style resolve
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
1887.1.1 by Adeodato Simó
Do not separate paragraphs in the copyright statement with blank lines,
12
#
821 by Martin Pool
- start code for built-in diff3-style resolve
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16
17
823 by Martin Pool
quote
18
# mbp: "you know that thing where cvs gives you conflict markers?"
19
# s: "i hate that."
20
21
1185.24.1 by Aaron Bentley
Got reprocessing working
22
from bzrlib.errors import CantReprocessAndShowBase
1711.2.23 by John Arbash Meinel
Late bind to PatienceSequenceMatcher in merge3.py
23
import bzrlib.patiencediff
1558.15.11 by Aaron Bentley
Apply merge review suggestions
24
from bzrlib.textfile import check_text_lines
821 by Martin Pool
- start code for built-in diff3-style resolve
25
1711.2.11 by John Arbash Meinel
Rename patiencediff.SequenceMatcher => PatienceSequenceMatcher and knit.SequenceMatcher => KnitSequenceMatcher
26
821 by Martin Pool
- start code for built-in diff3-style resolve
27
def intersect(ra, rb):
28
    """Given two ranges return the range where they intersect or None.
29
30
    >>> intersect((0, 10), (0, 6))
31
    (0, 6)
32
    >>> intersect((0, 10), (5, 15))
33
    (5, 10)
34
    >>> intersect((0, 10), (10, 15))
35
    >>> intersect((0, 9), (10, 15))
36
    >>> intersect((0, 9), (7, 15))
37
    (7, 9)
38
    """
39
    assert ra[0] <= ra[1]
40
    assert rb[0] <= rb[1]
41
    
42
    sa = max(ra[0], rb[0])
43
    sb = min(ra[1], rb[1])
44
    if sa < sb:
45
        return sa, sb
46
    else:
47
        return None
48
49
839 by Martin Pool
- avoid copying string lists when handling unmatched regions
50
def compare_range(a, astart, aend, b, bstart, bend):
51
    """Compare a[astart:aend] == b[bstart:bend], without slicing.
52
    """
53
    if (aend-astart) != (bend-bstart):
54
        return False
55
    for ia, ib in zip(xrange(astart, aend), xrange(bstart, bend)):
56
        if a[ia] != b[ib]:
57
            return False
58
    else:
59
        return True
60
        
61
62
822 by Martin Pool
- Renamed merge3 test suite for easier access.
63
821 by Martin Pool
- start code for built-in diff3-style resolve
64
class Merge3(object):
65
    """3-way merge of texts.
66
67
    Given BASE, OTHER, THIS, tries to produce a combined text
68
    incorporating the changes from both BASE->OTHER and BASE->THIS.
69
    All three will typically be sequences of lines."""
70
    def __init__(self, base, a, b):
1558.15.4 by Aaron Bentley
Fixed binary handling in Merge3
71
        check_text_lines(base)
72
        check_text_lines(a)
73
        check_text_lines(b)
821 by Martin Pool
- start code for built-in diff3-style resolve
74
        self.base = base
75
        self.a = a
76
        self.b = b
822 by Martin Pool
- Renamed merge3 test suite for easier access.
77
78
827 by Martin Pool
- new Merge3.merge_groups feeds back the merged lines
79
828 by Martin Pool
- code to represent merges in regular text conflict form
80
    def merge_lines(self,
829 by Martin Pool
- More merge3 cvs-form stuff
81
                    name_a=None,
82
                    name_b=None,
1185.18.1 by Aaron Bentley
Added --show-base to merge
83
                    name_base=None,
1148 by Martin Pool
- change conflict markers to suit smerge, etc
84
                    start_marker='<<<<<<<',
85
                    mid_marker='=======',
86
                    end_marker='>>>>>>>',
1185.24.1 by Aaron Bentley
Got reprocessing working
87
                    base_marker=None,
88
                    reprocess=False):
828 by Martin Pool
- code to represent merges in regular text conflict form
89
        """Return merge in cvs-like form.
90
        """
1551.10.38 by Aaron Bentley
merge3 auto-detects line endings for conflict markers
91
        newline = '\n'
92
        if len(self.a) > 0:
93
            if self.a[0].endswith('\r\n'):
94
                newline = '\r\n'
95
            elif self.a[0].endswith('\r'):
96
                newline = '\r'
1185.24.1 by Aaron Bentley
Got reprocessing working
97
        if base_marker and reprocess:
98
            raise CantReprocessAndShowBase()
829 by Martin Pool
- More merge3 cvs-form stuff
99
        if name_a:
100
            start_marker = start_marker + ' ' + name_a
101
        if name_b:
102
            end_marker = end_marker + ' ' + name_b
1185.18.1 by Aaron Bentley
Added --show-base to merge
103
        if name_base and base_marker:
104
            base_marker = base_marker + ' ' + name_base
1185.24.1 by Aaron Bentley
Got reprocessing working
105
        merge_regions = self.merge_regions()
106
        if reprocess is True:
107
            merge_regions = self.reprocess_merge_regions(merge_regions)
108
        for t in merge_regions:
828 by Martin Pool
- code to represent merges in regular text conflict form
109
            what = t[0]
110
            if what == 'unchanged':
111
                for i in range(t[1], t[2]):
112
                    yield self.base[i]
830 by Martin Pool
- handle chunks which differ from the base but agree
113
            elif what == 'a' or what == 'same':
828 by Martin Pool
- code to represent merges in regular text conflict form
114
                for i in range(t[1], t[2]):
115
                    yield self.a[i]
116
            elif what == 'b':
117
                for i in range(t[1], t[2]):
118
                    yield self.b[i]
119
            elif what == 'conflict':
1551.10.38 by Aaron Bentley
merge3 auto-detects line endings for conflict markers
120
                yield start_marker + newline
828 by Martin Pool
- code to represent merges in regular text conflict form
121
                for i in range(t[3], t[4]):
122
                    yield self.a[i]
1185.18.1 by Aaron Bentley
Added --show-base to merge
123
                if base_marker is not None:
1551.10.38 by Aaron Bentley
merge3 auto-detects line endings for conflict markers
124
                    yield base_marker + newline
1185.18.1 by Aaron Bentley
Added --show-base to merge
125
                    for i in range(t[1], t[2]):
126
                        yield self.base[i]
1551.10.38 by Aaron Bentley
merge3 auto-detects line endings for conflict markers
127
                yield mid_marker + newline
828 by Martin Pool
- code to represent merges in regular text conflict form
128
                for i in range(t[5], t[6]):
129
                    yield self.b[i]
1551.10.38 by Aaron Bentley
merge3 auto-detects line endings for conflict markers
130
                yield end_marker + newline
828 by Martin Pool
- code to represent merges in regular text conflict form
131
            else:
132
                raise ValueError(what)
133
        
134
        
135
136
137
832 by Martin Pool
- New Merge3.merge_annotated method for debugging.
138
    def merge_annotated(self):
139
        """Return merge with conflicts, showing origin of lines.
140
141
        Most useful for debugging merge.        
142
        """
143
        for t in self.merge_regions():
144
            what = t[0]
145
            if what == 'unchanged':
146
                for i in range(t[1], t[2]):
147
                    yield 'u | ' + self.base[i]
148
            elif what == 'a' or what == 'same':
149
                for i in range(t[1], t[2]):
150
                    yield what[0] + ' | ' + self.a[i]
151
            elif what == 'b':
152
                for i in range(t[1], t[2]):
153
                    yield 'b | ' + self.b[i]
154
            elif what == 'conflict':
155
                yield '<<<<\n'
156
                for i in range(t[3], t[4]):
157
                    yield 'A | ' + self.a[i]
158
                yield '----\n'
159
                for i in range(t[5], t[6]):
160
                    yield 'B | ' + self.b[i]
161
                yield '>>>>\n'
162
            else:
163
                raise ValueError(what)
164
        
165
        
166
167
168
827 by Martin Pool
- new Merge3.merge_groups feeds back the merged lines
169
    def merge_groups(self):
170
        """Yield sequence of line groups.  Each one is a tuple:
171
172
        'unchanged', lines
173
             Lines unchanged from base
174
175
        'a', lines
176
             Lines taken from a
177
830 by Martin Pool
- handle chunks which differ from the base but agree
178
        'same', lines
179
             Lines taken from a (and equal to b)
180
827 by Martin Pool
- new Merge3.merge_groups feeds back the merged lines
181
        'b', lines
182
             Lines taken from b
183
184
        'conflict', base_lines, a_lines, b_lines
185
             Lines from base were changed to either a or b and conflict.
186
        """
187
        for t in self.merge_regions():
188
            what = t[0]
189
            if what == 'unchanged':
190
                yield what, self.base[t[1]:t[2]]
830 by Martin Pool
- handle chunks which differ from the base but agree
191
            elif what == 'a' or what == 'same':
827 by Martin Pool
- new Merge3.merge_groups feeds back the merged lines
192
                yield what, self.a[t[1]:t[2]]
193
            elif what == 'b':
194
                yield what, self.b[t[1]:t[2]]
195
            elif what == 'conflict':
196
                yield (what,
197
                       self.base[t[1]:t[2]],
198
                       self.a[t[3]:t[4]],
199
                       self.b[t[5]:t[6]])
200
            else:
201
                raise ValueError(what)
202
203
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
204
    def merge_regions(self):
822 by Martin Pool
- Renamed merge3 test suite for easier access.
205
        """Return sequences of matching and conflicting regions.
206
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
207
        This returns tuples, where the first value says what kind we
208
        have:
209
210
        'unchanged', start, end
211
             Take a region of base[start:end]
212
830 by Martin Pool
- handle chunks which differ from the base but agree
213
        'same', astart, aend
214
             b and a are different from base but give the same result
215
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
216
        'a', start, end
217
             Non-clashing insertion from a[start:end]
218
822 by Martin Pool
- Renamed merge3 test suite for easier access.
219
        Method is as follows:
220
221
        The two sequences align only on regions which match the base
222
        and both descendents.  These are found by doing a two-way diff
223
        of each one against the base, and then finding the
224
        intersections between those regions.  These "sync regions"
225
        are by definition unchanged in both and easily dealt with.
226
227
        The regions in between can be in any of three cases:
228
        conflicted, or changed on only one side.
229
        """
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
230
231
        # section a[0:ia] has been disposed of, etc
232
        iz = ia = ib = 0
233
        
234
        for zmatch, zend, amatch, aend, bmatch, bend in self.find_sync_regions():
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
235
            #print 'match base [%d:%d]' % (zmatch, zend)
236
            
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
237
            matchlen = zend - zmatch
238
            assert matchlen >= 0
239
            assert matchlen == (aend - amatch)
240
            assert matchlen == (bend - bmatch)
241
            
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
242
            len_a = amatch - ia
243
            len_b = bmatch - ib
244
            len_base = zmatch - iz
245
            assert len_a >= 0
246
            assert len_b >= 0
247
            assert len_base >= 0
248
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
249
            #print 'unmatched a=%d, b=%d' % (len_a, len_b)
250
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
251
            if len_a or len_b:
839 by Martin Pool
- avoid copying string lists when handling unmatched regions
252
                # try to avoid actually slicing the lists
253
                equal_a = compare_range(self.a, ia, amatch,
254
                                        self.base, iz, zmatch)
255
                equal_b = compare_range(self.b, ib, bmatch,
256
                                        self.base, iz, zmatch)
257
                same = compare_range(self.a, ia, amatch,
258
                                     self.b, ib, bmatch)
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
259
830 by Martin Pool
- handle chunks which differ from the base but agree
260
                if same:
261
                    yield 'same', ia, amatch
262
                elif equal_a and not equal_b:
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
263
                    yield 'b', ib, bmatch
264
                elif equal_b and not equal_a:
265
                    yield 'a', ia, amatch
266
                elif not equal_a and not equal_b:
827 by Martin Pool
- new Merge3.merge_groups feeds back the merged lines
267
                    yield 'conflict', iz, zmatch, ia, amatch, ib, bmatch
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
268
                else:
839 by Martin Pool
- avoid copying string lists when handling unmatched regions
269
                    raise AssertionError("can't handle a=b=base but unmatched")
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
270
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
271
                ia = amatch
826 by Martin Pool
- Actually merge unsynchronized regions. Woot!
272
                ib = bmatch
273
            iz = zmatch
274
275
            # if the same part of the base was deleted on both sides
276
            # that's OK, we can just skip it.
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
277
278
                
279
            if matchlen > 0:
280
                assert ia == amatch
281
                assert ib == bmatch
282
                assert iz == zmatch
283
                
284
                yield 'unchanged', zmatch, zend
285
                iz = zend
286
                ia = aend
287
                ib = bend
1185.24.1 by Aaron Bentley
Got reprocessing working
288
    
289
290
    def reprocess_merge_regions(self, merge_regions):
1185.33.62 by Martin Pool
doc
291
        """Where there are conflict regions, remove the agreed lines.
292
293
        Lines where both A and B have made the same changes are 
294
        eliminated.
295
        """
1185.24.1 by Aaron Bentley
Got reprocessing working
296
        for region in merge_regions:
297
            if region[0] != "conflict":
298
                yield region
299
                continue
300
            type, iz, zmatch, ia, amatch, ib, bmatch = region
301
            a_region = self.a[ia:amatch]
302
            b_region = self.b[ib:bmatch]
1711.2.23 by John Arbash Meinel
Late bind to PatienceSequenceMatcher in merge3.py
303
            matches = bzrlib.patiencediff.PatienceSequenceMatcher(
304
                    None, a_region, b_region).get_matching_blocks()
1185.24.1 by Aaron Bentley
Got reprocessing working
305
            next_a = ia
306
            next_b = ib
307
            for region_ia, region_ib, region_len in matches[:-1]:
308
                region_ia += ia
309
                region_ib += ib
310
                reg = self.mismatch_region(next_a, region_ia, next_b,
311
                                           region_ib)
312
                if reg is not None:
313
                    yield reg
314
                yield 'same', region_ia, region_len+region_ia
315
                next_a = region_ia + region_len
316
                next_b = region_ib + region_len
317
            reg = self.mismatch_region(next_a, amatch, next_b, bmatch)
318
            if reg is not None:
319
                yield reg
320
321
322
    @staticmethod
323
    def mismatch_region(next_a, region_ia,  next_b, region_ib):
324
        if next_a < region_ia or next_b < region_ib:
325
            return 'conflict', None, None, next_a, region_ia, next_b, region_ib
326
            
327
822 by Martin Pool
- Renamed merge3 test suite for easier access.
328
    def find_sync_regions(self):
329
        """Return a list of sync regions, where both descendents match the base.
330
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
331
        Generates a list of (base1, base2, a1, a2, b1, b2).  There is
332
        always a zero-length sync region at the end of all the files.
821 by Martin Pool
- start code for built-in diff3-style resolve
333
        """
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
334
335
        ia = ib = 0
1711.2.23 by John Arbash Meinel
Late bind to PatienceSequenceMatcher in merge3.py
336
        amatches = bzrlib.patiencediff.PatienceSequenceMatcher(
337
                None, self.base, self.a).get_matching_blocks()
338
        bmatches = bzrlib.patiencediff.PatienceSequenceMatcher(
339
                None, self.base, self.b).get_matching_blocks()
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
340
        len_a = len(amatches)
341
        len_b = len(bmatches)
342
343
        sl = []
344
345
        while ia < len_a and ib < len_b:
346
            abase, amatch, alen = amatches[ia]
347
            bbase, bmatch, blen = bmatches[ib]
348
822 by Martin Pool
- Renamed merge3 test suite for easier access.
349
            # there is an unconflicted block at i; how long does it
350
            # extend?  until whichever one ends earlier.
351
            i = intersect((abase, abase+alen), (bbase, bbase+blen))
352
            if i:
353
                intbase = i[0]
354
                intend = i[1]
355
                intlen = intend - intbase
356
357
                # found a match of base[i[0], i[1]]; this may be less than
358
                # the region that matches in either one
359
                assert intlen <= alen
360
                assert intlen <= blen
361
                assert abase <= intbase
362
                assert bbase <= intbase
363
364
                asub = amatch + (intbase - abase)
365
                bsub = bmatch + (intbase - bbase)
366
                aend = asub + intlen
367
                bend = bsub + intlen
368
369
                assert self.base[intbase:intend] == self.a[asub:aend], \
370
                       (self.base[intbase:intend], self.a[asub:aend])
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
371
822 by Martin Pool
- Renamed merge3 test suite for easier access.
372
                assert self.base[intbase:intend] == self.b[bsub:bend]
373
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
374
                sl.append((intbase, intend,
375
                           asub, aend,
376
                           bsub, bend))
822 by Martin Pool
- Renamed merge3 test suite for easier access.
377
378
            # advance whichever one ends first in the base text
379
            if (abase + alen) < (bbase + blen):
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
380
                ia += 1
822 by Martin Pool
- Renamed merge3 test suite for easier access.
381
            else:
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
382
                ib += 1
383
            
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
384
        intbase = len(self.base)
385
        abase = len(self.a)
386
        bbase = len(self.b)
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
387
        sl.append((intbase, intbase, abase, abase, bbase, bbase))
388
389
        return sl
825 by Martin Pool
- Merge3.find_sync_regions always returns a zero-length sentinal at the end to
390
821 by Martin Pool
- start code for built-in diff3-style resolve
391
392
393
    def find_unconflicted(self):
394
        """Return a list of ranges in base that are not conflicted."""
1711.2.23 by John Arbash Meinel
Late bind to PatienceSequenceMatcher in merge3.py
395
        am = bzrlib.patiencediff.PatienceSequenceMatcher(
396
                None, self.base, self.a).get_matching_blocks()
397
        bm = bzrlib.patiencediff.PatienceSequenceMatcher(
398
                None, self.base, self.b).get_matching_blocks()
821 by Martin Pool
- start code for built-in diff3-style resolve
399
400
        unc = []
401
402
        while am and bm:
403
            # there is an unconflicted block at i; how long does it
404
            # extend?  until whichever one ends earlier.
405
            a1 = am[0][0]
406
            a2 = a1 + am[0][2]
407
            b1 = bm[0][0]
408
            b2 = b1 + bm[0][2]
409
            i = intersect((a1, a2), (b1, b2))
410
            if i:
411
                unc.append(i)
412
413
            if a2 < b2:
414
                del am[0]
415
            else:
416
                del bm[0]
417
                
418
        return unc
829 by Martin Pool
- More merge3 cvs-form stuff
419
420
421
def main(argv):
830 by Martin Pool
- handle chunks which differ from the base but agree
422
    # as for diff3 and meld the syntax is "MINE BASE OTHER"
423
    a = file(argv[1], 'rt').readlines()
424
    base = file(argv[2], 'rt').readlines()
829 by Martin Pool
- More merge3 cvs-form stuff
425
    b = file(argv[3], 'rt').readlines()
426
427
    m3 = Merge3(base, a, b)
428
838 by Martin Pool
- Merge3.find_sync_regions() - avoid problems with iters on python2.3 by
429
    #for sr in m3.find_sync_regions():
430
    #    print sr
431
832 by Martin Pool
- New Merge3.merge_annotated method for debugging.
432
    # sys.stdout.writelines(m3.merge_lines(name_a=argv[1], name_b=argv[3]))
433
    sys.stdout.writelines(m3.merge_annotated())
829 by Martin Pool
- More merge3 cvs-form stuff
434
435
436
if __name__ == '__main__':
437
    import sys
438
    sys.exit(main(sys.argv))