1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
# Copyright (C) 2005, 2006 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Author: Martin Pool <mbp@canonical.com>
# Aaron Bentley <aaron.bentley@utoronto.ca>
from difflib import SequenceMatcher
class TextMerge(object):
"""Base class for text-mergers
Subclasses must implement _merge_struct.
Many methods produce or consume structured merge information.
This is an iterable of tuples of lists of lines.
Each tuple may have a length of 1 - 3, depending on whether the region it
represents is conflicted.
Unconflicted region tuples have length 1.
Conflicted region tuples have length 2 or 3. Index 1 is text_a, e.g. THIS.
Index 1 is text_b, e.g. OTHER. Index 2 is optional. If present, it
represents BASE.
"""
# TODO: Show some version information (e.g. author, date) on conflicted
# regions.
A_MARKER = '<<<<<<< \n'
B_MARKER = '>>>>>>> \n'
SPLIT_MARKER = '=======\n'
def __init__(self, a_marker=A_MARKER, b_marker=B_MARKER,
split_marker=SPLIT_MARKER):
self.a_marker = a_marker
self.b_marker = b_marker
self.split_marker = split_marker
def _merge_struct(self):
"""Return structured merge info. Must be implemented by subclasses.
See TextMerge docstring for details on the format.
"""
raise NotImplementedError('_merge_struct is abstract')
def struct_to_lines(self, struct_iter):
"""Convert merge result tuples to lines"""
for lines in struct_iter:
if len(lines) == 1:
for line in lines[0]:
yield line
else:
yield self.a_marker
for line in lines[0]:
yield line
yield self.split_marker
for line in lines[1]:
yield line
yield self.b_marker
def iter_useful(self, struct_iter):
"""Iterate through input tuples, skipping empty ones."""
for group in struct_iter:
if len(group[0]) > 0:
yield group
elif len(group) > 1 and len(group[1]) > 0:
yield group
def merge_lines(self, reprocess=False):
"""Produce an iterable of lines, suitable for writing to a file
Returns a tuple of (line iterable, conflict indicator)
If reprocess is True, a two-way merge will be performed on the
intermediate structure, to reduce conflict regions.
"""
struct = []
conflicts = False
for group in self.merge_struct(reprocess):
struct.append(group)
if len(group) > 1:
conflicts = True
return self.struct_to_lines(struct), conflicts
def merge_struct(self, reprocess=False):
"""Produce structured merge info"""
struct_iter = self.iter_useful(self._merge_struct())
if reprocess is True:
return self.reprocess_struct(struct_iter)
else:
return struct_iter
@staticmethod
def reprocess_struct(struct_iter):
""" Perform a two-way merge on structural merge info.
This reduces the size of conflict regions, but breaks the connection
between the BASE text and the conflict region.
This process may split a single conflict region into several smaller
ones, but will not introduce new conflicts.
"""
for group in struct_iter:
if len(group) == 1:
yield group
else:
for newgroup in Merge2(group[0], group[1]).merge_struct():
yield newgroup
class Merge2(TextMerge):
""" Two-way merge.
In a two way merge, common regions are shown as unconflicting, and uncommon
regions produce conflicts.
"""
def __init__(self, lines_a, lines_b, a_marker=TextMerge.A_MARKER,
b_marker=TextMerge.B_MARKER,
split_marker=TextMerge.SPLIT_MARKER):
TextMerge.__init__(self, a_marker, b_marker, split_marker)
self.lines_a = lines_a
self.lines_b = lines_b
def _merge_struct(self):
"""Return structured merge info.
See TextMerge docstring.
"""
sm = SequenceMatcher(None, self.lines_a, self.lines_b)
pos_a = 0
pos_b = 0
for ai, bi, l in sm.get_matching_blocks():
# non-matching lines
yield(self.lines_a[pos_a:ai], self.lines_b[pos_b:bi])
# matching lines
yield(self.lines_a[ai:ai+l],)
pos_a = ai + l
pos_b = bi + l
# final non-matching lines
yield(self.lines_a[pos_a:-1], self.lines_b[pos_b:-1])
|