109
def recurse_matches(a, b, alo, blo, ahi, bhi, answer, maxrecursion):
108
def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):
110
109
"""Find all of the matching text in the lines of a and b.
112
111
:param a: A sequence
113
112
:param b: Another sequence
114
:param alo: The start location of a to check, typically 0
115
:param ahi: The start location of b to check, typically 0
116
113
:param ahi: The maximum length of a to check, typically len(a)
117
114
:param bhi: The maximum length of b to check, typically len(b)
118
115
:param answer: The return array. Will be filled with tuples
119
indicating [(line_in_a, line_in_b)]
116
indicating [(line_in_a), (line_in_b)]
120
117
:param maxrecursion: The maximum depth to recurse.
121
118
Must be a positive integer.
122
119
:return: None, the return value is in the parameter answer, which
126
124
if maxrecursion < 0:
127
mutter('max recursion depth reached')
128
125
# this will never happen normally, this check is to prevent DOS attacks
130
127
oldlength = len(answer)
131
alo, blo = answer[-1]
131
134
if alo == ahi or blo == bhi:
135
136
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
136
137
# recurse between lines which are unique in each file and match
139
# Most of the time, you will have a sequence of similar entries
140
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
141
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
142
apos, bpos, answer, maxrecursion - 1)
140
recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)
145
141
answer.append((apos, bpos))
146
142
if len(answer) > oldlength:
147
143
# find matches between the last match and the end
148
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
149
ahi, bhi, answer, maxrecursion - 1)
144
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
150
145
elif a[alo] == b[blo]:
151
146
# find matching lines at the very beginning
152
147
while alo < ahi and blo < bhi and a[alo] == b[blo]:
153
148
answer.append((alo, blo))
156
recurse_matches(a, b, alo, blo,
157
ahi, bhi, answer, maxrecursion - 1)
151
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
158
152
elif a[ahi - 1] == b[bhi - 1]:
159
153
# find matching lines at the very end
162
156
while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
165
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
166
nahi, nbhi, answer, maxrecursion - 1)
159
recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)
167
160
for i in xrange(ahi - nahi):
168
161
answer.append((nahi + i, nbhi + i))
171
def _collapse_sequences(matches):
172
"""Find sequences of lines.
174
Given a sequence of [(line_in_a, line_in_b),]
175
find regions where they both increment at the same time
178
start_a = start_b = None
180
for i_a, i_b in matches:
181
if (start_a is not None
182
and (i_a == start_a + length)
183
and (i_b == start_b + length)):
186
if start_a is not None:
187
answer.append((start_a, start_b, length))
193
answer.append((start_a, start_b, length))
198
def _check_consistency(answer):
199
# For consistency sake, make sure all matches are only increasing
202
for a,b,match_len in answer:
203
assert a >= next_a, 'Non increasing matches for a'
204
assert b >= next_b, 'Not increasing matches for b'
205
next_a = a + match_len
206
next_b = b + match_len
209
class PatienceSequenceMatcher(difflib.SequenceMatcher):
164
class SequenceMatcher(difflib.SequenceMatcher):
210
165
"""Compare a pair of sequences using longest common subset."""
212
_do_check_consistency = True
214
167
def __init__(self, isjunk=None, a='', b=''):
215
168
if isjunk is not None:
216
169
raise NotImplementedError('Currently we do not support'
217
170
' isjunk for sequence matching')
218
171
difflib.SequenceMatcher.__init__(self, isjunk, a, b)
220
def get_matching_blocks(self):
221
"""Return list of triples describing matching subsequences.
223
Each triple is of the form (i, j, n), and means that
224
a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
227
The last triple is a dummy, (len(a), len(b), 0), and is the only
230
>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")
231
>>> s.get_matching_blocks()
232
[(0, 0, 2), (3, 2, 2), (5, 4, 0)]
173
def _check_with_diff(self, alo, ahi, blo, bhi, answer):
174
"""Use the original diff algorithm on an unmatched section.
176
This will check to make sure the range is worth checking,
177
before doing any work.
179
:param alo: The last line that actually matched
180
:param ahi: The next line that actually matches
181
:param blo: Same as alo, only for the 'b' set
182
:param bhi: Same as ahi
183
:param answer: An array which will have the new ranges appended to it
234
# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks
235
# implementation which uses __helper. 2.4.3 got rid of helper for
236
# doing it inline with a queue.
237
# We should consider doing the same for recurse_matches
239
if self.matching_blocks is not None:
240
return self.matching_blocks
187
# recurse_matches has an implementation design
188
# which does not match non-unique lines in the
189
# if they do not touch matching unique lines
190
# so we rerun the regular diff algorithm
191
# if find a large enough chunk.
193
# recurse_matches already looked at the direct
194
# neighbors, so we only need to run if there is
195
# enough space to do so
196
if ahi - alo > 2 and bhi - blo > 2:
197
a = self.a[alo+1:ahi-1]
198
b = self.b[blo+1:bhi-1]
199
m = difflib.SequenceMatcher(None, a, b)
200
new_blocks = m.get_matching_blocks()
201
# difflib always adds a final match
203
for blk in new_blocks:
204
answer.append((blk[0]+alo+1,
208
def __helper(self, alo, ahi, blo, bhi, answer):
243
recurse_matches(self.a, self.b, 0, 0,
244
len(self.a), len(self.b), matches, 10)
212
recurse_matches(a, b, len(a), len(b), matches, 10)
245
213
# Matches now has individual line pairs of
246
214
# line A matches line B, at the given offsets
247
self.matching_blocks = _collapse_sequences(matches)
248
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
249
if PatienceSequenceMatcher._do_check_consistency:
251
_check_consistency(self.matching_blocks)
253
return self.matching_blocks
216
start_a = start_b = None
218
for i_a, i_b in matches:
219
if (start_a is not None
220
and (i_a == start_a + length)
221
and (i_b == start_b + length)):
226
# We need to check from 0,0 until the current match
227
self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo,
230
answer.append((start_a+alo, start_b+blo, length))
231
self._check_with_diff(start_a+alo+length, i_a+alo,
232
start_b+blo+length, i_b+blo,
240
answer.append((start_a+alo, start_b+blo, length))
241
self._check_with_diff(start_a+alo+length, ahi+1,
242
start_b+blo+length, bhi+1,
245
# Nothing matched, so we need to send the complete text
246
self._check_with_diff(alo-1, ahi+1, blo-1, bhi+1, answer)
248
# For consistency sake, make sure all matches are only increasing
252
for a,b,match_len in answer:
253
assert a >= next_a, 'Non increasing matches for a'
254
assert b >= next_b, 'Not increasing matches for b'
255
next_a = a + match_len
256
next_b = b + match_len
256
259
# This is a version of unified_diff which only adds a factory parameter