108
def recurse_matches(a, b, ahi, bhi, answer, maxrecursion):
110
def recurse_matches(a, b, alo, blo, ahi, bhi, answer, maxrecursion):
109
111
"""Find all of the matching text in the lines of a and b.
111
113
:param a: A sequence
112
114
:param b: Another sequence
115
:param alo: The start location of a to check, typically 0
116
:param ahi: The start location of b to check, typically 0
113
117
:param ahi: The maximum length of a to check, typically len(a)
114
118
:param bhi: The maximum length of b to check, typically len(b)
115
119
:param answer: The return array. Will be filled with tuples
116
indicating [(line_in_a), (line_in_b)]
120
indicating [(line_in_a, line_in_b)]
117
121
:param maxrecursion: The maximum depth to recurse.
118
122
Must be a positive integer.
119
123
:return: None, the return value is in the parameter answer, which
124
127
if maxrecursion < 0:
128
mutter('max recursion depth reached')
125
129
# this will never happen normally, this check is to prevent DOS attacks
127
131
oldlength = len(answer)
131
alo, blo = answer[-1]
134
132
if alo == ahi or blo == bhi:
136
136
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
137
137
# recurse between lines which are unique in each file and match
140
recurse_matches(a, b, apos, bpos, answer, maxrecursion - 1)
140
# Most of the time, you will have a sequence of similar entries
141
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
142
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
143
apos, bpos, answer, maxrecursion - 1)
141
146
answer.append((apos, bpos))
142
147
if len(answer) > oldlength:
143
148
# find matches between the last match and the end
144
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
149
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
150
ahi, bhi, answer, maxrecursion - 1)
145
151
elif a[alo] == b[blo]:
146
152
# find matching lines at the very beginning
147
153
while alo < ahi and blo < bhi and a[alo] == b[blo]:
148
154
answer.append((alo, blo))
151
recurse_matches(a, b, ahi, bhi, answer, maxrecursion - 1)
157
recurse_matches(a, b, alo, blo,
158
ahi, bhi, answer, maxrecursion - 1)
152
159
elif a[ahi - 1] == b[bhi - 1]:
153
160
# find matching lines at the very end
156
163
while nahi > alo and nbhi > blo and a[nahi - 1] == b[nbhi - 1]:
159
recurse_matches(a, b, nahi, nbhi, answer, maxrecursion - 1)
166
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
167
nahi, nbhi, answer, maxrecursion - 1)
160
168
for i in xrange(ahi - nahi):
161
169
answer.append((nahi + i, nbhi + i))
164
class SequenceMatcher(difflib.SequenceMatcher):
172
def _collapse_sequences(matches):
173
"""Find sequences of lines.
175
Given a sequence of [(line_in_a, line_in_b),]
176
find regions where they both increment at the same time
179
start_a = start_b = None
181
for i_a, i_b in matches:
182
if (start_a is not None
183
and (i_a == start_a + length)
184
and (i_b == start_b + length)):
187
if start_a is not None:
188
answer.append((start_a, start_b, length))
194
answer.append((start_a, start_b, length))
199
def _check_consistency(answer):
200
# For consistency sake, make sure all matches are only increasing
203
for a,b,match_len in answer:
204
assert a >= next_a, 'Non increasing matches for a'
205
assert b >= next_b, 'Not increasing matches for b'
206
next_a = a + match_len
207
next_b = b + match_len
210
class PatienceSequenceMatcher(difflib.SequenceMatcher):
165
211
"""Compare a pair of sequences using longest common subset."""
213
_do_check_consistency = True
167
215
def __init__(self, isjunk=None, a='', b=''):
168
216
if isjunk is not None:
169
217
raise NotImplementedError('Currently we do not support'
170
218
' isjunk for sequence matching')
171
219
difflib.SequenceMatcher.__init__(self, isjunk, a, b)
173
def _check_with_diff(self, alo, ahi, blo, bhi, answer):
174
"""Use the original diff algorithm on an unmatched section.
176
This will check to make sure the range is worth checking,
177
before doing any work.
179
:param alo: The last line that actually matched
180
:param ahi: The next line that actually matches
181
:param blo: Same as alo, only for the 'b' set
182
:param bhi: Same as ahi
183
:param answer: An array which will have the new ranges appended to it
221
def get_matching_blocks(self):
222
"""Return list of triples describing matching subsequences.
224
Each triple is of the form (i, j, n), and means that
225
a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
228
The last triple is a dummy, (len(a), len(b), 0), and is the only
231
>>> s = PatienceSequenceMatcher(None, "abxcd", "abcd")
232
>>> s.get_matching_blocks()
233
[(0, 0, 2), (3, 2, 2), (5, 4, 0)]
187
# recurse_matches has an implementation design
188
# which does not match non-unique lines in the
189
# if they do not touch matching unique lines
190
# so we rerun the regular diff algorithm
191
# if find a large enough chunk.
193
# recurse_matches already looked at the direct
194
# neighbors, so we only need to run if there is
195
# enough space to do so
196
if ahi - alo > 2 and bhi - blo > 2:
197
a = self.a[alo+1:ahi-1]
198
b = self.b[blo+1:bhi-1]
199
m = difflib.SequenceMatcher(None, a, b)
200
new_blocks = m.get_matching_blocks()
201
# difflib always adds a final match
203
for blk in new_blocks:
204
answer.append((blk[0]+alo+1,
208
def __helper(self, alo, ahi, blo, bhi, answer):
235
# jam 20060525 This is the python 2.4.1 difflib get_matching_blocks
236
# implementation which uses __helper. 2.4.3 got rid of helper for
237
# doing it inline with a queue.
238
# We should consider doing the same for recurse_matches
240
if self.matching_blocks is not None:
241
return self.matching_blocks
212
recurse_matches(a, b, len(a), len(b), matches, 10)
244
recurse_matches(self.a, self.b, 0, 0,
245
len(self.a), len(self.b), matches, 10)
213
246
# Matches now has individual line pairs of
214
247
# line A matches line B, at the given offsets
216
start_a = start_b = None
218
for i_a, i_b in matches:
219
if (start_a is not None
220
and (i_a == start_a + length)
221
and (i_b == start_b + length)):
226
# We need to check from 0,0 until the current match
227
self._check_with_diff(alo-1, i_a+alo, blo-1, i_b+blo,
230
answer.append((start_a+alo, start_b+blo, length))
231
self._check_with_diff(start_a+alo+length, i_a+alo,
232
start_b+blo+length, i_b+blo,
240
answer.append((start_a+alo, start_b+blo, length))
241
self._check_with_diff(start_a+alo+length, ahi+1,
242
start_b+blo+length, bhi+1,
245
# Nothing matched, so we need to send the complete text
246
self._check_with_diff(alo-1, ahi+1, blo-1, bhi+1, answer)
248
# For consistency sake, make sure all matches are only increasing
252
for a,b,match_len in answer:
253
assert a >= next_a, 'Non increasing matches for a'
254
assert b >= next_b, 'Not increasing matches for b'
255
next_a = a + match_len
256
next_b = b + match_len
248
self.matching_blocks = _collapse_sequences(matches)
249
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
250
if PatienceSequenceMatcher._do_check_consistency:
252
_check_consistency(self.matching_blocks)
254
return self.matching_blocks
259
257
# This is a version of unified_diff which only adds a factory parameter