134
130
last_a_pos = alo-1
135
131
last_b_pos = blo-1
136
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
132
for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
137
133
# recurse between lines which are unique in each file and match
140
136
# Most of the time, you will have a sequence of similar entries
141
137
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
142
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
138
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
143
139
apos, bpos, answer, maxrecursion - 1)
144
140
last_a_pos = apos
145
141
last_b_pos = bpos
146
142
answer.append((apos, bpos))
147
143
if len(answer) > oldlength:
148
144
# find matches between the last match and the end
149
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
150
ahi, bhi, answer, maxrecursion - 1)
145
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
146
ahi, bhi, answer, maxrecursion - 1)
151
147
elif a[alo] == b[blo]:
152
148
# find matching lines at the very beginning
153
149
while alo < ahi and blo < bhi and a[alo] == b[blo]:
154
150
answer.append((alo, blo))
157
recurse_matches(a, b, alo, blo,
158
ahi, bhi, answer, maxrecursion - 1)
153
recurse_matches_py(a, b, alo, blo,
154
ahi, bhi, answer, maxrecursion - 1)
159
155
elif a[ahi - 1] == b[bhi - 1]:
160
156
# find matching lines at the very end
241
239
return self.matching_blocks
244
recurse_matches(self.a, self.b, 0, 0,
245
len(self.a), len(self.b), matches, 10)
242
recurse_matches_py(self.a, self.b, 0, 0,
243
len(self.a), len(self.b), matches, 10)
246
244
# Matches now has individual line pairs of
247
245
# line A matches line B, at the given offsets
248
246
self.matching_blocks = _collapse_sequences(matches)
249
247
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
250
if PatienceSequenceMatcher._do_check_consistency:
248
if PatienceSequenceMatcher_py._do_check_consistency:
252
250
_check_consistency(self.matching_blocks)
254
252
return self.matching_blocks
257
# This is a version of unified_diff which only adds a factory parameter
258
# so that you can override the default SequenceMatcher
259
# this has been submitted as a patch to python
260
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
261
tofiledate='', n=3, lineterm='\n',
262
sequencematcher=None):
264
Compare two sequences of lines; generate the delta as a unified diff.
266
Unified diffs are a compact way of showing line changes and a few
267
lines of context. The number of context lines is set by 'n' which
270
By default, the diff control lines (those with ---, +++, or @@) are
271
created with a trailing newline. This is helpful so that inputs
272
created from file.readlines() result in diffs that are suitable for
273
file.writelines() since both the inputs and outputs have trailing
276
For inputs that do not have trailing newlines, set the lineterm
277
argument to "" so that the output will be uniformly newline free.
279
The unidiff format normally has a header for filenames and modification
280
times. Any or all of these may be specified using strings for
281
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
282
times are normally expressed in the format returned by time.ctime().
286
>>> for line in unified_diff('one two three four'.split(),
287
... 'zero one tree four'.split(), 'Original', 'Current',
288
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
291
--- Original Sat Jan 26 23:30:50 1991
292
+++ Current Fri Jun 06 10:20:52 2003
301
if sequencematcher is None:
302
sequencematcher = difflib.SequenceMatcher
305
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
307
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
308
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
310
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
311
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
312
for tag, i1, i2, j1, j2 in group:
314
for line in a[i1:i2]:
317
if tag == 'replace' or tag == 'delete':
318
for line in a[i1:i2]:
320
if tag == 'replace' or tag == 'insert':
321
for line in b[j1:j2]:
325
def unified_diff_files(a, b, sequencematcher=None):
326
"""Generate the diff for two files.
328
# Should this actually be an error?
335
file_a = open(a, 'rb')
336
time_a = os.stat(a).st_mtime
342
file_b = open(b, 'rb')
343
time_b = os.stat(b).st_mtime
345
# TODO: Include fromfiledate and tofiledate
346
return unified_diff(file_a.readlines(), file_b.readlines(),
347
fromfile=a, tofile=b,
348
sequencematcher=sequencematcher)
353
p = optparse.OptionParser(usage='%prog [options] file_a file_b'
354
'\nFiles can be "-" to read from stdin')
355
p.add_option('--patience', dest='matcher', action='store_const', const='patience',
356
default='patience', help='Use the patience difference algorithm')
357
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
358
default='patience', help='Use python\'s difflib algorithm')
360
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
362
(opts, args) = p.parse_args(args)
363
matcher = algorithms[opts.matcher]
366
print 'You must supply 2 filenames to diff'
369
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
370
sys.stdout.write(line)
372
if __name__ == '__main__':
373
sys.exit(main(sys.argv[1:]))