133
131
last_a_pos = alo-1
134
132
last_b_pos = blo-1
135
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
133
for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
136
134
# recurse between lines which are unique in each file and match
139
137
# Most of the time, you will have a sequence of similar entries
140
138
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
141
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
139
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
142
140
apos, bpos, answer, maxrecursion - 1)
143
141
last_a_pos = apos
144
142
last_b_pos = bpos
145
143
answer.append((apos, bpos))
146
144
if len(answer) > oldlength:
147
145
# find matches between the last match and the end
148
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
149
ahi, bhi, answer, maxrecursion - 1)
146
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
147
ahi, bhi, answer, maxrecursion - 1)
150
148
elif a[alo] == b[blo]:
151
149
# find matching lines at the very beginning
152
150
while alo < ahi and blo < bhi and a[alo] == b[blo]:
153
151
answer.append((alo, blo))
156
recurse_matches(a, b, alo, blo,
157
ahi, bhi, answer, maxrecursion - 1)
154
recurse_matches_py(a, b, alo, blo,
155
ahi, bhi, answer, maxrecursion - 1)
158
156
elif a[ahi - 1] == b[bhi - 1]:
159
157
# find matching lines at the very end
240
240
return self.matching_blocks
243
recurse_matches(self.a, self.b, 0, 0,
244
len(self.a), len(self.b), matches, 10)
243
recurse_matches_py(self.a, self.b, 0, 0,
244
len(self.a), len(self.b), matches, 10)
245
245
# Matches now has individual line pairs of
246
246
# line A matches line B, at the given offsets
247
247
self.matching_blocks = _collapse_sequences(matches)
248
248
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
249
if PatienceSequenceMatcher._do_check_consistency:
249
if PatienceSequenceMatcher_py._do_check_consistency:
251
251
_check_consistency(self.matching_blocks)
253
253
return self.matching_blocks
256
# This is a version of unified_diff which only adds a factory parameter
257
# so that you can override the default SequenceMatcher
258
# this has been submitted as a patch to python
259
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
260
tofiledate='', n=3, lineterm='\n',
261
sequencematcher=None):
263
Compare two sequences of lines; generate the delta as a unified diff.
265
Unified diffs are a compact way of showing line changes and a few
266
lines of context. The number of context lines is set by 'n' which
269
By default, the diff control lines (those with ---, +++, or @@) are
270
created with a trailing newline. This is helpful so that inputs
271
created from file.readlines() result in diffs that are suitable for
272
file.writelines() since both the inputs and outputs have trailing
275
For inputs that do not have trailing newlines, set the lineterm
276
argument to "" so that the output will be uniformly newline free.
278
The unidiff format normally has a header for filenames and modification
279
times. Any or all of these may be specified using strings for
280
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
281
times are normally expressed in the format returned by time.ctime().
285
>>> for line in unified_diff('one two three four'.split(),
286
... 'zero one tree four'.split(), 'Original', 'Current',
287
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
290
--- Original Sat Jan 26 23:30:50 1991
291
+++ Current Fri Jun 06 10:20:52 2003
300
if sequencematcher is None:
301
sequencematcher = difflib.SequenceMatcher
304
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
306
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
307
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
309
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
310
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
311
for tag, i1, i2, j1, j2 in group:
313
for line in a[i1:i2]:
316
if tag == 'replace' or tag == 'delete':
317
for line in a[i1:i2]:
319
if tag == 'replace' or tag == 'insert':
320
for line in b[j1:j2]:
324
def unified_diff_files(a, b, sequencematcher=None):
325
"""Generate the diff for two files.
327
# Should this actually be an error?
334
file_a = open(a, 'rb')
335
time_a = os.stat(a).st_mtime
341
file_b = open(b, 'rb')
342
time_b = os.stat(b).st_mtime
344
# TODO: Include fromfiledate and tofiledate
345
return unified_diff(file_a.readlines(), file_b.readlines(),
346
fromfile=a, tofile=b,
347
sequencematcher=sequencematcher)
352
p = optparse.OptionParser(usage='%prog [options] file_a file_b'
353
'\nFiles can be "-" to read from stdin')
354
p.add_option('--patience', dest='matcher', action='store_const', const='patience',
355
default='patience', help='Use the patience difference algorithm')
356
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
357
default='patience', help='Use python\'s difflib algorithm')
359
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
361
(opts, args) = p.parse_args(args)
362
matcher = algorithms[opts.matcher]
365
print 'You must supply 2 filenames to diff'
368
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
369
sys.stdout.write(line)
371
if __name__ == '__main__':
372
sys.exit(main(sys.argv[1:]))