133
130
last_a_pos = alo-1
134
131
last_b_pos = blo-1
135
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
132
for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
136
133
# recurse between lines which are unique in each file and match
139
136
# Most of the time, you will have a sequence of similar entries
140
137
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
141
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
138
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
142
139
apos, bpos, answer, maxrecursion - 1)
143
140
last_a_pos = apos
144
141
last_b_pos = bpos
145
142
answer.append((apos, bpos))
146
143
if len(answer) > oldlength:
147
144
# find matches between the last match and the end
148
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
149
ahi, bhi, answer, maxrecursion - 1)
145
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
146
ahi, bhi, answer, maxrecursion - 1)
150
147
elif a[alo] == b[blo]:
151
148
# find matching lines at the very beginning
152
149
while alo < ahi and blo < bhi and a[alo] == b[blo]:
153
150
answer.append((alo, blo))
156
recurse_matches(a, b, alo, blo,
157
ahi, bhi, answer, maxrecursion - 1)
153
recurse_matches_py(a, b, alo, blo,
154
ahi, bhi, answer, maxrecursion - 1)
158
155
elif a[ahi - 1] == b[bhi - 1]:
159
156
# find matching lines at the very end
240
239
return self.matching_blocks
243
recurse_matches(self.a, self.b, 0, 0,
244
len(self.a), len(self.b), matches, 10)
242
recurse_matches_py(self.a, self.b, 0, 0,
243
len(self.a), len(self.b), matches, 10)
245
244
# Matches now has individual line pairs of
246
245
# line A matches line B, at the given offsets
247
246
self.matching_blocks = _collapse_sequences(matches)
248
247
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
249
if PatienceSequenceMatcher._do_check_consistency:
248
if PatienceSequenceMatcher_py._do_check_consistency:
251
250
_check_consistency(self.matching_blocks)
253
252
return self.matching_blocks
256
# This is a version of unified_diff which only adds a factory parameter
257
# so that you can override the default SequenceMatcher
258
# this has been submitted as a patch to python
259
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
260
tofiledate='', n=3, lineterm='\n',
261
sequencematcher=None):
263
Compare two sequences of lines; generate the delta as a unified diff.
265
Unified diffs are a compact way of showing line changes and a few
266
lines of context. The number of context lines is set by 'n' which
269
By default, the diff control lines (those with ---, +++, or @@) are
270
created with a trailing newline. This is helpful so that inputs
271
created from file.readlines() result in diffs that are suitable for
272
file.writelines() since both the inputs and outputs have trailing
275
For inputs that do not have trailing newlines, set the lineterm
276
argument to "" so that the output will be uniformly newline free.
278
The unidiff format normally has a header for filenames and modification
279
times. Any or all of these may be specified using strings for
280
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
281
times are normally expressed in the format returned by time.ctime().
285
>>> for line in unified_diff('one two three four'.split(),
286
... 'zero one tree four'.split(), 'Original', 'Current',
287
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
290
--- Original Sat Jan 26 23:30:50 1991
291
+++ Current Fri Jun 06 10:20:52 2003
300
if sequencematcher is None:
301
sequencematcher = difflib.SequenceMatcher
304
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
306
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
307
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
309
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
310
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
311
for tag, i1, i2, j1, j2 in group:
313
for line in a[i1:i2]:
316
if tag == 'replace' or tag == 'delete':
317
for line in a[i1:i2]:
319
if tag == 'replace' or tag == 'insert':
320
for line in b[j1:j2]:
324
def unified_diff_files(a, b, sequencematcher=None):
325
"""Generate the diff for two files.
327
# Should this actually be an error?
334
file_a = open(a, 'rb')
335
time_a = os.stat(a).st_mtime
341
file_b = open(b, 'rb')
342
time_b = os.stat(b).st_mtime
344
# TODO: Include fromfiledate and tofiledate
345
return unified_diff(file_a.readlines(), file_b.readlines(),
346
fromfile=a, tofile=b,
347
sequencematcher=sequencematcher)
352
p = optparse.OptionParser(usage='%prog [options] file_a file_b'
353
'\nFiles can be "-" to read from stdin')
354
p.add_option('--patience', dest='matcher', action='store_const', const='patience',
355
default='patience', help='Use the patience difference algorithm')
356
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
357
default='patience', help='Use python\'s difflib algorithm')
359
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
361
(opts, args) = p.parse_args(args)
362
matcher = algorithms[opts.matcher]
365
print 'You must supply 2 filenames to diff'
368
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
369
sys.stdout.write(line)
371
if __name__ == '__main__':
372
sys.exit(main(sys.argv[1:]))