130
133
last_a_pos = alo-1
131
134
last_b_pos = blo-1
132
for apos, bpos in unique_lcs_py(a[alo:ahi], b[blo:bhi]):
135
for apos, bpos in unique_lcs(a[alo:ahi], b[blo:bhi]):
133
136
# recurse between lines which are unique in each file and match
136
139
# Most of the time, you will have a sequence of similar entries
137
140
if last_a_pos+1 != apos or last_b_pos+1 != bpos:
138
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
141
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
139
142
apos, bpos, answer, maxrecursion - 1)
140
143
last_a_pos = apos
141
144
last_b_pos = bpos
142
145
answer.append((apos, bpos))
143
146
if len(answer) > oldlength:
144
147
# find matches between the last match and the end
145
recurse_matches_py(a, b, last_a_pos+1, last_b_pos+1,
146
ahi, bhi, answer, maxrecursion - 1)
148
recurse_matches(a, b, last_a_pos+1, last_b_pos+1,
149
ahi, bhi, answer, maxrecursion - 1)
147
150
elif a[alo] == b[blo]:
148
151
# find matching lines at the very beginning
149
152
while alo < ahi and blo < bhi and a[alo] == b[blo]:
150
153
answer.append((alo, blo))
153
recurse_matches_py(a, b, alo, blo,
154
ahi, bhi, answer, maxrecursion - 1)
156
recurse_matches(a, b, alo, blo,
157
ahi, bhi, answer, maxrecursion - 1)
155
158
elif a[ahi - 1] == b[bhi - 1]:
156
159
# find matching lines at the very end
239
240
return self.matching_blocks
242
recurse_matches_py(self.a, self.b, 0, 0,
243
len(self.a), len(self.b), matches, 10)
243
recurse_matches(self.a, self.b, 0, 0,
244
len(self.a), len(self.b), matches, 10)
244
245
# Matches now has individual line pairs of
245
246
# line A matches line B, at the given offsets
246
247
self.matching_blocks = _collapse_sequences(matches)
247
248
self.matching_blocks.append( (len(self.a), len(self.b), 0) )
248
if PatienceSequenceMatcher_py._do_check_consistency:
249
if PatienceSequenceMatcher._do_check_consistency:
250
251
_check_consistency(self.matching_blocks)
252
253
return self.matching_blocks
256
# This is a version of unified_diff which only adds a factory parameter
257
# so that you can override the default SequenceMatcher
258
# this has been submitted as a patch to python
259
def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
260
tofiledate='', n=3, lineterm='\n',
261
sequencematcher=None):
263
Compare two sequences of lines; generate the delta as a unified diff.
265
Unified diffs are a compact way of showing line changes and a few
266
lines of context. The number of context lines is set by 'n' which
269
By default, the diff control lines (those with ---, +++, or @@) are
270
created with a trailing newline. This is helpful so that inputs
271
created from file.readlines() result in diffs that are suitable for
272
file.writelines() since both the inputs and outputs have trailing
275
For inputs that do not have trailing newlines, set the lineterm
276
argument to "" so that the output will be uniformly newline free.
278
The unidiff format normally has a header for filenames and modification
279
times. Any or all of these may be specified using strings for
280
'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'. The modification
281
times are normally expressed in the format returned by time.ctime().
285
>>> for line in unified_diff('one two three four'.split(),
286
... 'zero one tree four'.split(), 'Original', 'Current',
287
... 'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
290
--- Original Sat Jan 26 23:30:50 1991
291
+++ Current Fri Jun 06 10:20:52 2003
300
if sequencematcher is None:
301
sequencematcher = difflib.SequenceMatcher
304
for group in sequencematcher(None,a,b).get_grouped_opcodes(n):
306
yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
307
yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
309
i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
310
yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
311
for tag, i1, i2, j1, j2 in group:
313
for line in a[i1:i2]:
316
if tag == 'replace' or tag == 'delete':
317
for line in a[i1:i2]:
319
if tag == 'replace' or tag == 'insert':
320
for line in b[j1:j2]:
324
def unified_diff_files(a, b, sequencematcher=None):
325
"""Generate the diff for two files.
327
# Should this actually be an error?
334
file_a = open(a, 'rb')
335
time_a = os.stat(a).st_mtime
341
file_b = open(b, 'rb')
342
time_b = os.stat(b).st_mtime
344
# TODO: Include fromfiledate and tofiledate
345
return unified_diff(file_a.readlines(), file_b.readlines(),
346
fromfile=a, tofile=b,
347
sequencematcher=sequencematcher)
352
p = optparse.OptionParser(usage='%prog [options] file_a file_b'
353
'\nFiles can be "-" to read from stdin')
354
p.add_option('--patience', dest='matcher', action='store_const', const='patience',
355
default='patience', help='Use the patience difference algorithm')
356
p.add_option('--difflib', dest='matcher', action='store_const', const='difflib',
357
default='patience', help='Use python\'s difflib algorithm')
359
algorithms = {'patience':PatienceSequenceMatcher, 'difflib':difflib.SequenceMatcher}
361
(opts, args) = p.parse_args(args)
362
matcher = algorithms[opts.matcher]
365
print 'You must supply 2 filenames to diff'
368
for line in unified_diff_files(args[0], args[1], sequencematcher=matcher):
369
sys.stdout.write(line)
371
if __name__ == '__main__':
372
sys.exit(main(sys.argv[1:]))