1
from bzrlib.selftest import TestCase
2
from bzrlib.diff import internal_diff
1
# Copyright (C) 2005, 2006 Canonical Development Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3
18
from cStringIO import StringIO
4
def udiff_lines(old, new):
20
from tempfile import TemporaryFile
22
from bzrlib.diff import internal_diff, external_diff, show_diff_trees
23
from bzrlib.errors import BinaryFile, NoDiff
24
import bzrlib.patiencediff
25
from bzrlib.tests import (TestCase, TestCaseWithTransport,
26
TestCaseInTempDir, TestSkipped)
29
def udiff_lines(old, new, allow_binary=False):
6
internal_diff('old', old, 'new', new, output)
31
internal_diff('old', old, 'new', new, output, allow_binary)
8
33
return output.readlines()
10
def check_patch(lines):
11
assert len(lines) > 1, \
12
"Not enough lines for a file header for patch:\n%s" % "".join(lines)
13
assert lines[0].startswith ('---'), \
14
'No orig line for patch:\n%s' % "".join(lines)
15
assert lines[1].startswith ('+++'), \
16
'No mod line for patch:\n%s' % "".join(lines)
17
assert len(lines) > 2, \
18
"No hunks for patch:\n%s" % "".join(lines)
19
assert lines[2].startswith('@@'),\
20
"No hunk header for patch:\n%s" % "".join(lines)
21
assert '@@' in lines[2][2:], \
22
"Unterminated hunk header for patch:\n%s" % "".join(lines)
36
def external_udiff_lines(old, new, use_stringio=False):
38
# StringIO has no fileno, so it tests a different codepath
41
output = TemporaryFile()
43
external_diff('old', old, 'new', new, output, diff_opts=['-u'])
45
raise TestSkipped('external "diff" not present to test')
47
lines = output.readlines()
24
52
class TestDiff(TestCase):
25
54
def test_add_nl(self):
26
55
"""diff generates a valid diff for patches that add a newline"""
27
56
lines = udiff_lines(['boo'], ['boo\n'])
29
assert lines[4] == '\\ No newline at end of file\n', \
30
"expected no-nl, got %r" % lines[4]
57
self.check_patch(lines)
58
self.assertEquals(lines[4], '\\ No newline at end of file\n')
59
## "expected no-nl, got %r" % lines[4]
32
61
def test_add_nl_2(self):
33
62
"""diff generates a valid diff for patches that change last line and
36
65
lines = udiff_lines(['boo'], ['goo\n'])
38
assert lines[4] == '\\ No newline at end of file\n', \
39
"expected no-nl, got %r" % lines[4]
66
self.check_patch(lines)
67
self.assertEquals(lines[4], '\\ No newline at end of file\n')
68
## "expected no-nl, got %r" % lines[4]
41
70
def test_remove_nl(self):
42
71
"""diff generates a valid diff for patches that change last line and
45
74
lines = udiff_lines(['boo\n'], ['boo'])
47
assert lines[5] == '\\ No newline at end of file\n', \
48
"expected no-nl, got %r" % lines[5]
75
self.check_patch(lines)
76
self.assertEquals(lines[5], '\\ No newline at end of file\n')
77
## "expected no-nl, got %r" % lines[5]
79
def check_patch(self, lines):
80
self.assert_(len(lines) > 1)
81
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
82
self.assert_(lines[0].startswith ('---'))
83
## 'No orig line for patch:\n%s' % "".join(lines)
84
self.assert_(lines[1].startswith ('+++'))
85
## 'No mod line for patch:\n%s' % "".join(lines)
86
self.assert_(len(lines) > 2)
87
## "No hunks for patch:\n%s" % "".join(lines)
88
self.assert_(lines[2].startswith('@@'))
89
## "No hunk header for patch:\n%s" % "".join(lines)
90
self.assert_('@@' in lines[2][2:])
91
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
93
def test_binary_lines(self):
94
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
95
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
96
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
97
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
99
def test_external_diff(self):
100
lines = external_udiff_lines(['boo\n'], ['goo\n'])
101
self.check_patch(lines)
103
def test_external_diff_no_fileno(self):
104
# Make sure that we can handle not having a fileno, even
105
# if the diff is large
106
lines = external_udiff_lines(['boo\n']*10000,
109
self.check_patch(lines)
111
def test_internal_diff_default(self):
112
# Default internal diff encoding is utf8
114
internal_diff(u'old_\xb5', ['old_text\n'],
115
u'new_\xe5', ['new_text\n'], output)
116
lines = output.getvalue().splitlines(True)
117
self.check_patch(lines)
118
self.assertEquals(['--- old_\xc2\xb5\n',
119
'+++ new_\xc3\xa5\n',
127
def test_internal_diff_utf8(self):
129
internal_diff(u'old_\xb5', ['old_text\n'],
130
u'new_\xe5', ['new_text\n'], output,
131
path_encoding='utf8')
132
lines = output.getvalue().splitlines(True)
133
self.check_patch(lines)
134
self.assertEquals(['--- old_\xc2\xb5\n',
135
'+++ new_\xc3\xa5\n',
143
def test_internal_diff_iso_8859_1(self):
145
internal_diff(u'old_\xb5', ['old_text\n'],
146
u'new_\xe5', ['new_text\n'], output,
147
path_encoding='iso-8859-1')
148
lines = output.getvalue().splitlines(True)
149
self.check_patch(lines)
150
self.assertEquals(['--- old_\xb5\n',
159
def test_internal_diff_returns_bytes(self):
161
output = StringIO.StringIO()
162
internal_diff(u'old_\xb5', ['old_text\n'],
163
u'new_\xe5', ['new_text\n'], output)
164
self.failUnless(isinstance(output.getvalue(), str),
165
'internal_diff should return bytestrings')
168
class TestDiffDates(TestCaseWithTransport):
171
super(TestDiffDates, self).setUp()
172
self.wt = self.make_branch_and_tree('.')
173
self.b = self.wt.branch
174
self.build_tree_contents([
175
('file1', 'file1 contents at rev 1\n'),
176
('file2', 'file2 contents at rev 1\n')
178
self.wt.add(['file1', 'file2'])
180
message='Revision 1',
181
timestamp=1143849600, # 2006-04-01 00:00:00 UTC
184
self.build_tree_contents([('file1', 'file1 contents at rev 2\n')])
186
message='Revision 2',
187
timestamp=1143936000, # 2006-04-02 00:00:00 UTC
190
self.build_tree_contents([('file2', 'file2 contents at rev 3\n')])
192
message='Revision 3',
193
timestamp=1144022400, # 2006-04-03 00:00:00 UTC
196
self.wt.remove(['file2'])
198
message='Revision 4',
199
timestamp=1144108800, # 2006-04-04 00:00:00 UTC
202
self.build_tree_contents([
203
('file1', 'file1 contents in working tree\n')
205
# set the date stamps for files in the working tree to known values
206
os.utime('file1', (1144195200, 1144195200)) # 2006-04-05 00:00:00 UTC
208
def get_diff(self, tree1, tree2, specific_files=None, working_tree=None):
210
if working_tree is not None:
211
extra_trees = (working_tree,)
214
show_diff_trees(tree1, tree2, output, specific_files=specific_files,
215
extra_trees=extra_trees, old_label='old/',
217
return output.getvalue()
219
def test_diff_rev_tree_working_tree(self):
220
output = self.get_diff(self.wt.basis_tree(), self.wt)
221
# note that the date for old/file1 is from rev 2 rather than from
222
# the basis revision (rev 4)
223
self.assertEqualDiff(output, '''\
224
=== modified file 'file1'
225
--- old/file1\t2006-04-02 00:00:00 +0000
226
+++ new/file1\t2006-04-05 00:00:00 +0000
228
-file1 contents at rev 2
229
+file1 contents in working tree
233
def test_diff_rev_tree_rev_tree(self):
234
tree1 = self.b.repository.revision_tree('rev-2')
235
tree2 = self.b.repository.revision_tree('rev-3')
236
output = self.get_diff(tree1, tree2)
237
self.assertEqualDiff(output, '''\
238
=== modified file 'file2'
239
--- old/file2\t2006-04-01 00:00:00 +0000
240
+++ new/file2\t2006-04-03 00:00:00 +0000
242
-file2 contents at rev 1
243
+file2 contents at rev 3
247
def test_diff_add_files(self):
248
tree1 = self.b.repository.revision_tree(None)
249
tree2 = self.b.repository.revision_tree('rev-1')
250
output = self.get_diff(tree1, tree2)
251
# the files have the epoch time stamp for the tree in which
253
self.assertEqualDiff(output, '''\
254
=== added file 'file1'
255
--- old/file1\t1970-01-01 00:00:00 +0000
256
+++ new/file1\t2006-04-01 00:00:00 +0000
258
+file1 contents at rev 1
260
=== added file 'file2'
261
--- old/file2\t1970-01-01 00:00:00 +0000
262
+++ new/file2\t2006-04-01 00:00:00 +0000
264
+file2 contents at rev 1
268
def test_diff_remove_files(self):
269
tree1 = self.b.repository.revision_tree('rev-3')
270
tree2 = self.b.repository.revision_tree('rev-4')
271
output = self.get_diff(tree1, tree2)
272
# the file has the epoch time stamp for the tree in which
274
self.assertEqualDiff(output, '''\
275
=== removed file 'file2'
276
--- old/file2\t2006-04-03 00:00:00 +0000
277
+++ new/file2\t1970-01-01 00:00:00 +0000
279
-file2 contents at rev 3
283
def test_show_diff_specified(self):
284
"""A working tree filename can be used to identify a file"""
285
self.wt.rename_one('file1', 'file1b')
286
old_tree = self.b.repository.revision_tree('rev-1')
287
new_tree = self.b.repository.revision_tree('rev-4')
288
out = self.get_diff(old_tree, new_tree, specific_files=['file1b'],
289
working_tree=self.wt)
290
self.assertContainsRe(out, 'file1\t')
292
def test_recursive_diff(self):
293
"""Children of directories are matched"""
296
self.wt.add(['dir1', 'dir2'])
297
self.wt.rename_one('file1', 'dir1/file1')
298
old_tree = self.b.repository.revision_tree('rev-1')
299
new_tree = self.b.repository.revision_tree('rev-4')
300
out = self.get_diff(old_tree, new_tree, specific_files=['dir1'],
301
working_tree=self.wt)
302
self.assertContainsRe(out, 'file1\t')
303
out = self.get_diff(old_tree, new_tree, specific_files=['dir2'],
304
working_tree=self.wt)
305
self.assertNotContainsRe(out, 'file1\t')
307
class TestPatienceDiffLib(TestCase):
309
def test_unique_lcs(self):
310
unique_lcs = bzrlib.patiencediff.unique_lcs
311
self.assertEquals(unique_lcs('', ''), [])
312
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
313
self.assertEquals(unique_lcs('a', 'b'), [])
314
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
315
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
316
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
317
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
319
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
321
def test_recurse_matches(self):
322
def test_one(a, b, matches):
324
bzrlib.patiencediff.recurse_matches(a, b, 0, 0, len(a), len(b),
326
self.assertEquals(test_matches, matches)
328
test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'],
329
[(0, 0), (2, 2), (4, 4)])
330
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
331
[(0, 0), (2, 1), (4, 2)])
333
# recurse_matches doesn't match non-unique
334
# lines surrounded by bogus text.
335
# The update has been done in patiencediff.SequenceMatcher instead
337
# This is what it could be
338
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
340
# This is what it currently gives:
341
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
343
def test_matching_blocks(self):
344
def chk_blocks(a, b, expected_blocks):
345
# difflib always adds a signature of the total
346
# length, with no matching entries at the end
347
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
348
blocks = s.get_matching_blocks()
349
self.assertEquals((len(a), len(b), 0), blocks[-1])
350
self.assertEquals(expected_blocks, blocks[:-1])
352
# Some basic matching tests
353
chk_blocks('', '', [])
354
chk_blocks([], [], [])
355
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
356
chk_blocks('abcd', 'abce', [(0, 0, 3)])
357
chk_blocks('eabc', 'abce', [(1, 0, 3)])
358
chk_blocks('eabce', 'abce', [(1, 0, 4)])
359
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
360
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
361
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
362
# This may check too much, but it checks to see that
363
# a copied block stays attached to the previous section,
365
# difflib would tend to grab the trailing longest match
366
# which would make the diff not look right
367
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
368
[(0, 0, 6), (6, 11, 10)])
370
# make sure it supports passing in lists
374
'how are you today?\n'],
376
'how are you today?\n'],
377
[(0, 0, 1), (2, 1, 1)])
379
# non unique lines surrounded by non-matching lines
381
chk_blocks('aBccDe', 'abccde', [(0,0,1), (5,5,1)])
383
# But they only need to be locally unique
384
chk_blocks('aBcDec', 'abcdec', [(0,0,1), (2,2,1), (4,4,2)])
386
# non unique blocks won't be matched
387
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (8,8,1)])
389
# but locally unique ones will
390
chk_blocks('aBcdEeXcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
391
(5,4,1), (7,5,2), (10,8,1)])
393
chk_blocks('abbabbXd', 'cabbabxd', [(7,7,1)])
394
chk_blocks('abbabbbb', 'cabbabbc', [])
395
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
397
def test_opcodes(self):
398
def chk_ops(a, b, expected_codes):
399
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
400
self.assertEquals(expected_codes, s.get_opcodes())
404
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
405
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
406
('replace', 3,4, 3,4)
408
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
412
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
415
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
416
('replace', 2,3, 2,3),
419
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
420
('replace', 2,3, 2,5),
423
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
424
('insert', 2,2, 2,5),
427
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
428
[('equal', 0,6, 0,6),
429
('insert', 6,6, 6,11),
430
('equal', 6,16, 11,21)
435
, 'how are you today?\n'],
437
, 'how are you today?\n'],
438
[('equal', 0,1, 0,1),
439
('delete', 1,2, 1,1),
442
chk_ops('aBccDe', 'abccde',
443
[('equal', 0,1, 0,1),
444
('replace', 1,5, 1,5),
447
chk_ops('aBcDec', 'abcdec',
448
[('equal', 0,1, 0,1),
449
('replace', 1,2, 1,2),
451
('replace', 3,4, 3,4),
454
chk_ops('aBcdEcdFg', 'abcdecdfg',
455
[('equal', 0,1, 0,1),
456
('replace', 1,8, 1,8),
459
chk_ops('aBcdEeXcdFg', 'abcdecdfg',
460
[('equal', 0,1, 0,1),
461
('replace', 1,2, 1,2),
463
('delete', 4,5, 4,4),
465
('delete', 6,7, 5,5),
467
('replace', 9,10, 7,8),
468
('equal', 10,11, 8,9)
471
def test_multiple_ranges(self):
472
# There was an earlier bug where we used a bad set of ranges,
473
# this triggers that specific bug, to make sure it doesn't regress
474
def chk_blocks(a, b, expected_blocks):
475
# difflib always adds a signature of the total
476
# length, with no matching entries at the end
477
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
478
blocks = s.get_matching_blocks()
480
self.assertEquals(x, (len(a), len(b), 0))
481
self.assertEquals(expected_blocks, blocks)
483
chk_blocks('abcdefghijklmnop'
484
, 'abcXghiYZQRSTUVWXYZijklmnop'
485
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
487
chk_blocks('ABCd efghIjk L'
488
, 'AxyzBCn mo pqrstuvwI1 2 L'
489
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
491
# These are rot13 code snippets.
493
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
495
gnxrf_netf = ['svyr*']
496
gnxrf_bcgvbaf = ['ab-erphefr']
498
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
499
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
501
ercbegre = nqq_ercbegre_ahyy
503
ercbegre = nqq_ercbegre_cevag
504
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
507
pynff pzq_zxqve(Pbzznaq):
508
'''.splitlines(True), '''\
509
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
511
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
514
gnxrf_netf = ['svyr*']
515
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
517
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
522
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
523
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
525
npgvba = omeyvo.nqq.nqq_npgvba_cevag
527
npgvba = omeyvo.nqq.nqq_npgvba_nqq
529
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
531
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
534
pynff pzq_zxqve(Pbzznaq):
536
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
538
def test_patience_unified_diff(self):
539
txt_a = ['hello there\n',
541
'how are you today?\n']
542
txt_b = ['hello there\n',
543
'how are you today?\n']
544
unified_diff = bzrlib.patiencediff.unified_diff
545
psm = bzrlib.patiencediff.PatienceSequenceMatcher
546
self.assertEquals([ '--- \n',
551
' how are you today?\n'
553
, list(unified_diff(txt_a, txt_b,
554
sequencematcher=psm)))
555
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
556
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
557
# This is the result with LongestCommonSubstring matching
558
self.assertEquals(['--- \n',
560
'@@ -1,6 +1,11 @@\n',
572
, list(unified_diff(txt_a, txt_b)))
573
# And the patience diff
574
self.assertEquals(['--- \n',
576
'@@ -4,6 +4,11 @@\n',
589
, list(unified_diff(txt_a, txt_b,
590
sequencematcher=psm)))
593
class TestPatienceDiffLibFiles(TestCaseInTempDir):
595
def test_patience_unified_diff_files(self):
596
txt_a = ['hello there\n',
598
'how are you today?\n']
599
txt_b = ['hello there\n',
600
'how are you today?\n']
601
open('a1', 'wb').writelines(txt_a)
602
open('b1', 'wb').writelines(txt_b)
604
unified_diff_files = bzrlib.patiencediff.unified_diff_files
605
psm = bzrlib.patiencediff.PatienceSequenceMatcher
606
self.assertEquals(['--- a1 \n',
611
' how are you today?\n',
613
, list(unified_diff_files('a1', 'b1',
614
sequencematcher=psm)))
616
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
617
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
618
open('a2', 'wb').writelines(txt_a)
619
open('b2', 'wb').writelines(txt_b)
621
# This is the result with LongestCommonSubstring matching
622
self.assertEquals(['--- a2 \n',
624
'@@ -1,6 +1,11 @@\n',
636
, list(unified_diff_files('a2', 'b2')))
638
# And the patience diff
639
self.assertEquals(['--- a2 \n',
641
'@@ -4,6 +4,11 @@\n',
654
, list(unified_diff_files('a2', 'b2',
655
sequencematcher=psm)))