1
# Copyright (C) 2005, 2006 Canonical Development Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
from cStringIO import StringIO
20
from bzrlib.diff import internal_diff, show_diff_trees
21
from bzrlib.errors import BinaryFile
22
import bzrlib.patiencediff
23
from bzrlib.tests import TestCase, TestCaseWithTransport, TestCaseInTempDir
24
from bzrlib.tests import TestCase, TestCaseInTempDir
27
def udiff_lines(old, new, allow_binary=False):
29
internal_diff('old', old, 'new', new, output, allow_binary)
31
return output.readlines()
34
class TestDiff(TestCase):
36
def test_add_nl(self):
37
"""diff generates a valid diff for patches that add a newline"""
38
lines = udiff_lines(['boo'], ['boo\n'])
39
self.check_patch(lines)
40
self.assertEquals(lines[4], '\\ No newline at end of file\n')
41
## "expected no-nl, got %r" % lines[4]
43
def test_add_nl_2(self):
44
"""diff generates a valid diff for patches that change last line and
47
lines = udiff_lines(['boo'], ['goo\n'])
48
self.check_patch(lines)
49
self.assertEquals(lines[4], '\\ No newline at end of file\n')
50
## "expected no-nl, got %r" % lines[4]
52
def test_remove_nl(self):
53
"""diff generates a valid diff for patches that change last line and
56
lines = udiff_lines(['boo\n'], ['boo'])
57
self.check_patch(lines)
58
self.assertEquals(lines[5], '\\ No newline at end of file\n')
59
## "expected no-nl, got %r" % lines[5]
61
def check_patch(self, lines):
62
self.assert_(len(lines) > 1)
63
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
64
self.assert_(lines[0].startswith ('---'))
65
## 'No orig line for patch:\n%s' % "".join(lines)
66
self.assert_(lines[1].startswith ('+++'))
67
## 'No mod line for patch:\n%s' % "".join(lines)
68
self.assert_(len(lines) > 2)
69
## "No hunks for patch:\n%s" % "".join(lines)
70
self.assert_(lines[2].startswith('@@'))
71
## "No hunk header for patch:\n%s" % "".join(lines)
72
self.assert_('@@' in lines[2][2:])
73
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
75
def test_binary_lines(self):
76
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
77
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
78
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
79
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
81
def test_internal_diff_default(self):
82
# Default internal diff encoding is utf8
84
internal_diff(u'old_\xb5', ['old_text\n'],
85
u'new_\xe5', ['new_text\n'], output)
86
lines = output.getvalue().splitlines(True)
87
self.check_patch(lines)
88
self.assertEquals(['--- old_\xc2\xb5\n',
97
def test_internal_diff_utf8(self):
99
internal_diff(u'old_\xb5', ['old_text\n'],
100
u'new_\xe5', ['new_text\n'], output,
101
path_encoding='utf8')
102
lines = output.getvalue().splitlines(True)
103
self.check_patch(lines)
104
self.assertEquals(['--- old_\xc2\xb5\n',
105
'+++ new_\xc3\xa5\n',
113
def test_internal_diff_iso_8859_1(self):
115
internal_diff(u'old_\xb5', ['old_text\n'],
116
u'new_\xe5', ['new_text\n'], output,
117
path_encoding='iso-8859-1')
118
lines = output.getvalue().splitlines(True)
119
self.check_patch(lines)
120
self.assertEquals(['--- old_\xb5\n',
129
def test_internal_diff_returns_bytes(self):
131
output = StringIO.StringIO()
132
internal_diff(u'old_\xb5', ['old_text\n'],
133
u'new_\xe5', ['new_text\n'], output)
134
self.failUnless(isinstance(output.getvalue(), str),
135
'internal_diff should return bytestrings')
138
class TestDiffDates(TestCaseWithTransport):
141
super(TestDiffDates, self).setUp()
142
self.wt = self.make_branch_and_tree('.')
143
self.b = self.wt.branch
144
self.build_tree_contents([
145
('file1', 'file1 contents at rev 1\n'),
146
('file2', 'file2 contents at rev 1\n')
148
self.wt.add(['file1', 'file2'])
150
message='Revision 1',
151
timestamp=1143849600, # 2006-04-01 00:00:00 UTC
154
self.build_tree_contents([('file1', 'file1 contents at rev 2\n')])
156
message='Revision 2',
157
timestamp=1143936000, # 2006-04-02 00:00:00 UTC
160
self.build_tree_contents([('file2', 'file2 contents at rev 3\n')])
162
message='Revision 3',
163
timestamp=1144022400, # 2006-04-03 00:00:00 UTC
166
self.wt.remove(['file2'])
168
message='Revision 4',
169
timestamp=1144108800, # 2006-04-04 00:00:00 UTC
172
self.build_tree_contents([
173
('file1', 'file1 contents in working tree\n')
175
# set the date stamps for files in the working tree to known values
176
os.utime('file1', (1144195200, 1144195200)) # 2006-04-05 00:00:00 UTC
178
def get_diff(self, tree1, tree2):
180
show_diff_trees(tree1, tree2, output,
181
old_label='old/', new_label='new/')
182
return output.getvalue()
184
def test_diff_rev_tree_working_tree(self):
185
output = self.get_diff(self.wt.basis_tree(), self.wt)
186
# note that the date for old/file1 is from rev 2 rather than from
187
# the basis revision (rev 4)
188
self.assertEqualDiff(output, '''\
189
=== modified file 'file1'
190
--- old/file1\t2006-04-02 00:00:00 +0000
191
+++ new/file1\t2006-04-05 00:00:00 +0000
193
-file1 contents at rev 2
194
+file1 contents in working tree
198
def test_diff_rev_tree_rev_tree(self):
199
tree1 = self.b.repository.revision_tree('rev-2')
200
tree2 = self.b.repository.revision_tree('rev-3')
201
output = self.get_diff(tree1, tree2)
202
self.assertEqualDiff(output, '''\
203
=== modified file 'file2'
204
--- old/file2\t2006-04-01 00:00:00 +0000
205
+++ new/file2\t2006-04-03 00:00:00 +0000
207
-file2 contents at rev 1
208
+file2 contents at rev 3
212
def test_diff_add_files(self):
213
tree1 = self.b.repository.revision_tree(None)
214
tree2 = self.b.repository.revision_tree('rev-1')
215
output = self.get_diff(tree1, tree2)
216
# the files have the epoch time stamp for the tree in which
218
self.assertEqualDiff(output, '''\
219
=== added file 'file1'
220
--- old/file1\t1970-01-01 00:00:00 +0000
221
+++ new/file1\t2006-04-01 00:00:00 +0000
223
+file1 contents at rev 1
225
=== added file 'file2'
226
--- old/file2\t1970-01-01 00:00:00 +0000
227
+++ new/file2\t2006-04-01 00:00:00 +0000
229
+file2 contents at rev 1
233
def test_diff_remove_files(self):
234
tree1 = self.b.repository.revision_tree('rev-3')
235
tree2 = self.b.repository.revision_tree('rev-4')
236
output = self.get_diff(tree1, tree2)
237
# the file has the epoch time stamp for the tree in which
239
self.assertEqualDiff(output, '''\
240
=== removed file 'file2'
241
--- old/file2\t2006-04-03 00:00:00 +0000
242
+++ new/file2\t1970-01-01 00:00:00 +0000
244
-file2 contents at rev 3
249
class TestPatienceDiffLib(TestCase):
251
def test_unique_lcs(self):
252
unique_lcs = bzrlib.patiencediff.unique_lcs
253
self.assertEquals(unique_lcs('', ''), [])
254
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
255
self.assertEquals(unique_lcs('a', 'b'), [])
256
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
257
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
258
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
259
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
261
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
263
def test_recurse_matches(self):
264
def test_one(a, b, matches):
266
bzrlib.patiencediff.recurse_matches(a, b, 0, 0, len(a), len(b),
268
self.assertEquals(test_matches, matches)
270
test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'],
271
[(0, 0), (2, 2), (4, 4)])
272
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
273
[(0, 0), (2, 1), (4, 2)])
275
# recurse_matches doesn't match non-unique
276
# lines surrounded by bogus text.
277
# The update has been done in patiencediff.SequenceMatcher instead
279
# This is what it could be
280
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
282
# This is what it currently gives:
283
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
285
def test_matching_blocks(self):
286
def chk_blocks(a, b, expected_blocks):
287
# difflib always adds a signature of the total
288
# length, with no matching entries at the end
289
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
290
blocks = s.get_matching_blocks()
291
self.assertEquals((len(a), len(b), 0), blocks[-1])
292
self.assertEquals(expected_blocks, blocks[:-1])
294
# Some basic matching tests
295
chk_blocks('', '', [])
296
chk_blocks([], [], [])
297
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
298
chk_blocks('abcd', 'abce', [(0, 0, 3)])
299
chk_blocks('eabc', 'abce', [(1, 0, 3)])
300
chk_blocks('eabce', 'abce', [(1, 0, 4)])
301
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
302
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
303
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
304
# This may check too much, but it checks to see that
305
# a copied block stays attached to the previous section,
307
# difflib would tend to grab the trailing longest match
308
# which would make the diff not look right
309
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
310
[(0, 0, 6), (6, 11, 10)])
312
# make sure it supports passing in lists
316
'how are you today?\n'],
318
'how are you today?\n'],
319
[(0, 0, 1), (2, 1, 1)])
321
# non unique lines surrounded by non-matching lines
323
chk_blocks('aBccDe', 'abccde', [(0,0,1), (5,5,1)])
325
# But they only need to be locally unique
326
chk_blocks('aBcDec', 'abcdec', [(0,0,1), (2,2,1), (4,4,2)])
328
# non unique blocks won't be matched
329
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (8,8,1)])
331
# but locally unique ones will
332
chk_blocks('aBcdEeXcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
333
(5,4,1), (7,5,2), (10,8,1)])
335
chk_blocks('abbabbXd', 'cabbabxd', [(7,7,1)])
336
chk_blocks('abbabbbb', 'cabbabbc', [])
337
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
339
def test_opcodes(self):
340
def chk_ops(a, b, expected_codes):
341
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
342
self.assertEquals(expected_codes, s.get_opcodes())
346
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
347
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
348
('replace', 3,4, 3,4)
350
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
354
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
357
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
358
('replace', 2,3, 2,3),
361
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
362
('replace', 2,3, 2,5),
365
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
366
('insert', 2,2, 2,5),
369
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
370
[('equal', 0,6, 0,6),
371
('insert', 6,6, 6,11),
372
('equal', 6,16, 11,21)
377
, 'how are you today?\n'],
379
, 'how are you today?\n'],
380
[('equal', 0,1, 0,1),
381
('delete', 1,2, 1,1),
384
chk_ops('aBccDe', 'abccde',
385
[('equal', 0,1, 0,1),
386
('replace', 1,5, 1,5),
389
chk_ops('aBcDec', 'abcdec',
390
[('equal', 0,1, 0,1),
391
('replace', 1,2, 1,2),
393
('replace', 3,4, 3,4),
396
chk_ops('aBcdEcdFg', 'abcdecdfg',
397
[('equal', 0,1, 0,1),
398
('replace', 1,8, 1,8),
401
chk_ops('aBcdEeXcdFg', 'abcdecdfg',
402
[('equal', 0,1, 0,1),
403
('replace', 1,2, 1,2),
405
('delete', 4,5, 4,4),
407
('delete', 6,7, 5,5),
409
('replace', 9,10, 7,8),
410
('equal', 10,11, 8,9)
413
def test_multiple_ranges(self):
414
# There was an earlier bug where we used a bad set of ranges,
415
# this triggers that specific bug, to make sure it doesn't regress
416
def chk_blocks(a, b, expected_blocks):
417
# difflib always adds a signature of the total
418
# length, with no matching entries at the end
419
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
420
blocks = s.get_matching_blocks()
422
self.assertEquals(x, (len(a), len(b), 0))
423
self.assertEquals(expected_blocks, blocks)
425
chk_blocks('abcdefghijklmnop'
426
, 'abcXghiYZQRSTUVWXYZijklmnop'
427
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
429
chk_blocks('ABCd efghIjk L'
430
, 'AxyzBCn mo pqrstuvwI1 2 L'
431
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
433
# These are rot13 code snippets.
435
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
437
gnxrf_netf = ['svyr*']
438
gnxrf_bcgvbaf = ['ab-erphefr']
440
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
441
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
443
ercbegre = nqq_ercbegre_ahyy
445
ercbegre = nqq_ercbegre_cevag
446
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
449
pynff pzq_zxqve(Pbzznaq):
450
'''.splitlines(True), '''\
451
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
453
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
456
gnxrf_netf = ['svyr*']
457
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
459
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
464
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
465
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
467
npgvba = omeyvo.nqq.nqq_npgvba_cevag
469
npgvba = omeyvo.nqq.nqq_npgvba_nqq
471
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
473
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
476
pynff pzq_zxqve(Pbzznaq):
478
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
480
def test_patience_unified_diff(self):
481
txt_a = ['hello there\n',
483
'how are you today?\n']
484
txt_b = ['hello there\n',
485
'how are you today?\n']
486
unified_diff = bzrlib.patiencediff.unified_diff
487
psm = bzrlib.patiencediff.PatienceSequenceMatcher
488
self.assertEquals([ '--- \n',
493
' how are you today?\n'
495
, list(unified_diff(txt_a, txt_b,
496
sequencematcher=psm)))
497
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
498
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
499
# This is the result with LongestCommonSubstring matching
500
self.assertEquals(['--- \n',
502
'@@ -1,6 +1,11 @@\n',
514
, list(unified_diff(txt_a, txt_b)))
515
# And the patience diff
516
self.assertEquals(['--- \n',
518
'@@ -4,6 +4,11 @@\n',
531
, list(unified_diff(txt_a, txt_b,
532
sequencematcher=psm)))
535
class TestPatienceDiffLibFiles(TestCaseInTempDir):
537
def test_patience_unified_diff_files(self):
538
txt_a = ['hello there\n',
540
'how are you today?\n']
541
txt_b = ['hello there\n',
542
'how are you today?\n']
543
open('a1', 'wb').writelines(txt_a)
544
open('b1', 'wb').writelines(txt_b)
546
unified_diff_files = bzrlib.patiencediff.unified_diff_files
547
psm = bzrlib.patiencediff.PatienceSequenceMatcher
548
self.assertEquals(['--- a1 \n',
553
' how are you today?\n',
555
, list(unified_diff_files('a1', 'b1',
556
sequencematcher=psm)))
558
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
559
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
560
open('a2', 'wb').writelines(txt_a)
561
open('b2', 'wb').writelines(txt_b)
563
# This is the result with LongestCommonSubstring matching
564
self.assertEquals(['--- a2 \n',
566
'@@ -1,6 +1,11 @@\n',
578
, list(unified_diff_files('a2', 'b2')))
580
# And the patience diff
581
self.assertEquals(['--- a2 \n',
583
'@@ -4,6 +4,11 @@\n',
596
, list(unified_diff_files('a2', 'b2',
597
sequencematcher=psm)))