1
from bzrlib.selftest import TestCase
1
# Copyright (C) 2005, 2006 Canonical Development Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
2
19
from bzrlib.diff import internal_diff
3
from cStringIO import StringIO
4
def udiff_lines(old, new):
20
from bzrlib.errors import BinaryFile
21
import bzrlib.patiencediff
22
from bzrlib.tests import TestCase, TestCaseInTempDir
25
def udiff_lines(old, new, allow_binary=False):
6
internal_diff('old', old, 'new', new, output)
27
internal_diff('old', old, 'new', new, output, allow_binary)
8
29
return output.readlines()
10
def check_patch(lines):
11
assert len(lines) > 1, \
12
"Not enough lines for a file header for patch:\n%s" % "".join(lines)
13
assert lines[0].startswith ('---'), \
14
'No orig line for patch:\n%s' % "".join(lines)
15
assert lines[1].startswith ('+++'), \
16
'No mod line for patch:\n%s' % "".join(lines)
17
assert len(lines) > 2, \
18
"No hunks for patch:\n%s" % "".join(lines)
19
assert lines[2].startswith('@@'),\
20
"No hunk header for patch:\n%s" % "".join(lines)
21
assert '@@' in lines[2][2:], \
22
"Unterminated hunk header for patch:\n%s" % "".join(lines)
24
32
class TestDiff(TestCase):
25
34
def test_add_nl(self):
26
35
"""diff generates a valid diff for patches that add a newline"""
27
36
lines = udiff_lines(['boo'], ['boo\n'])
29
assert lines[4] == '\\ No newline at end of file\n', \
30
"expected no-nl, got %r" % lines[4]
37
self.check_patch(lines)
38
self.assertEquals(lines[4], '\\ No newline at end of file\n')
39
## "expected no-nl, got %r" % lines[4]
32
41
def test_add_nl_2(self):
33
42
"""diff generates a valid diff for patches that change last line and
36
45
lines = udiff_lines(['boo'], ['goo\n'])
38
assert lines[4] == '\\ No newline at end of file\n', \
39
"expected no-nl, got %r" % lines[4]
46
self.check_patch(lines)
47
self.assertEquals(lines[4], '\\ No newline at end of file\n')
48
## "expected no-nl, got %r" % lines[4]
41
50
def test_remove_nl(self):
42
51
"""diff generates a valid diff for patches that change last line and
45
54
lines = udiff_lines(['boo\n'], ['boo'])
47
assert lines[5] == '\\ No newline at end of file\n', \
48
"expected no-nl, got %r" % lines[5]
55
self.check_patch(lines)
56
self.assertEquals(lines[5], '\\ No newline at end of file\n')
57
## "expected no-nl, got %r" % lines[5]
59
def check_patch(self, lines):
60
self.assert_(len(lines) > 1)
61
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
62
self.assert_(lines[0].startswith ('---'))
63
## 'No orig line for patch:\n%s' % "".join(lines)
64
self.assert_(lines[1].startswith ('+++'))
65
## 'No mod line for patch:\n%s' % "".join(lines)
66
self.assert_(len(lines) > 2)
67
## "No hunks for patch:\n%s" % "".join(lines)
68
self.assert_(lines[2].startswith('@@'))
69
## "No hunk header for patch:\n%s" % "".join(lines)
70
self.assert_('@@' in lines[2][2:])
71
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
73
def test_binary_lines(self):
74
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
75
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
76
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
77
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
80
class TestPatienceDiffLib(TestCase):
82
def test_unique_lcs(self):
83
unique_lcs = bzrlib.patiencediff.unique_lcs
84
self.assertEquals(unique_lcs('', ''), [])
85
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
86
self.assertEquals(unique_lcs('a', 'b'), [])
87
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
88
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
89
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
90
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
92
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
94
def test_recurse_matches(self):
95
def test_one(a, b, matches):
97
bzrlib.patiencediff.recurse_matches(a, b, 0, 0, len(a), len(b),
99
self.assertEquals(test_matches, matches)
101
test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'],
102
[(0, 0), (2, 2), (4, 4)])
103
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
104
[(0, 0), (2, 1), (4, 2)])
106
# recurse_matches doesn't match non-unique
107
# lines surrounded by bogus text.
108
# The update has been done in patiencediff.SequenceMatcher instead
110
# This is what it could be
111
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
113
# This is what it currently gives:
114
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
116
def test_matching_blocks(self):
117
def chk_blocks(a, b, expected_blocks):
118
# difflib always adds a signature of the total
119
# length, with no matching entries at the end
120
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
121
blocks = s.get_matching_blocks()
122
self.assertEquals((len(a), len(b), 0), blocks[-1])
123
self.assertEquals(expected_blocks, blocks[:-1])
125
# Some basic matching tests
126
chk_blocks('', '', [])
127
chk_blocks([], [], [])
128
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
129
chk_blocks('abcd', 'abce', [(0, 0, 3)])
130
chk_blocks('eabc', 'abce', [(1, 0, 3)])
131
chk_blocks('eabce', 'abce', [(1, 0, 4)])
132
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
133
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
134
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
135
# This may check too much, but it checks to see that
136
# a copied block stays attached to the previous section,
138
# difflib would tend to grab the trailing longest match
139
# which would make the diff not look right
140
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
141
[(0, 0, 6), (6, 11, 10)])
143
# make sure it supports passing in lists
147
'how are you today?\n'],
149
'how are you today?\n'],
150
[(0, 0, 1), (2, 1, 1)])
152
# non unique lines surrounded by non-matching lines
154
chk_blocks('aBccDe', 'abccde', [(0,0,1), (5,5,1)])
156
# But they only need to be locally unique
157
chk_blocks('aBcDec', 'abcdec', [(0,0,1), (2,2,1), (4,4,2)])
159
# non unique blocks won't be matched
160
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (8,8,1)])
162
# but locally unique ones will
163
chk_blocks('aBcdEeXcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
164
(5,4,1), (7,5,2), (10,8,1)])
166
chk_blocks('abbabbXd', 'cabbabxd', [(7,7,1)])
167
chk_blocks('abbabbbb', 'cabbabbc', [])
168
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
170
def test_opcodes(self):
171
def chk_ops(a, b, expected_codes):
172
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
173
self.assertEquals(expected_codes, s.get_opcodes())
177
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
178
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
179
('replace', 3,4, 3,4)
181
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
185
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
188
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
189
('replace', 2,3, 2,3),
192
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
193
('replace', 2,3, 2,5),
196
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
197
('insert', 2,2, 2,5),
200
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
201
[('equal', 0,6, 0,6),
202
('insert', 6,6, 6,11),
203
('equal', 6,16, 11,21)
208
, 'how are you today?\n'],
210
, 'how are you today?\n'],
211
[('equal', 0,1, 0,1),
212
('delete', 1,2, 1,1),
215
chk_ops('aBccDe', 'abccde',
216
[('equal', 0,1, 0,1),
217
('replace', 1,5, 1,5),
220
chk_ops('aBcDec', 'abcdec',
221
[('equal', 0,1, 0,1),
222
('replace', 1,2, 1,2),
224
('replace', 3,4, 3,4),
227
chk_ops('aBcdEcdFg', 'abcdecdfg',
228
[('equal', 0,1, 0,1),
229
('replace', 1,8, 1,8),
232
chk_ops('aBcdEeXcdFg', 'abcdecdfg',
233
[('equal', 0,1, 0,1),
234
('replace', 1,2, 1,2),
236
('delete', 4,5, 4,4),
238
('delete', 6,7, 5,5),
240
('replace', 9,10, 7,8),
241
('equal', 10,11, 8,9)
244
def test_multiple_ranges(self):
245
# There was an earlier bug where we used a bad set of ranges,
246
# this triggers that specific bug, to make sure it doesn't regress
247
def chk_blocks(a, b, expected_blocks):
248
# difflib always adds a signature of the total
249
# length, with no matching entries at the end
250
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
251
blocks = s.get_matching_blocks()
253
self.assertEquals(x, (len(a), len(b), 0))
254
self.assertEquals(expected_blocks, blocks)
256
chk_blocks('abcdefghijklmnop'
257
, 'abcXghiYZQRSTUVWXYZijklmnop'
258
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
260
chk_blocks('ABCd efghIjk L'
261
, 'AxyzBCn mo pqrstuvwI1 2 L'
262
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
264
# These are rot13 code snippets.
266
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
268
gnxrf_netf = ['svyr*']
269
gnxrf_bcgvbaf = ['ab-erphefr']
271
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
272
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
274
ercbegre = nqq_ercbegre_ahyy
276
ercbegre = nqq_ercbegre_cevag
277
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
280
pynff pzq_zxqve(Pbzznaq):
281
'''.splitlines(True), '''\
282
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
284
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
287
gnxrf_netf = ['svyr*']
288
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
290
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
295
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
296
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
298
npgvba = omeyvo.nqq.nqq_npgvba_cevag
300
npgvba = omeyvo.nqq.nqq_npgvba_nqq
302
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
304
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
307
pynff pzq_zxqve(Pbzznaq):
309
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
311
def test_patience_unified_diff(self):
312
txt_a = ['hello there\n',
314
'how are you today?\n']
315
txt_b = ['hello there\n',
316
'how are you today?\n']
317
unified_diff = bzrlib.patiencediff.unified_diff
318
psm = bzrlib.patiencediff.PatienceSequenceMatcher
319
self.assertEquals([ '--- \n',
324
' how are you today?\n'
326
, list(unified_diff(txt_a, txt_b,
327
sequencematcher=psm)))
328
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
329
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
330
# This is the result with LongestCommonSubstring matching
331
self.assertEquals(['--- \n',
333
'@@ -1,6 +1,11 @@\n',
345
, list(unified_diff(txt_a, txt_b)))
346
# And the patience diff
347
self.assertEquals(['--- \n',
349
'@@ -4,6 +4,11 @@\n',
362
, list(unified_diff(txt_a, txt_b,
363
sequencematcher=psm)))
366
class TestPatienceDiffLibFiles(TestCaseInTempDir):
368
def test_patience_unified_diff_files(self):
369
txt_a = ['hello there\n',
371
'how are you today?\n']
372
txt_b = ['hello there\n',
373
'how are you today?\n']
374
open('a1', 'wb').writelines(txt_a)
375
open('b1', 'wb').writelines(txt_b)
377
unified_diff_files = bzrlib.patiencediff.unified_diff_files
378
psm = bzrlib.patiencediff.PatienceSequenceMatcher
379
self.assertEquals(['--- a1 \n',
384
' how are you today?\n',
386
, list(unified_diff_files('a1', 'b1',
387
sequencematcher=psm)))
389
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
390
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
391
open('a2', 'wb').writelines(txt_a)
392
open('b2', 'wb').writelines(txt_b)
394
# This is the result with LongestCommonSubstring matching
395
self.assertEquals(['--- a2 \n',
397
'@@ -1,6 +1,11 @@\n',
409
, list(unified_diff_files('a2', 'b2')))
411
# And the patience diff
412
self.assertEquals(['--- a2 \n',
414
'@@ -4,6 +4,11 @@\n',
427
, list(unified_diff_files('a2', 'b2',
428
sequencematcher=psm)))