1
# Copyright (C) 2005, 2006 Canonical Development Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
from cStringIO import StringIO
19
from bzrlib.diff import internal_diff
20
from bzrlib.errors import BinaryFile
21
import bzrlib.patiencediff
22
from bzrlib.tests import TestCase, TestCaseInTempDir
25
def udiff_lines(old, new, allow_binary=False):
27
internal_diff('old', old, 'new', new, output, allow_binary)
29
return output.readlines()
32
class TestDiff(TestCase):
34
def test_add_nl(self):
35
"""diff generates a valid diff for patches that add a newline"""
36
lines = udiff_lines(['boo'], ['boo\n'])
37
self.check_patch(lines)
38
self.assertEquals(lines[4], '\\ No newline at end of file\n')
39
## "expected no-nl, got %r" % lines[4]
41
def test_add_nl_2(self):
42
"""diff generates a valid diff for patches that change last line and
45
lines = udiff_lines(['boo'], ['goo\n'])
46
self.check_patch(lines)
47
self.assertEquals(lines[4], '\\ No newline at end of file\n')
48
## "expected no-nl, got %r" % lines[4]
50
def test_remove_nl(self):
51
"""diff generates a valid diff for patches that change last line and
54
lines = udiff_lines(['boo\n'], ['boo'])
55
self.check_patch(lines)
56
self.assertEquals(lines[5], '\\ No newline at end of file\n')
57
## "expected no-nl, got %r" % lines[5]
59
def check_patch(self, lines):
60
self.assert_(len(lines) > 1)
61
## "Not enough lines for a file header for patch:\n%s" % "".join(lines)
62
self.assert_(lines[0].startswith ('---'))
63
## 'No orig line for patch:\n%s' % "".join(lines)
64
self.assert_(lines[1].startswith ('+++'))
65
## 'No mod line for patch:\n%s' % "".join(lines)
66
self.assert_(len(lines) > 2)
67
## "No hunks for patch:\n%s" % "".join(lines)
68
self.assert_(lines[2].startswith('@@'))
69
## "No hunk header for patch:\n%s" % "".join(lines)
70
self.assert_('@@' in lines[2][2:])
71
## "Unterminated hunk header for patch:\n%s" % "".join(lines)
73
def test_binary_lines(self):
74
self.assertRaises(BinaryFile, udiff_lines, [1023 * 'a' + '\x00'], [])
75
self.assertRaises(BinaryFile, udiff_lines, [], [1023 * 'a' + '\x00'])
76
udiff_lines([1023 * 'a' + '\x00'], [], allow_binary=True)
77
udiff_lines([], [1023 * 'a' + '\x00'], allow_binary=True)
79
def test_internal_diff_default(self):
80
# Default internal diff encoding is utf8
82
internal_diff(u'old_\xb5', ['old_text\n'],
83
u'new_\xe5', ['new_text\n'], output)
84
lines = output.getvalue().splitlines(True)
85
self.check_patch(lines)
86
self.assertEquals(['--- old_\xc2\xb5\t\n',
87
'+++ new_\xc3\xa5\t\n',
95
def test_internal_diff_utf8(self):
97
internal_diff(u'old_\xb5', ['old_text\n'],
98
u'new_\xe5', ['new_text\n'], output,
100
lines = output.getvalue().splitlines(True)
101
self.check_patch(lines)
102
self.assertEquals(['--- old_\xc2\xb5\t\n',
103
'+++ new_\xc3\xa5\t\n',
111
def test_internal_diff_iso_8859_1(self):
113
internal_diff(u'old_\xb5', ['old_text\n'],
114
u'new_\xe5', ['new_text\n'], output,
115
path_encoding='iso-8859-1')
116
lines = output.getvalue().splitlines(True)
117
self.check_patch(lines)
118
self.assertEquals(['--- old_\xb5\t\n',
127
def test_internal_diff_returns_bytes(self):
129
output = StringIO.StringIO()
130
internal_diff(u'old_\xb5', ['old_text\n'],
131
u'new_\xe5', ['new_text\n'], output)
132
self.failUnless(isinstance(output.getvalue(), str),
133
'internal_diff should return bytestrings')
136
class TestPatienceDiffLib(TestCase):
138
def test_unique_lcs(self):
139
unique_lcs = bzrlib.patiencediff.unique_lcs
140
self.assertEquals(unique_lcs('', ''), [])
141
self.assertEquals(unique_lcs('a', 'a'), [(0,0)])
142
self.assertEquals(unique_lcs('a', 'b'), [])
143
self.assertEquals(unique_lcs('ab', 'ab'), [(0,0), (1,1)])
144
self.assertEquals(unique_lcs('abcde', 'cdeab'), [(2,0), (3,1), (4,2)])
145
self.assertEquals(unique_lcs('cdeab', 'abcde'), [(0,2), (1,3), (2,4)])
146
self.assertEquals(unique_lcs('abXde', 'abYde'), [(0,0), (1,1),
148
self.assertEquals(unique_lcs('acbac', 'abc'), [(2,1)])
150
def test_recurse_matches(self):
151
def test_one(a, b, matches):
153
bzrlib.patiencediff.recurse_matches(a, b, 0, 0, len(a), len(b),
155
self.assertEquals(test_matches, matches)
157
test_one(['a', '', 'b', '', 'c'], ['a', 'a', 'b', 'c', 'c'],
158
[(0, 0), (2, 2), (4, 4)])
159
test_one(['a', 'c', 'b', 'a', 'c'], ['a', 'b', 'c'],
160
[(0, 0), (2, 1), (4, 2)])
162
# recurse_matches doesn't match non-unique
163
# lines surrounded by bogus text.
164
# The update has been done in patiencediff.SequenceMatcher instead
166
# This is what it could be
167
#test_one('aBccDe', 'abccde', [(0,0), (2,2), (3,3), (5,5)])
169
# This is what it currently gives:
170
test_one('aBccDe', 'abccde', [(0,0), (5,5)])
172
def test_matching_blocks(self):
173
def chk_blocks(a, b, expected_blocks):
174
# difflib always adds a signature of the total
175
# length, with no matching entries at the end
176
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
177
blocks = s.get_matching_blocks()
178
self.assertEquals((len(a), len(b), 0), blocks[-1])
179
self.assertEquals(expected_blocks, blocks[:-1])
181
# Some basic matching tests
182
chk_blocks('', '', [])
183
chk_blocks([], [], [])
184
chk_blocks('abcd', 'abcd', [(0, 0, 4)])
185
chk_blocks('abcd', 'abce', [(0, 0, 3)])
186
chk_blocks('eabc', 'abce', [(1, 0, 3)])
187
chk_blocks('eabce', 'abce', [(1, 0, 4)])
188
chk_blocks('abcde', 'abXde', [(0, 0, 2), (3, 3, 2)])
189
chk_blocks('abcde', 'abXYZde', [(0, 0, 2), (3, 5, 2)])
190
chk_blocks('abde', 'abXYZde', [(0, 0, 2), (2, 5, 2)])
191
# This may check too much, but it checks to see that
192
# a copied block stays attached to the previous section,
194
# difflib would tend to grab the trailing longest match
195
# which would make the diff not look right
196
chk_blocks('abcdefghijklmnop', 'abcdefxydefghijklmnop',
197
[(0, 0, 6), (6, 11, 10)])
199
# make sure it supports passing in lists
203
'how are you today?\n'],
205
'how are you today?\n'],
206
[(0, 0, 1), (2, 1, 1)])
208
# non unique lines surrounded by non-matching lines
210
chk_blocks('aBccDe', 'abccde', [(0,0,1), (5,5,1)])
212
# But they only need to be locally unique
213
chk_blocks('aBcDec', 'abcdec', [(0,0,1), (2,2,1), (4,4,2)])
215
# non unique blocks won't be matched
216
chk_blocks('aBcdEcdFg', 'abcdecdfg', [(0,0,1), (8,8,1)])
218
# but locally unique ones will
219
chk_blocks('aBcdEeXcdFg', 'abcdecdfg', [(0,0,1), (2,2,2),
220
(5,4,1), (7,5,2), (10,8,1)])
222
chk_blocks('abbabbXd', 'cabbabxd', [(7,7,1)])
223
chk_blocks('abbabbbb', 'cabbabbc', [])
224
chk_blocks('bbbbbbbb', 'cbbbbbbc', [])
226
def test_opcodes(self):
227
def chk_ops(a, b, expected_codes):
228
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
229
self.assertEquals(expected_codes, s.get_opcodes())
233
chk_ops('abcd', 'abcd', [('equal', 0,4, 0,4)])
234
chk_ops('abcd', 'abce', [('equal', 0,3, 0,3),
235
('replace', 3,4, 3,4)
237
chk_ops('eabc', 'abce', [('delete', 0,1, 0,0),
241
chk_ops('eabce', 'abce', [('delete', 0,1, 0,0),
244
chk_ops('abcde', 'abXde', [('equal', 0,2, 0,2),
245
('replace', 2,3, 2,3),
248
chk_ops('abcde', 'abXYZde', [('equal', 0,2, 0,2),
249
('replace', 2,3, 2,5),
252
chk_ops('abde', 'abXYZde', [('equal', 0,2, 0,2),
253
('insert', 2,2, 2,5),
256
chk_ops('abcdefghijklmnop', 'abcdefxydefghijklmnop',
257
[('equal', 0,6, 0,6),
258
('insert', 6,6, 6,11),
259
('equal', 6,16, 11,21)
264
, 'how are you today?\n'],
266
, 'how are you today?\n'],
267
[('equal', 0,1, 0,1),
268
('delete', 1,2, 1,1),
271
chk_ops('aBccDe', 'abccde',
272
[('equal', 0,1, 0,1),
273
('replace', 1,5, 1,5),
276
chk_ops('aBcDec', 'abcdec',
277
[('equal', 0,1, 0,1),
278
('replace', 1,2, 1,2),
280
('replace', 3,4, 3,4),
283
chk_ops('aBcdEcdFg', 'abcdecdfg',
284
[('equal', 0,1, 0,1),
285
('replace', 1,8, 1,8),
288
chk_ops('aBcdEeXcdFg', 'abcdecdfg',
289
[('equal', 0,1, 0,1),
290
('replace', 1,2, 1,2),
292
('delete', 4,5, 4,4),
294
('delete', 6,7, 5,5),
296
('replace', 9,10, 7,8),
297
('equal', 10,11, 8,9)
300
def test_multiple_ranges(self):
301
# There was an earlier bug where we used a bad set of ranges,
302
# this triggers that specific bug, to make sure it doesn't regress
303
def chk_blocks(a, b, expected_blocks):
304
# difflib always adds a signature of the total
305
# length, with no matching entries at the end
306
s = bzrlib.patiencediff.PatienceSequenceMatcher(None, a, b)
307
blocks = s.get_matching_blocks()
309
self.assertEquals(x, (len(a), len(b), 0))
310
self.assertEquals(expected_blocks, blocks)
312
chk_blocks('abcdefghijklmnop'
313
, 'abcXghiYZQRSTUVWXYZijklmnop'
314
, [(0, 0, 3), (6, 4, 3), (9, 20, 7)])
316
chk_blocks('ABCd efghIjk L'
317
, 'AxyzBCn mo pqrstuvwI1 2 L'
318
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
320
# These are rot13 code snippets.
322
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
324
gnxrf_netf = ['svyr*']
325
gnxrf_bcgvbaf = ['ab-erphefr']
327
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr):
328
sebz omeyvo.nqq vzcbeg fzneg_nqq, nqq_ercbegre_cevag, nqq_ercbegre_ahyy
330
ercbegre = nqq_ercbegre_ahyy
332
ercbegre = nqq_ercbegre_cevag
333
fzneg_nqq(svyr_yvfg, abg ab_erphefr, ercbegre)
336
pynff pzq_zxqve(Pbzznaq):
337
'''.splitlines(True), '''\
338
trg nqqrq jura lbh nqq n svyr va gur qverpgbel.
340
--qel-eha jvyy fubj juvpu svyrf jbhyq or nqqrq, ohg abg npghnyyl
343
gnxrf_netf = ['svyr*']
344
gnxrf_bcgvbaf = ['ab-erphefr', 'qel-eha']
346
qrs eha(frys, svyr_yvfg, ab_erphefr=Snyfr, qel_eha=Snyfr):
351
# Guvf vf cbvagyrff, ohg V'q engure abg envfr na reebe
352
npgvba = omeyvo.nqq.nqq_npgvba_ahyy
354
npgvba = omeyvo.nqq.nqq_npgvba_cevag
356
npgvba = omeyvo.nqq.nqq_npgvba_nqq
358
npgvba = omeyvo.nqq.nqq_npgvba_nqq_naq_cevag
360
omeyvo.nqq.fzneg_nqq(svyr_yvfg, abg ab_erphefr, npgvba)
363
pynff pzq_zxqve(Pbzznaq):
365
, [(0,0,1), (1, 4, 2), (9, 19, 1), (12, 23, 3)])
367
def test_patience_unified_diff(self):
368
txt_a = ['hello there\n',
370
'how are you today?\n']
371
txt_b = ['hello there\n',
372
'how are you today?\n']
373
unified_diff = bzrlib.patiencediff.unified_diff
374
psm = bzrlib.patiencediff.PatienceSequenceMatcher
375
self.assertEquals([ '--- \n',
380
' how are you today?\n'
382
, list(unified_diff(txt_a, txt_b,
383
sequencematcher=psm)))
384
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
385
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
386
# This is the result with LongestCommonSubstring matching
387
self.assertEquals(['--- \n',
389
'@@ -1,6 +1,11 @@\n',
401
, list(unified_diff(txt_a, txt_b)))
402
# And the patience diff
403
self.assertEquals(['--- \n',
405
'@@ -4,6 +4,11 @@\n',
418
, list(unified_diff(txt_a, txt_b,
419
sequencematcher=psm)))
422
class TestPatienceDiffLibFiles(TestCaseInTempDir):
424
def test_patience_unified_diff_files(self):
425
txt_a = ['hello there\n',
427
'how are you today?\n']
428
txt_b = ['hello there\n',
429
'how are you today?\n']
430
open('a1', 'wb').writelines(txt_a)
431
open('b1', 'wb').writelines(txt_b)
433
unified_diff_files = bzrlib.patiencediff.unified_diff_files
434
psm = bzrlib.patiencediff.PatienceSequenceMatcher
435
self.assertEquals(['--- a1 \n',
440
' how are you today?\n',
442
, list(unified_diff_files('a1', 'b1',
443
sequencematcher=psm)))
445
txt_a = map(lambda x: x+'\n', 'abcdefghijklmnop')
446
txt_b = map(lambda x: x+'\n', 'abcdefxydefghijklmnop')
447
open('a2', 'wb').writelines(txt_a)
448
open('b2', 'wb').writelines(txt_b)
450
# This is the result with LongestCommonSubstring matching
451
self.assertEquals(['--- a2 \n',
453
'@@ -1,6 +1,11 @@\n',
465
, list(unified_diff_files('a2', 'b2')))
467
# And the patience diff
468
self.assertEquals(['--- a2 \n',
470
'@@ -4,6 +4,11 @@\n',
483
, list(unified_diff_files('a2', 'b2',
484
sequencematcher=psm)))