~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
16
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
17
"""Tests for the python and pyrex extensions of groupcompress"""
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
18
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
19
from bzrlib import (
20
    _groupcompress_py,
21
    tests,
22
    )
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
23
from bzrlib.tests.scenarios import (
24
    load_tests_apply_scenarios,
25
    )
26
27
28
def module_scenarios():
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
29
    scenarios = [
30
        ('python', {'_gc_module': _groupcompress_py}),
31
        ]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
32
    if compiled_groupcompress_feature.available():
33
        gc_module = compiled_groupcompress_feature.module
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
34
        scenarios.append(('C',
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
35
            {'_gc_module': gc_module}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
36
    return scenarios
37
38
39
def two_way_scenarios():
40
    scenarios = [
41
        ('PP', {'make_delta': _groupcompress_py.make_delta,
42
                'apply_delta': _groupcompress_py.apply_delta})
43
        ]
44
    if compiled_groupcompress_feature.available():
45
        gc_module = compiled_groupcompress_feature.module
46
        scenarios.extend([
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
47
            ('CC', {'make_delta': gc_module.make_delta,
48
                    'apply_delta': gc_module.apply_delta}),
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
49
            ('PC', {'make_delta': _groupcompress_py.make_delta,
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
50
                    'apply_delta': gc_module.apply_delta}),
51
            ('CP', {'make_delta': gc_module.make_delta,
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
52
                    'apply_delta': _groupcompress_py.apply_delta}),
53
            ])
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
54
    return scenarios
55
56
57
load_tests = load_tests_apply_scenarios
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
58
59
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
60
compiled_groupcompress_feature = tests.ModuleAvailableFeature(
61
                                    'bzrlib._groupcompress_pyx')
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
62
0.23.8 by John Arbash Meinel
Add another test text.
63
_text1 = """\
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
64
This is a bit
65
of source text
66
which is meant to be matched
67
against other text
68
"""
69
0.23.8 by John Arbash Meinel
Add another test text.
70
_text2 = """\
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
71
This is a bit
72
of source text
73
which is meant to differ from
74
against other text
75
"""
76
0.23.8 by John Arbash Meinel
Add another test text.
77
_text3 = """\
78
This is a bit
79
of source text
80
which is meant to be matched
81
against other text
82
except it also
83
has a lot more data
84
at the end of the file
85
"""
86
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
87
_first_text = """\
88
a bit of text, that
89
does not have much in
90
common with the next text
91
"""
92
93
_second_text = """\
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
94
some more bit of text, that
95
does not have much in
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
96
common with the previous text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
97
and has some extra text
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
98
"""
99
100
101
_third_text = """\
102
a bit of text, that
103
has some in common with the previous text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
104
and has some extra text
105
and not have much in
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
106
common with the next text
107
"""
108
3735.33.4 by John Arbash Meinel
The new layout is working.
109
_fourth_text = """\
110
123456789012345
111
same rabin hash
112
123456789012345
113
same rabin hash
114
123456789012345
115
same rabin hash
116
123456789012345
117
same rabin hash
118
"""
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
119
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
120
class TestMakeAndApplyDelta(tests.TestCase):
121
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
122
    scenarios = module_scenarios()
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
123
    _gc_module = None # Set by load_tests
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
124
125
    def setUp(self):
126
        super(TestMakeAndApplyDelta, self).setUp()
127
        self.make_delta = self._gc_module.make_delta
128
        self.apply_delta = self._gc_module.apply_delta
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
129
        self.apply_delta_to_source = self._gc_module.apply_delta_to_source
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
130
131
    def test_make_delta_is_typesafe(self):
132
        self.make_delta('a string', 'another string')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
133
134
        def _check_make_delta(string1, string2):
135
            self.assertRaises(TypeError, self.make_delta, string1, string2)
136
137
        _check_make_delta('a string', object())
138
        _check_make_delta('a string', u'not a string')
139
        _check_make_delta(object(), 'a string')
140
        _check_make_delta(u'not a string', 'a string')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
141
142
    def test_make_noop_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
143
        ident_delta = self.make_delta(_text1, _text1)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
144
        self.assertEqual('M\x90M', ident_delta)
0.23.8 by John Arbash Meinel
Add another test text.
145
        ident_delta = self.make_delta(_text2, _text2)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
146
        self.assertEqual('N\x90N', ident_delta)
0.23.8 by John Arbash Meinel
Add another test text.
147
        ident_delta = self.make_delta(_text3, _text3)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
148
        self.assertEqual('\x87\x01\x90\x87', ident_delta)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
149
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
150
    def assertDeltaIn(self, delta1, delta2, delta):
151
        """Make sure that the delta bytes match one of the expectations."""
152
        # In general, the python delta matcher gives different results than the
153
        # pyrex delta matcher. Both should be valid deltas, though.
154
        if delta not in (delta1, delta2):
155
            self.fail("Delta bytes:\n"
156
                      "       %r\n"
157
                      "not in %r\n"
158
                      "    or %r"
159
                      % (delta, delta1, delta2))
160
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
161
    def test_make_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
162
        delta = self.make_delta(_text1, _text2)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
163
        self.assertDeltaIn(
164
            'N\x90/\x1fdiffer from\nagainst other text\n',
165
            'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
166
            delta)
0.23.8 by John Arbash Meinel
Add another test text.
167
        delta = self.make_delta(_text2, _text1)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
168
        self.assertDeltaIn(
169
            'M\x90/\x1ebe matched\nagainst other text\n',
170
            'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
171
            delta)
0.23.8 by John Arbash Meinel
Add another test text.
172
        delta = self.make_delta(_text3, _text1)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
173
        self.assertEqual('M\x90M', delta)
0.23.8 by John Arbash Meinel
Add another test text.
174
        delta = self.make_delta(_text3, _text2)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
175
        self.assertDeltaIn(
176
            'N\x90/\x1fdiffer from\nagainst other text\n',
177
            'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
178
            delta)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
179
4300.2.1 by John Arbash Meinel
Fix bug #364900, properly remove the 64kB that was just encoded in the copy.
180
    def test_make_delta_with_large_copies(self):
181
        # We want to have a copy that is larger than 64kB, which forces us to
182
        # issue multiple copy instructions.
183
        big_text = _text3 * 1220
184
        delta = self.make_delta(big_text, big_text)
185
        self.assertDeltaIn(
186
            '\xdc\x86\x0a'      # Encoding the length of the uncompressed text
187
            '\x80'              # Copy 64kB, starting at byte 0
188
            '\x84\x01'          # and another 64kB starting at 64kB
189
            '\xb4\x02\x5c\x83', # And the bit of tail.
190
            None,   # Both implementations should be identical
191
            delta)
192
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
193
    def test_apply_delta_is_typesafe(self):
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
194
        self.apply_delta(_text1, 'M\x90M')
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
195
        self.assertRaises(TypeError, self.apply_delta, object(), 'M\x90M')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
196
        self.assertRaises(TypeError, self.apply_delta,
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
197
                          unicode(_text1), 'M\x90M')
198
        self.assertRaises(TypeError, self.apply_delta, _text1, u'M\x90M')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
199
        self.assertRaises(TypeError, self.apply_delta, _text1, object())
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
200
201
    def test_apply_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
202
        target = self.apply_delta(_text1,
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
203
                    'N\x90/\x1fdiffer from\nagainst other text\n')
0.23.8 by John Arbash Meinel
Add another test text.
204
        self.assertEqual(_text2, target)
205
        target = self.apply_delta(_text2,
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
206
                    'M\x90/\x1ebe matched\nagainst other text\n')
0.23.8 by John Arbash Meinel
Add another test text.
207
        self.assertEqual(_text1, target)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
208
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
209
    def test_apply_delta_to_source_is_safe(self):
210
        self.assertRaises(TypeError,
211
            self.apply_delta_to_source, object(), 0, 1)
212
        self.assertRaises(TypeError,
213
            self.apply_delta_to_source, u'unicode str', 0, 1)
214
        # end > length
215
        self.assertRaises(ValueError,
216
            self.apply_delta_to_source, 'foo', 1, 4)
217
        # start > length
218
        self.assertRaises(ValueError,
219
            self.apply_delta_to_source, 'foo', 5, 3)
220
        # start > end
221
        self.assertRaises(ValueError,
222
            self.apply_delta_to_source, 'foo', 3, 2)
223
224
    def test_apply_delta_to_source(self):
225
        source_and_delta = (_text1
226
                            + 'N\x90/\x1fdiffer from\nagainst other text\n')
227
        self.assertEqual(_text2, self.apply_delta_to_source(source_and_delta,
228
                                    len(_text1), len(source_and_delta)))
229
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
230
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
231
class TestMakeAndApplyCompatible(tests.TestCase):
232
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
233
    scenarios = two_way_scenarios()
234
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
235
    make_delta = None # Set by load_tests
236
    apply_delta = None # Set by load_tests
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
237
238
    def assertMakeAndApply(self, source, target):
239
        """Assert that generating a delta and applying gives success."""
240
        delta = self.make_delta(source, target)
241
        bytes = self.apply_delta(source, delta)
242
        self.assertEqualDiff(target, bytes)
243
244
    def test_direct(self):
245
        self.assertMakeAndApply(_text1, _text2)
246
        self.assertMakeAndApply(_text2, _text1)
247
        self.assertMakeAndApply(_text1, _text3)
248
        self.assertMakeAndApply(_text3, _text1)
249
        self.assertMakeAndApply(_text2, _text3)
250
        self.assertMakeAndApply(_text3, _text2)
251
252
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
253
class TestDeltaIndex(tests.TestCase):
254
255
    def setUp(self):
256
        super(TestDeltaIndex, self).setUp()
257
        # This test isn't multiplied, because we only have DeltaIndex for the
258
        # compiled form
259
        # We call this here, because _test_needs_features happens after setUp
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
260
        self.requireFeature(compiled_groupcompress_feature)
261
        self._gc_module = compiled_groupcompress_feature.module
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
262
263
    def test_repr(self):
264
        di = self._gc_module.DeltaIndex('test text\n')
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
265
        self.assertEqual('DeltaIndex(1, 10)', repr(di))
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
266
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
267
    def test__dump_no_index(self):
268
        di = self._gc_module.DeltaIndex()
269
        self.assertEqual(None, di._dump_index())
270
271
    def test__dump_index_simple(self):
272
        di = self._gc_module.DeltaIndex()
273
        di.add_source(_text1, 0)
274
        self.assertFalse(di._has_index())
275
        self.assertEqual(None, di._dump_index())
276
        _ = di.make_delta(_text1)
277
        self.assertTrue(di._has_index())
278
        hash_list, entry_list = di._dump_index()
279
        self.assertEqual(16, len(hash_list))
280
        self.assertEqual(68, len(entry_list))
281
        just_entries = [(idx, text_offset, hash_val)
282
                        for idx, (text_offset, hash_val)
283
                         in enumerate(entry_list)
284
                         if text_offset != 0 or hash_val != 0]
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
285
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
286
        self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
287
                          (25, 48, rabin_hash(_text1[33:49])),
288
                          (34, 32, rabin_hash(_text1[17:33])),
289
                          (47, 64, rabin_hash(_text1[49:65])),
290
                         ], just_entries)
291
        # This ensures that the hash map points to the location we expect it to
292
        for entry_idx, text_offset, hash_val in just_entries:
293
            self.assertEqual(entry_idx, hash_list[hash_val & 0xf])
294
295
    def test__dump_index_two_sources(self):
296
        di = self._gc_module.DeltaIndex()
297
        di.add_source(_text1, 0)
298
        di.add_source(_text2, 2)
299
        start2 = len(_text1) + 2
300
        self.assertTrue(di._has_index())
301
        hash_list, entry_list = di._dump_index()
302
        self.assertEqual(16, len(hash_list))
303
        self.assertEqual(68, len(entry_list))
304
        just_entries = [(idx, text_offset, hash_val)
305
                        for idx, (text_offset, hash_val)
306
                         in enumerate(entry_list)
307
                         if text_offset != 0 or hash_val != 0]
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
308
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
309
        self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
310
                          (9, start2+16, rabin_hash(_text2[1:17])),
311
                          (25, 48, rabin_hash(_text1[33:49])),
312
                          (30, start2+64, rabin_hash(_text2[49:65])),
313
                          (34, 32, rabin_hash(_text1[17:33])),
314
                          (35, start2+32, rabin_hash(_text2[17:33])),
315
                          (43, start2+48, rabin_hash(_text2[33:49])),
316
                          (47, 64, rabin_hash(_text1[49:65])),
317
                         ], just_entries)
318
        # Each entry should be in the appropriate hash bucket.
319
        for entry_idx, text_offset, hash_val in just_entries:
320
            hash_idx = hash_val & 0xf
321
            self.assertTrue(
322
                hash_list[hash_idx] <= entry_idx < hash_list[hash_idx+1])
323
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
324
    def test_first_add_source_doesnt_index_until_make_delta(self):
325
        di = self._gc_module.DeltaIndex()
326
        self.assertFalse(di._has_index())
327
        di.add_source(_text1, 0)
328
        self.assertFalse(di._has_index())
329
        # However, asking to make a delta will trigger the index to be
330
        # generated, and will generate a proper delta
331
        delta = di.make_delta(_text2)
332
        self.assertTrue(di._has_index())
333
        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
334
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
335
    def test_add_source_max_bytes_to_index(self):
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
336
        di = self._gc_module.DeltaIndex()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
337
        di._max_bytes_to_index = 3*16
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
338
        di.add_source(_text1, 0) # (77 bytes -1) // 3 = 25 byte stride
339
        di.add_source(_text3, 3) # (135 bytes -1) // 3 = 44 byte stride
340
        start2 = len(_text1) + 3
341
        hash_list, entry_list = di._dump_index()
342
        self.assertEqual(16, len(hash_list))
343
        self.assertEqual(67, len(entry_list))
344
        just_entries = sorted([(text_offset, hash_val)
345
                               for text_offset, hash_val in entry_list
346
                                if text_offset != 0 or hash_val != 0])
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
347
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
348
        self.assertEqual([(25, rabin_hash(_text1[10:26])),
349
                          (50, rabin_hash(_text1[35:51])),
350
                          (75, rabin_hash(_text1[60:76])),
351
                          (start2+44, rabin_hash(_text3[29:45])),
352
                          (start2+88, rabin_hash(_text3[73:89])),
353
                          (start2+132, rabin_hash(_text3[117:133])),
354
                         ], just_entries)
355
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
356
    def test_second_add_source_triggers_make_index(self):
357
        di = self._gc_module.DeltaIndex()
358
        self.assertFalse(di._has_index())
359
        di.add_source(_text1, 0)
360
        self.assertFalse(di._has_index())
361
        di.add_source(_text2, 0)
362
        self.assertTrue(di._has_index())
363
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
364
    def test_make_delta(self):
365
        di = self._gc_module.DeltaIndex(_text1)
366
        delta = di.make_delta(_text2)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
367
        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
368
369
    def test_delta_against_multiple_sources(self):
370
        di = self._gc_module.DeltaIndex()
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
371
        di.add_source(_first_text, 0)
372
        self.assertEqual(len(_first_text), di._source_offset)
373
        di.add_source(_second_text, 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
374
        self.assertEqual(len(_first_text) + len(_second_text),
375
                         di._source_offset)
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
376
        delta = di.make_delta(_third_text)
377
        result = self._gc_module.apply_delta(_first_text + _second_text, delta)
378
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
379
        self.assertEqual('\x85\x01\x90\x14\x0chas some in '
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
380
                         '\x91v6\x03and\x91d"\x91:\n', delta)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
381
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
382
    def test_delta_with_offsets(self):
383
        di = self._gc_module.DeltaIndex()
384
        di.add_source(_first_text, 5)
385
        self.assertEqual(len(_first_text) + 5, di._source_offset)
386
        di.add_source(_second_text, 10)
387
        self.assertEqual(len(_first_text) + len(_second_text) + 15,
388
                         di._source_offset)
389
        delta = di.make_delta(_third_text)
390
        self.assertIsNot(None, delta)
391
        result = self._gc_module.apply_delta(
392
            '12345' + _first_text + '1234567890' + _second_text, delta)
393
        self.assertIsNot(None, result)
394
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
395
        self.assertEqual('\x85\x01\x91\x05\x14\x0chas some in '
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
396
                         '\x91\x856\x03and\x91s"\x91?\n', delta)
397
398
    def test_delta_with_delta_bytes(self):
399
        di = self._gc_module.DeltaIndex()
3735.33.4 by John Arbash Meinel
The new layout is working.
400
        source = _first_text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
401
        di.add_source(_first_text, 0)
402
        self.assertEqual(len(_first_text), di._source_offset)
403
        delta = di.make_delta(_second_text)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
404
        self.assertEqual('h\tsome more\x91\x019'
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
405
                         '&previous text\nand has some extra text\n', delta)
406
        di.add_delta_source(delta, 0)
3735.33.4 by John Arbash Meinel
The new layout is working.
407
        source += delta
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
408
        self.assertEqual(len(_first_text) + len(delta), di._source_offset)
3735.33.4 by John Arbash Meinel
The new layout is working.
409
        second_delta = di.make_delta(_third_text)
410
        result = self._gc_module.apply_delta(source, second_delta)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
411
        self.assertEqualDiff(_third_text, result)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
412
        # We should be able to match against the
413
        # 'previous text\nand has some...'  that was part of the delta bytes
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
414
        # Note that we don't match the 'common with the', because it isn't long
415
        # enough to match in the original text, and those bytes are not present
416
        # in the delta for the second text.
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
417
        self.assertEqual('\x85\x01\x90\x14\x1chas some in common with the '
418
                         '\x91S&\x03and\x91\x18,', second_delta)
3735.33.4 by John Arbash Meinel
The new layout is working.
419
        # Add this delta, and create a new delta for the same text. We should
420
        # find the remaining text, and only insert the short 'and' text.
421
        di.add_delta_source(second_delta, 0)
422
        source += second_delta
423
        third_delta = di.make_delta(_third_text)
424
        result = self._gc_module.apply_delta(source, third_delta)
425
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
426
        self.assertEqual('\x85\x01\x90\x14\x91\x7e\x1c'
427
                         '\x91S&\x03and\x91\x18,', third_delta)
3735.33.4 by John Arbash Meinel
The new layout is working.
428
        # Now create a delta, which we know won't be able to be 'fit' into the
429
        # existing index
430
        fourth_delta = di.make_delta(_fourth_text)
431
        self.assertEqual(_fourth_text,
432
                         self._gc_module.apply_delta(source, fourth_delta))
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
433
        self.assertEqual('\x80\x01'
3735.33.4 by John Arbash Meinel
The new layout is working.
434
                         '\x7f123456789012345\nsame rabin hash\n'
435
                         '123456789012345\nsame rabin hash\n'
436
                         '123456789012345\nsame rabin hash\n'
437
                         '123456789012345\nsame rabin hash'
438
                         '\x01\n', fourth_delta)
439
        di.add_delta_source(fourth_delta, 0)
440
        source += fourth_delta
441
        # With the next delta, everything should be found
442
        fifth_delta = di.make_delta(_fourth_text)
443
        self.assertEqual(_fourth_text,
444
                         self._gc_module.apply_delta(source, fifth_delta))
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
445
        self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
446
447
448
class TestCopyInstruction(tests.TestCase):
449
450
    def assertEncode(self, expected, offset, length):
451
        bytes = _groupcompress_py.encode_copy_instruction(offset, length)
452
        if expected != bytes:
453
            self.assertEqual([hex(ord(e)) for e in expected],
454
                             [hex(ord(b)) for b in bytes])
455
456
    def assertDecode(self, exp_offset, exp_length, exp_newpos, bytes, pos):
457
        cmd = ord(bytes[pos])
458
        pos += 1
459
        out = _groupcompress_py.decode_copy_instruction(bytes, cmd, pos)
460
        self.assertEqual((exp_offset, exp_length, exp_newpos), out)
461
462
    def test_encode_no_length(self):
4300.2.1 by John Arbash Meinel
Fix bug #364900, properly remove the 64kB that was just encoded in the copy.
463
        self.assertEncode('\x80', 0, 64*1024)
464
        self.assertEncode('\x81\x01', 1, 64*1024)
465
        self.assertEncode('\x81\x0a', 10, 64*1024)
466
        self.assertEncode('\x81\xff', 255, 64*1024)
467
        self.assertEncode('\x82\x01', 256, 64*1024)
468
        self.assertEncode('\x83\x01\x01', 257, 64*1024)
469
        self.assertEncode('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64*1024)
470
        self.assertEncode('\x8E\xff\xff\xff', 0xFFFFFF00, 64*1024)
471
        self.assertEncode('\x8D\xff\xff\xff', 0xFFFF00FF, 64*1024)
472
        self.assertEncode('\x8B\xff\xff\xff', 0xFF00FFFF, 64*1024)
473
        self.assertEncode('\x87\xff\xff\xff', 0x00FFFFFF, 64*1024)
474
        self.assertEncode('\x8F\x04\x03\x02\x01', 0x01020304, 64*1024)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
475
476
    def test_encode_no_offset(self):
477
        self.assertEncode('\x90\x01', 0, 1)
478
        self.assertEncode('\x90\x0a', 0, 10)
479
        self.assertEncode('\x90\xff', 0, 255)
480
        self.assertEncode('\xA0\x01', 0, 256)
481
        self.assertEncode('\xB0\x01\x01', 0, 257)
482
        self.assertEncode('\xB0\xff\xff', 0, 0xFFFF)
483
        # Special case, if copy == 64KiB, then we store exactly 0
484
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
485
        # about that, as we would never actually copy 0 bytes
486
        self.assertEncode('\x80', 0, 64*1024)
487
488
    def test_encode(self):
489
        self.assertEncode('\x91\x01\x01', 1, 1)
490
        self.assertEncode('\x91\x09\x0a', 9, 10)
491
        self.assertEncode('\x91\xfe\xff', 254, 255)
492
        self.assertEncode('\xA2\x02\x01', 512, 256)
493
        self.assertEncode('\xB3\x02\x01\x01\x01', 258, 257)
494
        self.assertEncode('\xB0\x01\x01', 0, 257)
495
        # Special case, if copy == 64KiB, then we store exactly 0
496
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
497
        # about that, as we would never actually copy 0 bytes
498
        self.assertEncode('\x81\x0a', 10, 64*1024)
499
500
    def test_decode_no_length(self):
501
        # If length is 0, it is interpreted as 64KiB
502
        # The shortest possible instruction is a copy of 64KiB from offset 0
503
        self.assertDecode(0, 65536, 1, '\x80', 0)
504
        self.assertDecode(1, 65536, 2, '\x81\x01', 0)
505
        self.assertDecode(10, 65536, 2, '\x81\x0a', 0)
506
        self.assertDecode(255, 65536, 2, '\x81\xff', 0)
507
        self.assertDecode(256, 65536, 2, '\x82\x01', 0)
508
        self.assertDecode(257, 65536, 3, '\x83\x01\x01', 0)
509
        self.assertDecode(0xFFFFFFFF, 65536, 5, '\x8F\xff\xff\xff\xff', 0)
510
        self.assertDecode(0xFFFFFF00, 65536, 4, '\x8E\xff\xff\xff', 0)
511
        self.assertDecode(0xFFFF00FF, 65536, 4, '\x8D\xff\xff\xff', 0)
512
        self.assertDecode(0xFF00FFFF, 65536, 4, '\x8B\xff\xff\xff', 0)
513
        self.assertDecode(0x00FFFFFF, 65536, 4, '\x87\xff\xff\xff', 0)
514
        self.assertDecode(0x01020304, 65536, 5, '\x8F\x04\x03\x02\x01', 0)
515
516
    def test_decode_no_offset(self):
517
        self.assertDecode(0, 1, 2, '\x90\x01', 0)
518
        self.assertDecode(0, 10, 2, '\x90\x0a', 0)
519
        self.assertDecode(0, 255, 2, '\x90\xff', 0)
520
        self.assertDecode(0, 256, 2, '\xA0\x01', 0)
521
        self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
522
        self.assertDecode(0, 65535, 3, '\xB0\xff\xff', 0)
523
        # Special case, if copy == 64KiB, then we store exactly 0
524
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
525
        # about that, as we would never actually copy 0 bytes
526
        self.assertDecode(0, 65536, 1, '\x80', 0)
527
528
    def test_decode(self):
529
        self.assertDecode(1, 1, 3, '\x91\x01\x01', 0)
530
        self.assertDecode(9, 10, 3, '\x91\x09\x0a', 0)
531
        self.assertDecode(254, 255, 3, '\x91\xfe\xff', 0)
532
        self.assertDecode(512, 256, 3, '\xA2\x02\x01', 0)
533
        self.assertDecode(258, 257, 5, '\xB3\x02\x01\x01\x01', 0)
534
        self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
535
536
    def test_decode_not_start(self):
537
        self.assertDecode(1, 1, 6, 'abc\x91\x01\x01def', 3)
538
        self.assertDecode(9, 10, 5, 'ab\x91\x09\x0ade', 2)
539
        self.assertDecode(254, 255, 6, 'not\x91\xfe\xffcopy', 3)
540
541
542
class TestBase128Int(tests.TestCase):
543
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
544
    scenarios = module_scenarios()
545
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
546
    _gc_module = None # Set by load_tests
547
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
548
    def assertEqualEncode(self, bytes, val):
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
549
        self.assertEqual(bytes, self._gc_module.encode_base128_int(val))
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
550
551
    def assertEqualDecode(self, val, num_decode, bytes):
552
        self.assertEqual((val, num_decode),
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
553
                         self._gc_module.decode_base128_int(bytes))
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
554
555
    def test_encode(self):
556
        self.assertEqualEncode('\x01', 1)
557
        self.assertEqualEncode('\x02', 2)
558
        self.assertEqualEncode('\x7f', 127)
559
        self.assertEqualEncode('\x80\x01', 128)
560
        self.assertEqualEncode('\xff\x01', 255)
561
        self.assertEqualEncode('\x80\x02', 256)
562
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
563
564
    def test_decode(self):
565
        self.assertEqualDecode(1, 1, '\x01')
566
        self.assertEqualDecode(2, 1, '\x02')
567
        self.assertEqualDecode(127, 1, '\x7f')
568
        self.assertEqualDecode(128, 2, '\x80\x01')
569
        self.assertEqualDecode(255, 2, '\xff\x01')
570
        self.assertEqualDecode(256, 2, '\x80\x02')
571
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
572
573
    def test_decode_with_trailing_bytes(self):
574
        self.assertEqualDecode(1, 1, '\x01abcdef')
575
        self.assertEqualDecode(127, 1, '\x7f\x01')
576
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
577
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
578
579