~bzr-pqm/bzr/bzr.dev

5557.1.7 by John Arbash Meinel
Merge in the bzr.dev 5582
1
# Copyright (C) 2008-2011 Canonical Ltd
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
2
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
3
# This program is free software; you can redistribute it and/or modify
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
7
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11
# GNU General Public License for more details.
3735.31.2 by John Arbash Meinel
Cleanup trailing whitespace, get test_source to pass by removing asserts.
12
#
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
3735.36.3 by John Arbash Meinel
Add the new address for FSF to the new files.
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
16
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
17
"""Tests for the python and pyrex extensions of groupcompress"""
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
18
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
19
from bzrlib import (
20
    _groupcompress_py,
21
    tests,
22
    )
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
23
from bzrlib.tests.scenarios import (
24
    load_tests_apply_scenarios,
25
    )
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
26
from bzrlib.tests import (
27
    features,
28
    )
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
29
30
31
def module_scenarios():
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
32
    scenarios = [
33
        ('python', {'_gc_module': _groupcompress_py}),
34
        ]
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
35
    if compiled_groupcompress_feature.available():
36
        gc_module = compiled_groupcompress_feature.module
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
37
        scenarios.append(('C',
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
38
            {'_gc_module': gc_module}))
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
39
    return scenarios
40
41
42
def two_way_scenarios():
43
    scenarios = [
44
        ('PP', {'make_delta': _groupcompress_py.make_delta,
45
                'apply_delta': _groupcompress_py.apply_delta})
46
        ]
47
    if compiled_groupcompress_feature.available():
48
        gc_module = compiled_groupcompress_feature.module
49
        scenarios.extend([
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
50
            ('CC', {'make_delta': gc_module.make_delta,
51
                    'apply_delta': gc_module.apply_delta}),
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
52
            ('PC', {'make_delta': _groupcompress_py.make_delta,
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
53
                    'apply_delta': gc_module.apply_delta}),
54
            ('CP', {'make_delta': gc_module.make_delta,
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
55
                    'apply_delta': _groupcompress_py.apply_delta}),
56
            ])
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
57
    return scenarios
58
59
60
load_tests = load_tests_apply_scenarios
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
61
62
5967.12.1 by Martin Pool
Move all test features into bzrlib.tests.features
63
compiled_groupcompress_feature = features.ModuleAvailableFeature(
64
    'bzrlib._groupcompress_pyx')
0.18.15 by John Arbash Meinel
Start writing tests directly for the compiled class
65
0.23.8 by John Arbash Meinel
Add another test text.
66
_text1 = """\
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
67
This is a bit
68
of source text
69
which is meant to be matched
70
against other text
71
"""
72
0.23.8 by John Arbash Meinel
Add another test text.
73
_text2 = """\
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
74
This is a bit
75
of source text
76
which is meant to differ from
77
against other text
78
"""
79
0.23.8 by John Arbash Meinel
Add another test text.
80
_text3 = """\
81
This is a bit
82
of source text
83
which is meant to be matched
84
against other text
85
except it also
86
has a lot more data
87
at the end of the file
88
"""
89
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
90
_first_text = """\
91
a bit of text, that
92
does not have much in
93
common with the next text
94
"""
95
96
_second_text = """\
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
97
some more bit of text, that
98
does not have much in
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
99
common with the previous text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
100
and has some extra text
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
101
"""
102
103
104
_third_text = """\
105
a bit of text, that
106
has some in common with the previous text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
107
and has some extra text
108
and not have much in
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
109
common with the next text
110
"""
111
3735.33.4 by John Arbash Meinel
The new layout is working.
112
_fourth_text = """\
113
123456789012345
114
same rabin hash
115
123456789012345
116
same rabin hash
117
123456789012345
118
same rabin hash
119
123456789012345
120
same rabin hash
121
"""
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
122
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
123
class TestMakeAndApplyDelta(tests.TestCase):
124
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
125
    scenarios = module_scenarios()
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
126
    _gc_module = None # Set by load_tests
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
127
128
    def setUp(self):
129
        super(TestMakeAndApplyDelta, self).setUp()
130
        self.make_delta = self._gc_module.make_delta
131
        self.apply_delta = self._gc_module.apply_delta
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
132
        self.apply_delta_to_source = self._gc_module.apply_delta_to_source
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
133
134
    def test_make_delta_is_typesafe(self):
135
        self.make_delta('a string', 'another string')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
136
137
        def _check_make_delta(string1, string2):
138
            self.assertRaises(TypeError, self.make_delta, string1, string2)
139
140
        _check_make_delta('a string', object())
141
        _check_make_delta('a string', u'not a string')
142
        _check_make_delta(object(), 'a string')
143
        _check_make_delta(u'not a string', 'a string')
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
144
145
    def test_make_noop_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
146
        ident_delta = self.make_delta(_text1, _text1)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
147
        self.assertEqual('M\x90M', ident_delta)
0.23.8 by John Arbash Meinel
Add another test text.
148
        ident_delta = self.make_delta(_text2, _text2)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
149
        self.assertEqual('N\x90N', ident_delta)
0.23.8 by John Arbash Meinel
Add another test text.
150
        ident_delta = self.make_delta(_text3, _text3)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
151
        self.assertEqual('\x87\x01\x90\x87', ident_delta)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
152
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
153
    def assertDeltaIn(self, delta1, delta2, delta):
154
        """Make sure that the delta bytes match one of the expectations."""
155
        # In general, the python delta matcher gives different results than the
156
        # pyrex delta matcher. Both should be valid deltas, though.
157
        if delta not in (delta1, delta2):
158
            self.fail("Delta bytes:\n"
159
                      "       %r\n"
160
                      "not in %r\n"
161
                      "    or %r"
162
                      % (delta, delta1, delta2))
163
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
164
    def test_make_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
165
        delta = self.make_delta(_text1, _text2)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
166
        self.assertDeltaIn(
167
            'N\x90/\x1fdiffer from\nagainst other text\n',
168
            'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
169
            delta)
0.23.8 by John Arbash Meinel
Add another test text.
170
        delta = self.make_delta(_text2, _text1)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
171
        self.assertDeltaIn(
172
            'M\x90/\x1ebe matched\nagainst other text\n',
173
            'M\x90\x1d\x1dwhich is meant to be matched\n\x91;\x13',
174
            delta)
0.23.8 by John Arbash Meinel
Add another test text.
175
        delta = self.make_delta(_text3, _text1)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
176
        self.assertEqual('M\x90M', delta)
0.23.8 by John Arbash Meinel
Add another test text.
177
        delta = self.make_delta(_text3, _text2)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
178
        self.assertDeltaIn(
179
            'N\x90/\x1fdiffer from\nagainst other text\n',
180
            'N\x90\x1d\x1ewhich is meant to differ from\n\x91:\x13',
181
            delta)
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
182
4300.2.1 by John Arbash Meinel
Fix bug #364900, properly remove the 64kB that was just encoded in the copy.
183
    def test_make_delta_with_large_copies(self):
184
        # We want to have a copy that is larger than 64kB, which forces us to
185
        # issue multiple copy instructions.
186
        big_text = _text3 * 1220
187
        delta = self.make_delta(big_text, big_text)
188
        self.assertDeltaIn(
189
            '\xdc\x86\x0a'      # Encoding the length of the uncompressed text
190
            '\x80'              # Copy 64kB, starting at byte 0
191
            '\x84\x01'          # and another 64kB starting at 64kB
192
            '\xb4\x02\x5c\x83', # And the bit of tail.
193
            None,   # Both implementations should be identical
194
            delta)
195
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
196
    def test_apply_delta_is_typesafe(self):
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
197
        self.apply_delta(_text1, 'M\x90M')
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
198
        self.assertRaises(TypeError, self.apply_delta, object(), 'M\x90M')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
199
        self.assertRaises(TypeError, self.apply_delta,
3735.40.10 by John Arbash Meinel
Merge in the new delta format code.
200
                          unicode(_text1), 'M\x90M')
201
        self.assertRaises(TypeError, self.apply_delta, _text1, u'M\x90M')
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
202
        self.assertRaises(TypeError, self.apply_delta, _text1, object())
0.23.6 by John Arbash Meinel
Start stripping out the actual GroupCompressor
203
204
    def test_apply_delta(self):
0.23.8 by John Arbash Meinel
Add another test text.
205
        target = self.apply_delta(_text1,
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
206
                    'N\x90/\x1fdiffer from\nagainst other text\n')
0.23.8 by John Arbash Meinel
Add another test text.
207
        self.assertEqual(_text2, target)
208
        target = self.apply_delta(_text2,
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
209
                    'M\x90/\x1ebe matched\nagainst other text\n')
0.23.8 by John Arbash Meinel
Add another test text.
210
        self.assertEqual(_text1, target)
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
211
3735.40.19 by John Arbash Meinel
Implement apply_delta_to_source which doesn't have to malloc another string.
212
    def test_apply_delta_to_source_is_safe(self):
213
        self.assertRaises(TypeError,
214
            self.apply_delta_to_source, object(), 0, 1)
215
        self.assertRaises(TypeError,
216
            self.apply_delta_to_source, u'unicode str', 0, 1)
217
        # end > length
218
        self.assertRaises(ValueError,
219
            self.apply_delta_to_source, 'foo', 1, 4)
220
        # start > length
221
        self.assertRaises(ValueError,
222
            self.apply_delta_to_source, 'foo', 5, 3)
223
        # start > end
224
        self.assertRaises(ValueError,
225
            self.apply_delta_to_source, 'foo', 3, 2)
226
227
    def test_apply_delta_to_source(self):
228
        source_and_delta = (_text1
229
                            + 'N\x90/\x1fdiffer from\nagainst other text\n')
230
        self.assertEqual(_text2, self.apply_delta_to_source(source_and_delta,
231
                                    len(_text1), len(source_and_delta)))
232
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
233
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
234
class TestMakeAndApplyCompatible(tests.TestCase):
235
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
236
    scenarios = two_way_scenarios()
237
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
238
    make_delta = None # Set by load_tests
239
    apply_delta = None # Set by load_tests
3735.40.12 by John Arbash Meinel
Add tests that the deltas generated by one implementation are compatible
240
241
    def assertMakeAndApply(self, source, target):
242
        """Assert that generating a delta and applying gives success."""
243
        delta = self.make_delta(source, target)
244
        bytes = self.apply_delta(source, delta)
245
        self.assertEqualDiff(target, bytes)
246
247
    def test_direct(self):
248
        self.assertMakeAndApply(_text1, _text2)
249
        self.assertMakeAndApply(_text2, _text1)
250
        self.assertMakeAndApply(_text1, _text3)
251
        self.assertMakeAndApply(_text3, _text1)
252
        self.assertMakeAndApply(_text2, _text3)
253
        self.assertMakeAndApply(_text3, _text2)
254
255
3735.40.5 by John Arbash Meinel
Start adding permutation tests for _groupcompress_py and _groupcompress_pyx
256
class TestDeltaIndex(tests.TestCase):
257
258
    def setUp(self):
259
        super(TestDeltaIndex, self).setUp()
260
        # This test isn't multiplied, because we only have DeltaIndex for the
261
        # compiled form
262
        # We call this here, because _test_needs_features happens after setUp
4913.2.20 by John Arbash Meinel
Change all of the compiled_foo to compiled_foo_feature
263
        self.requireFeature(compiled_groupcompress_feature)
264
        self._gc_module = compiled_groupcompress_feature.module
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
265
266
    def test_repr(self):
267
        di = self._gc_module.DeltaIndex('test text\n')
0.23.43 by John Arbash Meinel
Change the internals to allow delta indexes to be expanded with new source data.
268
        self.assertEqual('DeltaIndex(1, 10)', repr(di))
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
269
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
270
    def test__dump_no_index(self):
271
        di = self._gc_module.DeltaIndex()
272
        self.assertEqual(None, di._dump_index())
273
274
    def test__dump_index_simple(self):
275
        di = self._gc_module.DeltaIndex()
276
        di.add_source(_text1, 0)
277
        self.assertFalse(di._has_index())
278
        self.assertEqual(None, di._dump_index())
279
        _ = di.make_delta(_text1)
280
        self.assertTrue(di._has_index())
281
        hash_list, entry_list = di._dump_index()
282
        self.assertEqual(16, len(hash_list))
283
        self.assertEqual(68, len(entry_list))
284
        just_entries = [(idx, text_offset, hash_val)
285
                        for idx, (text_offset, hash_val)
286
                         in enumerate(entry_list)
287
                         if text_offset != 0 or hash_val != 0]
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
288
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
289
        self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
290
                          (25, 48, rabin_hash(_text1[33:49])),
291
                          (34, 32, rabin_hash(_text1[17:33])),
292
                          (47, 64, rabin_hash(_text1[49:65])),
293
                         ], just_entries)
294
        # This ensures that the hash map points to the location we expect it to
295
        for entry_idx, text_offset, hash_val in just_entries:
296
            self.assertEqual(entry_idx, hash_list[hash_val & 0xf])
297
298
    def test__dump_index_two_sources(self):
299
        di = self._gc_module.DeltaIndex()
300
        di.add_source(_text1, 0)
301
        di.add_source(_text2, 2)
302
        start2 = len(_text1) + 2
303
        self.assertTrue(di._has_index())
304
        hash_list, entry_list = di._dump_index()
305
        self.assertEqual(16, len(hash_list))
306
        self.assertEqual(68, len(entry_list))
307
        just_entries = [(idx, text_offset, hash_val)
308
                        for idx, (text_offset, hash_val)
309
                         in enumerate(entry_list)
310
                         if text_offset != 0 or hash_val != 0]
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
311
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
312
        self.assertEqual([(8, 16, rabin_hash(_text1[1:17])),
313
                          (9, start2+16, rabin_hash(_text2[1:17])),
314
                          (25, 48, rabin_hash(_text1[33:49])),
315
                          (30, start2+64, rabin_hash(_text2[49:65])),
316
                          (34, 32, rabin_hash(_text1[17:33])),
317
                          (35, start2+32, rabin_hash(_text2[17:33])),
318
                          (43, start2+48, rabin_hash(_text2[33:49])),
319
                          (47, 64, rabin_hash(_text1[49:65])),
320
                         ], just_entries)
321
        # Each entry should be in the appropriate hash bucket.
322
        for entry_idx, text_offset, hash_val in just_entries:
323
            hash_idx = hash_val & 0xf
324
            self.assertTrue(
325
                hash_list[hash_idx] <= entry_idx < hash_list[hash_idx+1])
326
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
327
    def test_first_add_source_doesnt_index_until_make_delta(self):
328
        di = self._gc_module.DeltaIndex()
329
        self.assertFalse(di._has_index())
330
        di.add_source(_text1, 0)
331
        self.assertFalse(di._has_index())
332
        # However, asking to make a delta will trigger the index to be
333
        # generated, and will generate a proper delta
334
        delta = di.make_delta(_text2)
335
        self.assertTrue(di._has_index())
336
        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
337
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
338
    def test_add_source_max_bytes_to_index(self):
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
339
        di = self._gc_module.DeltaIndex()
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
340
        di._max_bytes_to_index = 3*16
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
341
        di.add_source(_text1, 0) # (77 bytes -1) // 3 = 25 byte stride
342
        di.add_source(_text3, 3) # (135 bytes -1) // 3 = 44 byte stride
343
        start2 = len(_text1) + 3
344
        hash_list, entry_list = di._dump_index()
345
        self.assertEqual(16, len(hash_list))
346
        self.assertEqual(67, len(entry_list))
347
        just_entries = sorted([(text_offset, hash_val)
348
                               for text_offset, hash_val in entry_list
349
                                if text_offset != 0 or hash_val != 0])
5755.2.8 by John Arbash Meinel
Do a lot of renaming.
350
        rabin_hash = self._gc_module._rabin_hash
5755.2.3 by John Arbash Meinel
Add a max_entries_per_source to DeltaIndex
351
        self.assertEqual([(25, rabin_hash(_text1[10:26])),
352
                          (50, rabin_hash(_text1[35:51])),
353
                          (75, rabin_hash(_text1[60:76])),
354
                          (start2+44, rabin_hash(_text3[29:45])),
355
                          (start2+88, rabin_hash(_text3[73:89])),
356
                          (start2+132, rabin_hash(_text3[117:133])),
357
                         ], just_entries)
358
4398.6.1 by John Arbash Meinel
Change groupcompress.DeltaIndex to be lazy about indexing the first source.
359
    def test_second_add_source_triggers_make_index(self):
360
        di = self._gc_module.DeltaIndex()
361
        self.assertFalse(di._has_index())
362
        di.add_source(_text1, 0)
363
        self.assertFalse(di._has_index())
364
        di.add_source(_text2, 0)
365
        self.assertTrue(di._has_index())
366
0.23.14 by John Arbash Meinel
Implement a DeltaIndex wrapper.
367
    def test_make_delta(self):
368
        di = self._gc_module.DeltaIndex(_text1)
369
        delta = di.make_delta(_text2)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
370
        self.assertEqual('N\x90/\x1fdiffer from\nagainst other text\n', delta)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
371
372
    def test_delta_against_multiple_sources(self):
373
        di = self._gc_module.DeltaIndex()
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
374
        di.add_source(_first_text, 0)
375
        self.assertEqual(len(_first_text), di._source_offset)
376
        di.add_source(_second_text, 0)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
377
        self.assertEqual(len(_first_text) + len(_second_text),
378
                         di._source_offset)
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
379
        delta = di.make_delta(_third_text)
380
        result = self._gc_module.apply_delta(_first_text + _second_text, delta)
381
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
382
        self.assertEqual('\x85\x01\x90\x14\x0chas some in '
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
383
                         '\x91v6\x03and\x91d"\x91:\n', delta)
0.23.25 by John Arbash Meinel
We are now able to add multiple sources to the delta generator.
384
0.23.26 by John Arbash Meinel
We now start to make use of the ability to extend the delta index
385
    def test_delta_with_offsets(self):
386
        di = self._gc_module.DeltaIndex()
387
        di.add_source(_first_text, 5)
388
        self.assertEqual(len(_first_text) + 5, di._source_offset)
389
        di.add_source(_second_text, 10)
390
        self.assertEqual(len(_first_text) + len(_second_text) + 15,
391
                         di._source_offset)
392
        delta = di.make_delta(_third_text)
393
        self.assertIsNot(None, delta)
394
        result = self._gc_module.apply_delta(
395
            '12345' + _first_text + '1234567890' + _second_text, delta)
396
        self.assertIsNot(None, result)
397
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
398
        self.assertEqual('\x85\x01\x91\x05\x14\x0chas some in '
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
399
                         '\x91\x856\x03and\x91s"\x91?\n', delta)
400
401
    def test_delta_with_delta_bytes(self):
402
        di = self._gc_module.DeltaIndex()
3735.33.4 by John Arbash Meinel
The new layout is working.
403
        source = _first_text
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
404
        di.add_source(_first_text, 0)
405
        self.assertEqual(len(_first_text), di._source_offset)
406
        delta = di.make_delta(_second_text)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
407
        self.assertEqual('h\tsome more\x91\x019'
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
408
                         '&previous text\nand has some extra text\n', delta)
409
        di.add_delta_source(delta, 0)
3735.33.4 by John Arbash Meinel
The new layout is working.
410
        source += delta
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
411
        self.assertEqual(len(_first_text) + len(delta), di._source_offset)
3735.33.4 by John Arbash Meinel
The new layout is working.
412
        second_delta = di.make_delta(_third_text)
413
        result = self._gc_module.apply_delta(source, second_delta)
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
414
        self.assertEqualDiff(_third_text, result)
4241.6.6 by Robert Collins, John Arbash Meinel, Ian Clathworthy, Vincent Ladeuil
Groupcompress from brisbane-core.
415
        # We should be able to match against the
416
        # 'previous text\nand has some...'  that was part of the delta bytes
0.23.45 by John Arbash Meinel
Add a function that updates the index for delta bytes.
417
        # Note that we don't match the 'common with the', because it isn't long
418
        # enough to match in the original text, and those bytes are not present
419
        # in the delta for the second text.
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
420
        self.assertEqual('\x85\x01\x90\x14\x1chas some in common with the '
421
                         '\x91S&\x03and\x91\x18,', second_delta)
3735.33.4 by John Arbash Meinel
The new layout is working.
422
        # Add this delta, and create a new delta for the same text. We should
423
        # find the remaining text, and only insert the short 'and' text.
424
        di.add_delta_source(second_delta, 0)
425
        source += second_delta
426
        third_delta = di.make_delta(_third_text)
427
        result = self._gc_module.apply_delta(source, third_delta)
428
        self.assertEqualDiff(_third_text, result)
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
429
        self.assertEqual('\x85\x01\x90\x14\x91\x7e\x1c'
430
                         '\x91S&\x03and\x91\x18,', third_delta)
3735.33.4 by John Arbash Meinel
The new layout is working.
431
        # Now create a delta, which we know won't be able to be 'fit' into the
432
        # existing index
433
        fourth_delta = di.make_delta(_fourth_text)
434
        self.assertEqual(_fourth_text,
435
                         self._gc_module.apply_delta(source, fourth_delta))
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
436
        self.assertEqual('\x80\x01'
3735.33.4 by John Arbash Meinel
The new layout is working.
437
                         '\x7f123456789012345\nsame rabin hash\n'
438
                         '123456789012345\nsame rabin hash\n'
439
                         '123456789012345\nsame rabin hash\n'
440
                         '123456789012345\nsame rabin hash'
441
                         '\x01\n', fourth_delta)
442
        di.add_delta_source(fourth_delta, 0)
443
        source += fourth_delta
444
        # With the next delta, everything should be found
445
        fifth_delta = di.make_delta(_fourth_text)
446
        self.assertEqual(_fourth_text,
447
                         self._gc_module.apply_delta(source, fifth_delta))
3735.38.1 by John Arbash Meinel
Change the delta byte stream to remove the 'source length' entry.
448
        self.assertEqual('\x80\x01\x91\xa7\x7f\x01\n', fifth_delta)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
449
450
451
class TestCopyInstruction(tests.TestCase):
452
453
    def assertEncode(self, expected, offset, length):
454
        bytes = _groupcompress_py.encode_copy_instruction(offset, length)
455
        if expected != bytes:
456
            self.assertEqual([hex(ord(e)) for e in expected],
457
                             [hex(ord(b)) for b in bytes])
458
459
    def assertDecode(self, exp_offset, exp_length, exp_newpos, bytes, pos):
460
        cmd = ord(bytes[pos])
461
        pos += 1
462
        out = _groupcompress_py.decode_copy_instruction(bytes, cmd, pos)
463
        self.assertEqual((exp_offset, exp_length, exp_newpos), out)
464
465
    def test_encode_no_length(self):
4300.2.1 by John Arbash Meinel
Fix bug #364900, properly remove the 64kB that was just encoded in the copy.
466
        self.assertEncode('\x80', 0, 64*1024)
467
        self.assertEncode('\x81\x01', 1, 64*1024)
468
        self.assertEncode('\x81\x0a', 10, 64*1024)
469
        self.assertEncode('\x81\xff', 255, 64*1024)
470
        self.assertEncode('\x82\x01', 256, 64*1024)
471
        self.assertEncode('\x83\x01\x01', 257, 64*1024)
472
        self.assertEncode('\x8F\xff\xff\xff\xff', 0xFFFFFFFF, 64*1024)
473
        self.assertEncode('\x8E\xff\xff\xff', 0xFFFFFF00, 64*1024)
474
        self.assertEncode('\x8D\xff\xff\xff', 0xFFFF00FF, 64*1024)
475
        self.assertEncode('\x8B\xff\xff\xff', 0xFF00FFFF, 64*1024)
476
        self.assertEncode('\x87\xff\xff\xff', 0x00FFFFFF, 64*1024)
477
        self.assertEncode('\x8F\x04\x03\x02\x01', 0x01020304, 64*1024)
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
478
479
    def test_encode_no_offset(self):
480
        self.assertEncode('\x90\x01', 0, 1)
481
        self.assertEncode('\x90\x0a', 0, 10)
482
        self.assertEncode('\x90\xff', 0, 255)
483
        self.assertEncode('\xA0\x01', 0, 256)
484
        self.assertEncode('\xB0\x01\x01', 0, 257)
485
        self.assertEncode('\xB0\xff\xff', 0, 0xFFFF)
486
        # Special case, if copy == 64KiB, then we store exactly 0
487
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
488
        # about that, as we would never actually copy 0 bytes
489
        self.assertEncode('\x80', 0, 64*1024)
490
491
    def test_encode(self):
492
        self.assertEncode('\x91\x01\x01', 1, 1)
493
        self.assertEncode('\x91\x09\x0a', 9, 10)
494
        self.assertEncode('\x91\xfe\xff', 254, 255)
495
        self.assertEncode('\xA2\x02\x01', 512, 256)
496
        self.assertEncode('\xB3\x02\x01\x01\x01', 258, 257)
497
        self.assertEncode('\xB0\x01\x01', 0, 257)
498
        # Special case, if copy == 64KiB, then we store exactly 0
499
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
500
        # about that, as we would never actually copy 0 bytes
501
        self.assertEncode('\x81\x0a', 10, 64*1024)
502
503
    def test_decode_no_length(self):
504
        # If length is 0, it is interpreted as 64KiB
505
        # The shortest possible instruction is a copy of 64KiB from offset 0
506
        self.assertDecode(0, 65536, 1, '\x80', 0)
507
        self.assertDecode(1, 65536, 2, '\x81\x01', 0)
508
        self.assertDecode(10, 65536, 2, '\x81\x0a', 0)
509
        self.assertDecode(255, 65536, 2, '\x81\xff', 0)
510
        self.assertDecode(256, 65536, 2, '\x82\x01', 0)
511
        self.assertDecode(257, 65536, 3, '\x83\x01\x01', 0)
512
        self.assertDecode(0xFFFFFFFF, 65536, 5, '\x8F\xff\xff\xff\xff', 0)
513
        self.assertDecode(0xFFFFFF00, 65536, 4, '\x8E\xff\xff\xff', 0)
514
        self.assertDecode(0xFFFF00FF, 65536, 4, '\x8D\xff\xff\xff', 0)
515
        self.assertDecode(0xFF00FFFF, 65536, 4, '\x8B\xff\xff\xff', 0)
516
        self.assertDecode(0x00FFFFFF, 65536, 4, '\x87\xff\xff\xff', 0)
517
        self.assertDecode(0x01020304, 65536, 5, '\x8F\x04\x03\x02\x01', 0)
518
519
    def test_decode_no_offset(self):
520
        self.assertDecode(0, 1, 2, '\x90\x01', 0)
521
        self.assertDecode(0, 10, 2, '\x90\x0a', 0)
522
        self.assertDecode(0, 255, 2, '\x90\xff', 0)
523
        self.assertDecode(0, 256, 2, '\xA0\x01', 0)
524
        self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
525
        self.assertDecode(0, 65535, 3, '\xB0\xff\xff', 0)
526
        # Special case, if copy == 64KiB, then we store exactly 0
527
        # Note that this puns with a copy of exactly 0 bytes, but we don't care
528
        # about that, as we would never actually copy 0 bytes
529
        self.assertDecode(0, 65536, 1, '\x80', 0)
530
531
    def test_decode(self):
532
        self.assertDecode(1, 1, 3, '\x91\x01\x01', 0)
533
        self.assertDecode(9, 10, 3, '\x91\x09\x0a', 0)
534
        self.assertDecode(254, 255, 3, '\x91\xfe\xff', 0)
535
        self.assertDecode(512, 256, 3, '\xA2\x02\x01', 0)
536
        self.assertDecode(258, 257, 5, '\xB3\x02\x01\x01\x01', 0)
537
        self.assertDecode(0, 257, 3, '\xB0\x01\x01', 0)
538
539
    def test_decode_not_start(self):
540
        self.assertDecode(1, 1, 6, 'abc\x91\x01\x01def', 3)
541
        self.assertDecode(9, 10, 5, 'ab\x91\x09\x0ade', 2)
542
        self.assertDecode(254, 255, 6, 'not\x91\xfe\xffcopy', 3)
543
544
545
class TestBase128Int(tests.TestCase):
546
5559.2.2 by Martin Pool
Change to using standard load_tests_apply_scenarios.
547
    scenarios = module_scenarios()
548
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
549
    _gc_module = None # Set by load_tests
550
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
551
    def assertEqualEncode(self, bytes, val):
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
552
        self.assertEqual(bytes, self._gc_module.encode_base128_int(val))
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
553
554
    def assertEqualDecode(self, val, num_decode, bytes):
555
        self.assertEqual((val, num_decode),
3735.40.16 by John Arbash Meinel
Implement (de|en)code_base128_int in pyrex.
556
                         self._gc_module.decode_base128_int(bytes))
3735.40.11 by John Arbash Meinel
Implement make_delta and apply_delta.
557
558
    def test_encode(self):
559
        self.assertEqualEncode('\x01', 1)
560
        self.assertEqualEncode('\x02', 2)
561
        self.assertEqualEncode('\x7f', 127)
562
        self.assertEqualEncode('\x80\x01', 128)
563
        self.assertEqualEncode('\xff\x01', 255)
564
        self.assertEqualEncode('\x80\x02', 256)
565
        self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
566
567
    def test_decode(self):
568
        self.assertEqualDecode(1, 1, '\x01')
569
        self.assertEqualDecode(2, 1, '\x02')
570
        self.assertEqualDecode(127, 1, '\x7f')
571
        self.assertEqualDecode(128, 2, '\x80\x01')
572
        self.assertEqualDecode(255, 2, '\xff\x01')
573
        self.assertEqualDecode(256, 2, '\x80\x02')
574
        self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
575
576
    def test_decode_with_trailing_bytes(self):
577
        self.assertEqualDecode(1, 1, '\x01abcdef')
578
        self.assertEqualDecode(127, 1, '\x7f\x01')
579
        self.assertEqualDecode(128, 2, '\x80\x01abcdef')
580
        self.assertEqualDecode(255, 2, '\xff\x01\xff')
581
582