28
import bzrlib.plugins.groupcompress._groupcompress_c
28
import bzrlib.plugins.groupcompress._groupcompress_pyx
29
29
except ImportError:
34
34
def feature_name(self):
35
return 'bzrlib.plugins.groupcompress._groupcompress_c'
35
return 'bzrlib.plugins.groupcompress._groupcompress_pyx'
37
37
CompiledGroupCompress = _CompiledGroupCompress()
40
class TestCompiledEquivalenceTable(tests.TestCase):
41
"""Direct tests for the compiled Equivalence Table."""
43
_tests_need_features = [CompiledGroupCompress]
45
# These tests assume that hash(int) == int
46
# If that ever changes, we can simply change this code to use a custom
47
# class that has precomputed values returned from __hash__.
50
# TODO: We need a test that forces a rebuild of the hash array, and makes
51
# sure that the lines that should not be indexed *stay* not indexed
54
super(TestCompiledEquivalenceTable, self).setUp()
55
from bzrlib.plugins.groupcompress import _groupcompress_c
56
self._gc_module = _groupcompress_c
58
def test_minimum_hash_size(self):
59
eq = self._gc_module.EquivalenceTable([])
60
# We request at least 50% free space in the hash (to make collisions
62
self.assertEqual(1024, eq._py_compute_minimum_hash_size(512))
63
self.assertEqual(2048, eq._py_compute_minimum_hash_size(513))
64
self.assertEqual(2048, eq._py_compute_minimum_hash_size(1000))
65
self.assertEqual(2048, eq._py_compute_minimum_hash_size(1024))
67
def test_recommended_hash_size(self):
68
eq = self._gc_module.EquivalenceTable([])
69
# We always recommend a minimum of 8k
70
self.assertEqual(8192, eq._py_compute_recommended_hash_size(10))
71
self.assertEqual(8192, eq._py_compute_recommended_hash_size(1000))
72
self.assertEqual(8192, eq._py_compute_recommended_hash_size(2000))
73
self.assertEqual(16384, eq._py_compute_recommended_hash_size(4000))
75
# And we recommend at least 75% free slots
76
self.assertEqual(16384, eq._py_compute_recommended_hash_size(4096))
77
self.assertEqual(32768, eq._py_compute_recommended_hash_size(4097))
79
def test__raw_lines(self):
80
eq = self._gc_module.EquivalenceTable([1, 2, 3])
81
self.assertEqual([(1, 1, 1, -1), (2, 2, 2, -1), (3, 3, 3, -1)],
82
eq._inspect_left_lines())
84
def test_build_hash(self):
85
eq = self._gc_module.EquivalenceTable([1, 2, 3])
86
# (size, [(offset, head_offset_in_lines, count)])
87
self.assertEqual((8192, [(1, 0, 1), (2, 1, 1), (3, 2, 1)]),
88
eq._inspect_hash_table())
90
def test_build_hash_with_duplicates(self):
91
eq = self._gc_module.EquivalenceTable([1, 2, 4, 0, 1, 4, 2, 4])
101
], eq._inspect_left_lines())
102
# (hash_offset, head_offset_in_lines, count)
103
self.assertEqual((8192, [
108
]), eq._inspect_hash_table())
110
def test_build_hash_table_with_wraparound(self):
111
eq = self._gc_module.EquivalenceTable([1, 2+8192])
115
], eq._inspect_left_lines())
116
self.assertEqual((8192, [
119
]), eq._inspect_hash_table())
121
def test_build_hash_table_with_collisions(self):
122
# We build up backwards, so # 2+8192 will wrap around to 2, and take
123
# its spot because the 2 offset is taken, then the real '2' will get
124
# bumped to 3, which will bump 3 into 4. then when we have 5, it will
125
# be fine, but the 8192+5 will get bumped to 6
126
eq = self._gc_module.EquivalenceTable([1, 5+8192, 5, 3, 2, 2+8192])
134
], eq._inspect_left_lines())
135
self.assertEqual((8192, [
142
]), eq._inspect_hash_table())
144
def test_build_hash_table_with_wrapped_collisions(self):
145
eq = self._gc_module.EquivalenceTable([0, 8191+8192, 8191])
148
(16383, 16383, 0, -1),
149
(8191, 8191, 8191, -1),
150
], eq._inspect_left_lines())
151
self.assertEqual((8192, [
155
]), eq._inspect_hash_table())
157
def test_build_hash_table_with_strings(self):
158
eq = self._gc_module.EquivalenceTable(['a', 'b', 'c', 'b'])
160
('a', hash('a'), hash('a') & 8191, -1),
161
('b', hash('b'), hash('b') & 8191, 3),
162
('c', hash('c'), hash('c') & 8191, -1),
163
('b', hash('b'), hash('b') & 8191, -1),
164
], eq._inspect_left_lines())
165
self.assertEqual((8192, sorted([
166
(hash('a') & 8191, 0, 1),
167
(hash('b') & 8191, 1, 2),
168
(hash('c') & 8191, 2, 1),
169
])), eq._inspect_hash_table())
171
def test_with_unhashable(self):
172
self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
173
[['unhashable', 'list'], 'foo'])
174
self.assertRaises(TypeError, self._gc_module.EquivalenceTable,
175
('foo', ['unhashable', 'list']))
177
def test_extend_lines(self):
178
eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
179
eq.extend_lines([30, 20, 40], [True, True, True])
188
], eq._inspect_left_lines())
189
# (hash_offset, head_offset_in_lines, count)
190
self.assertEqual((8192, [
195
]), eq._inspect_hash_table())
197
def test_extend_lines_ignored(self):
198
eq = self._gc_module.EquivalenceTable([10, 20, 30, 20])
199
eq.extend_lines([30, 20, 40], [True, False, False])
208
], eq._inspect_left_lines())
209
# (hash_offset, head_offset_in_lines, count)
210
self.assertEqual((8192, [
214
]), eq._inspect_hash_table())
217
class TestGetLongestMatch(tests.TestCase):
220
super(TestGetLongestMatch, self).setUp()
221
self._longest_match = groupcompress._get_longest_match
222
self._eq_class = groupcompress.GroupCompressor._equivalence_table_class
224
def assertLongestMatch(self, block, end_pos, matching_locs,
225
eq, start_pos, max_len, known_locs):
226
"""Check that _get_longest_match gives correct results."""
227
the_block, the_end_pos, next_locs = self._longest_match(eq,
228
start_pos, max_len, known_locs)
229
self.assertEqual(block, the_block)
230
self.assertEqual(end_pos, the_end_pos)
231
if next_locs is not None:
232
self.assertEqual(matching_locs, next_locs)
234
def test_all_match(self):
235
eq = self._eq_class(['a', 'b', 'c'])
236
eq.set_right_lines(['a', 'b', 'c'])
237
self.assertLongestMatch((0, 0, 3), 3, None,
239
self.assertLongestMatch((0, 0, 3), 3, None,
241
self.assertLongestMatch((1, 1, 2), 3, None,
243
self.assertLongestMatch((1, 1, 2), 3, None,
246
def test_no_match(self):
247
eq = self._eq_class(['a', 'b', 'c'])
248
eq.set_right_lines(['d', 'e', 'f'])
249
self.assertLongestMatch(None, 1, None,
251
self.assertLongestMatch(None, 2, None,
253
self.assertLongestMatch(None, 3, None,
256
def test_next_match(self):
257
eq = self._eq_class(['a', 'b', 'c'])
258
eq.set_right_lines(['a', 'c'])
259
self.assertLongestMatch((0, 0, 1), 1, [2],
261
self.assertLongestMatch((2, 1, 1), 2, None,
264
def test_lots_of_matches(self):
265
eq = self._eq_class(['a']*1000)
266
eq.set_right_lines(['a']*1000)
267
self.assertLongestMatch((0, 0, 1000), 1000, None,
269
eq = self._eq_class(['a']*1000)
270
eq.set_right_lines(['a']*2000)
271
self.assertLongestMatch((0, 0, 1000), 1000, range(1000),
273
eq = self._eq_class(['a']*2000)
274
eq.set_right_lines(['a']*1000)
275
self.assertLongestMatch((0, 0, 1000), 1000, None,
279
class TestCompiledGetLongestMatch(TestGetLongestMatch):
281
_tests_need_features = [CompiledGroupCompress]
284
super(TestGetLongestMatch, self).setUp()
285
from bzrlib.plugins.groupcompress import _groupcompress_c
286
self._longest_match = _groupcompress_c._get_longest_match
287
self._eq_class = _groupcompress_c.EquivalenceTable
42
which is meant to be matched
49
which is meant to differ from
56
which is meant to be matched
60
at the end of the file
66
common with the next text
70
some more bit of text, that
72
common with the previous text
73
and has some extra text
79
has some in common with the previous text
80
and has some extra text
82
common with the next text
86
class Test_GroupCompress(tests.TestCase):
87
"""Direct tests for the compiled extension."""
90
super(Test_GroupCompress, self).setUp()
91
self.requireFeature(CompiledGroupCompress)
92
from bzrlib.plugins.groupcompress import _groupcompress_pyx
93
self._gc_module = _groupcompress_pyx
96
class TestMakeAndApplyDelta(Test_GroupCompress):
99
super(TestMakeAndApplyDelta, self).setUp()
100
self.make_delta = self._gc_module.make_delta
101
self.apply_delta = self._gc_module.apply_delta
103
def test_make_delta_is_typesafe(self):
104
self.make_delta('a string', 'another string')
105
self.assertRaises(TypeError,
106
self.make_delta, 'a string', object())
107
self.assertRaises(TypeError,
108
self.make_delta, 'a string', u'not a string')
109
self.assertRaises(TypeError,
110
self.make_delta, object(), 'a string')
111
self.assertRaises(TypeError,
112
self.make_delta, u'not a string', 'a string')
114
def test_make_noop_delta(self):
115
ident_delta = self.make_delta(_text1, _text1)
116
self.assertEqual('MM\x90M', ident_delta)
117
ident_delta = self.make_delta(_text2, _text2)
118
self.assertEqual('NN\x90N', ident_delta)
119
ident_delta = self.make_delta(_text3, _text3)
120
self.assertEqual('\x87\x01\x87\x01\x90\x87', ident_delta)
122
def test_make_delta(self):
123
delta = self.make_delta(_text1, _text2)
124
self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
125
delta = self.make_delta(_text2, _text1)
126
self.assertEqual('NM\x90/\x1ebe matched\nagainst other text\n', delta)
127
delta = self.make_delta(_text3, _text1)
128
self.assertEqual('\x87\x01M\x90M', delta)
129
delta = self.make_delta(_text3, _text2)
130
self.assertEqual('\x87\x01N\x90/\x1fdiffer from\nagainst other text\n',
133
def test_apply_delta_is_typesafe(self):
134
self.apply_delta(_text1, 'MM\x90M')
135
self.assertRaises(TypeError,
136
self.apply_delta, object(), 'MM\x90M')
137
self.assertRaises(TypeError,
138
self.apply_delta, unicode(_text1), 'MM\x90M')
139
self.assertRaises(TypeError,
140
self.apply_delta, _text1, u'MM\x90M')
141
self.assertRaises(TypeError,
142
self.apply_delta, _text1, object())
144
def test_apply_delta(self):
145
target = self.apply_delta(_text1,
146
'MN\x90/\x1fdiffer from\nagainst other text\n')
147
self.assertEqual(_text2, target)
148
target = self.apply_delta(_text2,
149
'NM\x90/\x1ebe matched\nagainst other text\n')
150
self.assertEqual(_text1, target)
153
class TestDeltaIndex(Test_GroupCompress):
156
di = self._gc_module.DeltaIndex('test text\n')
157
self.assertEqual('DeltaIndex(1, 10)', repr(di))
159
def test_make_delta(self):
160
di = self._gc_module.DeltaIndex(_text1)
161
delta = di.make_delta(_text2)
162
self.assertEqual('MN\x90/\x1fdiffer from\nagainst other text\n', delta)
164
def test_delta_against_multiple_sources(self):
165
di = self._gc_module.DeltaIndex()
166
di.add_source(_first_text, 0)
167
self.assertEqual(len(_first_text), di._source_offset)
168
di.add_source(_second_text, 0)
169
self.assertEqual(len(_first_text) + len(_second_text), di._source_offset)
170
delta = di.make_delta(_third_text)
171
result = self._gc_module.apply_delta(_first_text + _second_text, delta)
172
self.assertEqualDiff(_third_text, result)
173
self.assertEqual('\xac\x01\x85\x01\x90\x14\x0chas some in '
174
'\x91v6\x03and\x91d"\x91:\n', delta)
176
def test_delta_with_offsets(self):
177
di = self._gc_module.DeltaIndex()
178
di.add_source(_first_text, 5)
179
self.assertEqual(len(_first_text) + 5, di._source_offset)
180
di.add_source(_second_text, 10)
181
self.assertEqual(len(_first_text) + len(_second_text) + 15,
183
delta = di.make_delta(_third_text)
184
self.assertIsNot(None, delta)
185
result = self._gc_module.apply_delta(
186
'12345' + _first_text + '1234567890' + _second_text, delta)
187
self.assertIsNot(None, result)
188
self.assertEqualDiff(_third_text, result)
189
self.assertEqual('\xbb\x01\x85\x01\x91\x05\x14\x0chas some in '
190
'\x91\x856\x03and\x91s"\x91?\n', delta)
192
def test_delta_with_delta_bytes(self):
193
di = self._gc_module.DeltaIndex()
194
di.add_source(_first_text, 0)
195
self.assertEqual(len(_first_text), di._source_offset)
196
delta = di.make_delta(_second_text)
197
self.assertEqual('Dh\tsome more\x91\x019'
198
'&previous text\nand has some extra text\n', delta)
199
di.add_delta_source(delta, 0)
200
self.assertEqual(len(_first_text) + len(delta), di._source_offset)
201
third_delta = di.make_delta(_third_text)
202
result = self._gc_module.apply_delta(_first_text + delta, third_delta)
203
self.assertEqualDiff(_third_text, result)
204
# We should be able to match against the 'previous text\nand has some...'
205
# that was part of the delta bytes
206
# Note that we don't match the 'common with the', because it isn't long
207
# enough to match in the original text, and those bytes are not present
208
# in the delta for the second text.
209
self.assertEqual('z\x85\x01\x90\x14\x1chas some in common with the '
210
'\x91T&\x03and\x91\x18,', third_delta)