28
28
from bzrlib.osutils import sha_string
29
from bzrlib.tests import (
30
TestCaseWithTransport,
29
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
32
def load_tests(standard_tests, module, loader):
33
"""Parameterize tests for all versions of groupcompress."""
34
to_adapt, result = tests.split_suite_by_condition(
35
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
37
('python', {'compressor': groupcompress.PythonGroupCompressor}),
39
if CompiledGroupCompressFeature.available():
40
scenarios.append(('C',
41
{'compressor': groupcompress.PyrexGroupCompressor}))
42
return tests.multiply_tests(to_adapt, scenarios, result)
35
45
class TestGroupCompressor(tests.TestCase):
47
def _chunks_to_repr_lines(self, chunks):
48
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
50
def assertEqualDiffEncoded(self, expected, actual):
51
"""Compare the actual content to the expected content.
53
:param expected: A group of chunks that we expect to see
54
:param actual: The measured 'chunks'
56
We will transform the chunks back into lines, and then run 'repr()'
57
over them to handle non-ascii characters.
59
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
60
self._chunks_to_repr_lines(actual))
63
class TestAllGroupCompressors(TestGroupCompressor):
36
64
"""Tests for GroupCompressor"""
66
compressor = None # Set by multiply_tests
38
68
def test_empty_delta(self):
39
compressor = groupcompress.GroupCompressor()
40
self.assertEqual([], compressor.lines)
69
compressor = self.compressor()
70
self.assertEqual([], compressor.chunks)
42
72
def test_one_nosha_delta(self):
43
73
# diff against NUKK
44
compressor = groupcompress.GroupCompressor()
74
compressor = self.compressor()
45
75
sha1, start_point, end_point, _, _ = compressor.compress(('label',),
46
76
'strange\ncommon\n', None)
47
77
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
49
'f', '\x0f', 'strange\ncommon\n',
51
self.assertEqual(expected_lines, compressor.lines)
78
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
79
self.assertEqual(expected_lines, ''.join(compressor.chunks))
52
80
self.assertEqual(0, start_point)
53
81
self.assertEqual(sum(map(len, expected_lines)), end_point)
55
83
def test_empty_content(self):
56
compressor = groupcompress.GroupCompressor()
84
compressor = self.compressor()
57
85
# Adding empty bytes should return the 'null' record
58
86
sha1, start_point, end_point, kind, _ = compressor.compress(('empty',),
73
101
self.assertEqual('fulltext', kind)
74
102
self.assertEqual(groupcompress._null_sha1, sha1)
76
def _chunks_to_repr_lines(self, chunks):
77
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
79
def assertEqualDiffEncoded(self, expected, actual):
80
"""Compare the actual content to the expected content.
82
:param expected: A group of chunks that we expect to see
83
:param actual: The measured 'chunks'
85
We will transform the chunks back into lines, and then run 'repr()'
86
over them to handle non-ascii characters.
88
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
89
self._chunks_to_repr_lines(actual))
91
def test_two_nosha_delta(self):
92
compressor = groupcompress.GroupCompressor()
93
sha1_1, _, _, _, _ = compressor.compress(('label',),
94
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
95
expected_lines = list(compressor.lines)
96
sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
97
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
98
self.assertEqual(sha_string('common long line\n'
99
'that needs a 16 byte match\n'
100
'different\n'), sha1_2)
101
expected_lines.extend([
102
# 'delta', delta length
104
# source and target length
106
# copy the line common
107
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
108
# add the line different, and the trailing newline
109
'\x0adifferent\n', # insert 10 bytes
111
self.assertEqualDiffEncoded(expected_lines, compressor.lines)
112
self.assertEqual(sum(map(len, expected_lines)), end_point)
114
def test_three_nosha_delta(self):
115
# The first interesting test: make a change that should use lines from
117
compressor = groupcompress.GroupCompressor()
118
sha1_1, _, _, _, _ = compressor.compress(('label',),
119
'strange\ncommon very very long line\nwith some extra text\n', None)
120
sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
121
'different\nmoredifferent\nand then some more\n', None)
122
expected_lines = list(compressor.lines)
123
sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
124
'new\ncommon very very long line\nwith some extra text\n'
125
'different\nmoredifferent\nand then some more\n',
128
sha_string('new\ncommon very very long line\nwith some extra text\n'
129
'different\nmoredifferent\nand then some more\n'),
131
expected_lines.extend([
132
# 'delta', delta length
134
# source and target length
138
# Copy of first parent 'common' range
139
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
140
# Copy of second parent 'different' range
141
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
143
self.assertEqualDiffEncoded(expected_lines, compressor.lines)
144
self.assertEqual(sum(map(len, expected_lines)), end_point)
146
def test_stats(self):
147
compressor = groupcompress.GroupCompressor()
148
compressor.compress(('label',), 'strange\ncommon long line\n'
149
'plus more text\n', None)
150
compressor.compress(('newlabel',),
151
'common long line\nplus more text\n'
152
'different\nmoredifferent\n', None)
153
compressor.compress(('label3',),
154
'new\ncommon long line\nplus more text\n'
155
'\ndifferent\nmoredifferent\n', None)
156
self.assertAlmostEqual(1.4, compressor.ratio(), 1)
158
104
def test_extract_from_compressor(self):
159
105
# Knit fetching will try to reconstruct texts locally which results in
160
106
# reading something that is in the compressor stream already.
161
compressor = groupcompress.GroupCompressor()
107
compressor = self.compressor()
162
108
sha1_1, _, _, _, _ = compressor.compress(('label',),
163
109
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
expected_lines = list(compressor.lines)
110
expected_lines = list(compressor.chunks)
165
111
sha1_2, _, end_point, _, _ = compressor.compress(('newlabel',),
166
112
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
113
# get the first out
168
114
self.assertEqual(('strange\ncommon long line\n'
169
115
'that needs a 16 byte match\n', sha1_1),
170
compressor.extract(('label',)))
116
compressor.extract(('label',)))
172
118
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
173
119
'different\n', sha1_2),
174
120
compressor.extract(('newlabel',)))
177
class TestBase128Int(tests.TestCase):
179
def assertEqualEncode(self, bytes, val):
180
self.assertEqual(bytes, groupcompress.encode_base128_int(val))
182
def assertEqualDecode(self, val, num_decode, bytes):
183
self.assertEqual((val, num_decode),
184
groupcompress.decode_base128_int(bytes))
186
def test_encode(self):
187
self.assertEqualEncode('\x01', 1)
188
self.assertEqualEncode('\x02', 2)
189
self.assertEqualEncode('\x7f', 127)
190
self.assertEqualEncode('\x80\x01', 128)
191
self.assertEqualEncode('\xff\x01', 255)
192
self.assertEqualEncode('\x80\x02', 256)
193
self.assertEqualEncode('\xff\xff\xff\xff\x0f', 0xFFFFFFFF)
195
def test_decode(self):
196
self.assertEqualDecode(1, 1, '\x01')
197
self.assertEqualDecode(2, 1, '\x02')
198
self.assertEqualDecode(127, 1, '\x7f')
199
self.assertEqualDecode(128, 2, '\x80\x01')
200
self.assertEqualDecode(255, 2, '\xff\x01')
201
self.assertEqualDecode(256, 2, '\x80\x02')
202
self.assertEqualDecode(0xFFFFFFFF, 5, '\xff\xff\xff\xff\x0f')
204
def test_decode_with_trailing_bytes(self):
205
self.assertEqualDecode(1, 1, '\x01abcdef')
206
self.assertEqualDecode(127, 1, '\x7f\x01')
207
self.assertEqualDecode(128, 2, '\x80\x01abcdef')
208
self.assertEqualDecode(255, 2, '\xff\x01\xff')
123
class TestPyrexGroupCompressor(TestGroupCompressor):
125
_test_needs_features = [CompiledGroupCompressFeature]
126
compressor = groupcompress.PyrexGroupCompressor
128
def test_stats(self):
129
compressor = self.compressor()
130
compressor.compress(('label',),
132
'common very very long line\n'
133
'plus more text\n', None)
134
compressor.compress(('newlabel',),
135
'common very very long line\n'
138
'moredifferent\n', None)
139
compressor.compress(('label3',),
141
'common very very long line\n'
144
'moredifferent\n', None)
145
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
147
def test_two_nosha_delta(self):
148
compressor = self.compressor()
149
sha1_1, _, _, _, _ = compressor.compress(('label',),
150
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
151
expected_lines = list(compressor.chunks)
152
sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
153
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
154
self.assertEqual(sha_string('common long line\n'
155
'that needs a 16 byte match\n'
156
'different\n'), sha1_2)
157
expected_lines.extend([
158
# 'delta', delta length
160
# source and target length
162
# copy the line common
163
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
164
# add the line different, and the trailing newline
165
'\x0adifferent\n', # insert 10 bytes
167
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
168
self.assertEqual(sum(map(len, expected_lines)), end_point)
170
def test_three_nosha_delta(self):
171
# The first interesting test: make a change that should use lines from
173
compressor = self.compressor()
174
sha1_1, _, _, _, _ = compressor.compress(('label',),
175
'strange\ncommon very very long line\nwith some extra text\n', None)
176
sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
177
'different\nmoredifferent\nand then some more\n', None)
178
expected_lines = list(compressor.chunks)
179
sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
180
'new\ncommon very very long line\nwith some extra text\n'
181
'different\nmoredifferent\nand then some more\n',
184
sha_string('new\ncommon very very long line\nwith some extra text\n'
185
'different\nmoredifferent\nand then some more\n'),
187
expected_lines.extend([
188
# 'delta', delta length
190
# source and target length
194
# Copy of first parent 'common' range
195
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
196
# Copy of second parent 'different' range
197
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
199
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
200
self.assertEqual(sum(map(len, expected_lines)), end_point)
203
class TestPythonGroupCompressor(TestGroupCompressor):
205
compressor = groupcompress.PythonGroupCompressor
207
def test_stats(self):
208
compressor = self.compressor()
209
compressor.compress(('label',),
211
'common very very long line\n'
212
'plus more text\n', None)
213
compressor.compress(('newlabel',),
214
'common very very long line\n'
217
'moredifferent\n', None)
218
compressor.compress(('label3',),
220
'common very very long line\n'
223
'moredifferent\n', None)
224
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
226
def test_two_nosha_delta(self):
227
compressor = self.compressor()
228
sha1_1, _, _, _, _ = compressor.compress(('label',),
229
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
230
expected_lines = list(compressor.chunks)
231
sha1_2, start_point, end_point, _, _ = compressor.compress(('newlabel',),
232
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
233
self.assertEqual(sha_string('common long line\n'
234
'that needs a 16 byte match\n'
235
'different\n'), sha1_2)
236
expected_lines.extend([
237
# 'delta', delta length
241
# copy the line common
242
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
243
# add the line different, and the trailing newline
244
'\x0adifferent\n', # insert 10 bytes
246
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
247
self.assertEqual(sum(map(len, expected_lines)), end_point)
249
def test_three_nosha_delta(self):
250
# The first interesting test: make a change that should use lines from
252
compressor = self.compressor()
253
sha1_1, _, _, _, _ = compressor.compress(('label',),
254
'strange\ncommon very very long line\nwith some extra text\n', None)
255
sha1_2, _, _, _, _ = compressor.compress(('newlabel',),
256
'different\nmoredifferent\nand then some more\n', None)
257
expected_lines = list(compressor.chunks)
258
sha1_3, start_point, end_point, _, _ = compressor.compress(('label3',),
259
'new\ncommon very very long line\nwith some extra text\n'
260
'different\nmoredifferent\nand then some more\n',
263
sha_string('new\ncommon very very long line\nwith some extra text\n'
264
'different\nmoredifferent\nand then some more\n'),
266
expected_lines.extend([
267
# 'delta', delta length
273
# Copy of first parent 'common' range
274
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
275
# Copy of second parent 'different' range
276
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
278
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
279
self.assertEqual(sum(map(len, expected_lines)), end_point)
211
282
class TestGroupCompressBlock(tests.TestCase):
239
310
self.assertEqual('', block._z_content)
240
311
block._ensure_content() # Ensure content is safe to call 2x
242
def test_from_bytes_with_labels(self):
243
header = ('key:bing\n'
244
'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
250
'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
255
z_header = zlib.compress(header)
313
def test_from_bytes(self):
256
314
content = ('a tiny bit of content\n')
257
315
z_content = zlib.compress(content)
259
317
'gcb1z\n' # group compress block v1 plain
260
'%d\n' # Length of zlib bytes
261
'%d\n' # Length of all meta-info
262
318
'%d\n' # Length of compressed content
263
319
'%d\n' # Length of uncompressed content
264
'%s' # Compressed header
265
320
'%s' # Compressed content
266
) % (len(z_header), len(header),
267
len(z_content), len(content),
321
) % (len(z_content), len(content), z_content)
269
322
block = groupcompress.GroupCompressBlock.from_bytes(
271
block._parse_header()
272
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
273
self.assertEqual([('bing',), ('foo', 'bar')], sorted(block._entries))
274
bing = block._entries[('bing',)]
275
self.assertEqual(('bing',), bing.key)
276
self.assertEqual('fulltext', bing.type)
277
self.assertEqual('abcd'*10, bing.sha1)
278
self.assertEqual(100, bing.start)
279
self.assertEqual(100, bing.length)
280
foobar = block._entries[('foo', 'bar')]
281
self.assertEqual(('foo', 'bar'), foobar.key)
282
self.assertEqual('fulltext', foobar.type)
283
self.assertEqual('abcd'*10, foobar.sha1)
284
self.assertEqual(0, foobar.start)
285
self.assertEqual(100, foobar.length)
286
324
self.assertEqual(z_content, block._z_content)
287
325
self.assertIs(None, block._content)
288
block._ensure_content()
289
self.assertEqual(z_content, block._z_content)
290
self.assertEqual(content, block._content)
292
def test_from_old_bytes(self):
293
# Backwards compatibility, with groups that didn't define content length
294
content = ('a tiny bit of content\n')
295
z_content = zlib.compress(content)
297
'gcb1z\n' # group compress block v1 plain
298
'0\n' # Length of zlib bytes
299
'0\n' # Length of all meta-info
300
'' # Compressed header
301
'%s' # Compressed content
303
block = groupcompress.GroupCompressBlock.from_bytes(
305
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
306
block._ensure_content()
307
self.assertEqual(z_content, block._z_content)
308
self.assertEqual(content, block._content)
310
def test_add_entry(self):
311
gcb = groupcompress.GroupCompressBlock()
312
e = gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
313
self.assertIsInstance(e, groupcompress.GroupCompressBlockEntry)
314
self.assertEqual(('foo', 'bar'), e.key)
315
self.assertEqual('fulltext', e.type)
316
self.assertEqual('abcd'*10, e.sha1)
317
self.assertEqual(0, e.start)
318
self.assertEqual(100, e.length)
326
self.assertEqual(len(z_content), block._z_content_length)
327
self.assertEqual(len(content), block._content_length)
328
block._ensure_content()
329
self.assertEqual(z_content, block._z_content)
330
self.assertEqual(content, block._content)
320
332
def test_to_bytes(self):
321
no_labels = groupcompress._NO_LABELS
323
groupcompress._NO_LABELS = no_labels
324
self.addCleanup(reset)
325
groupcompress._NO_LABELS = False
333
content = ('this is some content\n'
334
'this content will be compressed\n')
326
335
gcb = groupcompress.GroupCompressBlock()
327
gcb.add_entry(('foo', 'bar'), 'fulltext', 'abcd'*10, 0, 100)
328
gcb.add_entry(('bing',), 'fulltext', 'abcd'*10, 100, 100)
329
gcb.set_content('this is some content\n'
330
'this content will be compressed\n')
336
gcb.set_content(content)
331
337
bytes = gcb.to_bytes()
338
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
339
self.assertEqual(gcb._content_length, len(content))
332
340
expected_header =('gcb1z\n' # group compress block v1 zlib
333
'76\n' # Length of compressed bytes
334
'183\n' # Length of uncompressed meta-info
335
'50\n' # Length of compressed content
336
'53\n' # Length of uncompressed content
341
'%d\n' # Length of compressed content
342
'%d\n' # Length of uncompressed content
343
) % (gcb._z_content_length, gcb._content_length)
338
344
self.assertStartsWith(bytes, expected_header)
339
345
remaining_bytes = bytes[len(expected_header):]
340
346
raw_bytes = zlib.decompress(remaining_bytes)
341
self.assertEqualDiff('key:bing\n'
342
'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
348
'sha1:abcdabcdabcdabcdabcdabcdabcdabcdabcdabcd\n'
347
self.assertEqual(content, raw_bytes)
354
349
def test_partial_decomp(self):
355
350
content_chunks = []