1
# Copyright (C) 2008, 2009 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
30
from bzrlib.osutils import sha_string
31
from bzrlib.tests.test__groupcompress import CompiledGroupCompressFeature
34
def load_tests(standard_tests, module, loader):
35
"""Parameterize tests for all versions of groupcompress."""
36
to_adapt, result = tests.split_suite_by_condition(
37
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
39
('python', {'compressor': groupcompress.PythonGroupCompressor}),
41
if CompiledGroupCompressFeature.available():
42
scenarios.append(('C',
43
{'compressor': groupcompress.PyrexGroupCompressor}))
44
return tests.multiply_tests(to_adapt, scenarios, result)
47
class TestGroupCompressor(tests.TestCase):
49
def _chunks_to_repr_lines(self, chunks):
50
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
52
def assertEqualDiffEncoded(self, expected, actual):
53
"""Compare the actual content to the expected content.
55
:param expected: A group of chunks that we expect to see
56
:param actual: The measured 'chunks'
58
We will transform the chunks back into lines, and then run 'repr()'
59
over them to handle non-ascii characters.
61
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
62
self._chunks_to_repr_lines(actual))
65
class TestAllGroupCompressors(TestGroupCompressor):
66
"""Tests for GroupCompressor"""
68
compressor = None # Set by multiply_tests
70
def test_empty_delta(self):
71
compressor = self.compressor()
72
self.assertEqual([], compressor.chunks)
74
def test_one_nosha_delta(self):
76
compressor = self.compressor()
77
sha1, start_point, end_point, _ = compressor.compress(('label',),
78
'strange\ncommon\n', None)
79
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
80
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
81
self.assertEqual(expected_lines, ''.join(compressor.chunks))
82
self.assertEqual(0, start_point)
83
self.assertEqual(sum(map(len, expected_lines)), end_point)
85
def test_empty_content(self):
86
compressor = self.compressor()
87
# Adding empty bytes should return the 'null' record
88
sha1, start_point, end_point, kind = compressor.compress(('empty',),
90
self.assertEqual(0, start_point)
91
self.assertEqual(0, end_point)
92
self.assertEqual('fulltext', kind)
93
self.assertEqual(groupcompress._null_sha1, sha1)
94
self.assertEqual(0, compressor.endpoint)
95
self.assertEqual([], compressor.chunks)
96
# Even after adding some content
97
compressor.compress(('content',), 'some\nbytes\n', None)
98
self.assertTrue(compressor.endpoint > 0)
99
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
101
self.assertEqual(0, start_point)
102
self.assertEqual(0, end_point)
103
self.assertEqual('fulltext', kind)
104
self.assertEqual(groupcompress._null_sha1, sha1)
106
def test_extract_from_compressor(self):
107
# Knit fetching will try to reconstruct texts locally which results in
108
# reading something that is in the compressor stream already.
109
compressor = self.compressor()
110
sha1_1, _, _, _ = compressor.compress(('label',),
111
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
112
expected_lines = list(compressor.chunks)
113
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
114
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
116
self.assertEqual(('strange\ncommon long line\n'
117
'that needs a 16 byte match\n', sha1_1),
118
compressor.extract(('label',)))
120
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
121
'different\n', sha1_2),
122
compressor.extract(('newlabel',)))
124
def test_pop_last(self):
125
compressor = self.compressor()
126
_, _, _, _ = compressor.compress(('key1',),
127
'some text\nfor the first entry\n', None)
128
expected_lines = list(compressor.chunks)
129
_, _, _, _ = compressor.compress(('key2',),
130
'some text\nfor the second entry\n', None)
131
compressor.pop_last()
132
self.assertEqual(expected_lines, compressor.chunks)
135
class TestPyrexGroupCompressor(TestGroupCompressor):
137
_test_needs_features = [CompiledGroupCompressFeature]
138
compressor = groupcompress.PyrexGroupCompressor
140
def test_stats(self):
141
compressor = self.compressor()
142
compressor.compress(('label',),
144
'common very very long line\n'
145
'plus more text\n', None)
146
compressor.compress(('newlabel',),
147
'common very very long line\n'
150
'moredifferent\n', None)
151
compressor.compress(('label3',),
153
'common very very long line\n'
156
'moredifferent\n', None)
157
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
159
def test_two_nosha_delta(self):
160
compressor = self.compressor()
161
sha1_1, _, _, _ = compressor.compress(('label',),
162
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
163
expected_lines = list(compressor.chunks)
164
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
165
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
166
self.assertEqual(sha_string('common long line\n'
167
'that needs a 16 byte match\n'
168
'different\n'), sha1_2)
169
expected_lines.extend([
170
# 'delta', delta length
172
# source and target length
174
# copy the line common
175
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
176
# add the line different, and the trailing newline
177
'\x0adifferent\n', # insert 10 bytes
179
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
180
self.assertEqual(sum(map(len, expected_lines)), end_point)
182
def test_three_nosha_delta(self):
183
# The first interesting test: make a change that should use lines from
185
compressor = self.compressor()
186
sha1_1, _, _, _ = compressor.compress(('label',),
187
'strange\ncommon very very long line\nwith some extra text\n', None)
188
sha1_2, _, _, _ = compressor.compress(('newlabel',),
189
'different\nmoredifferent\nand then some more\n', None)
190
expected_lines = list(compressor.chunks)
191
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
192
'new\ncommon very very long line\nwith some extra text\n'
193
'different\nmoredifferent\nand then some more\n',
196
sha_string('new\ncommon very very long line\nwith some extra text\n'
197
'different\nmoredifferent\nand then some more\n'),
199
expected_lines.extend([
200
# 'delta', delta length
202
# source and target length
206
# Copy of first parent 'common' range
207
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
208
# Copy of second parent 'different' range
209
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
211
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
212
self.assertEqual(sum(map(len, expected_lines)), end_point)
215
class TestPythonGroupCompressor(TestGroupCompressor):
217
compressor = groupcompress.PythonGroupCompressor
219
def test_stats(self):
220
compressor = self.compressor()
221
compressor.compress(('label',),
223
'common very very long line\n'
224
'plus more text\n', None)
225
compressor.compress(('newlabel',),
226
'common very very long line\n'
229
'moredifferent\n', None)
230
compressor.compress(('label3',),
232
'common very very long line\n'
235
'moredifferent\n', None)
236
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
238
def test_two_nosha_delta(self):
239
compressor = self.compressor()
240
sha1_1, _, _, _ = compressor.compress(('label',),
241
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
242
expected_lines = list(compressor.chunks)
243
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
244
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
245
self.assertEqual(sha_string('common long line\n'
246
'that needs a 16 byte match\n'
247
'different\n'), sha1_2)
248
expected_lines.extend([
249
# 'delta', delta length
253
# copy the line common
254
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
255
# add the line different, and the trailing newline
256
'\x0adifferent\n', # insert 10 bytes
258
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
259
self.assertEqual(sum(map(len, expected_lines)), end_point)
261
def test_three_nosha_delta(self):
262
# The first interesting test: make a change that should use lines from
264
compressor = self.compressor()
265
sha1_1, _, _, _ = compressor.compress(('label',),
266
'strange\ncommon very very long line\nwith some extra text\n', None)
267
sha1_2, _, _, _ = compressor.compress(('newlabel',),
268
'different\nmoredifferent\nand then some more\n', None)
269
expected_lines = list(compressor.chunks)
270
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
271
'new\ncommon very very long line\nwith some extra text\n'
272
'different\nmoredifferent\nand then some more\n',
275
sha_string('new\ncommon very very long line\nwith some extra text\n'
276
'different\nmoredifferent\nand then some more\n'),
278
expected_lines.extend([
279
# 'delta', delta length
285
# Copy of first parent 'common' range
286
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
287
# Copy of second parent 'different' range
288
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
290
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
291
self.assertEqual(sum(map(len, expected_lines)), end_point)
294
class TestGroupCompressBlock(tests.TestCase):
296
def make_block(self, key_to_text):
297
"""Create a GroupCompressBlock, filling it with the given texts."""
298
compressor = groupcompress.GroupCompressor()
300
for key in sorted(key_to_text):
301
compressor.compress(key, key_to_text[key], None)
302
locs = dict((key, (start, end)) for key, (start, _, end, _)
303
in compressor.labels_deltas.iteritems())
304
block = compressor.flush()
305
raw_bytes = block.to_bytes()
306
# Go through from_bytes(to_bytes()) so that we start with a compressed
308
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
310
def test_from_empty_bytes(self):
311
self.assertRaises(ValueError,
312
groupcompress.GroupCompressBlock.from_bytes, '')
314
def test_from_minimal_bytes(self):
315
block = groupcompress.GroupCompressBlock.from_bytes(
317
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
318
self.assertIs(None, block._content)
319
self.assertEqual('', block._z_content)
320
block._ensure_content()
321
self.assertEqual('', block._content)
322
self.assertEqual('', block._z_content)
323
block._ensure_content() # Ensure content is safe to call 2x
325
def test_from_invalid(self):
326
self.assertRaises(ValueError,
327
groupcompress.GroupCompressBlock.from_bytes,
328
'this is not a valid header')
330
def test_from_bytes(self):
331
content = ('a tiny bit of content\n')
332
z_content = zlib.compress(content)
334
'gcb1z\n' # group compress block v1 plain
335
'%d\n' # Length of compressed content
336
'%d\n' # Length of uncompressed content
337
'%s' # Compressed content
338
) % (len(z_content), len(content), z_content)
339
block = groupcompress.GroupCompressBlock.from_bytes(
341
self.assertEqual(z_content, block._z_content)
342
self.assertIs(None, block._content)
343
self.assertEqual(len(z_content), block._z_content_length)
344
self.assertEqual(len(content), block._content_length)
345
block._ensure_content()
346
self.assertEqual(z_content, block._z_content)
347
self.assertEqual(content, block._content)
349
def test_to_bytes(self):
350
content = ('this is some content\n'
351
'this content will be compressed\n')
352
gcb = groupcompress.GroupCompressBlock()
353
gcb.set_content(content)
354
bytes = gcb.to_bytes()
355
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
356
self.assertEqual(gcb._content_length, len(content))
357
expected_header =('gcb1z\n' # group compress block v1 zlib
358
'%d\n' # Length of compressed content
359
'%d\n' # Length of uncompressed content
360
) % (gcb._z_content_length, gcb._content_length)
361
self.assertStartsWith(bytes, expected_header)
362
remaining_bytes = bytes[len(expected_header):]
363
raw_bytes = zlib.decompress(remaining_bytes)
364
self.assertEqual(content, raw_bytes)
366
def test_partial_decomp(self):
368
# We need a sufficient amount of data so that zlib.decompress has
369
# partial decompression to work with. Most auto-generated data
370
# compresses a bit too well, we want a combination, so we combine a sha
371
# hash with compressible data.
372
for i in xrange(2048):
373
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
374
content_chunks.append(next_content)
375
next_sha1 = osutils.sha_string(next_content)
376
content_chunks.append(next_sha1 + '\n')
377
content = ''.join(content_chunks)
378
self.assertEqual(158634, len(content))
379
z_content = zlib.compress(content)
380
self.assertEqual(57182, len(z_content))
381
block = groupcompress.GroupCompressBlock()
382
block._z_content = z_content
383
block._z_content_length = len(z_content)
384
block._compressor_name = 'zlib'
385
block._content_length = 158634
386
self.assertIs(None, block._content)
387
block._ensure_content(100)
388
self.assertIsNot(None, block._content)
389
# We have decompressed at least 100 bytes
390
self.assertTrue(len(block._content) >= 100)
391
# We have not decompressed the whole content
392
self.assertTrue(len(block._content) < 158634)
393
self.assertEqualDiff(content[:len(block._content)], block._content)
394
# ensuring content that we already have shouldn't cause any more data
396
cur_len = len(block._content)
397
block._ensure_content(cur_len - 10)
398
self.assertEqual(cur_len, len(block._content))
399
# Now we want a bit more content
401
block._ensure_content(cur_len)
402
self.assertTrue(len(block._content) >= cur_len)
403
self.assertTrue(len(block._content) < 158634)
404
self.assertEqualDiff(content[:len(block._content)], block._content)
405
# And now lets finish
406
block._ensure_content(158634)
407
self.assertEqualDiff(content, block._content)
408
# And the decompressor is finalized
409
self.assertIs(None, block._z_content_decompressor)
411
def test_partial_decomp_no_known_length(self):
413
for i in xrange(2048):
414
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
415
content_chunks.append(next_content)
416
next_sha1 = osutils.sha_string(next_content)
417
content_chunks.append(next_sha1 + '\n')
418
content = ''.join(content_chunks)
419
self.assertEqual(158634, len(content))
420
z_content = zlib.compress(content)
421
self.assertEqual(57182, len(z_content))
422
block = groupcompress.GroupCompressBlock()
423
block._z_content = z_content
424
block._z_content_length = len(z_content)
425
block._compressor_name = 'zlib'
426
block._content_length = None # Don't tell the decompressed length
427
self.assertIs(None, block._content)
428
block._ensure_content(100)
429
self.assertIsNot(None, block._content)
430
# We have decompressed at least 100 bytes
431
self.assertTrue(len(block._content) >= 100)
432
# We have not decompressed the whole content
433
self.assertTrue(len(block._content) < 158634)
434
self.assertEqualDiff(content[:len(block._content)], block._content)
435
# ensuring content that we already have shouldn't cause any more data
437
cur_len = len(block._content)
438
block._ensure_content(cur_len - 10)
439
self.assertEqual(cur_len, len(block._content))
440
# Now we want a bit more content
442
block._ensure_content(cur_len)
443
self.assertTrue(len(block._content) >= cur_len)
444
self.assertTrue(len(block._content) < 158634)
445
self.assertEqualDiff(content[:len(block._content)], block._content)
446
# And now lets finish
447
block._ensure_content()
448
self.assertEqualDiff(content, block._content)
449
# And the decompressor is finalized
450
self.assertIs(None, block._z_content_decompressor)
452
def test__dump(self):
453
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
454
key_to_text = {('1',): dup_content + '1 unique\n',
455
('2',): dup_content + '2 extra special\n'}
456
locs, block = self.make_block(key_to_text)
457
self.assertEqual([('f', len(key_to_text[('1',)])),
458
('d', 21, len(key_to_text[('2',)]),
459
[('c', 2, len(dup_content)),
460
('i', len('2 extra special\n'), '')
465
class TestCaseWithGroupCompressVersionedFiles(tests.TestCaseWithTransport):
467
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
469
t = self.get_transport(dir)
471
vf = groupcompress.make_pack_factory(graph=create_graph,
472
delta=False, keylength=keylength)(t)
474
self.addCleanup(groupcompress.cleanup_pack_group, vf)
478
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
480
def make_g_index(self, name, ref_lists=0, nodes=[]):
481
builder = btree_index.BTreeBuilder(ref_lists)
482
for node, references, value in nodes:
483
builder.add_node(node, references, value)
484
stream = builder.finish()
485
trans = self.get_transport()
486
size = trans.put_file(name, stream)
487
return btree_index.BTreeGraphIndex(trans, name, size)
489
def make_g_index_missing_parent(self):
490
graph_index = self.make_g_index('missing_parent', 1,
491
[(('parent', ), '2 78 2 10', ([],)),
492
(('tip', ), '2 78 2 10',
493
([('parent', ), ('missing-parent', )],)),
497
def test_get_record_stream_as_requested(self):
498
# Consider promoting 'as-requested' to general availability, and
499
# make this a VF interface test
500
vf = self.make_test_vf(False, dir='source')
501
vf.add_lines(('a',), (), ['lines\n'])
502
vf.add_lines(('b',), (), ['lines\n'])
503
vf.add_lines(('c',), (), ['lines\n'])
504
vf.add_lines(('d',), (), ['lines\n'])
506
keys = [record.key for record in vf.get_record_stream(
507
[('a',), ('b',), ('c',), ('d',)],
508
'as-requested', False)]
509
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
510
keys = [record.key for record in vf.get_record_stream(
511
[('b',), ('a',), ('d',), ('c',)],
512
'as-requested', False)]
513
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
515
# It should work even after being repacked into another VF
516
vf2 = self.make_test_vf(False, dir='target')
517
vf2.insert_record_stream(vf.get_record_stream(
518
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
521
keys = [record.key for record in vf2.get_record_stream(
522
[('a',), ('b',), ('c',), ('d',)],
523
'as-requested', False)]
524
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
525
keys = [record.key for record in vf2.get_record_stream(
526
[('b',), ('a',), ('d',), ('c',)],
527
'as-requested', False)]
528
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
530
def test_insert_record_stream_re_uses_blocks(self):
531
vf = self.make_test_vf(True, dir='source')
532
def grouped_stream(revision_ids, first_parents=()):
533
parents = first_parents
534
for revision_id in revision_ids:
536
record = versionedfile.FulltextContentFactory(
538
'some content that is\n'
539
'identical except for\n'
540
'revision_id:%s\n' % (revision_id,))
544
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
546
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
547
first_parents=(('d',),)))
549
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
552
for record in stream:
553
if record.key in [('a',), ('e',)]:
554
self.assertEqual('groupcompress-block', record.storage_kind)
556
self.assertEqual('groupcompress-block-ref',
558
block_bytes[record.key] = record._manager._block._z_content
560
self.assertEqual(8, num_records)
563
self.assertIs(block_bytes[key], block_bytes[('a',)])
564
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
567
self.assertIs(block_bytes[key], block_bytes[('e',)])
568
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
569
# Now copy the blocks into another vf, and ensure that the blocks are
570
# preserved without creating new entries
571
vf2 = self.make_test_vf(True, dir='target')
572
# ordering in 'groupcompress' order, should actually swap the groups in
573
# the target vf, but the groups themselves should not be disturbed.
574
vf2.insert_record_stream(vf.get_record_stream(
575
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
576
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
577
'groupcompress', False)
580
for record in stream:
582
self.assertEqual(block_bytes[record.key],
583
record._manager._block._z_content)
584
self.assertEqual(8, num_records)
586
def test__insert_record_stream_no_reuse_block(self):
587
vf = self.make_test_vf(True, dir='source')
588
def grouped_stream(revision_ids, first_parents=()):
589
parents = first_parents
590
for revision_id in revision_ids:
592
record = versionedfile.FulltextContentFactory(
594
'some content that is\n'
595
'identical except for\n'
596
'revision_id:%s\n' % (revision_id,))
600
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
602
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
603
first_parents=(('d',),)))
605
self.assertEqual(8, len(list(vf.get_record_stream(
606
[(r,) for r in 'abcdefgh'],
607
'unordered', False))))
608
# Now copy the blocks into another vf, and ensure that the blocks are
609
# preserved without creating new entries
610
vf2 = self.make_test_vf(True, dir='target')
611
# ordering in 'groupcompress' order, should actually swap the groups in
612
# the target vf, but the groups themselves should not be disturbed.
613
list(vf2._insert_record_stream(vf.get_record_stream(
614
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
617
# After inserting with reuse_blocks=False, we should have everything in
618
# a single new block.
619
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
620
'groupcompress', False)
622
for record in stream:
624
block = record._manager._block
626
self.assertIs(block, record._manager._block)
628
def test_add_missing_noncompression_parent_unvalidated_index(self):
629
unvalidated = self.make_g_index_missing_parent()
630
combined = _mod_index.CombinedGraphIndex([unvalidated])
631
index = groupcompress._GCGraphIndex(combined,
632
is_locked=lambda: True, parents=True,
633
track_external_parent_refs=True)
634
index.scan_unvalidated_index(unvalidated)
636
frozenset([('missing-parent',)]), index.get_missing_parents())
638
def test_track_external_parent_refs(self):
639
g_index = self.make_g_index('empty', 1, [])
640
mod_index = btree_index.BTreeBuilder(1, 1)
641
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
642
index = groupcompress._GCGraphIndex(combined,
643
is_locked=lambda: True, parents=True,
644
add_callback=mod_index.add_nodes,
645
track_external_parent_refs=True)
647
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
649
frozenset([('parent-1',), ('parent-2',)]),
650
index.get_missing_parents())
653
class TestLazyGroupCompress(tests.TestCaseWithTransport):
656
('key1',): "this is a text\n"
657
"with a reasonable amount of compressible bytes\n",
658
('key2',): "another text\n"
659
"with a reasonable amount of compressible bytes\n",
660
('key3',): "yet another text which won't be extracted\n"
661
"with a reasonable amount of compressible bytes\n",
662
('key4',): "this will be extracted\n"
663
"but references most of its bytes from\n"
664
"yet another text which won't be extracted\n"
665
"with a reasonable amount of compressible bytes\n",
667
def make_block(self, key_to_text):
668
"""Create a GroupCompressBlock, filling it with the given texts."""
669
compressor = groupcompress.GroupCompressor()
671
for key in sorted(key_to_text):
672
compressor.compress(key, key_to_text[key], None)
673
locs = dict((key, (start, end)) for key, (start, _, end, _)
674
in compressor.labels_deltas.iteritems())
675
block = compressor.flush()
676
raw_bytes = block.to_bytes()
677
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
679
def add_key_to_manager(self, key, locations, block, manager):
680
start, end = locations[key]
681
manager.add_factory(key, (), start, end)
683
def test_get_fulltexts(self):
684
locations, block = self.make_block(self._texts)
685
manager = groupcompress._LazyGroupContentManager(block)
686
self.add_key_to_manager(('key1',), locations, block, manager)
687
self.add_key_to_manager(('key2',), locations, block, manager)
689
for record in manager.get_record_stream():
690
result_order.append(record.key)
691
text = self._texts[record.key]
692
self.assertEqual(text, record.get_bytes_as('fulltext'))
693
self.assertEqual([('key1',), ('key2',)], result_order)
695
# If we build the manager in the opposite order, we should get them
696
# back in the opposite order
697
manager = groupcompress._LazyGroupContentManager(block)
698
self.add_key_to_manager(('key2',), locations, block, manager)
699
self.add_key_to_manager(('key1',), locations, block, manager)
701
for record in manager.get_record_stream():
702
result_order.append(record.key)
703
text = self._texts[record.key]
704
self.assertEqual(text, record.get_bytes_as('fulltext'))
705
self.assertEqual([('key2',), ('key1',)], result_order)
707
def test__wire_bytes_no_keys(self):
708
locations, block = self.make_block(self._texts)
709
manager = groupcompress._LazyGroupContentManager(block)
710
wire_bytes = manager._wire_bytes()
711
block_length = len(block.to_bytes())
712
# We should have triggered a strip, since we aren't using any content
713
stripped_block = manager._block.to_bytes()
714
self.assertTrue(block_length > len(stripped_block))
715
empty_z_header = zlib.compress('')
716
self.assertEqual('groupcompress-block\n'
717
'8\n' # len(compress(''))
719
'%d\n'# compressed block len
722
% (len(stripped_block), empty_z_header,
726
def test__wire_bytes(self):
727
locations, block = self.make_block(self._texts)
728
manager = groupcompress._LazyGroupContentManager(block)
729
self.add_key_to_manager(('key1',), locations, block, manager)
730
self.add_key_to_manager(('key4',), locations, block, manager)
731
block_bytes = block.to_bytes()
732
wire_bytes = manager._wire_bytes()
733
(storage_kind, z_header_len, header_len,
734
block_len, rest) = wire_bytes.split('\n', 4)
735
z_header_len = int(z_header_len)
736
header_len = int(header_len)
737
block_len = int(block_len)
738
self.assertEqual('groupcompress-block', storage_kind)
739
self.assertEqual(33, z_header_len)
740
self.assertEqual(25, header_len)
741
self.assertEqual(len(block_bytes), block_len)
742
z_header = rest[:z_header_len]
743
header = zlib.decompress(z_header)
744
self.assertEqual(header_len, len(header))
745
entry1 = locations[('key1',)]
746
entry4 = locations[('key4',)]
747
self.assertEqualDiff('key1\n'
749
'%d\n' # start offset
755
% (entry1[0], entry1[1],
756
entry4[0], entry4[1]),
758
z_block = rest[z_header_len:]
759
self.assertEqual(block_bytes, z_block)
761
def test_from_bytes(self):
762
locations, block = self.make_block(self._texts)
763
manager = groupcompress._LazyGroupContentManager(block)
764
self.add_key_to_manager(('key1',), locations, block, manager)
765
self.add_key_to_manager(('key4',), locations, block, manager)
766
wire_bytes = manager._wire_bytes()
767
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
768
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
769
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
770
self.assertEqual(2, len(manager._factories))
771
self.assertEqual(block._z_content, manager._block._z_content)
773
for record in manager.get_record_stream():
774
result_order.append(record.key)
775
text = self._texts[record.key]
776
self.assertEqual(text, record.get_bytes_as('fulltext'))
777
self.assertEqual([('key1',), ('key4',)], result_order)
779
def test__check_rebuild_no_changes(self):
780
locations, block = self.make_block(self._texts)
781
manager = groupcompress._LazyGroupContentManager(block)
782
# Request all the keys, which ensures that we won't rebuild
783
self.add_key_to_manager(('key1',), locations, block, manager)
784
self.add_key_to_manager(('key2',), locations, block, manager)
785
self.add_key_to_manager(('key3',), locations, block, manager)
786
self.add_key_to_manager(('key4',), locations, block, manager)
787
manager._check_rebuild_block()
788
self.assertIs(block, manager._block)
790
def test__check_rebuild_only_one(self):
791
locations, block = self.make_block(self._texts)
792
manager = groupcompress._LazyGroupContentManager(block)
793
# Request just the first key, which should trigger a 'strip' action
794
self.add_key_to_manager(('key1',), locations, block, manager)
795
manager._check_rebuild_block()
796
self.assertIsNot(block, manager._block)
797
self.assertTrue(block._content_length > manager._block._content_length)
798
# We should be able to still get the content out of this block, though
799
# it should only have 1 entry
800
for record in manager.get_record_stream():
801
self.assertEqual(('key1',), record.key)
802
self.assertEqual(self._texts[record.key],
803
record.get_bytes_as('fulltext'))
805
def test__check_rebuild_middle(self):
806
locations, block = self.make_block(self._texts)
807
manager = groupcompress._LazyGroupContentManager(block)
808
# Request a small key in the middle should trigger a 'rebuild'
809
self.add_key_to_manager(('key4',), locations, block, manager)
810
manager._check_rebuild_block()
811
self.assertIsNot(block, manager._block)
812
self.assertTrue(block._content_length > manager._block._content_length)
813
for record in manager.get_record_stream():
814
self.assertEqual(('key4',), record.key)
815
self.assertEqual(self._texts[record.key],
816
record.get_bytes_as('fulltext'))