1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
31
from bzrlib.osutils import sha_string
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
35
def load_tests(standard_tests, module, loader):
36
"""Parameterize tests for all versions of groupcompress."""
37
to_adapt, result = tests.split_suite_by_condition(
38
standard_tests, tests.condition_isinstance(TestAllGroupCompressors))
40
('python', {'compressor': groupcompress.PythonGroupCompressor}),
42
if compiled_groupcompress_feature.available():
43
scenarios.append(('C',
44
{'compressor': groupcompress.PyrexGroupCompressor}))
45
return tests.multiply_tests(to_adapt, scenarios, result)
48
class TestGroupCompressor(tests.TestCase):
50
def _chunks_to_repr_lines(self, chunks):
51
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
53
def assertEqualDiffEncoded(self, expected, actual):
54
"""Compare the actual content to the expected content.
56
:param expected: A group of chunks that we expect to see
57
:param actual: The measured 'chunks'
59
We will transform the chunks back into lines, and then run 'repr()'
60
over them to handle non-ascii characters.
62
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
63
self._chunks_to_repr_lines(actual))
66
class TestAllGroupCompressors(TestGroupCompressor):
67
"""Tests for GroupCompressor"""
69
compressor = None # Set by multiply_tests
71
def test_empty_delta(self):
72
compressor = self.compressor()
73
self.assertEqual([], compressor.chunks)
75
def test_one_nosha_delta(self):
77
compressor = self.compressor()
78
sha1, start_point, end_point, _ = compressor.compress(('label',),
79
'strange\ncommon\n', None)
80
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
81
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
82
self.assertEqual(expected_lines, ''.join(compressor.chunks))
83
self.assertEqual(0, start_point)
84
self.assertEqual(sum(map(len, expected_lines)), end_point)
86
def test_empty_content(self):
87
compressor = self.compressor()
88
# Adding empty bytes should return the 'null' record
89
sha1, start_point, end_point, kind = compressor.compress(('empty',),
91
self.assertEqual(0, start_point)
92
self.assertEqual(0, end_point)
93
self.assertEqual('fulltext', kind)
94
self.assertEqual(groupcompress._null_sha1, sha1)
95
self.assertEqual(0, compressor.endpoint)
96
self.assertEqual([], compressor.chunks)
97
# Even after adding some content
98
compressor.compress(('content',), 'some\nbytes\n', None)
99
self.assertTrue(compressor.endpoint > 0)
100
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
102
self.assertEqual(0, start_point)
103
self.assertEqual(0, end_point)
104
self.assertEqual('fulltext', kind)
105
self.assertEqual(groupcompress._null_sha1, sha1)
107
def test_extract_from_compressor(self):
108
# Knit fetching will try to reconstruct texts locally which results in
109
# reading something that is in the compressor stream already.
110
compressor = self.compressor()
111
sha1_1, _, _, _ = compressor.compress(('label',),
112
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
113
expected_lines = list(compressor.chunks)
114
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
115
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
117
self.assertEqual(('strange\ncommon long line\n'
118
'that needs a 16 byte match\n', sha1_1),
119
compressor.extract(('label',)))
121
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
122
'different\n', sha1_2),
123
compressor.extract(('newlabel',)))
125
def test_pop_last(self):
126
compressor = self.compressor()
127
_, _, _, _ = compressor.compress(('key1',),
128
'some text\nfor the first entry\n', None)
129
expected_lines = list(compressor.chunks)
130
_, _, _, _ = compressor.compress(('key2',),
131
'some text\nfor the second entry\n', None)
132
compressor.pop_last()
133
self.assertEqual(expected_lines, compressor.chunks)
136
class TestPyrexGroupCompressor(TestGroupCompressor):
138
_test_needs_features = [compiled_groupcompress_feature]
139
compressor = groupcompress.PyrexGroupCompressor
141
def test_stats(self):
142
compressor = self.compressor()
143
compressor.compress(('label',),
145
'common very very long line\n'
146
'plus more text\n', None)
147
compressor.compress(('newlabel',),
148
'common very very long line\n'
151
'moredifferent\n', None)
152
compressor.compress(('label3',),
154
'common very very long line\n'
157
'moredifferent\n', None)
158
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
160
def test_two_nosha_delta(self):
161
compressor = self.compressor()
162
sha1_1, _, _, _ = compressor.compress(('label',),
163
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
164
expected_lines = list(compressor.chunks)
165
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
166
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
167
self.assertEqual(sha_string('common long line\n'
168
'that needs a 16 byte match\n'
169
'different\n'), sha1_2)
170
expected_lines.extend([
171
# 'delta', delta length
173
# source and target length
175
# copy the line common
176
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
177
# add the line different, and the trailing newline
178
'\x0adifferent\n', # insert 10 bytes
180
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
181
self.assertEqual(sum(map(len, expected_lines)), end_point)
183
def test_three_nosha_delta(self):
184
# The first interesting test: make a change that should use lines from
186
compressor = self.compressor()
187
sha1_1, _, _, _ = compressor.compress(('label',),
188
'strange\ncommon very very long line\nwith some extra text\n', None)
189
sha1_2, _, _, _ = compressor.compress(('newlabel',),
190
'different\nmoredifferent\nand then some more\n', None)
191
expected_lines = list(compressor.chunks)
192
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
193
'new\ncommon very very long line\nwith some extra text\n'
194
'different\nmoredifferent\nand then some more\n',
197
sha_string('new\ncommon very very long line\nwith some extra text\n'
198
'different\nmoredifferent\nand then some more\n'),
200
expected_lines.extend([
201
# 'delta', delta length
203
# source and target length
207
# Copy of first parent 'common' range
208
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
209
# Copy of second parent 'different' range
210
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
212
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
213
self.assertEqual(sum(map(len, expected_lines)), end_point)
216
class TestPythonGroupCompressor(TestGroupCompressor):
218
compressor = groupcompress.PythonGroupCompressor
220
def test_stats(self):
221
compressor = self.compressor()
222
compressor.compress(('label',),
224
'common very very long line\n'
225
'plus more text\n', None)
226
compressor.compress(('newlabel',),
227
'common very very long line\n'
230
'moredifferent\n', None)
231
compressor.compress(('label3',),
233
'common very very long line\n'
236
'moredifferent\n', None)
237
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
239
def test_two_nosha_delta(self):
240
compressor = self.compressor()
241
sha1_1, _, _, _ = compressor.compress(('label',),
242
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
243
expected_lines = list(compressor.chunks)
244
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
245
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
246
self.assertEqual(sha_string('common long line\n'
247
'that needs a 16 byte match\n'
248
'different\n'), sha1_2)
249
expected_lines.extend([
250
# 'delta', delta length
254
# copy the line common
255
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
256
# add the line different, and the trailing newline
257
'\x0adifferent\n', # insert 10 bytes
259
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
260
self.assertEqual(sum(map(len, expected_lines)), end_point)
262
def test_three_nosha_delta(self):
263
# The first interesting test: make a change that should use lines from
265
compressor = self.compressor()
266
sha1_1, _, _, _ = compressor.compress(('label',),
267
'strange\ncommon very very long line\nwith some extra text\n', None)
268
sha1_2, _, _, _ = compressor.compress(('newlabel',),
269
'different\nmoredifferent\nand then some more\n', None)
270
expected_lines = list(compressor.chunks)
271
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
272
'new\ncommon very very long line\nwith some extra text\n'
273
'different\nmoredifferent\nand then some more\n',
276
sha_string('new\ncommon very very long line\nwith some extra text\n'
277
'different\nmoredifferent\nand then some more\n'),
279
expected_lines.extend([
280
# 'delta', delta length
286
# Copy of first parent 'common' range
287
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
288
# Copy of second parent 'different' range
289
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
291
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
292
self.assertEqual(sum(map(len, expected_lines)), end_point)
295
class TestGroupCompressBlock(tests.TestCase):
297
def make_block(self, key_to_text):
298
"""Create a GroupCompressBlock, filling it with the given texts."""
299
compressor = groupcompress.GroupCompressor()
301
for key in sorted(key_to_text):
302
compressor.compress(key, key_to_text[key], None)
303
locs = dict((key, (start, end)) for key, (start, _, end, _)
304
in compressor.labels_deltas.iteritems())
305
block = compressor.flush()
306
raw_bytes = block.to_bytes()
307
# Go through from_bytes(to_bytes()) so that we start with a compressed
309
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
311
def test_from_empty_bytes(self):
312
self.assertRaises(ValueError,
313
groupcompress.GroupCompressBlock.from_bytes, '')
315
def test_from_minimal_bytes(self):
316
block = groupcompress.GroupCompressBlock.from_bytes(
318
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
319
self.assertIs(None, block._content)
320
self.assertEqual('', block._z_content)
321
block._ensure_content()
322
self.assertEqual('', block._content)
323
self.assertEqual('', block._z_content)
324
block._ensure_content() # Ensure content is safe to call 2x
326
def test_from_invalid(self):
327
self.assertRaises(ValueError,
328
groupcompress.GroupCompressBlock.from_bytes,
329
'this is not a valid header')
331
def test_from_bytes(self):
332
content = ('a tiny bit of content\n')
333
z_content = zlib.compress(content)
335
'gcb1z\n' # group compress block v1 plain
336
'%d\n' # Length of compressed content
337
'%d\n' # Length of uncompressed content
338
'%s' # Compressed content
339
) % (len(z_content), len(content), z_content)
340
block = groupcompress.GroupCompressBlock.from_bytes(
342
self.assertEqual(z_content, block._z_content)
343
self.assertIs(None, block._content)
344
self.assertEqual(len(z_content), block._z_content_length)
345
self.assertEqual(len(content), block._content_length)
346
block._ensure_content()
347
self.assertEqual(z_content, block._z_content)
348
self.assertEqual(content, block._content)
350
def test_to_chunks(self):
351
content_chunks = ['this is some content\n',
352
'this content will be compressed\n']
353
content_len = sum(map(len, content_chunks))
354
content = ''.join(content_chunks)
355
gcb = groupcompress.GroupCompressBlock()
356
gcb.set_chunked_content(content_chunks, content_len)
357
total_len, block_chunks = gcb.to_chunks()
358
block_bytes = ''.join(block_chunks)
359
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
360
self.assertEqual(total_len, len(block_bytes))
361
self.assertEqual(gcb._content_length, content_len)
362
expected_header =('gcb1z\n' # group compress block v1 zlib
363
'%d\n' # Length of compressed content
364
'%d\n' # Length of uncompressed content
365
) % (gcb._z_content_length, gcb._content_length)
366
# The first chunk should be the header chunk. It is small, fixed size,
367
# and there is no compelling reason to split it up
368
self.assertEqual(expected_header, block_chunks[0])
369
self.assertStartsWith(block_bytes, expected_header)
370
remaining_bytes = block_bytes[len(expected_header):]
371
raw_bytes = zlib.decompress(remaining_bytes)
372
self.assertEqual(content, raw_bytes)
374
def test_to_bytes(self):
375
content = ('this is some content\n'
376
'this content will be compressed\n')
377
gcb = groupcompress.GroupCompressBlock()
378
gcb.set_content(content)
379
bytes = gcb.to_bytes()
380
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
381
self.assertEqual(gcb._content_length, len(content))
382
expected_header =('gcb1z\n' # group compress block v1 zlib
383
'%d\n' # Length of compressed content
384
'%d\n' # Length of uncompressed content
385
) % (gcb._z_content_length, gcb._content_length)
386
self.assertStartsWith(bytes, expected_header)
387
remaining_bytes = bytes[len(expected_header):]
388
raw_bytes = zlib.decompress(remaining_bytes)
389
self.assertEqual(content, raw_bytes)
391
# we should get the same results if using the chunked version
392
gcb = groupcompress.GroupCompressBlock()
393
gcb.set_chunked_content(['this is some content\n'
394
'this content will be compressed\n'],
397
bytes = gcb.to_bytes()
398
self.assertEqual(old_bytes, bytes)
400
def test_partial_decomp(self):
402
# We need a sufficient amount of data so that zlib.decompress has
403
# partial decompression to work with. Most auto-generated data
404
# compresses a bit too well, we want a combination, so we combine a sha
405
# hash with compressible data.
406
for i in xrange(2048):
407
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
408
content_chunks.append(next_content)
409
next_sha1 = osutils.sha_string(next_content)
410
content_chunks.append(next_sha1 + '\n')
411
content = ''.join(content_chunks)
412
self.assertEqual(158634, len(content))
413
z_content = zlib.compress(content)
414
self.assertEqual(57182, len(z_content))
415
block = groupcompress.GroupCompressBlock()
416
block._z_content_chunks = (z_content,)
417
block._z_content_length = len(z_content)
418
block._compressor_name = 'zlib'
419
block._content_length = 158634
420
self.assertIs(None, block._content)
421
block._ensure_content(100)
422
self.assertIsNot(None, block._content)
423
# We have decompressed at least 100 bytes
424
self.assertTrue(len(block._content) >= 100)
425
# We have not decompressed the whole content
426
self.assertTrue(len(block._content) < 158634)
427
self.assertEqualDiff(content[:len(block._content)], block._content)
428
# ensuring content that we already have shouldn't cause any more data
430
cur_len = len(block._content)
431
block._ensure_content(cur_len - 10)
432
self.assertEqual(cur_len, len(block._content))
433
# Now we want a bit more content
435
block._ensure_content(cur_len)
436
self.assertTrue(len(block._content) >= cur_len)
437
self.assertTrue(len(block._content) < 158634)
438
self.assertEqualDiff(content[:len(block._content)], block._content)
439
# And now lets finish
440
block._ensure_content(158634)
441
self.assertEqualDiff(content, block._content)
442
# And the decompressor is finalized
443
self.assertIs(None, block._z_content_decompressor)
445
def test__ensure_all_content(self):
447
# We need a sufficient amount of data so that zlib.decompress has
448
# partial decompression to work with. Most auto-generated data
449
# compresses a bit too well, we want a combination, so we combine a sha
450
# hash with compressible data.
451
for i in xrange(2048):
452
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
453
content_chunks.append(next_content)
454
next_sha1 = osutils.sha_string(next_content)
455
content_chunks.append(next_sha1 + '\n')
456
content = ''.join(content_chunks)
457
self.assertEqual(158634, len(content))
458
z_content = zlib.compress(content)
459
self.assertEqual(57182, len(z_content))
460
block = groupcompress.GroupCompressBlock()
461
block._z_content_chunks = (z_content,)
462
block._z_content_length = len(z_content)
463
block._compressor_name = 'zlib'
464
block._content_length = 158634
465
self.assertIs(None, block._content)
466
# The first _ensure_content got all of the required data
467
block._ensure_content(158634)
468
self.assertEqualDiff(content, block._content)
469
# And we should have released the _z_content_decompressor since it was
471
self.assertIs(None, block._z_content_decompressor)
473
def test__dump(self):
474
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
475
key_to_text = {('1',): dup_content + '1 unique\n',
476
('2',): dup_content + '2 extra special\n'}
477
locs, block = self.make_block(key_to_text)
478
self.assertEqual([('f', len(key_to_text[('1',)])),
479
('d', 21, len(key_to_text[('2',)]),
480
[('c', 2, len(dup_content)),
481
('i', len('2 extra special\n'), '')
486
class TestCaseWithGroupCompressVersionedFiles(
487
tests.TestCaseWithMemoryTransport):
489
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
490
dir='.', inconsistency_fatal=True):
491
t = self.get_transport(dir)
493
vf = groupcompress.make_pack_factory(graph=create_graph,
494
delta=False, keylength=keylength,
495
inconsistency_fatal=inconsistency_fatal)(t)
497
self.addCleanup(groupcompress.cleanup_pack_group, vf)
501
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
503
def make_g_index(self, name, ref_lists=0, nodes=[]):
504
builder = btree_index.BTreeBuilder(ref_lists)
505
for node, references, value in nodes:
506
builder.add_node(node, references, value)
507
stream = builder.finish()
508
trans = self.get_transport()
509
size = trans.put_file(name, stream)
510
return btree_index.BTreeGraphIndex(trans, name, size)
512
def make_g_index_missing_parent(self):
513
graph_index = self.make_g_index('missing_parent', 1,
514
[(('parent', ), '2 78 2 10', ([],)),
515
(('tip', ), '2 78 2 10',
516
([('parent', ), ('missing-parent', )],)),
520
def test_get_record_stream_as_requested(self):
521
# Consider promoting 'as-requested' to general availability, and
522
# make this a VF interface test
523
vf = self.make_test_vf(False, dir='source')
524
vf.add_lines(('a',), (), ['lines\n'])
525
vf.add_lines(('b',), (), ['lines\n'])
526
vf.add_lines(('c',), (), ['lines\n'])
527
vf.add_lines(('d',), (), ['lines\n'])
529
keys = [record.key for record in vf.get_record_stream(
530
[('a',), ('b',), ('c',), ('d',)],
531
'as-requested', False)]
532
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
533
keys = [record.key for record in vf.get_record_stream(
534
[('b',), ('a',), ('d',), ('c',)],
535
'as-requested', False)]
536
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
538
# It should work even after being repacked into another VF
539
vf2 = self.make_test_vf(False, dir='target')
540
vf2.insert_record_stream(vf.get_record_stream(
541
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
544
keys = [record.key for record in vf2.get_record_stream(
545
[('a',), ('b',), ('c',), ('d',)],
546
'as-requested', False)]
547
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
548
keys = [record.key for record in vf2.get_record_stream(
549
[('b',), ('a',), ('d',), ('c',)],
550
'as-requested', False)]
551
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
553
def test_insert_record_stream_reuses_blocks(self):
554
vf = self.make_test_vf(True, dir='source')
555
def grouped_stream(revision_ids, first_parents=()):
556
parents = first_parents
557
for revision_id in revision_ids:
559
record = versionedfile.FulltextContentFactory(
561
'some content that is\n'
562
'identical except for\n'
563
'revision_id:%s\n' % (revision_id,))
567
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
569
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
570
first_parents=(('d',),)))
572
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
575
for record in stream:
576
if record.key in [('a',), ('e',)]:
577
self.assertEqual('groupcompress-block', record.storage_kind)
579
self.assertEqual('groupcompress-block-ref',
581
block_bytes[record.key] = record._manager._block._z_content
583
self.assertEqual(8, num_records)
586
self.assertIs(block_bytes[key], block_bytes[('a',)])
587
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
590
self.assertIs(block_bytes[key], block_bytes[('e',)])
591
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
592
# Now copy the blocks into another vf, and ensure that the blocks are
593
# preserved without creating new entries
594
vf2 = self.make_test_vf(True, dir='target')
595
# ordering in 'groupcompress' order, should actually swap the groups in
596
# the target vf, but the groups themselves should not be disturbed.
597
def small_size_stream():
598
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
599
'groupcompress', False):
600
record._manager._full_enough_block_size = \
601
record._manager._block._content_length
604
vf2.insert_record_stream(small_size_stream())
605
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
606
'groupcompress', False)
609
for record in stream:
611
self.assertEqual(block_bytes[record.key],
612
record._manager._block._z_content)
613
self.assertEqual(8, num_records)
615
def test_insert_record_stream_packs_on_the_fly(self):
616
vf = self.make_test_vf(True, dir='source')
617
def grouped_stream(revision_ids, first_parents=()):
618
parents = first_parents
619
for revision_id in revision_ids:
621
record = versionedfile.FulltextContentFactory(
623
'some content that is\n'
624
'identical except for\n'
625
'revision_id:%s\n' % (revision_id,))
629
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
631
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
632
first_parents=(('d',),)))
633
# Now copy the blocks into another vf, and see that the
634
# insert_record_stream rebuilt a new block on-the-fly because of
636
vf2 = self.make_test_vf(True, dir='target')
637
vf2.insert_record_stream(vf.get_record_stream(
638
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
639
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
640
'groupcompress', False)
643
# All of the records should be recombined into a single block
645
for record in stream:
648
block = record._manager._block
650
self.assertIs(block, record._manager._block)
651
self.assertEqual(8, num_records)
653
def test__insert_record_stream_no_reuse_block(self):
654
vf = self.make_test_vf(True, dir='source')
655
def grouped_stream(revision_ids, first_parents=()):
656
parents = first_parents
657
for revision_id in revision_ids:
659
record = versionedfile.FulltextContentFactory(
661
'some content that is\n'
662
'identical except for\n'
663
'revision_id:%s\n' % (revision_id,))
667
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
669
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
670
first_parents=(('d',),)))
672
self.assertEqual(8, len(list(vf.get_record_stream(
673
[(r,) for r in 'abcdefgh'],
674
'unordered', False))))
675
# Now copy the blocks into another vf, and ensure that the blocks are
676
# preserved without creating new entries
677
vf2 = self.make_test_vf(True, dir='target')
678
# ordering in 'groupcompress' order, should actually swap the groups in
679
# the target vf, but the groups themselves should not be disturbed.
680
list(vf2._insert_record_stream(vf.get_record_stream(
681
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
684
# After inserting with reuse_blocks=False, we should have everything in
685
# a single new block.
686
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
687
'groupcompress', False)
689
for record in stream:
691
block = record._manager._block
693
self.assertIs(block, record._manager._block)
695
def test_add_missing_noncompression_parent_unvalidated_index(self):
696
unvalidated = self.make_g_index_missing_parent()
697
combined = _mod_index.CombinedGraphIndex([unvalidated])
698
index = groupcompress._GCGraphIndex(combined,
699
is_locked=lambda: True, parents=True,
700
track_external_parent_refs=True)
701
index.scan_unvalidated_index(unvalidated)
703
frozenset([('missing-parent',)]), index.get_missing_parents())
705
def test_track_external_parent_refs(self):
706
g_index = self.make_g_index('empty', 1, [])
707
mod_index = btree_index.BTreeBuilder(1, 1)
708
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
709
index = groupcompress._GCGraphIndex(combined,
710
is_locked=lambda: True, parents=True,
711
add_callback=mod_index.add_nodes,
712
track_external_parent_refs=True)
714
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
716
frozenset([('parent-1',), ('parent-2',)]),
717
index.get_missing_parents())
719
def make_source_with_b(self, a_parent, path):
720
source = self.make_test_vf(True, dir=path)
721
source.add_lines(('a',), (), ['lines\n'])
723
b_parents = (('a',),)
726
source.add_lines(('b',), b_parents, ['lines\n'])
729
def do_inconsistent_inserts(self, inconsistency_fatal):
730
target = self.make_test_vf(True, dir='target',
731
inconsistency_fatal=inconsistency_fatal)
733
source = self.make_source_with_b(x==1, 'source%s' % x)
734
target.insert_record_stream(source.get_record_stream(
735
[('b',)], 'unordered', False))
737
def test_inconsistent_redundant_inserts_warn(self):
738
"""Should not insert a record that is already present."""
740
def warning(template, args):
741
warnings.append(template % args)
742
_trace_warning = trace.warning
743
trace.warning = warning
745
self.do_inconsistent_inserts(inconsistency_fatal=False)
747
trace.warning = _trace_warning
748
self.assertEqual(["inconsistent details in skipped record: ('b',)"
749
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
752
def test_inconsistent_redundant_inserts_raises(self):
753
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
754
inconsistency_fatal=True)
755
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
757
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
758
" 0 8', \(\(\('a',\),\),\)\)")
760
def test_clear_cache(self):
761
vf = self.make_source_with_b(True, 'source')
763
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
766
self.assertTrue(len(vf._group_cache) > 0)
768
self.assertEqual(0, len(vf._group_cache))
772
class StubGCVF(object):
773
def __init__(self, canned_get_blocks=None):
774
self._group_cache = {}
775
self._canned_get_blocks = canned_get_blocks or []
776
def _get_blocks(self, read_memos):
777
return iter(self._canned_get_blocks)
780
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
781
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
783
def test_add_key_new_read_memo(self):
784
"""Adding a key with an uncached read_memo new to this batch adds that
785
read_memo to the list of memos to fetch.
787
# locations are: index_memo, ignored, parents, ignored
788
# where index_memo is: (idx, offset, len, factory_start, factory_end)
789
# and (idx, offset, size) is known as the 'read_memo', identifying the
791
read_memo = ('fake index', 100, 50)
793
('key',): (read_memo + (None, None), None, None, None)}
794
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
795
total_size = batcher.add_key(('key',))
796
self.assertEqual(50, total_size)
797
self.assertEqual([('key',)], batcher.keys)
798
self.assertEqual([read_memo], batcher.memos_to_get)
800
def test_add_key_duplicate_read_memo(self):
801
"""read_memos that occur multiple times in a batch will only be fetched
804
read_memo = ('fake index', 100, 50)
805
# Two keys, both sharing the same read memo (but different overall
808
('key1',): (read_memo + (0, 1), None, None, None),
809
('key2',): (read_memo + (1, 2), None, None, None)}
810
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
811
total_size = batcher.add_key(('key1',))
812
total_size = batcher.add_key(('key2',))
813
self.assertEqual(50, total_size)
814
self.assertEqual([('key1',), ('key2',)], batcher.keys)
815
self.assertEqual([read_memo], batcher.memos_to_get)
817
def test_add_key_cached_read_memo(self):
818
"""Adding a key with a cached read_memo will not cause that read_memo
819
to be added to the list to fetch.
821
read_memo = ('fake index', 100, 50)
823
gcvf._group_cache[read_memo] = 'fake block'
825
('key',): (read_memo + (None, None), None, None, None)}
826
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
827
total_size = batcher.add_key(('key',))
828
self.assertEqual(0, total_size)
829
self.assertEqual([('key',)], batcher.keys)
830
self.assertEqual([], batcher.memos_to_get)
832
def test_yield_factories_empty(self):
833
"""An empty batch yields no factories."""
834
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
835
self.assertEqual([], list(batcher.yield_factories()))
837
def test_yield_factories_calls_get_blocks(self):
838
"""Uncached memos are retrieved via get_blocks."""
839
read_memo1 = ('fake index', 100, 50)
840
read_memo2 = ('fake index', 150, 40)
843
(read_memo1, groupcompress.GroupCompressBlock()),
844
(read_memo2, groupcompress.GroupCompressBlock())])
846
('key1',): (read_memo1 + (None, None), None, None, None),
847
('key2',): (read_memo2 + (None, None), None, None, None)}
848
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
849
batcher.add_key(('key1',))
850
batcher.add_key(('key2',))
851
factories = list(batcher.yield_factories(full_flush=True))
852
self.assertLength(2, factories)
853
keys = [f.key for f in factories]
854
kinds = [f.storage_kind for f in factories]
855
self.assertEqual([('key1',), ('key2',)], keys)
856
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
858
def test_yield_factories_flushing(self):
859
"""yield_factories holds back on yielding results from the final block
860
unless passed full_flush=True.
862
fake_block = groupcompress.GroupCompressBlock()
863
read_memo = ('fake index', 100, 50)
865
gcvf._group_cache[read_memo] = fake_block
867
('key',): (read_memo + (None, None), None, None, None)}
868
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
869
batcher.add_key(('key',))
870
self.assertEqual([], list(batcher.yield_factories()))
871
factories = list(batcher.yield_factories(full_flush=True))
872
self.assertLength(1, factories)
873
self.assertEqual(('key',), factories[0].key)
874
self.assertEqual('groupcompress-block', factories[0].storage_kind)
877
class TestLazyGroupCompress(tests.TestCaseWithTransport):
880
('key1',): "this is a text\n"
881
"with a reasonable amount of compressible bytes\n"
882
"which can be shared between various other texts\n",
883
('key2',): "another text\n"
884
"with a reasonable amount of compressible bytes\n"
885
"which can be shared between various other texts\n",
886
('key3',): "yet another text which won't be extracted\n"
887
"with a reasonable amount of compressible bytes\n"
888
"which can be shared between various other texts\n",
889
('key4',): "this will be extracted\n"
890
"but references most of its bytes from\n"
891
"yet another text which won't be extracted\n"
892
"with a reasonable amount of compressible bytes\n"
893
"which can be shared between various other texts\n",
895
def make_block(self, key_to_text):
896
"""Create a GroupCompressBlock, filling it with the given texts."""
897
compressor = groupcompress.GroupCompressor()
899
for key in sorted(key_to_text):
900
compressor.compress(key, key_to_text[key], None)
901
locs = dict((key, (start, end)) for key, (start, _, end, _)
902
in compressor.labels_deltas.iteritems())
903
block = compressor.flush()
904
raw_bytes = block.to_bytes()
905
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
907
def add_key_to_manager(self, key, locations, block, manager):
908
start, end = locations[key]
909
manager.add_factory(key, (), start, end)
911
def make_block_and_full_manager(self, texts):
912
locations, block = self.make_block(texts)
913
manager = groupcompress._LazyGroupContentManager(block)
914
for key in sorted(texts):
915
self.add_key_to_manager(key, locations, block, manager)
916
return block, manager
918
def test_get_fulltexts(self):
919
locations, block = self.make_block(self._texts)
920
manager = groupcompress._LazyGroupContentManager(block)
921
self.add_key_to_manager(('key1',), locations, block, manager)
922
self.add_key_to_manager(('key2',), locations, block, manager)
924
for record in manager.get_record_stream():
925
result_order.append(record.key)
926
text = self._texts[record.key]
927
self.assertEqual(text, record.get_bytes_as('fulltext'))
928
self.assertEqual([('key1',), ('key2',)], result_order)
930
# If we build the manager in the opposite order, we should get them
931
# back in the opposite order
932
manager = groupcompress._LazyGroupContentManager(block)
933
self.add_key_to_manager(('key2',), locations, block, manager)
934
self.add_key_to_manager(('key1',), locations, block, manager)
936
for record in manager.get_record_stream():
937
result_order.append(record.key)
938
text = self._texts[record.key]
939
self.assertEqual(text, record.get_bytes_as('fulltext'))
940
self.assertEqual([('key2',), ('key1',)], result_order)
942
def test__wire_bytes_no_keys(self):
943
locations, block = self.make_block(self._texts)
944
manager = groupcompress._LazyGroupContentManager(block)
945
wire_bytes = manager._wire_bytes()
946
block_length = len(block.to_bytes())
947
# We should have triggered a strip, since we aren't using any content
948
stripped_block = manager._block.to_bytes()
949
self.assertTrue(block_length > len(stripped_block))
950
empty_z_header = zlib.compress('')
951
self.assertEqual('groupcompress-block\n'
952
'8\n' # len(compress(''))
954
'%d\n'# compressed block len
957
% (len(stripped_block), empty_z_header,
961
def test__wire_bytes(self):
962
locations, block = self.make_block(self._texts)
963
manager = groupcompress._LazyGroupContentManager(block)
964
self.add_key_to_manager(('key1',), locations, block, manager)
965
self.add_key_to_manager(('key4',), locations, block, manager)
966
block_bytes = block.to_bytes()
967
wire_bytes = manager._wire_bytes()
968
(storage_kind, z_header_len, header_len,
969
block_len, rest) = wire_bytes.split('\n', 4)
970
z_header_len = int(z_header_len)
971
header_len = int(header_len)
972
block_len = int(block_len)
973
self.assertEqual('groupcompress-block', storage_kind)
974
self.assertEqual(34, z_header_len)
975
self.assertEqual(26, header_len)
976
self.assertEqual(len(block_bytes), block_len)
977
z_header = rest[:z_header_len]
978
header = zlib.decompress(z_header)
979
self.assertEqual(header_len, len(header))
980
entry1 = locations[('key1',)]
981
entry4 = locations[('key4',)]
982
self.assertEqualDiff('key1\n'
984
'%d\n' # start offset
990
% (entry1[0], entry1[1],
991
entry4[0], entry4[1]),
993
z_block = rest[z_header_len:]
994
self.assertEqual(block_bytes, z_block)
996
def test_from_bytes(self):
997
locations, block = self.make_block(self._texts)
998
manager = groupcompress._LazyGroupContentManager(block)
999
self.add_key_to_manager(('key1',), locations, block, manager)
1000
self.add_key_to_manager(('key4',), locations, block, manager)
1001
wire_bytes = manager._wire_bytes()
1002
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1003
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1004
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1005
self.assertEqual(2, len(manager._factories))
1006
self.assertEqual(block._z_content, manager._block._z_content)
1008
for record in manager.get_record_stream():
1009
result_order.append(record.key)
1010
text = self._texts[record.key]
1011
self.assertEqual(text, record.get_bytes_as('fulltext'))
1012
self.assertEqual([('key1',), ('key4',)], result_order)
1014
def test__check_rebuild_no_changes(self):
1015
block, manager = self.make_block_and_full_manager(self._texts)
1016
manager._check_rebuild_block()
1017
self.assertIs(block, manager._block)
1019
def test__check_rebuild_only_one(self):
1020
locations, block = self.make_block(self._texts)
1021
manager = groupcompress._LazyGroupContentManager(block)
1022
# Request just the first key, which should trigger a 'strip' action
1023
self.add_key_to_manager(('key1',), locations, block, manager)
1024
manager._check_rebuild_block()
1025
self.assertIsNot(block, manager._block)
1026
self.assertTrue(block._content_length > manager._block._content_length)
1027
# We should be able to still get the content out of this block, though
1028
# it should only have 1 entry
1029
for record in manager.get_record_stream():
1030
self.assertEqual(('key1',), record.key)
1031
self.assertEqual(self._texts[record.key],
1032
record.get_bytes_as('fulltext'))
1034
def test__check_rebuild_middle(self):
1035
locations, block = self.make_block(self._texts)
1036
manager = groupcompress._LazyGroupContentManager(block)
1037
# Request a small key in the middle should trigger a 'rebuild'
1038
self.add_key_to_manager(('key4',), locations, block, manager)
1039
manager._check_rebuild_block()
1040
self.assertIsNot(block, manager._block)
1041
self.assertTrue(block._content_length > manager._block._content_length)
1042
for record in manager.get_record_stream():
1043
self.assertEqual(('key4',), record.key)
1044
self.assertEqual(self._texts[record.key],
1045
record.get_bytes_as('fulltext'))
1047
def test_check_is_well_utilized_all_keys(self):
1048
block, manager = self.make_block_and_full_manager(self._texts)
1049
self.assertFalse(manager.check_is_well_utilized())
1050
# Though we can fake it by changing the recommended minimum size
1051
manager._full_enough_block_size = block._content_length
1052
self.assertTrue(manager.check_is_well_utilized())
1053
# Setting it just above causes it to fail
1054
manager._full_enough_block_size = block._content_length + 1
1055
self.assertFalse(manager.check_is_well_utilized())
1056
# Setting the mixed-block size doesn't do anything, because the content
1057
# is considered to not be 'mixed'
1058
manager._full_enough_mixed_block_size = block._content_length
1059
self.assertFalse(manager.check_is_well_utilized())
1061
def test_check_is_well_utilized_mixed_keys(self):
1067
texts[f1k1] = self._texts[('key1',)]
1068
texts[f1k2] = self._texts[('key2',)]
1069
texts[f2k1] = self._texts[('key3',)]
1070
texts[f2k2] = self._texts[('key4',)]
1071
block, manager = self.make_block_and_full_manager(texts)
1072
self.assertFalse(manager.check_is_well_utilized())
1073
manager._full_enough_block_size = block._content_length
1074
self.assertTrue(manager.check_is_well_utilized())
1075
manager._full_enough_block_size = block._content_length + 1
1076
self.assertFalse(manager.check_is_well_utilized())
1077
manager._full_enough_mixed_block_size = block._content_length
1078
self.assertTrue(manager.check_is_well_utilized())
1080
def test_check_is_well_utilized_partial_use(self):
1081
locations, block = self.make_block(self._texts)
1082
manager = groupcompress._LazyGroupContentManager(block)
1083
manager._full_enough_block_size = block._content_length
1084
self.add_key_to_manager(('key1',), locations, block, manager)
1085
self.add_key_to_manager(('key2',), locations, block, manager)
1086
# Just using the content from key1 and 2 is not enough to be considered
1088
self.assertFalse(manager.check_is_well_utilized())
1089
# However if we add key3, then we have enough, as we only require 75%
1091
self.add_key_to_manager(('key4',), locations, block, manager)
1092
self.assertTrue(manager.check_is_well_utilized())
1095
class Test_GCBuildDetails(tests.TestCase):
1097
def test_acts_like_tuple(self):
1098
# _GCBuildDetails inlines some of the data that used to be spread out
1099
# across a bunch of tuples
1100
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1101
('INDEX', 10, 20, 0, 5))
1102
self.assertEqual(4, len(bd))
1103
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1104
self.assertEqual(None, bd[1]) # Compression Parent is always None
1105
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1106
self.assertEqual(('group', None), bd[3]) # Record details
1108
def test__repr__(self):
1109
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1110
('INDEX', 10, 20, 0, 5))
1111
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1112
" (('parent1',), ('parent2',)))",