1
# Copyright (C) 2008-2011 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Tests for group compression."""
31
from bzrlib.osutils import sha_string
32
from bzrlib.tests.test__groupcompress import compiled_groupcompress_feature
33
from bzrlib.tests.scenarios import load_tests_apply_scenarios
36
def group_compress_implementation_scenarios():
38
('python', {'compressor': groupcompress.PythonGroupCompressor}),
40
if compiled_groupcompress_feature.available():
41
scenarios.append(('C',
42
{'compressor': groupcompress.PyrexGroupCompressor}))
46
load_tests = load_tests_apply_scenarios
49
class TestGroupCompressor(tests.TestCase):
51
def _chunks_to_repr_lines(self, chunks):
52
return '\n'.join(map(repr, ''.join(chunks).split('\n')))
54
def assertEqualDiffEncoded(self, expected, actual):
55
"""Compare the actual content to the expected content.
57
:param expected: A group of chunks that we expect to see
58
:param actual: The measured 'chunks'
60
We will transform the chunks back into lines, and then run 'repr()'
61
over them to handle non-ascii characters.
63
self.assertEqualDiff(self._chunks_to_repr_lines(expected),
64
self._chunks_to_repr_lines(actual))
67
class TestAllGroupCompressors(TestGroupCompressor):
68
"""Tests for GroupCompressor"""
70
scenarios = group_compress_implementation_scenarios()
71
compressor = None # Set by scenario
73
def test_empty_delta(self):
74
compressor = self.compressor()
75
self.assertEqual([], compressor.chunks)
77
def test_one_nosha_delta(self):
79
compressor = self.compressor()
80
sha1, start_point, end_point, _ = compressor.compress(('label',),
81
'strange\ncommon\n', None)
82
self.assertEqual(sha_string('strange\ncommon\n'), sha1)
83
expected_lines = 'f' '\x0f' 'strange\ncommon\n'
84
self.assertEqual(expected_lines, ''.join(compressor.chunks))
85
self.assertEqual(0, start_point)
86
self.assertEqual(sum(map(len, expected_lines)), end_point)
88
def test_empty_content(self):
89
compressor = self.compressor()
90
# Adding empty bytes should return the 'null' record
91
sha1, start_point, end_point, kind = compressor.compress(('empty',),
93
self.assertEqual(0, start_point)
94
self.assertEqual(0, end_point)
95
self.assertEqual('fulltext', kind)
96
self.assertEqual(groupcompress._null_sha1, sha1)
97
self.assertEqual(0, compressor.endpoint)
98
self.assertEqual([], compressor.chunks)
99
# Even after adding some content
100
compressor.compress(('content',), 'some\nbytes\n', None)
101
self.assertTrue(compressor.endpoint > 0)
102
sha1, start_point, end_point, kind = compressor.compress(('empty2',),
104
self.assertEqual(0, start_point)
105
self.assertEqual(0, end_point)
106
self.assertEqual('fulltext', kind)
107
self.assertEqual(groupcompress._null_sha1, sha1)
109
def test_extract_from_compressor(self):
110
# Knit fetching will try to reconstruct texts locally which results in
111
# reading something that is in the compressor stream already.
112
compressor = self.compressor()
113
sha1_1, _, _, _ = compressor.compress(('label',),
114
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
115
expected_lines = list(compressor.chunks)
116
sha1_2, _, end_point, _ = compressor.compress(('newlabel',),
117
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
119
self.assertEqual(('strange\ncommon long line\n'
120
'that needs a 16 byte match\n', sha1_1),
121
compressor.extract(('label',)))
123
self.assertEqual(('common long line\nthat needs a 16 byte match\n'
124
'different\n', sha1_2),
125
compressor.extract(('newlabel',)))
127
def test_pop_last(self):
128
compressor = self.compressor()
129
_, _, _, _ = compressor.compress(('key1',),
130
'some text\nfor the first entry\n', None)
131
expected_lines = list(compressor.chunks)
132
_, _, _, _ = compressor.compress(('key2',),
133
'some text\nfor the second entry\n', None)
134
compressor.pop_last()
135
self.assertEqual(expected_lines, compressor.chunks)
138
class TestPyrexGroupCompressor(TestGroupCompressor):
140
_test_needs_features = [compiled_groupcompress_feature]
141
compressor = groupcompress.PyrexGroupCompressor
143
def test_stats(self):
144
compressor = self.compressor()
145
compressor.compress(('label',),
147
'common very very long line\n'
148
'plus more text\n', None)
149
compressor.compress(('newlabel',),
150
'common very very long line\n'
153
'moredifferent\n', None)
154
compressor.compress(('label3',),
156
'common very very long line\n'
159
'moredifferent\n', None)
160
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
162
def test_two_nosha_delta(self):
163
compressor = self.compressor()
164
sha1_1, _, _, _ = compressor.compress(('label',),
165
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
166
expected_lines = list(compressor.chunks)
167
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
168
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
169
self.assertEqual(sha_string('common long line\n'
170
'that needs a 16 byte match\n'
171
'different\n'), sha1_2)
172
expected_lines.extend([
173
# 'delta', delta length
175
# source and target length
177
# copy the line common
178
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
179
# add the line different, and the trailing newline
180
'\x0adifferent\n', # insert 10 bytes
182
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
183
self.assertEqual(sum(map(len, expected_lines)), end_point)
185
def test_three_nosha_delta(self):
186
# The first interesting test: make a change that should use lines from
188
compressor = self.compressor()
189
sha1_1, _, _, _ = compressor.compress(('label',),
190
'strange\ncommon very very long line\nwith some extra text\n', None)
191
sha1_2, _, _, _ = compressor.compress(('newlabel',),
192
'different\nmoredifferent\nand then some more\n', None)
193
expected_lines = list(compressor.chunks)
194
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
195
'new\ncommon very very long line\nwith some extra text\n'
196
'different\nmoredifferent\nand then some more\n',
199
sha_string('new\ncommon very very long line\nwith some extra text\n'
200
'different\nmoredifferent\nand then some more\n'),
202
expected_lines.extend([
203
# 'delta', delta length
205
# source and target length
209
# Copy of first parent 'common' range
210
'\x91\x09\x31' # copy, offset 0x09, 0x31 bytes
211
# Copy of second parent 'different' range
212
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
214
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
215
self.assertEqual(sum(map(len, expected_lines)), end_point)
218
class TestPythonGroupCompressor(TestGroupCompressor):
220
compressor = groupcompress.PythonGroupCompressor
222
def test_stats(self):
223
compressor = self.compressor()
224
compressor.compress(('label',),
226
'common very very long line\n'
227
'plus more text\n', None)
228
compressor.compress(('newlabel',),
229
'common very very long line\n'
232
'moredifferent\n', None)
233
compressor.compress(('label3',),
235
'common very very long line\n'
238
'moredifferent\n', None)
239
self.assertAlmostEqual(1.9, compressor.ratio(), 1)
241
def test_two_nosha_delta(self):
242
compressor = self.compressor()
243
sha1_1, _, _, _ = compressor.compress(('label',),
244
'strange\ncommon long line\nthat needs a 16 byte match\n', None)
245
expected_lines = list(compressor.chunks)
246
sha1_2, start_point, end_point, _ = compressor.compress(('newlabel',),
247
'common long line\nthat needs a 16 byte match\ndifferent\n', None)
248
self.assertEqual(sha_string('common long line\n'
249
'that needs a 16 byte match\n'
250
'different\n'), sha1_2)
251
expected_lines.extend([
252
# 'delta', delta length
256
# copy the line common
257
'\x91\x0a\x2c', #copy, offset 0x0a, len 0x2c
258
# add the line different, and the trailing newline
259
'\x0adifferent\n', # insert 10 bytes
261
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
262
self.assertEqual(sum(map(len, expected_lines)), end_point)
264
def test_three_nosha_delta(self):
265
# The first interesting test: make a change that should use lines from
267
compressor = self.compressor()
268
sha1_1, _, _, _ = compressor.compress(('label',),
269
'strange\ncommon very very long line\nwith some extra text\n', None)
270
sha1_2, _, _, _ = compressor.compress(('newlabel',),
271
'different\nmoredifferent\nand then some more\n', None)
272
expected_lines = list(compressor.chunks)
273
sha1_3, start_point, end_point, _ = compressor.compress(('label3',),
274
'new\ncommon very very long line\nwith some extra text\n'
275
'different\nmoredifferent\nand then some more\n',
278
sha_string('new\ncommon very very long line\nwith some extra text\n'
279
'different\nmoredifferent\nand then some more\n'),
281
expected_lines.extend([
282
# 'delta', delta length
288
# Copy of first parent 'common' range
289
'\x91\x0a\x30' # copy, offset 0x0a, 0x30 bytes
290
# Copy of second parent 'different' range
291
'\x91\x3c\x2b' # copy, offset 0x3c, 0x2b bytes
293
self.assertEqualDiffEncoded(expected_lines, compressor.chunks)
294
self.assertEqual(sum(map(len, expected_lines)), end_point)
297
class TestGroupCompressBlock(tests.TestCase):
299
def make_block(self, key_to_text):
300
"""Create a GroupCompressBlock, filling it with the given texts."""
301
compressor = groupcompress.GroupCompressor()
303
for key in sorted(key_to_text):
304
compressor.compress(key, key_to_text[key], None)
305
locs = dict((key, (start, end)) for key, (start, _, end, _)
306
in compressor.labels_deltas.iteritems())
307
block = compressor.flush()
308
raw_bytes = block.to_bytes()
309
# Go through from_bytes(to_bytes()) so that we start with a compressed
311
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
313
def test_from_empty_bytes(self):
314
self.assertRaises(ValueError,
315
groupcompress.GroupCompressBlock.from_bytes, '')
317
def test_from_minimal_bytes(self):
318
block = groupcompress.GroupCompressBlock.from_bytes(
320
self.assertIsInstance(block, groupcompress.GroupCompressBlock)
321
self.assertIs(None, block._content)
322
self.assertEqual('', block._z_content)
323
block._ensure_content()
324
self.assertEqual('', block._content)
325
self.assertEqual('', block._z_content)
326
block._ensure_content() # Ensure content is safe to call 2x
328
def test_from_invalid(self):
329
self.assertRaises(ValueError,
330
groupcompress.GroupCompressBlock.from_bytes,
331
'this is not a valid header')
333
def test_from_bytes(self):
334
content = ('a tiny bit of content\n')
335
z_content = zlib.compress(content)
337
'gcb1z\n' # group compress block v1 plain
338
'%d\n' # Length of compressed content
339
'%d\n' # Length of uncompressed content
340
'%s' # Compressed content
341
) % (len(z_content), len(content), z_content)
342
block = groupcompress.GroupCompressBlock.from_bytes(
344
self.assertEqual(z_content, block._z_content)
345
self.assertIs(None, block._content)
346
self.assertEqual(len(z_content), block._z_content_length)
347
self.assertEqual(len(content), block._content_length)
348
block._ensure_content()
349
self.assertEqual(z_content, block._z_content)
350
self.assertEqual(content, block._content)
352
def test_to_chunks(self):
353
content_chunks = ['this is some content\n',
354
'this content will be compressed\n']
355
content_len = sum(map(len, content_chunks))
356
content = ''.join(content_chunks)
357
gcb = groupcompress.GroupCompressBlock()
358
gcb.set_chunked_content(content_chunks, content_len)
359
total_len, block_chunks = gcb.to_chunks()
360
block_bytes = ''.join(block_chunks)
361
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
362
self.assertEqual(total_len, len(block_bytes))
363
self.assertEqual(gcb._content_length, content_len)
364
expected_header =('gcb1z\n' # group compress block v1 zlib
365
'%d\n' # Length of compressed content
366
'%d\n' # Length of uncompressed content
367
) % (gcb._z_content_length, gcb._content_length)
368
# The first chunk should be the header chunk. It is small, fixed size,
369
# and there is no compelling reason to split it up
370
self.assertEqual(expected_header, block_chunks[0])
371
self.assertStartsWith(block_bytes, expected_header)
372
remaining_bytes = block_bytes[len(expected_header):]
373
raw_bytes = zlib.decompress(remaining_bytes)
374
self.assertEqual(content, raw_bytes)
376
def test_to_bytes(self):
377
content = ('this is some content\n'
378
'this content will be compressed\n')
379
gcb = groupcompress.GroupCompressBlock()
380
gcb.set_content(content)
381
bytes = gcb.to_bytes()
382
self.assertEqual(gcb._z_content_length, len(gcb._z_content))
383
self.assertEqual(gcb._content_length, len(content))
384
expected_header =('gcb1z\n' # group compress block v1 zlib
385
'%d\n' # Length of compressed content
386
'%d\n' # Length of uncompressed content
387
) % (gcb._z_content_length, gcb._content_length)
388
self.assertStartsWith(bytes, expected_header)
389
remaining_bytes = bytes[len(expected_header):]
390
raw_bytes = zlib.decompress(remaining_bytes)
391
self.assertEqual(content, raw_bytes)
393
# we should get the same results if using the chunked version
394
gcb = groupcompress.GroupCompressBlock()
395
gcb.set_chunked_content(['this is some content\n'
396
'this content will be compressed\n'],
399
bytes = gcb.to_bytes()
400
self.assertEqual(old_bytes, bytes)
402
def test_partial_decomp(self):
404
# We need a sufficient amount of data so that zlib.decompress has
405
# partial decompression to work with. Most auto-generated data
406
# compresses a bit too well, we want a combination, so we combine a sha
407
# hash with compressible data.
408
for i in xrange(2048):
409
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
410
content_chunks.append(next_content)
411
next_sha1 = osutils.sha_string(next_content)
412
content_chunks.append(next_sha1 + '\n')
413
content = ''.join(content_chunks)
414
self.assertEqual(158634, len(content))
415
z_content = zlib.compress(content)
416
self.assertEqual(57182, len(z_content))
417
block = groupcompress.GroupCompressBlock()
418
block._z_content_chunks = (z_content,)
419
block._z_content_length = len(z_content)
420
block._compressor_name = 'zlib'
421
block._content_length = 158634
422
self.assertIs(None, block._content)
423
block._ensure_content(100)
424
self.assertIsNot(None, block._content)
425
# We have decompressed at least 100 bytes
426
self.assertTrue(len(block._content) >= 100)
427
# We have not decompressed the whole content
428
self.assertTrue(len(block._content) < 158634)
429
self.assertEqualDiff(content[:len(block._content)], block._content)
430
# ensuring content that we already have shouldn't cause any more data
432
cur_len = len(block._content)
433
block._ensure_content(cur_len - 10)
434
self.assertEqual(cur_len, len(block._content))
435
# Now we want a bit more content
437
block._ensure_content(cur_len)
438
self.assertTrue(len(block._content) >= cur_len)
439
self.assertTrue(len(block._content) < 158634)
440
self.assertEqualDiff(content[:len(block._content)], block._content)
441
# And now lets finish
442
block._ensure_content(158634)
443
self.assertEqualDiff(content, block._content)
444
# And the decompressor is finalized
445
self.assertIs(None, block._z_content_decompressor)
447
def test__ensure_all_content(self):
449
# We need a sufficient amount of data so that zlib.decompress has
450
# partial decompression to work with. Most auto-generated data
451
# compresses a bit too well, we want a combination, so we combine a sha
452
# hash with compressible data.
453
for i in xrange(2048):
454
next_content = '%d\nThis is a bit of duplicate text\n' % (i,)
455
content_chunks.append(next_content)
456
next_sha1 = osutils.sha_string(next_content)
457
content_chunks.append(next_sha1 + '\n')
458
content = ''.join(content_chunks)
459
self.assertEqual(158634, len(content))
460
z_content = zlib.compress(content)
461
self.assertEqual(57182, len(z_content))
462
block = groupcompress.GroupCompressBlock()
463
block._z_content_chunks = (z_content,)
464
block._z_content_length = len(z_content)
465
block._compressor_name = 'zlib'
466
block._content_length = 158634
467
self.assertIs(None, block._content)
468
# The first _ensure_content got all of the required data
469
block._ensure_content(158634)
470
self.assertEqualDiff(content, block._content)
471
# And we should have released the _z_content_decompressor since it was
473
self.assertIs(None, block._z_content_decompressor)
475
def test__dump(self):
476
dup_content = 'some duplicate content\nwhich is sufficiently long\n'
477
key_to_text = {('1',): dup_content + '1 unique\n',
478
('2',): dup_content + '2 extra special\n'}
479
locs, block = self.make_block(key_to_text)
480
self.assertEqual([('f', len(key_to_text[('1',)])),
481
('d', 21, len(key_to_text[('2',)]),
482
[('c', 2, len(dup_content)),
483
('i', len('2 extra special\n'), '')
488
class TestCaseWithGroupCompressVersionedFiles(
489
tests.TestCaseWithMemoryTransport):
491
def make_test_vf(self, create_graph, keylength=1, do_cleanup=True,
492
dir='.', inconsistency_fatal=True):
493
t = self.get_transport(dir)
495
vf = groupcompress.make_pack_factory(graph=create_graph,
496
delta=False, keylength=keylength,
497
inconsistency_fatal=inconsistency_fatal)(t)
499
self.addCleanup(groupcompress.cleanup_pack_group, vf)
503
class TestGroupCompressVersionedFiles(TestCaseWithGroupCompressVersionedFiles):
505
def make_g_index(self, name, ref_lists=0, nodes=[]):
506
builder = btree_index.BTreeBuilder(ref_lists)
507
for node, references, value in nodes:
508
builder.add_node(node, references, value)
509
stream = builder.finish()
510
trans = self.get_transport()
511
size = trans.put_file(name, stream)
512
return btree_index.BTreeGraphIndex(trans, name, size)
514
def make_g_index_missing_parent(self):
515
graph_index = self.make_g_index('missing_parent', 1,
516
[(('parent', ), '2 78 2 10', ([],)),
517
(('tip', ), '2 78 2 10',
518
([('parent', ), ('missing-parent', )],)),
522
def test_get_record_stream_as_requested(self):
523
# Consider promoting 'as-requested' to general availability, and
524
# make this a VF interface test
525
vf = self.make_test_vf(False, dir='source')
526
vf.add_lines(('a',), (), ['lines\n'])
527
vf.add_lines(('b',), (), ['lines\n'])
528
vf.add_lines(('c',), (), ['lines\n'])
529
vf.add_lines(('d',), (), ['lines\n'])
531
keys = [record.key for record in vf.get_record_stream(
532
[('a',), ('b',), ('c',), ('d',)],
533
'as-requested', False)]
534
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
535
keys = [record.key for record in vf.get_record_stream(
536
[('b',), ('a',), ('d',), ('c',)],
537
'as-requested', False)]
538
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
540
# It should work even after being repacked into another VF
541
vf2 = self.make_test_vf(False, dir='target')
542
vf2.insert_record_stream(vf.get_record_stream(
543
[('b',), ('a',), ('d',), ('c',)], 'as-requested', False))
546
keys = [record.key for record in vf2.get_record_stream(
547
[('a',), ('b',), ('c',), ('d',)],
548
'as-requested', False)]
549
self.assertEqual([('a',), ('b',), ('c',), ('d',)], keys)
550
keys = [record.key for record in vf2.get_record_stream(
551
[('b',), ('a',), ('d',), ('c',)],
552
'as-requested', False)]
553
self.assertEqual([('b',), ('a',), ('d',), ('c',)], keys)
555
def test_insert_record_stream_reuses_blocks(self):
556
vf = self.make_test_vf(True, dir='source')
557
def grouped_stream(revision_ids, first_parents=()):
558
parents = first_parents
559
for revision_id in revision_ids:
561
record = versionedfile.FulltextContentFactory(
563
'some content that is\n'
564
'identical except for\n'
565
'revision_id:%s\n' % (revision_id,))
569
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
571
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
572
first_parents=(('d',),)))
574
stream = vf.get_record_stream([(r,) for r in 'abcdefgh'],
577
for record in stream:
578
if record.key in [('a',), ('e',)]:
579
self.assertEqual('groupcompress-block', record.storage_kind)
581
self.assertEqual('groupcompress-block-ref',
583
block_bytes[record.key] = record._manager._block._z_content
585
self.assertEqual(8, num_records)
588
self.assertIs(block_bytes[key], block_bytes[('a',)])
589
self.assertNotEqual(block_bytes[key], block_bytes[('e',)])
592
self.assertIs(block_bytes[key], block_bytes[('e',)])
593
self.assertNotEqual(block_bytes[key], block_bytes[('a',)])
594
# Now copy the blocks into another vf, and ensure that the blocks are
595
# preserved without creating new entries
596
vf2 = self.make_test_vf(True, dir='target')
597
# ordering in 'groupcompress' order, should actually swap the groups in
598
# the target vf, but the groups themselves should not be disturbed.
599
def small_size_stream():
600
for record in vf.get_record_stream([(r,) for r in 'abcdefgh'],
601
'groupcompress', False):
602
record._manager._full_enough_block_size = \
603
record._manager._block._content_length
606
vf2.insert_record_stream(small_size_stream())
607
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
608
'groupcompress', False)
611
for record in stream:
613
self.assertEqual(block_bytes[record.key],
614
record._manager._block._z_content)
615
self.assertEqual(8, num_records)
617
def test_insert_record_stream_packs_on_the_fly(self):
618
vf = self.make_test_vf(True, dir='source')
619
def grouped_stream(revision_ids, first_parents=()):
620
parents = first_parents
621
for revision_id in revision_ids:
623
record = versionedfile.FulltextContentFactory(
625
'some content that is\n'
626
'identical except for\n'
627
'revision_id:%s\n' % (revision_id,))
631
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
633
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
634
first_parents=(('d',),)))
635
# Now copy the blocks into another vf, and see that the
636
# insert_record_stream rebuilt a new block on-the-fly because of
638
vf2 = self.make_test_vf(True, dir='target')
639
vf2.insert_record_stream(vf.get_record_stream(
640
[(r,) for r in 'abcdefgh'], 'groupcompress', False))
641
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
642
'groupcompress', False)
645
# All of the records should be recombined into a single block
647
for record in stream:
650
block = record._manager._block
652
self.assertIs(block, record._manager._block)
653
self.assertEqual(8, num_records)
655
def test__insert_record_stream_no_reuse_block(self):
656
vf = self.make_test_vf(True, dir='source')
657
def grouped_stream(revision_ids, first_parents=()):
658
parents = first_parents
659
for revision_id in revision_ids:
661
record = versionedfile.FulltextContentFactory(
663
'some content that is\n'
664
'identical except for\n'
665
'revision_id:%s\n' % (revision_id,))
669
vf.insert_record_stream(grouped_stream(['a', 'b', 'c', 'd']))
671
vf.insert_record_stream(grouped_stream(['e', 'f', 'g', 'h'],
672
first_parents=(('d',),)))
674
self.assertEqual(8, len(list(vf.get_record_stream(
675
[(r,) for r in 'abcdefgh'],
676
'unordered', False))))
677
# Now copy the blocks into another vf, and ensure that the blocks are
678
# preserved without creating new entries
679
vf2 = self.make_test_vf(True, dir='target')
680
# ordering in 'groupcompress' order, should actually swap the groups in
681
# the target vf, but the groups themselves should not be disturbed.
682
list(vf2._insert_record_stream(vf.get_record_stream(
683
[(r,) for r in 'abcdefgh'], 'groupcompress', False),
686
# After inserting with reuse_blocks=False, we should have everything in
687
# a single new block.
688
stream = vf2.get_record_stream([(r,) for r in 'abcdefgh'],
689
'groupcompress', False)
691
for record in stream:
693
block = record._manager._block
695
self.assertIs(block, record._manager._block)
697
def test_add_missing_noncompression_parent_unvalidated_index(self):
698
unvalidated = self.make_g_index_missing_parent()
699
combined = _mod_index.CombinedGraphIndex([unvalidated])
700
index = groupcompress._GCGraphIndex(combined,
701
is_locked=lambda: True, parents=True,
702
track_external_parent_refs=True)
703
index.scan_unvalidated_index(unvalidated)
705
frozenset([('missing-parent',)]), index.get_missing_parents())
707
def test_track_external_parent_refs(self):
708
g_index = self.make_g_index('empty', 1, [])
709
mod_index = btree_index.BTreeBuilder(1, 1)
710
combined = _mod_index.CombinedGraphIndex([g_index, mod_index])
711
index = groupcompress._GCGraphIndex(combined,
712
is_locked=lambda: True, parents=True,
713
add_callback=mod_index.add_nodes,
714
track_external_parent_refs=True)
716
(('new-key',), '2 10 2 10', [(('parent-1',), ('parent-2',))])])
718
frozenset([('parent-1',), ('parent-2',)]),
719
index.get_missing_parents())
721
def make_source_with_b(self, a_parent, path):
722
source = self.make_test_vf(True, dir=path)
723
source.add_lines(('a',), (), ['lines\n'])
725
b_parents = (('a',),)
728
source.add_lines(('b',), b_parents, ['lines\n'])
731
def do_inconsistent_inserts(self, inconsistency_fatal):
732
target = self.make_test_vf(True, dir='target',
733
inconsistency_fatal=inconsistency_fatal)
735
source = self.make_source_with_b(x==1, 'source%s' % x)
736
target.insert_record_stream(source.get_record_stream(
737
[('b',)], 'unordered', False))
739
def test_inconsistent_redundant_inserts_warn(self):
740
"""Should not insert a record that is already present."""
742
def warning(template, args):
743
warnings.append(template % args)
744
_trace_warning = trace.warning
745
trace.warning = warning
747
self.do_inconsistent_inserts(inconsistency_fatal=False)
749
trace.warning = _trace_warning
750
self.assertEqual(["inconsistent details in skipped record: ('b',)"
751
" ('42 32 0 8', ((),)) ('74 32 0 8', ((('a',),),))"],
754
def test_inconsistent_redundant_inserts_raises(self):
755
e = self.assertRaises(errors.KnitCorrupt, self.do_inconsistent_inserts,
756
inconsistency_fatal=True)
757
self.assertContainsRe(str(e), "Knit.* corrupt: inconsistent details"
759
" \('b',\) \('42 32 0 8', \(\(\),\)\) \('74 32"
760
" 0 8', \(\(\('a',\),\),\)\)")
762
def test_clear_cache(self):
763
vf = self.make_source_with_b(True, 'source')
765
for record in vf.get_record_stream([('a',), ('b',)], 'unordered',
768
self.assertTrue(len(vf._group_cache) > 0)
770
self.assertEqual(0, len(vf._group_cache))
774
class StubGCVF(object):
775
def __init__(self, canned_get_blocks=None):
776
self._group_cache = {}
777
self._canned_get_blocks = canned_get_blocks or []
778
def _get_blocks(self, read_memos):
779
return iter(self._canned_get_blocks)
782
class Test_BatchingBlockFetcher(TestCaseWithGroupCompressVersionedFiles):
783
"""Simple whitebox unit tests for _BatchingBlockFetcher."""
785
def test_add_key_new_read_memo(self):
786
"""Adding a key with an uncached read_memo new to this batch adds that
787
read_memo to the list of memos to fetch.
789
# locations are: index_memo, ignored, parents, ignored
790
# where index_memo is: (idx, offset, len, factory_start, factory_end)
791
# and (idx, offset, size) is known as the 'read_memo', identifying the
793
read_memo = ('fake index', 100, 50)
795
('key',): (read_memo + (None, None), None, None, None)}
796
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
797
total_size = batcher.add_key(('key',))
798
self.assertEqual(50, total_size)
799
self.assertEqual([('key',)], batcher.keys)
800
self.assertEqual([read_memo], batcher.memos_to_get)
802
def test_add_key_duplicate_read_memo(self):
803
"""read_memos that occur multiple times in a batch will only be fetched
806
read_memo = ('fake index', 100, 50)
807
# Two keys, both sharing the same read memo (but different overall
810
('key1',): (read_memo + (0, 1), None, None, None),
811
('key2',): (read_memo + (1, 2), None, None, None)}
812
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), locations)
813
total_size = batcher.add_key(('key1',))
814
total_size = batcher.add_key(('key2',))
815
self.assertEqual(50, total_size)
816
self.assertEqual([('key1',), ('key2',)], batcher.keys)
817
self.assertEqual([read_memo], batcher.memos_to_get)
819
def test_add_key_cached_read_memo(self):
820
"""Adding a key with a cached read_memo will not cause that read_memo
821
to be added to the list to fetch.
823
read_memo = ('fake index', 100, 50)
825
gcvf._group_cache[read_memo] = 'fake block'
827
('key',): (read_memo + (None, None), None, None, None)}
828
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
829
total_size = batcher.add_key(('key',))
830
self.assertEqual(0, total_size)
831
self.assertEqual([('key',)], batcher.keys)
832
self.assertEqual([], batcher.memos_to_get)
834
def test_yield_factories_empty(self):
835
"""An empty batch yields no factories."""
836
batcher = groupcompress._BatchingBlockFetcher(StubGCVF(), {})
837
self.assertEqual([], list(batcher.yield_factories()))
839
def test_yield_factories_calls_get_blocks(self):
840
"""Uncached memos are retrieved via get_blocks."""
841
read_memo1 = ('fake index', 100, 50)
842
read_memo2 = ('fake index', 150, 40)
845
(read_memo1, groupcompress.GroupCompressBlock()),
846
(read_memo2, groupcompress.GroupCompressBlock())])
848
('key1',): (read_memo1 + (None, None), None, None, None),
849
('key2',): (read_memo2 + (None, None), None, None, None)}
850
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
851
batcher.add_key(('key1',))
852
batcher.add_key(('key2',))
853
factories = list(batcher.yield_factories(full_flush=True))
854
self.assertLength(2, factories)
855
keys = [f.key for f in factories]
856
kinds = [f.storage_kind for f in factories]
857
self.assertEqual([('key1',), ('key2',)], keys)
858
self.assertEqual(['groupcompress-block', 'groupcompress-block'], kinds)
860
def test_yield_factories_flushing(self):
861
"""yield_factories holds back on yielding results from the final block
862
unless passed full_flush=True.
864
fake_block = groupcompress.GroupCompressBlock()
865
read_memo = ('fake index', 100, 50)
867
gcvf._group_cache[read_memo] = fake_block
869
('key',): (read_memo + (None, None), None, None, None)}
870
batcher = groupcompress._BatchingBlockFetcher(gcvf, locations)
871
batcher.add_key(('key',))
872
self.assertEqual([], list(batcher.yield_factories()))
873
factories = list(batcher.yield_factories(full_flush=True))
874
self.assertLength(1, factories)
875
self.assertEqual(('key',), factories[0].key)
876
self.assertEqual('groupcompress-block', factories[0].storage_kind)
879
class TestLazyGroupCompress(tests.TestCaseWithTransport):
882
('key1',): "this is a text\n"
883
"with a reasonable amount of compressible bytes\n"
884
"which can be shared between various other texts\n",
885
('key2',): "another text\n"
886
"with a reasonable amount of compressible bytes\n"
887
"which can be shared between various other texts\n",
888
('key3',): "yet another text which won't be extracted\n"
889
"with a reasonable amount of compressible bytes\n"
890
"which can be shared between various other texts\n",
891
('key4',): "this will be extracted\n"
892
"but references most of its bytes from\n"
893
"yet another text which won't be extracted\n"
894
"with a reasonable amount of compressible bytes\n"
895
"which can be shared between various other texts\n",
897
def make_block(self, key_to_text):
898
"""Create a GroupCompressBlock, filling it with the given texts."""
899
compressor = groupcompress.GroupCompressor()
901
for key in sorted(key_to_text):
902
compressor.compress(key, key_to_text[key], None)
903
locs = dict((key, (start, end)) for key, (start, _, end, _)
904
in compressor.labels_deltas.iteritems())
905
block = compressor.flush()
906
raw_bytes = block.to_bytes()
907
return locs, groupcompress.GroupCompressBlock.from_bytes(raw_bytes)
909
def add_key_to_manager(self, key, locations, block, manager):
910
start, end = locations[key]
911
manager.add_factory(key, (), start, end)
913
def make_block_and_full_manager(self, texts):
914
locations, block = self.make_block(texts)
915
manager = groupcompress._LazyGroupContentManager(block)
916
for key in sorted(texts):
917
self.add_key_to_manager(key, locations, block, manager)
918
return block, manager
920
def test_get_fulltexts(self):
921
locations, block = self.make_block(self._texts)
922
manager = groupcompress._LazyGroupContentManager(block)
923
self.add_key_to_manager(('key1',), locations, block, manager)
924
self.add_key_to_manager(('key2',), locations, block, manager)
926
for record in manager.get_record_stream():
927
result_order.append(record.key)
928
text = self._texts[record.key]
929
self.assertEqual(text, record.get_bytes_as('fulltext'))
930
self.assertEqual([('key1',), ('key2',)], result_order)
932
# If we build the manager in the opposite order, we should get them
933
# back in the opposite order
934
manager = groupcompress._LazyGroupContentManager(block)
935
self.add_key_to_manager(('key2',), locations, block, manager)
936
self.add_key_to_manager(('key1',), locations, block, manager)
938
for record in manager.get_record_stream():
939
result_order.append(record.key)
940
text = self._texts[record.key]
941
self.assertEqual(text, record.get_bytes_as('fulltext'))
942
self.assertEqual([('key2',), ('key1',)], result_order)
944
def test__wire_bytes_no_keys(self):
945
locations, block = self.make_block(self._texts)
946
manager = groupcompress._LazyGroupContentManager(block)
947
wire_bytes = manager._wire_bytes()
948
block_length = len(block.to_bytes())
949
# We should have triggered a strip, since we aren't using any content
950
stripped_block = manager._block.to_bytes()
951
self.assertTrue(block_length > len(stripped_block))
952
empty_z_header = zlib.compress('')
953
self.assertEqual('groupcompress-block\n'
954
'8\n' # len(compress(''))
956
'%d\n'# compressed block len
959
% (len(stripped_block), empty_z_header,
963
def test__wire_bytes(self):
964
locations, block = self.make_block(self._texts)
965
manager = groupcompress._LazyGroupContentManager(block)
966
self.add_key_to_manager(('key1',), locations, block, manager)
967
self.add_key_to_manager(('key4',), locations, block, manager)
968
block_bytes = block.to_bytes()
969
wire_bytes = manager._wire_bytes()
970
(storage_kind, z_header_len, header_len,
971
block_len, rest) = wire_bytes.split('\n', 4)
972
z_header_len = int(z_header_len)
973
header_len = int(header_len)
974
block_len = int(block_len)
975
self.assertEqual('groupcompress-block', storage_kind)
976
self.assertEqual(34, z_header_len)
977
self.assertEqual(26, header_len)
978
self.assertEqual(len(block_bytes), block_len)
979
z_header = rest[:z_header_len]
980
header = zlib.decompress(z_header)
981
self.assertEqual(header_len, len(header))
982
entry1 = locations[('key1',)]
983
entry4 = locations[('key4',)]
984
self.assertEqualDiff('key1\n'
986
'%d\n' # start offset
992
% (entry1[0], entry1[1],
993
entry4[0], entry4[1]),
995
z_block = rest[z_header_len:]
996
self.assertEqual(block_bytes, z_block)
998
def test_from_bytes(self):
999
locations, block = self.make_block(self._texts)
1000
manager = groupcompress._LazyGroupContentManager(block)
1001
self.add_key_to_manager(('key1',), locations, block, manager)
1002
self.add_key_to_manager(('key4',), locations, block, manager)
1003
wire_bytes = manager._wire_bytes()
1004
self.assertStartsWith(wire_bytes, 'groupcompress-block\n')
1005
manager = groupcompress._LazyGroupContentManager.from_bytes(wire_bytes)
1006
self.assertIsInstance(manager, groupcompress._LazyGroupContentManager)
1007
self.assertEqual(2, len(manager._factories))
1008
self.assertEqual(block._z_content, manager._block._z_content)
1010
for record in manager.get_record_stream():
1011
result_order.append(record.key)
1012
text = self._texts[record.key]
1013
self.assertEqual(text, record.get_bytes_as('fulltext'))
1014
self.assertEqual([('key1',), ('key4',)], result_order)
1016
def test__check_rebuild_no_changes(self):
1017
block, manager = self.make_block_and_full_manager(self._texts)
1018
manager._check_rebuild_block()
1019
self.assertIs(block, manager._block)
1021
def test__check_rebuild_only_one(self):
1022
locations, block = self.make_block(self._texts)
1023
manager = groupcompress._LazyGroupContentManager(block)
1024
# Request just the first key, which should trigger a 'strip' action
1025
self.add_key_to_manager(('key1',), locations, block, manager)
1026
manager._check_rebuild_block()
1027
self.assertIsNot(block, manager._block)
1028
self.assertTrue(block._content_length > manager._block._content_length)
1029
# We should be able to still get the content out of this block, though
1030
# it should only have 1 entry
1031
for record in manager.get_record_stream():
1032
self.assertEqual(('key1',), record.key)
1033
self.assertEqual(self._texts[record.key],
1034
record.get_bytes_as('fulltext'))
1036
def test__check_rebuild_middle(self):
1037
locations, block = self.make_block(self._texts)
1038
manager = groupcompress._LazyGroupContentManager(block)
1039
# Request a small key in the middle should trigger a 'rebuild'
1040
self.add_key_to_manager(('key4',), locations, block, manager)
1041
manager._check_rebuild_block()
1042
self.assertIsNot(block, manager._block)
1043
self.assertTrue(block._content_length > manager._block._content_length)
1044
for record in manager.get_record_stream():
1045
self.assertEqual(('key4',), record.key)
1046
self.assertEqual(self._texts[record.key],
1047
record.get_bytes_as('fulltext'))
1049
def test_check_is_well_utilized_all_keys(self):
1050
block, manager = self.make_block_and_full_manager(self._texts)
1051
self.assertFalse(manager.check_is_well_utilized())
1052
# Though we can fake it by changing the recommended minimum size
1053
manager._full_enough_block_size = block._content_length
1054
self.assertTrue(manager.check_is_well_utilized())
1055
# Setting it just above causes it to fail
1056
manager._full_enough_block_size = block._content_length + 1
1057
self.assertFalse(manager.check_is_well_utilized())
1058
# Setting the mixed-block size doesn't do anything, because the content
1059
# is considered to not be 'mixed'
1060
manager._full_enough_mixed_block_size = block._content_length
1061
self.assertFalse(manager.check_is_well_utilized())
1063
def test_check_is_well_utilized_mixed_keys(self):
1069
texts[f1k1] = self._texts[('key1',)]
1070
texts[f1k2] = self._texts[('key2',)]
1071
texts[f2k1] = self._texts[('key3',)]
1072
texts[f2k2] = self._texts[('key4',)]
1073
block, manager = self.make_block_and_full_manager(texts)
1074
self.assertFalse(manager.check_is_well_utilized())
1075
manager._full_enough_block_size = block._content_length
1076
self.assertTrue(manager.check_is_well_utilized())
1077
manager._full_enough_block_size = block._content_length + 1
1078
self.assertFalse(manager.check_is_well_utilized())
1079
manager._full_enough_mixed_block_size = block._content_length
1080
self.assertTrue(manager.check_is_well_utilized())
1082
def test_check_is_well_utilized_partial_use(self):
1083
locations, block = self.make_block(self._texts)
1084
manager = groupcompress._LazyGroupContentManager(block)
1085
manager._full_enough_block_size = block._content_length
1086
self.add_key_to_manager(('key1',), locations, block, manager)
1087
self.add_key_to_manager(('key2',), locations, block, manager)
1088
# Just using the content from key1 and 2 is not enough to be considered
1090
self.assertFalse(manager.check_is_well_utilized())
1091
# However if we add key3, then we have enough, as we only require 75%
1093
self.add_key_to_manager(('key4',), locations, block, manager)
1094
self.assertTrue(manager.check_is_well_utilized())
1097
class Test_GCBuildDetails(tests.TestCase):
1099
def test_acts_like_tuple(self):
1100
# _GCBuildDetails inlines some of the data that used to be spread out
1101
# across a bunch of tuples
1102
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1103
('INDEX', 10, 20, 0, 5))
1104
self.assertEqual(4, len(bd))
1105
self.assertEqual(('INDEX', 10, 20, 0, 5), bd[0])
1106
self.assertEqual(None, bd[1]) # Compression Parent is always None
1107
self.assertEqual((('parent1',), ('parent2',)), bd[2])
1108
self.assertEqual(('group', None), bd[3]) # Record details
1110
def test__repr__(self):
1111
bd = groupcompress._GCBuildDetails((('parent1',), ('parent2',)),
1112
('INDEX', 10, 20, 0, 5))
1113
self.assertEqual("_GCBuildDetails(('INDEX', 10, 20, 0, 5),"
1114
" (('parent1',), ('parent2',)))",