~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/tests/test_knit.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2009-08-27 02:27:19 UTC
  • mfrom: (4634.3.19 gc-batching)
  • Revision ID: pqm@pqm.ubuntu.com-20090827022719-bl2yoqhpj3fcfczu
(andrew) Fix #402657: 2a fetch over dumb transport reads one group at
        a time.

Show diffs side-by-side

added added

removed removed

Lines of Context:
12
12
#
13
13
# You should have received a copy of the GNU General Public License
14
14
# along with this program; if not, write to the Free Software
15
 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16
16
 
17
17
"""Tests for Knit data structure"""
18
18
 
19
19
from cStringIO import StringIO
20
20
import difflib
21
21
import gzip
22
 
import sha
23
22
import sys
24
23
 
25
24
from bzrlib import (
27
26
    generate_ids,
28
27
    knit,
29
28
    multiparent,
 
29
    osutils,
30
30
    pack,
31
31
    )
32
32
from bzrlib.errors import (
42
42
    KnitSequenceMatcher,
43
43
    KnitVersionedFiles,
44
44
    PlainKnitContent,
 
45
    _VFContentMapGenerator,
45
46
    _DirectPackAccess,
46
47
    _KndxIndex,
47
48
    _KnitGraphIndex,
48
49
    _KnitKeyAccess,
49
50
    make_file_factory,
50
51
    )
51
 
from bzrlib.osutils import split_lines
52
 
from bzrlib.symbol_versioning import one_four
 
52
from bzrlib.repofmt import pack_repo
53
53
from bzrlib.tests import (
54
54
    Feature,
55
55
    KnownFailure,
56
56
    TestCase,
57
57
    TestCaseWithMemoryTransport,
58
58
    TestCaseWithTransport,
 
59
    TestNotApplicable,
59
60
    )
60
61
from bzrlib.transport import get_transport
61
62
from bzrlib.transport.memory import MemoryTransport
63
64
from bzrlib.versionedfile import (
64
65
    AbsentContentFactory,
65
66
    ConstantMapper,
 
67
    network_bytes_to_kind_and_offset,
66
68
    RecordingVersionedFilesDecorator,
67
69
    )
68
70
 
71
73
 
72
74
    def _probe(self):
73
75
        try:
74
 
            import bzrlib._knit_load_data_c
 
76
            import bzrlib._knit_load_data_pyx
75
77
        except ImportError:
76
78
            return False
77
79
        return True
78
80
 
79
81
    def feature_name(self):
80
 
        return 'bzrlib._knit_load_data_c'
 
82
        return 'bzrlib._knit_load_data_pyx'
81
83
 
82
84
CompiledKnitFeature = _CompiledKnitFeature()
83
85
 
271
273
        return queue_call
272
274
 
273
275
 
 
276
class MockReadvFailingTransport(MockTransport):
 
277
    """Fail in the middle of a readv() result.
 
278
 
 
279
    This Transport will successfully yield the first two requested hunks, but
 
280
    raise NoSuchFile for the rest.
 
281
    """
 
282
 
 
283
    def readv(self, relpath, offsets):
 
284
        count = 0
 
285
        for result in MockTransport.readv(self, relpath, offsets):
 
286
            count += 1
 
287
            # we use 2 because the first offset is the pack header, the second
 
288
            # is the first actual content requset
 
289
            if count > 2:
 
290
                raise errors.NoSuchFile(relpath)
 
291
            yield result
 
292
 
 
293
 
274
294
class KnitRecordAccessTestsMixin(object):
275
295
    """Tests for getting and putting knit records."""
276
296
 
279
299
        access = self.get_access()
280
300
        memos = access.add_raw_records([('key', 10)], '1234567890')
281
301
        self.assertEqual(['1234567890'], list(access.get_raw_records(memos)))
282
 
 
 
302
 
283
303
    def test_add_several_raw_records(self):
284
304
        """add_raw_records with many records and read some back."""
285
305
        access = self.get_access()
305
325
        mapper = ConstantMapper("foo")
306
326
        access = _KnitKeyAccess(self.get_transport(), mapper)
307
327
        return access
308
 
    
 
328
 
 
329
 
 
330
class _TestException(Exception):
 
331
    """Just an exception for local tests to use."""
 
332
 
309
333
 
310
334
class TestPackKnitAccess(TestCaseWithMemoryTransport, KnitRecordAccessTestsMixin):
311
335
    """Tests for the pack based access."""
323
347
        access.set_writer(writer, index, (transport, packname))
324
348
        return access, writer
325
349
 
 
350
    def make_pack_file(self):
 
351
        """Create a pack file with 2 records."""
 
352
        access, writer = self._get_access(packname='packname', index='foo')
 
353
        memos = []
 
354
        memos.extend(access.add_raw_records([('key1', 10)], '1234567890'))
 
355
        memos.extend(access.add_raw_records([('key2', 5)], '12345'))
 
356
        writer.end()
 
357
        return memos
 
358
 
 
359
    def make_vf_for_retrying(self):
 
360
        """Create 3 packs and a reload function.
 
361
 
 
362
        Originally, 2 pack files will have the data, but one will be missing.
 
363
        And then the third will be used in place of the first two if reload()
 
364
        is called.
 
365
 
 
366
        :return: (versioned_file, reload_counter)
 
367
            versioned_file  a KnitVersionedFiles using the packs for access
 
368
        """
 
369
        builder = self.make_branch_builder('.', format="1.9")
 
370
        builder.start_series()
 
371
        builder.build_snapshot('rev-1', None, [
 
372
            ('add', ('', 'root-id', 'directory', None)),
 
373
            ('add', ('file', 'file-id', 'file', 'content\nrev 1\n')),
 
374
            ])
 
375
        builder.build_snapshot('rev-2', ['rev-1'], [
 
376
            ('modify', ('file-id', 'content\nrev 2\n')),
 
377
            ])
 
378
        builder.build_snapshot('rev-3', ['rev-2'], [
 
379
            ('modify', ('file-id', 'content\nrev 3\n')),
 
380
            ])
 
381
        builder.finish_series()
 
382
        b = builder.get_branch()
 
383
        b.lock_write()
 
384
        self.addCleanup(b.unlock)
 
385
        # Pack these three revisions into another pack file, but don't remove
 
386
        # the originals
 
387
        repo = b.repository
 
388
        collection = repo._pack_collection
 
389
        collection.ensure_loaded()
 
390
        orig_packs = collection.packs
 
391
        packer = pack_repo.Packer(collection, orig_packs, '.testpack')
 
392
        new_pack = packer.pack()
 
393
        # forget about the new pack
 
394
        collection.reset()
 
395
        repo.refresh_data()
 
396
        vf = repo.revisions
 
397
        # Set up a reload() function that switches to using the new pack file
 
398
        new_index = new_pack.revision_index
 
399
        access_tuple = new_pack.access_tuple()
 
400
        reload_counter = [0, 0, 0]
 
401
        def reload():
 
402
            reload_counter[0] += 1
 
403
            if reload_counter[1] > 0:
 
404
                # We already reloaded, nothing more to do
 
405
                reload_counter[2] += 1
 
406
                return False
 
407
            reload_counter[1] += 1
 
408
            vf._index._graph_index._indices[:] = [new_index]
 
409
            vf._access._indices.clear()
 
410
            vf._access._indices[new_index] = access_tuple
 
411
            return True
 
412
        # Delete one of the pack files so the data will need to be reloaded. We
 
413
        # will delete the file with 'rev-2' in it
 
414
        trans, name = orig_packs[1].access_tuple()
 
415
        trans.delete(name)
 
416
        # We don't have the index trigger reloading because we want to test
 
417
        # that we reload when the .pack disappears
 
418
        vf._access._reload_func = reload
 
419
        return vf, reload_counter
 
420
 
 
421
    def make_reload_func(self, return_val=True):
 
422
        reload_called = [0]
 
423
        def reload():
 
424
            reload_called[0] += 1
 
425
            return return_val
 
426
        return reload_called, reload
 
427
 
 
428
    def make_retry_exception(self):
 
429
        # We raise a real exception so that sys.exc_info() is properly
 
430
        # populated
 
431
        try:
 
432
            raise _TestException('foobar')
 
433
        except _TestException, e:
 
434
            retry_exc = errors.RetryWithNewPacks(None, reload_occurred=False,
 
435
                                                 exc_info=sys.exc_info())
 
436
        return retry_exc
 
437
 
326
438
    def test_read_from_several_packs(self):
327
439
        access, writer = self._get_access()
328
440
        memos = []
364
476
        writer.end()
365
477
        self.assertEqual(['1234567890'], list(access.get_raw_records(memos)))
366
478
 
 
479
    def test_missing_index_raises_retry(self):
 
480
        memos = self.make_pack_file()
 
481
        transport = self.get_transport()
 
482
        reload_called, reload_func = self.make_reload_func()
 
483
        # Note that the index key has changed from 'foo' to 'bar'
 
484
        access = _DirectPackAccess({'bar':(transport, 'packname')},
 
485
                                   reload_func=reload_func)
 
486
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
487
                                  access.get_raw_records, memos)
 
488
        # Because a key was passed in which does not match our index list, we
 
489
        # assume that the listing was already reloaded
 
490
        self.assertTrue(e.reload_occurred)
 
491
        self.assertIsInstance(e.exc_info, tuple)
 
492
        self.assertIs(e.exc_info[0], KeyError)
 
493
        self.assertIsInstance(e.exc_info[1], KeyError)
 
494
 
 
495
    def test_missing_index_raises_key_error_with_no_reload(self):
 
496
        memos = self.make_pack_file()
 
497
        transport = self.get_transport()
 
498
        # Note that the index key has changed from 'foo' to 'bar'
 
499
        access = _DirectPackAccess({'bar':(transport, 'packname')})
 
500
        e = self.assertListRaises(KeyError, access.get_raw_records, memos)
 
501
 
 
502
    def test_missing_file_raises_retry(self):
 
503
        memos = self.make_pack_file()
 
504
        transport = self.get_transport()
 
505
        reload_called, reload_func = self.make_reload_func()
 
506
        # Note that the 'filename' has been changed to 'different-packname'
 
507
        access = _DirectPackAccess({'foo':(transport, 'different-packname')},
 
508
                                   reload_func=reload_func)
 
509
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
510
                                  access.get_raw_records, memos)
 
511
        # The file has gone missing, so we assume we need to reload
 
512
        self.assertFalse(e.reload_occurred)
 
513
        self.assertIsInstance(e.exc_info, tuple)
 
514
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
515
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
516
        self.assertEqual('different-packname', e.exc_info[1].path)
 
517
 
 
518
    def test_missing_file_raises_no_such_file_with_no_reload(self):
 
519
        memos = self.make_pack_file()
 
520
        transport = self.get_transport()
 
521
        # Note that the 'filename' has been changed to 'different-packname'
 
522
        access = _DirectPackAccess({'foo':(transport, 'different-packname')})
 
523
        e = self.assertListRaises(errors.NoSuchFile,
 
524
                                  access.get_raw_records, memos)
 
525
 
 
526
    def test_failing_readv_raises_retry(self):
 
527
        memos = self.make_pack_file()
 
528
        transport = self.get_transport()
 
529
        failing_transport = MockReadvFailingTransport(
 
530
                                [transport.get_bytes('packname')])
 
531
        reload_called, reload_func = self.make_reload_func()
 
532
        access = _DirectPackAccess({'foo':(failing_transport, 'packname')},
 
533
                                   reload_func=reload_func)
 
534
        # Asking for a single record will not trigger the Mock failure
 
535
        self.assertEqual(['1234567890'],
 
536
            list(access.get_raw_records(memos[:1])))
 
537
        self.assertEqual(['12345'],
 
538
            list(access.get_raw_records(memos[1:2])))
 
539
        # A multiple offset readv() will fail mid-way through
 
540
        e = self.assertListRaises(errors.RetryWithNewPacks,
 
541
                                  access.get_raw_records, memos)
 
542
        # The file has gone missing, so we assume we need to reload
 
543
        self.assertFalse(e.reload_occurred)
 
544
        self.assertIsInstance(e.exc_info, tuple)
 
545
        self.assertIs(e.exc_info[0], errors.NoSuchFile)
 
546
        self.assertIsInstance(e.exc_info[1], errors.NoSuchFile)
 
547
        self.assertEqual('packname', e.exc_info[1].path)
 
548
 
 
549
    def test_failing_readv_raises_no_such_file_with_no_reload(self):
 
550
        memos = self.make_pack_file()
 
551
        transport = self.get_transport()
 
552
        failing_transport = MockReadvFailingTransport(
 
553
                                [transport.get_bytes('packname')])
 
554
        reload_called, reload_func = self.make_reload_func()
 
555
        access = _DirectPackAccess({'foo':(failing_transport, 'packname')})
 
556
        # Asking for a single record will not trigger the Mock failure
 
557
        self.assertEqual(['1234567890'],
 
558
            list(access.get_raw_records(memos[:1])))
 
559
        self.assertEqual(['12345'],
 
560
            list(access.get_raw_records(memos[1:2])))
 
561
        # A multiple offset readv() will fail mid-way through
 
562
        e = self.assertListRaises(errors.NoSuchFile,
 
563
                                  access.get_raw_records, memos)
 
564
 
 
565
    def test_reload_or_raise_no_reload(self):
 
566
        access = _DirectPackAccess({}, reload_func=None)
 
567
        retry_exc = self.make_retry_exception()
 
568
        # Without a reload_func, we will just re-raise the original exception
 
569
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
570
 
 
571
    def test_reload_or_raise_reload_changed(self):
 
572
        reload_called, reload_func = self.make_reload_func(return_val=True)
 
573
        access = _DirectPackAccess({}, reload_func=reload_func)
 
574
        retry_exc = self.make_retry_exception()
 
575
        access.reload_or_raise(retry_exc)
 
576
        self.assertEqual([1], reload_called)
 
577
        retry_exc.reload_occurred=True
 
578
        access.reload_or_raise(retry_exc)
 
579
        self.assertEqual([2], reload_called)
 
580
 
 
581
    def test_reload_or_raise_reload_no_change(self):
 
582
        reload_called, reload_func = self.make_reload_func(return_val=False)
 
583
        access = _DirectPackAccess({}, reload_func=reload_func)
 
584
        retry_exc = self.make_retry_exception()
 
585
        # If reload_occurred is False, then we consider it an error to have
 
586
        # reload_func() return False (no changes).
 
587
        self.assertRaises(_TestException, access.reload_or_raise, retry_exc)
 
588
        self.assertEqual([1], reload_called)
 
589
        retry_exc.reload_occurred=True
 
590
        # If reload_occurred is True, then we assume nothing changed because
 
591
        # it had changed earlier, but didn't change again
 
592
        access.reload_or_raise(retry_exc)
 
593
        self.assertEqual([2], reload_called)
 
594
 
 
595
    def test_annotate_retries(self):
 
596
        vf, reload_counter = self.make_vf_for_retrying()
 
597
        # It is a little bit bogus to annotate the Revision VF, but it works,
 
598
        # as we have ancestry stored there
 
599
        key = ('rev-3',)
 
600
        reload_lines = vf.annotate(key)
 
601
        self.assertEqual([1, 1, 0], reload_counter)
 
602
        plain_lines = vf.annotate(key)
 
603
        self.assertEqual([1, 1, 0], reload_counter) # No extra reloading
 
604
        if reload_lines != plain_lines:
 
605
            self.fail('Annotation was not identical with reloading.')
 
606
        # Now delete the packs-in-use, which should trigger another reload, but
 
607
        # this time we just raise an exception because we can't recover
 
608
        for trans, name in vf._access._indices.itervalues():
 
609
            trans.delete(name)
 
610
        self.assertRaises(errors.NoSuchFile, vf.annotate, key)
 
611
        self.assertEqual([2, 1, 1], reload_counter)
 
612
 
 
613
    def test__get_record_map_retries(self):
 
614
        vf, reload_counter = self.make_vf_for_retrying()
 
615
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
616
        records = vf._get_record_map(keys)
 
617
        self.assertEqual(keys, sorted(records.keys()))
 
618
        self.assertEqual([1, 1, 0], reload_counter)
 
619
        # Now delete the packs-in-use, which should trigger another reload, but
 
620
        # this time we just raise an exception because we can't recover
 
621
        for trans, name in vf._access._indices.itervalues():
 
622
            trans.delete(name)
 
623
        self.assertRaises(errors.NoSuchFile, vf._get_record_map, keys)
 
624
        self.assertEqual([2, 1, 1], reload_counter)
 
625
 
 
626
    def test_get_record_stream_retries(self):
 
627
        vf, reload_counter = self.make_vf_for_retrying()
 
628
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
629
        record_stream = vf.get_record_stream(keys, 'topological', False)
 
630
        record = record_stream.next()
 
631
        self.assertEqual(('rev-1',), record.key)
 
632
        self.assertEqual([0, 0, 0], reload_counter)
 
633
        record = record_stream.next()
 
634
        self.assertEqual(('rev-2',), record.key)
 
635
        self.assertEqual([1, 1, 0], reload_counter)
 
636
        record = record_stream.next()
 
637
        self.assertEqual(('rev-3',), record.key)
 
638
        self.assertEqual([1, 1, 0], reload_counter)
 
639
        # Now delete all pack files, and see that we raise the right error
 
640
        for trans, name in vf._access._indices.itervalues():
 
641
            trans.delete(name)
 
642
        self.assertListRaises(errors.NoSuchFile,
 
643
            vf.get_record_stream, keys, 'topological', False)
 
644
 
 
645
    def test_iter_lines_added_or_present_in_keys_retries(self):
 
646
        vf, reload_counter = self.make_vf_for_retrying()
 
647
        keys = [('rev-1',), ('rev-2',), ('rev-3',)]
 
648
        # Unfortunately, iter_lines_added_or_present_in_keys iterates the
 
649
        # result in random order (determined by the iteration order from a
 
650
        # set()), so we don't have any solid way to trigger whether data is
 
651
        # read before or after. However we tried to delete the middle node to
 
652
        # exercise the code well.
 
653
        # What we care about is that all lines are always yielded, but not
 
654
        # duplicated
 
655
        count = 0
 
656
        reload_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
657
        self.assertEqual([1, 1, 0], reload_counter)
 
658
        # Now do it again, to make sure the result is equivalent
 
659
        plain_lines = sorted(vf.iter_lines_added_or_present_in_keys(keys))
 
660
        self.assertEqual([1, 1, 0], reload_counter) # No extra reloading
 
661
        self.assertEqual(plain_lines, reload_lines)
 
662
        self.assertEqual(21, len(plain_lines))
 
663
        # Now delete all pack files, and see that we raise the right error
 
664
        for trans, name in vf._access._indices.itervalues():
 
665
            trans.delete(name)
 
666
        self.assertListRaises(errors.NoSuchFile,
 
667
            vf.iter_lines_added_or_present_in_keys, keys)
 
668
        self.assertEqual([2, 1, 1], reload_counter)
 
669
 
 
670
    def test_get_record_stream_yields_disk_sorted_order(self):
 
671
        # if we get 'unordered' pick a semi-optimal order for reading. The
 
672
        # order should be grouped by pack file, and then by position in file
 
673
        repo = self.make_repository('test', format='pack-0.92')
 
674
        repo.lock_write()
 
675
        self.addCleanup(repo.unlock)
 
676
        repo.start_write_group()
 
677
        vf = repo.texts
 
678
        vf.add_lines(('f-id', 'rev-5'), [('f-id', 'rev-4')], ['lines\n'])
 
679
        vf.add_lines(('f-id', 'rev-1'), [], ['lines\n'])
 
680
        vf.add_lines(('f-id', 'rev-2'), [('f-id', 'rev-1')], ['lines\n'])
 
681
        repo.commit_write_group()
 
682
        # We inserted them as rev-5, rev-1, rev-2, we should get them back in
 
683
        # the same order
 
684
        stream = vf.get_record_stream([('f-id', 'rev-1'), ('f-id', 'rev-5'),
 
685
                                       ('f-id', 'rev-2')], 'unordered', False)
 
686
        keys = [r.key for r in stream]
 
687
        self.assertEqual([('f-id', 'rev-5'), ('f-id', 'rev-1'),
 
688
                          ('f-id', 'rev-2')], keys)
 
689
        repo.start_write_group()
 
690
        vf.add_lines(('f-id', 'rev-4'), [('f-id', 'rev-3')], ['lines\n'])
 
691
        vf.add_lines(('f-id', 'rev-3'), [('f-id', 'rev-2')], ['lines\n'])
 
692
        vf.add_lines(('f-id', 'rev-6'), [('f-id', 'rev-5')], ['lines\n'])
 
693
        repo.commit_write_group()
 
694
        # Request in random order, to make sure the output order isn't based on
 
695
        # the request
 
696
        request_keys = set(('f-id', 'rev-%d' % i) for i in range(1, 7))
 
697
        stream = vf.get_record_stream(request_keys, 'unordered', False)
 
698
        keys = [r.key for r in stream]
 
699
        # We want to get the keys back in disk order, but it doesn't matter
 
700
        # which pack we read from first. So this can come back in 2 orders
 
701
        alt1 = [('f-id', 'rev-%d' % i) for i in [4, 3, 6, 5, 1, 2]]
 
702
        alt2 = [('f-id', 'rev-%d' % i) for i in [5, 1, 2, 4, 3, 6]]
 
703
        if keys != alt1 and keys != alt2:
 
704
            self.fail('Returned key order did not match either expected order.'
 
705
                      ' expected %s or %s, not %s'
 
706
                      % (alt1, alt2, keys))
 
707
 
367
708
 
368
709
class LowLevelKnitDataTests(TestCase):
369
710
 
374
715
        gz_file.close()
375
716
        return sio.getvalue()
376
717
 
 
718
    def make_multiple_records(self):
 
719
        """Create the content for multiple records."""
 
720
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
 
721
        total_txt = []
 
722
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
 
723
                                        'foo\n'
 
724
                                        'bar\n'
 
725
                                        'end rev-id-1\n'
 
726
                                        % (sha1sum,))
 
727
        record_1 = (0, len(gz_txt), sha1sum)
 
728
        total_txt.append(gz_txt)
 
729
        sha1sum = osutils.sha('baz\n').hexdigest()
 
730
        gz_txt = self.create_gz_content('version rev-id-2 1 %s\n'
 
731
                                        'baz\n'
 
732
                                        'end rev-id-2\n'
 
733
                                        % (sha1sum,))
 
734
        record_2 = (record_1[1], len(gz_txt), sha1sum)
 
735
        total_txt.append(gz_txt)
 
736
        return total_txt, record_1, record_2
 
737
 
377
738
    def test_valid_knit_data(self):
378
 
        sha1sum = sha.new('foo\nbar\n').hexdigest()
 
739
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
379
740
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
380
741
                                        'foo\n'
381
742
                                        'bar\n'
393
754
        raw_contents = list(knit._read_records_iter_raw(records))
394
755
        self.assertEqual([(('rev-id-1',), gz_txt, sha1sum)], raw_contents)
395
756
 
 
757
    def test_multiple_records_valid(self):
 
758
        total_txt, record_1, record_2 = self.make_multiple_records()
 
759
        transport = MockTransport([''.join(total_txt)])
 
760
        access = _KnitKeyAccess(transport, ConstantMapper('filename'))
 
761
        knit = KnitVersionedFiles(None, access)
 
762
        records = [(('rev-id-1',), (('rev-id-1',), record_1[0], record_1[1])),
 
763
                   (('rev-id-2',), (('rev-id-2',), record_2[0], record_2[1]))]
 
764
 
 
765
        contents = list(knit._read_records_iter(records))
 
766
        self.assertEqual([(('rev-id-1',), ['foo\n', 'bar\n'], record_1[2]),
 
767
                          (('rev-id-2',), ['baz\n'], record_2[2])],
 
768
                         contents)
 
769
 
 
770
        raw_contents = list(knit._read_records_iter_raw(records))
 
771
        self.assertEqual([(('rev-id-1',), total_txt[0], record_1[2]),
 
772
                          (('rev-id-2',), total_txt[1], record_2[2])],
 
773
                         raw_contents)
 
774
 
396
775
    def test_not_enough_lines(self):
397
 
        sha1sum = sha.new('foo\n').hexdigest()
 
776
        sha1sum = osutils.sha('foo\n').hexdigest()
398
777
        # record says 2 lines data says 1
399
778
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
400
779
                                        'foo\n'
412
791
        self.assertEqual([(('rev-id-1',),  gz_txt, sha1sum)], raw_contents)
413
792
 
414
793
    def test_too_many_lines(self):
415
 
        sha1sum = sha.new('foo\nbar\n').hexdigest()
 
794
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
416
795
        # record says 1 lines data says 2
417
796
        gz_txt = self.create_gz_content('version rev-id-1 1 %s\n'
418
797
                                        'foo\n'
431
810
        self.assertEqual([(('rev-id-1',), gz_txt, sha1sum)], raw_contents)
432
811
 
433
812
    def test_mismatched_version_id(self):
434
 
        sha1sum = sha.new('foo\nbar\n').hexdigest()
 
813
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
435
814
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
436
815
                                        'foo\n'
437
816
                                        'bar\n'
450
829
            knit._read_records_iter_raw(records))
451
830
 
452
831
    def test_uncompressed_data(self):
453
 
        sha1sum = sha.new('foo\nbar\n').hexdigest()
 
832
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
454
833
        txt = ('version rev-id-1 2 %s\n'
455
834
               'foo\n'
456
835
               'bar\n'
470
849
            knit._read_records_iter_raw(records))
471
850
 
472
851
    def test_corrupted_data(self):
473
 
        sha1sum = sha.new('foo\nbar\n').hexdigest()
 
852
        sha1sum = osutils.sha('foo\nbar\n').hexdigest()
474
853
        gz_txt = self.create_gz_content('version rev-id-1 2 %s\n'
475
854
                                        'foo\n'
476
855
                                        'bar\n'
720
1099
            call[1][1].getvalue())
721
1100
        self.assertEqual({'create_parent_dir': True}, call[2])
722
1101
 
 
1102
    def assertTotalBuildSize(self, size, keys, positions):
 
1103
        self.assertEqual(size,
 
1104
                         knit._get_total_build_size(None, keys, positions))
 
1105
 
 
1106
    def test__get_total_build_size(self):
 
1107
        positions = {
 
1108
            ('a',): (('fulltext', False), (('a',), 0, 100), None),
 
1109
            ('b',): (('line-delta', False), (('b',), 100, 21), ('a',)),
 
1110
            ('c',): (('line-delta', False), (('c',), 121, 35), ('b',)),
 
1111
            ('d',): (('line-delta', False), (('d',), 156, 12), ('b',)),
 
1112
            }
 
1113
        self.assertTotalBuildSize(100, [('a',)], positions)
 
1114
        self.assertTotalBuildSize(121, [('b',)], positions)
 
1115
        # c needs both a & b
 
1116
        self.assertTotalBuildSize(156, [('c',)], positions)
 
1117
        # we shouldn't count 'b' twice
 
1118
        self.assertTotalBuildSize(156, [('b',), ('c',)], positions)
 
1119
        self.assertTotalBuildSize(133, [('d',)], positions)
 
1120
        self.assertTotalBuildSize(168, [('c',), ('d',)], positions)
 
1121
 
723
1122
    def test_get_position(self):
724
1123
        transport = MockTransport([
725
1124
            _KndxIndex.HEADER,
866
1265
            else:
867
1266
                raise
868
1267
 
 
1268
    def test_scan_unvalidated_index_not_implemented(self):
 
1269
        transport = MockTransport()
 
1270
        index = self.get_knit_index(transport, 'filename', 'r')
 
1271
        self.assertRaises(
 
1272
            NotImplementedError, index.scan_unvalidated_index,
 
1273
            'dummy graph_index')
 
1274
        self.assertRaises(
 
1275
            NotImplementedError, index.get_missing_compression_parents)
 
1276
 
869
1277
    def test_short_line(self):
870
1278
        transport = MockTransport([
871
1279
            _KndxIndex.HEADER,
908
1316
        def reset():
909
1317
            knit._load_data = orig
910
1318
        self.addCleanup(reset)
911
 
        from bzrlib._knit_load_data_c import _load_data_c
 
1319
        from bzrlib._knit_load_data_pyx import _load_data_c
912
1320
        knit._load_data = _load_data_c
913
1321
        allow_writes = lambda: mode == 'w'
914
1322
        return _KndxIndex(transport, mapper, lambda:None, allow_writes, lambda:True)
915
1323
 
916
1324
 
 
1325
class Test_KnitAnnotator(TestCaseWithMemoryTransport):
 
1326
 
 
1327
    def make_annotator(self):
 
1328
        factory = knit.make_pack_factory(True, True, 1)
 
1329
        vf = factory(self.get_transport())
 
1330
        return knit._KnitAnnotator(vf)
 
1331
 
 
1332
    def test__expand_fulltext(self):
 
1333
        ann = self.make_annotator()
 
1334
        rev_key = ('rev-id',)
 
1335
        ann._num_compression_children[rev_key] = 1
 
1336
        res = ann._expand_record(rev_key, (('parent-id',),), None,
 
1337
                           ['line1\n', 'line2\n'], ('fulltext', True))
 
1338
        # The content object and text lines should be cached appropriately
 
1339
        self.assertEqual(['line1\n', 'line2'], res)
 
1340
        content_obj = ann._content_objects[rev_key]
 
1341
        self.assertEqual(['line1\n', 'line2\n'], content_obj._lines)
 
1342
        self.assertEqual(res, content_obj.text())
 
1343
        self.assertEqual(res, ann._text_cache[rev_key])
 
1344
 
 
1345
    def test__expand_delta_comp_parent_not_available(self):
 
1346
        # Parent isn't available yet, so we return nothing, but queue up this
 
1347
        # node for later processing
 
1348
        ann = self.make_annotator()
 
1349
        rev_key = ('rev-id',)
 
1350
        parent_key = ('parent-id',)
 
1351
        record = ['0,1,1\n', 'new-line\n']
 
1352
        details = ('line-delta', False)
 
1353
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1354
                                 record, details)
 
1355
        self.assertEqual(None, res)
 
1356
        self.assertTrue(parent_key in ann._pending_deltas)
 
1357
        pending = ann._pending_deltas[parent_key]
 
1358
        self.assertEqual(1, len(pending))
 
1359
        self.assertEqual((rev_key, (parent_key,), record, details), pending[0])
 
1360
 
 
1361
    def test__expand_record_tracks_num_children(self):
 
1362
        ann = self.make_annotator()
 
1363
        rev_key = ('rev-id',)
 
1364
        rev2_key = ('rev2-id',)
 
1365
        parent_key = ('parent-id',)
 
1366
        record = ['0,1,1\n', 'new-line\n']
 
1367
        details = ('line-delta', False)
 
1368
        ann._num_compression_children[parent_key] = 2
 
1369
        ann._expand_record(parent_key, (), None, ['line1\n', 'line2\n'],
 
1370
                           ('fulltext', False))
 
1371
        res = ann._expand_record(rev_key, (parent_key,), parent_key,
 
1372
                                 record, details)
 
1373
        self.assertEqual({parent_key: 1}, ann._num_compression_children)
 
1374
        # Expanding the second child should remove the content object, and the
 
1375
        # num_compression_children entry
 
1376
        res = ann._expand_record(rev2_key, (parent_key,), parent_key,
 
1377
                                 record, details)
 
1378
        self.assertFalse(parent_key in ann._content_objects)
 
1379
        self.assertEqual({}, ann._num_compression_children)
 
1380
        # We should not cache the content_objects for rev2 and rev, because
 
1381
        # they do not have compression children of their own.
 
1382
        self.assertEqual({}, ann._content_objects)
 
1383
 
 
1384
    def test__expand_delta_records_blocks(self):
 
1385
        ann = self.make_annotator()
 
1386
        rev_key = ('rev-id',)
 
1387
        parent_key = ('parent-id',)
 
1388
        record = ['0,1,1\n', 'new-line\n']
 
1389
        details = ('line-delta', True)
 
1390
        ann._num_compression_children[parent_key] = 2
 
1391
        ann._expand_record(parent_key, (), None,
 
1392
                           ['line1\n', 'line2\n', 'line3\n'],
 
1393
                           ('fulltext', False))
 
1394
        ann._expand_record(rev_key, (parent_key,), parent_key, record, details)
 
1395
        self.assertEqual({(rev_key, parent_key): [(1, 1, 1), (3, 3, 0)]},
 
1396
                         ann._matching_blocks)
 
1397
        rev2_key = ('rev2-id',)
 
1398
        record = ['0,1,1\n', 'new-line\n']
 
1399
        details = ('line-delta', False)
 
1400
        ann._expand_record(rev2_key, (parent_key,), parent_key, record, details)
 
1401
        self.assertEqual([(1, 1, 2), (3, 3, 0)],
 
1402
                         ann._matching_blocks[(rev2_key, parent_key)])
 
1403
 
 
1404
    def test__get_parent_ann_uses_matching_blocks(self):
 
1405
        ann = self.make_annotator()
 
1406
        rev_key = ('rev-id',)
 
1407
        parent_key = ('parent-id',)
 
1408
        parent_ann = [(parent_key,)]*3
 
1409
        block_key = (rev_key, parent_key)
 
1410
        ann._annotations_cache[parent_key] = parent_ann
 
1411
        ann._matching_blocks[block_key] = [(0, 1, 1), (3, 3, 0)]
 
1412
        # We should not try to access any parent_lines content, because we know
 
1413
        # we already have the matching blocks
 
1414
        par_ann, blocks = ann._get_parent_annotations_and_matches(rev_key,
 
1415
                                        ['1\n', '2\n', '3\n'], parent_key)
 
1416
        self.assertEqual(parent_ann, par_ann)
 
1417
        self.assertEqual([(0, 1, 1), (3, 3, 0)], blocks)
 
1418
        self.assertEqual({}, ann._matching_blocks)
 
1419
 
 
1420
    def test__process_pending(self):
 
1421
        ann = self.make_annotator()
 
1422
        rev_key = ('rev-id',)
 
1423
        p1_key = ('p1-id',)
 
1424
        p2_key = ('p2-id',)
 
1425
        record = ['0,1,1\n', 'new-line\n']
 
1426
        details = ('line-delta', False)
 
1427
        p1_record = ['line1\n', 'line2\n']
 
1428
        ann._num_compression_children[p1_key] = 1
 
1429
        res = ann._expand_record(rev_key, (p1_key,p2_key), p1_key,
 
1430
                                 record, details)
 
1431
        self.assertEqual(None, res)
 
1432
        # self.assertTrue(p1_key in ann._pending_deltas)
 
1433
        self.assertEqual({}, ann._pending_annotation)
 
1434
        # Now insert p1, and we should be able to expand the delta
 
1435
        res = ann._expand_record(p1_key, (), None, p1_record,
 
1436
                                 ('fulltext', False))
 
1437
        self.assertEqual(p1_record, res)
 
1438
        ann._annotations_cache[p1_key] = [(p1_key,)]*2
 
1439
        res = ann._process_pending(p1_key)
 
1440
        self.assertEqual([], res)
 
1441
        self.assertFalse(p1_key in ann._pending_deltas)
 
1442
        self.assertTrue(p2_key in ann._pending_annotation)
 
1443
        self.assertEqual({p2_key: [(rev_key, (p1_key, p2_key))]},
 
1444
                         ann._pending_annotation)
 
1445
        # Now fill in parent 2, and pending annotation should be satisfied
 
1446
        res = ann._expand_record(p2_key, (), None, [], ('fulltext', False))
 
1447
        ann._annotations_cache[p2_key] = []
 
1448
        res = ann._process_pending(p2_key)
 
1449
        self.assertEqual([rev_key], res)
 
1450
        self.assertEqual({}, ann._pending_annotation)
 
1451
        self.assertEqual({}, ann._pending_deltas)
 
1452
 
 
1453
    def test_record_delta_removes_basis(self):
 
1454
        ann = self.make_annotator()
 
1455
        ann._expand_record(('parent-id',), (), None,
 
1456
                           ['line1\n', 'line2\n'], ('fulltext', False))
 
1457
        ann._num_compression_children['parent-id'] = 2
 
1458
 
 
1459
    def test_annotate_special_text(self):
 
1460
        ann = self.make_annotator()
 
1461
        vf = ann._vf
 
1462
        rev1_key = ('rev-1',)
 
1463
        rev2_key = ('rev-2',)
 
1464
        rev3_key = ('rev-3',)
 
1465
        spec_key = ('special:',)
 
1466
        vf.add_lines(rev1_key, [], ['initial content\n'])
 
1467
        vf.add_lines(rev2_key, [rev1_key], ['initial content\n',
 
1468
                                            'common content\n',
 
1469
                                            'content in 2\n'])
 
1470
        vf.add_lines(rev3_key, [rev1_key], ['initial content\n',
 
1471
                                            'common content\n',
 
1472
                                            'content in 3\n'])
 
1473
        spec_text = ('initial content\n'
 
1474
                     'common content\n'
 
1475
                     'content in 2\n'
 
1476
                     'content in 3\n')
 
1477
        ann.add_special_text(spec_key, [rev2_key, rev3_key], spec_text)
 
1478
        anns, lines = ann.annotate(spec_key)
 
1479
        self.assertEqual([(rev1_key,),
 
1480
                          (rev2_key, rev3_key),
 
1481
                          (rev2_key,),
 
1482
                          (rev3_key,),
 
1483
                         ], anns)
 
1484
        self.assertEqualDiff(spec_text, ''.join(lines))
 
1485
 
 
1486
 
917
1487
class KnitTests(TestCaseWithTransport):
918
1488
    """Class containing knit test helper routines."""
919
1489
 
922
1492
        return make_file_factory(annotate, mapper)(self.get_transport())
923
1493
 
924
1494
 
 
1495
class TestBadShaError(KnitTests):
 
1496
    """Tests for handling of sha errors."""
 
1497
 
 
1498
    def test_sha_exception_has_text(self):
 
1499
        # having the failed text included in the error allows for recovery.
 
1500
        source = self.make_test_knit()
 
1501
        target = self.make_test_knit(name="target")
 
1502
        if not source._max_delta_chain:
 
1503
            raise TestNotApplicable(
 
1504
                "cannot get delta-caused sha failures without deltas.")
 
1505
        # create a basis
 
1506
        basis = ('basis',)
 
1507
        broken = ('broken',)
 
1508
        source.add_lines(basis, (), ['foo\n'])
 
1509
        source.add_lines(broken, (basis,), ['foo\n', 'bar\n'])
 
1510
        # Seed target with a bad basis text
 
1511
        target.add_lines(basis, (), ['gam\n'])
 
1512
        target.insert_record_stream(
 
1513
            source.get_record_stream([broken], 'unordered', False))
 
1514
        err = self.assertRaises(errors.KnitCorrupt,
 
1515
            target.get_record_stream([broken], 'unordered', True
 
1516
            ).next().get_bytes_as, 'chunked')
 
1517
        self.assertEqual(['gam\n', 'bar\n'], err.content)
 
1518
        # Test for formatting with live data
 
1519
        self.assertStartsWith(str(err), "Knit ")
 
1520
 
 
1521
 
925
1522
class TestKnitIndex(KnitTests):
926
1523
 
927
1524
    def test_add_versions_dictionary_compresses(self):
1127
1724
            [('parent',)])])
1128
1725
        # but neither should have added data:
1129
1726
        self.assertEqual([[], [], [], []], self.caught_entries)
1130
 
        
 
1727
 
1131
1728
    def test_add_version_different_dup(self):
1132
1729
        index = self.two_graph_index(deltas=True, catch_adds=True)
1133
1730
        # change options
1134
1731
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1135
 
            [(('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
1136
 
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1137
 
            [(('tip',), 'line-delta,no-eol', (None, 0, 100), [('parent',)])])
 
1732
            [(('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1138
1733
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1139
1734
            [(('tip',), 'fulltext', (None, 0, 100), [('parent',)])])
1140
1735
        # parents
1141
1736
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1142
1737
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [])])
1143
1738
        self.assertEqual([], self.caught_entries)
1144
 
        
 
1739
 
1145
1740
    def test_add_versions_nodeltas(self):
1146
1741
        index = self.two_graph_index(catch_adds=True)
1147
1742
        index.add_records([
1189
1784
            [('parent',)])])
1190
1785
        # but neither should have added data.
1191
1786
        self.assertEqual([[], [], [], []], self.caught_entries)
1192
 
        
 
1787
 
1193
1788
    def test_add_versions_different_dup(self):
1194
1789
        index = self.two_graph_index(deltas=True, catch_adds=True)
1195
1790
        # change options
1196
1791
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1197
 
            [(('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
1198
 
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1199
 
            [(('tip',), 'line-delta,no-eol', (None, 0, 100), [('parent',)])])
 
1792
            [(('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1200
1793
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1201
1794
            [(('tip',), 'fulltext', (None, 0, 100), [('parent',)])])
1202
1795
        # parents
1205
1798
        # change options in the second record
1206
1799
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1207
1800
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [('parent',)]),
1208
 
             (('tip',), 'no-eol,line-delta', (None, 0, 100), [('parent',)])])
 
1801
             (('tip',), 'line-delta', (None, 0, 100), [('parent',)])])
1209
1802
        self.assertEqual([], self.caught_entries)
1210
1803
 
 
1804
    def make_g_index_missing_compression_parent(self):
 
1805
        graph_index = self.make_g_index('missing_comp', 2,
 
1806
            [(('tip', ), ' 100 78',
 
1807
              ([('missing-parent', ), ('ghost', )], [('missing-parent', )]))])
 
1808
        return graph_index
 
1809
 
 
1810
    def make_g_index_missing_parent(self):
 
1811
        graph_index = self.make_g_index('missing_parent', 2,
 
1812
            [(('parent', ), ' 100 78', ([], [])),
 
1813
             (('tip', ), ' 100 78',
 
1814
              ([('parent', ), ('missing-parent', )], [('parent', )])),
 
1815
              ])
 
1816
        return graph_index
 
1817
 
 
1818
    def make_g_index_no_external_refs(self):
 
1819
        graph_index = self.make_g_index('no_external_refs', 2,
 
1820
            [(('rev', ), ' 100 78',
 
1821
              ([('parent', ), ('ghost', )], []))])
 
1822
        return graph_index
 
1823
 
 
1824
    def test_add_good_unvalidated_index(self):
 
1825
        unvalidated = self.make_g_index_no_external_refs()
 
1826
        combined = CombinedGraphIndex([unvalidated])
 
1827
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1828
        index.scan_unvalidated_index(unvalidated)
 
1829
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1830
 
 
1831
    def test_add_missing_compression_parent_unvalidated_index(self):
 
1832
        unvalidated = self.make_g_index_missing_compression_parent()
 
1833
        combined = CombinedGraphIndex([unvalidated])
 
1834
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1835
        index.scan_unvalidated_index(unvalidated)
 
1836
        # This also checks that its only the compression parent that is
 
1837
        # examined, otherwise 'ghost' would also be reported as a missing
 
1838
        # parent.
 
1839
        self.assertEqual(
 
1840
            frozenset([('missing-parent',)]),
 
1841
            index.get_missing_compression_parents())
 
1842
 
 
1843
    def test_add_missing_noncompression_parent_unvalidated_index(self):
 
1844
        unvalidated = self.make_g_index_missing_parent()
 
1845
        combined = CombinedGraphIndex([unvalidated])
 
1846
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1847
            track_external_parent_refs=True)
 
1848
        index.scan_unvalidated_index(unvalidated)
 
1849
        self.assertEqual(
 
1850
            frozenset([('missing-parent',)]), index.get_missing_parents())
 
1851
 
 
1852
    def test_track_external_parent_refs(self):
 
1853
        g_index = self.make_g_index('empty', 2, [])
 
1854
        combined = CombinedGraphIndex([g_index])
 
1855
        index = _KnitGraphIndex(combined, lambda: True, deltas=True,
 
1856
            add_callback=self.catch_add, track_external_parent_refs=True)
 
1857
        self.caught_entries = []
 
1858
        index.add_records([
 
1859
            (('new-key',), 'fulltext,no-eol', (None, 50, 60),
 
1860
             [('parent-1',), ('parent-2',)])])
 
1861
        self.assertEqual(
 
1862
            frozenset([('parent-1',), ('parent-2',)]),
 
1863
            index.get_missing_parents())
 
1864
 
 
1865
    def test_add_unvalidated_index_with_present_external_references(self):
 
1866
        index = self.two_graph_index(deltas=True)
 
1867
        # Ugly hack to get at one of the underlying GraphIndex objects that
 
1868
        # two_graph_index built.
 
1869
        unvalidated = index._graph_index._indices[1]
 
1870
        # 'parent' is an external ref of _indices[1] (unvalidated), but is
 
1871
        # present in _indices[0].
 
1872
        index.scan_unvalidated_index(unvalidated)
 
1873
        self.assertEqual(frozenset(), index.get_missing_compression_parents())
 
1874
 
 
1875
    def make_new_missing_parent_g_index(self, name):
 
1876
        missing_parent = name + '-missing-parent'
 
1877
        graph_index = self.make_g_index(name, 2,
 
1878
            [((name + 'tip', ), ' 100 78',
 
1879
              ([(missing_parent, ), ('ghost', )], [(missing_parent, )]))])
 
1880
        return graph_index
 
1881
 
 
1882
    def test_add_mulitiple_unvalidated_indices_with_missing_parents(self):
 
1883
        g_index_1 = self.make_new_missing_parent_g_index('one')
 
1884
        g_index_2 = self.make_new_missing_parent_g_index('two')
 
1885
        combined = CombinedGraphIndex([g_index_1, g_index_2])
 
1886
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1887
        index.scan_unvalidated_index(g_index_1)
 
1888
        index.scan_unvalidated_index(g_index_2)
 
1889
        self.assertEqual(
 
1890
            frozenset([('one-missing-parent',), ('two-missing-parent',)]),
 
1891
            index.get_missing_compression_parents())
 
1892
 
 
1893
    def test_add_mulitiple_unvalidated_indices_with_mutual_dependencies(self):
 
1894
        graph_index_a = self.make_g_index('one', 2,
 
1895
            [(('parent-one', ), ' 100 78', ([('non-compression-parent',)], [])),
 
1896
             (('child-of-two', ), ' 100 78',
 
1897
              ([('parent-two',)], [('parent-two',)]))])
 
1898
        graph_index_b = self.make_g_index('two', 2,
 
1899
            [(('parent-two', ), ' 100 78', ([('non-compression-parent',)], [])),
 
1900
             (('child-of-one', ), ' 100 78',
 
1901
              ([('parent-one',)], [('parent-one',)]))])
 
1902
        combined = CombinedGraphIndex([graph_index_a, graph_index_b])
 
1903
        index = _KnitGraphIndex(combined, lambda: True, deltas=True)
 
1904
        index.scan_unvalidated_index(graph_index_a)
 
1905
        index.scan_unvalidated_index(graph_index_b)
 
1906
        self.assertEqual(
 
1907
            frozenset([]), index.get_missing_compression_parents())
 
1908
 
1211
1909
 
1212
1910
class TestNoParentsGraphIndexKnit(KnitTests):
1213
1911
    """Tests for knits using _KnitGraphIndex with no parents."""
1221
1919
        size = trans.put_file(name, stream)
1222
1920
        return GraphIndex(trans, name, size)
1223
1921
 
 
1922
    def test_add_good_unvalidated_index(self):
 
1923
        unvalidated = self.make_g_index('unvalidated')
 
1924
        combined = CombinedGraphIndex([unvalidated])
 
1925
        index = _KnitGraphIndex(combined, lambda: True, parents=False)
 
1926
        index.scan_unvalidated_index(unvalidated)
 
1927
        self.assertEqual(frozenset(),
 
1928
            index.get_missing_compression_parents())
 
1929
 
1224
1930
    def test_parents_deltas_incompatible(self):
1225
1931
        index = CombinedGraphIndex([])
1226
1932
        self.assertRaises(errors.KnitError, _KnitGraphIndex, lambda:True,
1307
2013
        index.add_records([(('tip',), 'fulltext,no-eol', (None, 0, 1000), [])])
1308
2014
        # but neither should have added data.
1309
2015
        self.assertEqual([[], [], [], []], self.caught_entries)
1310
 
        
 
2016
 
1311
2017
    def test_add_version_different_dup(self):
1312
2018
        index = self.two_graph_index(catch_adds=True)
1313
2019
        # change options
1321
2027
        self.assertRaises(errors.KnitCorrupt, index.add_records,
1322
2028
            [(('tip',), 'fulltext,no-eol', (None, 0, 100), [('parent',)])])
1323
2029
        self.assertEqual([], self.caught_entries)
1324
 
        
 
2030
 
1325
2031
    def test_add_versions(self):
1326
2032
        index = self.two_graph_index(catch_adds=True)
1327
2033
        index.add_records([
1359
2065
        index.add_records([(('tip',), 'fulltext,no-eol', (None, 0, 1000), [])])
1360
2066
        # but neither should have added data.
1361
2067
        self.assertEqual([[], [], [], []], self.caught_entries)
1362
 
        
 
2068
 
1363
2069
    def test_add_versions_different_dup(self):
1364
2070
        index = self.two_graph_index(catch_adds=True)
1365
2071
        # change options
1379
2085
        self.assertEqual([], self.caught_entries)
1380
2086
 
1381
2087
 
 
2088
class TestKnitVersionedFiles(KnitTests):
 
2089
 
 
2090
    def assertGroupKeysForIo(self, exp_groups, keys, non_local_keys,
 
2091
                             positions, _min_buffer_size=None):
 
2092
        kvf = self.make_test_knit()
 
2093
        if _min_buffer_size is None:
 
2094
            _min_buffer_size = knit._STREAM_MIN_BUFFER_SIZE
 
2095
        self.assertEqual(exp_groups, kvf._group_keys_for_io(keys,
 
2096
                                        non_local_keys, positions,
 
2097
                                        _min_buffer_size=_min_buffer_size))
 
2098
 
 
2099
    def assertSplitByPrefix(self, expected_map, expected_prefix_order,
 
2100
                            keys):
 
2101
        split, prefix_order = KnitVersionedFiles._split_by_prefix(keys)
 
2102
        self.assertEqual(expected_map, split)
 
2103
        self.assertEqual(expected_prefix_order, prefix_order)
 
2104
 
 
2105
    def test__group_keys_for_io(self):
 
2106
        ft_detail = ('fulltext', False)
 
2107
        ld_detail = ('line-delta', False)
 
2108
        f_a = ('f', 'a')
 
2109
        f_b = ('f', 'b')
 
2110
        f_c = ('f', 'c')
 
2111
        g_a = ('g', 'a')
 
2112
        g_b = ('g', 'b')
 
2113
        g_c = ('g', 'c')
 
2114
        positions = {
 
2115
            f_a: (ft_detail, (f_a, 0, 100), None),
 
2116
            f_b: (ld_detail, (f_b, 100, 21), f_a),
 
2117
            f_c: (ld_detail, (f_c, 180, 15), f_b),
 
2118
            g_a: (ft_detail, (g_a, 121, 35), None),
 
2119
            g_b: (ld_detail, (g_b, 156, 12), g_a),
 
2120
            g_c: (ld_detail, (g_c, 195, 13), g_a),
 
2121
            }
 
2122
        self.assertGroupKeysForIo([([f_a], set())],
 
2123
                                  [f_a], [], positions)
 
2124
        self.assertGroupKeysForIo([([f_a], set([f_a]))],
 
2125
                                  [f_a], [f_a], positions)
 
2126
        self.assertGroupKeysForIo([([f_a, f_b], set([]))],
 
2127
                                  [f_a, f_b], [], positions)
 
2128
        self.assertGroupKeysForIo([([f_a, f_b], set([f_b]))],
 
2129
                                  [f_a, f_b], [f_b], positions)
 
2130
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2131
                                  [f_a, g_a, f_b, g_b], [], positions)
 
2132
        self.assertGroupKeysForIo([([f_a, f_b, g_a, g_b], set())],
 
2133
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2134
                                  _min_buffer_size=150)
 
2135
        self.assertGroupKeysForIo([([f_a, f_b], set()), ([g_a, g_b], set())],
 
2136
                                  [f_a, g_a, f_b, g_b], [], positions,
 
2137
                                  _min_buffer_size=100)
 
2138
        self.assertGroupKeysForIo([([f_c], set()), ([g_b], set())],
 
2139
                                  [f_c, g_b], [], positions,
 
2140
                                  _min_buffer_size=125)
 
2141
        self.assertGroupKeysForIo([([g_b, f_c], set())],
 
2142
                                  [g_b, f_c], [], positions,
 
2143
                                  _min_buffer_size=125)
 
2144
 
 
2145
    def test__split_by_prefix(self):
 
2146
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2147
                                  'g': [('g', 'b'), ('g', 'a')],
 
2148
                                 }, ['f', 'g'],
 
2149
                                 [('f', 'a'), ('g', 'b'),
 
2150
                                  ('g', 'a'), ('f', 'b')])
 
2151
 
 
2152
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2153
                                  'g': [('g', 'b'), ('g', 'a')],
 
2154
                                 }, ['f', 'g'],
 
2155
                                 [('f', 'a'), ('f', 'b'),
 
2156
                                  ('g', 'b'), ('g', 'a')])
 
2157
 
 
2158
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2159
                                  'g': [('g', 'b'), ('g', 'a')],
 
2160
                                 }, ['f', 'g'],
 
2161
                                 [('f', 'a'), ('f', 'b'),
 
2162
                                  ('g', 'b'), ('g', 'a')])
 
2163
 
 
2164
        self.assertSplitByPrefix({'f': [('f', 'a'), ('f', 'b')],
 
2165
                                  'g': [('g', 'b'), ('g', 'a')],
 
2166
                                  '': [('a',), ('b',)]
 
2167
                                 }, ['f', 'g', ''],
 
2168
                                 [('f', 'a'), ('g', 'b'),
 
2169
                                  ('a',), ('b',),
 
2170
                                  ('g', 'a'), ('f', 'b')])
 
2171
 
 
2172
 
1382
2173
class TestStacking(KnitTests):
1383
2174
 
1384
2175
    def get_basis_and_test_knit(self):
1410
2201
        basis.calls = []
1411
2202
        test.add_lines(key_cross_border, (key_basis,), ['foo\n'])
1412
2203
        self.assertEqual('fulltext', test._index.get_method(key_cross_border))
1413
 
        self.assertEqual([("get_parent_map", set([key_basis]))], basis.calls)
 
2204
        # we don't even need to look at the basis to see that this should be
 
2205
        # stored as a fulltext
 
2206
        self.assertEqual([], basis.calls)
1414
2207
        # Subsequent adds do delta.
1415
2208
        basis.calls = []
1416
2209
        test.add_lines(key_delta, (key_cross_border,), ['foo\n'])
1437
2230
        # self.assertEqual([("annotate", key_basis)], basis.calls)
1438
2231
        self.assertEqual([('get_parent_map', set([key_basis])),
1439
2232
            ('get_parent_map', set([key_basis])),
1440
 
            ('get_parent_map', set([key_basis])),
1441
 
            ('get_record_stream', [key_basis], 'unordered', True)],
 
2233
            ('get_record_stream', [key_basis], 'topological', True)],
1442
2234
            basis.calls)
1443
2235
 
1444
2236
    def test_check(self):
1445
2237
        # At the moment checking a stacked knit does implicitly check the
1446
 
        # fallback files.  
 
2238
        # fallback files.
1447
2239
        basis, test = self.get_basis_and_test_knit()
1448
2240
        test.check()
1449
2241
 
1541
2333
                True).next()
1542
2334
            self.assertEqual(record.key, result[0])
1543
2335
            self.assertEqual(record.sha1, result[1])
1544
 
            self.assertEqual(record.storage_kind, result[2])
 
2336
            # We used to check that the storage kind matched, but actually it
 
2337
            # depends on whether it was sourced from the basis, or in a single
 
2338
            # group, because asking for full texts returns proxy objects to a
 
2339
            # _ContentMapGenerator object; so checking the kind is unneeded.
1545
2340
            self.assertEqual(record.get_bytes_as('fulltext'), result[3])
1546
2341
        # It's not strictly minimal, but it seems reasonable for now for it to
1547
2342
        # ask which fallbacks have which parents.
1548
2343
        self.assertEqual([
1549
2344
            ("get_parent_map", set([key_basis, key_basis_2, key_missing])),
1550
 
            # unordered is asked for by the underlying worker as it still
1551
 
            # buffers everything while answering - which is a problem!
1552
 
            ("get_record_stream", [key_basis_2, key_basis], 'unordered', True)],
 
2345
            # topological is requested from the fallback, because that is what
 
2346
            # was requested at the top level.
 
2347
            ("get_record_stream", [key_basis_2, key_basis], 'topological', True)],
1553
2348
            calls)
1554
2349
 
1555
2350
    def test_get_record_stream_unordered_deltas(self):
1641
2436
        key_basis = ('bar',)
1642
2437
        key_missing = ('missing',)
1643
2438
        test.add_lines(key, (), ['foo\n'])
1644
 
        key_sha1sum = sha.new('foo\n').hexdigest()
 
2439
        key_sha1sum = osutils.sha('foo\n').hexdigest()
1645
2440
        sha1s = test.get_sha1s([key])
1646
2441
        self.assertEqual({key: key_sha1sum}, sha1s)
1647
2442
        self.assertEqual([], basis.calls)
1649
2444
        # directly (rather than via text reconstruction) so that remote servers
1650
2445
        # etc don't have to answer with full content.
1651
2446
        basis.add_lines(key_basis, (), ['foo\n', 'bar\n'])
1652
 
        basis_sha1sum = sha.new('foo\nbar\n').hexdigest()
 
2447
        basis_sha1sum = osutils.sha('foo\nbar\n').hexdigest()
1653
2448
        basis.calls = []
1654
2449
        sha1s = test.get_sha1s([key, key_missing, key_basis])
1655
2450
        self.assertEqual({key: key_sha1sum,
1671
2466
        source.add_lines(key_delta, (key_basis,), ['bar\n'])
1672
2467
        stream = source.get_record_stream([key_delta], 'unordered', False)
1673
2468
        test.insert_record_stream(stream)
1674
 
        self.assertEqual([("get_parent_map", set([key_basis]))],
 
2469
        # XXX: this does somewhat too many calls in making sure of whether it
 
2470
        # has to recreate the full text.
 
2471
        self.assertEqual([("get_parent_map", set([key_basis])),
 
2472
             ('get_parent_map', set([key_basis])),
 
2473
             ('get_record_stream', [key_basis], 'unordered', True)],
1675
2474
            basis.calls)
1676
2475
        self.assertEqual({key_delta:(key_basis,)},
1677
2476
            test.get_parent_map([key_delta]))
1680
2479
 
1681
2480
    def test_iter_lines_added_or_present_in_keys(self):
1682
2481
        # Lines from the basis are returned, and lines for a given key are only
1683
 
        # returned once. 
 
2482
        # returned once.
1684
2483
        key1 = ('foo1',)
1685
2484
        key2 = ('foo2',)
1686
2485
        # all sources are asked for keys:
1738
2537
        test.add_mpdiffs([(key_delta, (key_basis,),
1739
2538
            source.get_sha1s([key_delta])[key_delta], diffs[0])])
1740
2539
        self.assertEqual([("get_parent_map", set([key_basis])),
1741
 
            ('get_record_stream', [key_basis], 'unordered', True),
1742
 
            ('get_parent_map', set([key_basis]))],
 
2540
            ('get_record_stream', [key_basis], 'unordered', True),],
1743
2541
            basis.calls)
1744
2542
        self.assertEqual({key_delta:(key_basis,)},
1745
2543
            test.get_parent_map([key_delta]))
1764
2562
                multiparent.NewText(['foo\n']),
1765
2563
                multiparent.ParentText(1, 0, 2, 1)])],
1766
2564
            diffs)
1767
 
        self.assertEqual(4, len(basis.calls))
 
2565
        self.assertEqual(3, len(basis.calls))
1768
2566
        self.assertEqual([
1769
2567
            ("get_parent_map", set([key_left, key_right])),
1770
2568
            ("get_parent_map", set([key_left, key_right])),
1771
 
            ("get_parent_map", set([key_left, key_right])),
1772
2569
            ],
1773
 
            basis.calls[:3])
1774
 
        last_call = basis.calls[3]
 
2570
            basis.calls[:-1])
 
2571
        last_call = basis.calls[-1]
1775
2572
        self.assertEqual('get_record_stream', last_call[0])
1776
2573
        self.assertEqual(set([key_left, key_right]), set(last_call[1]))
1777
 
        self.assertEqual('unordered', last_call[2])
 
2574
        self.assertEqual('topological', last_call[2])
1778
2575
        self.assertEqual(True, last_call[3])
 
2576
 
 
2577
 
 
2578
class TestNetworkBehaviour(KnitTests):
 
2579
    """Tests for getting data out of/into knits over the network."""
 
2580
 
 
2581
    def test_include_delta_closure_generates_a_knit_delta_closure(self):
 
2582
        vf = self.make_test_knit(name='test')
 
2583
        # put in three texts, giving ft, delta, delta
 
2584
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2585
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2586
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2587
        # But heuristics could interfere, so check what happened:
 
2588
        self.assertEqual(['knit-ft-gz', 'knit-delta-gz', 'knit-delta-gz'],
 
2589
            [record.storage_kind for record in
 
2590
             vf.get_record_stream([('base',), ('d1',), ('d2',)],
 
2591
                'topological', False)])
 
2592
        # generate a stream of just the deltas include_delta_closure=True,
 
2593
        # serialise to the network, and check that we get a delta closure on the wire.
 
2594
        stream = vf.get_record_stream([('d1',), ('d2',)], 'topological', True)
 
2595
        netb = [record.get_bytes_as(record.storage_kind) for record in stream]
 
2596
        # The first bytes should be a memo from _ContentMapGenerator, and the
 
2597
        # second bytes should be empty (because its a API proxy not something
 
2598
        # for wire serialisation.
 
2599
        self.assertEqual('', netb[1])
 
2600
        bytes = netb[0]
 
2601
        kind, line_end = network_bytes_to_kind_and_offset(bytes)
 
2602
        self.assertEqual('knit-delta-closure', kind)
 
2603
 
 
2604
 
 
2605
class TestContentMapGenerator(KnitTests):
 
2606
    """Tests for ContentMapGenerator"""
 
2607
 
 
2608
    def test_get_record_stream_gives_records(self):
 
2609
        vf = self.make_test_knit(name='test')
 
2610
        # put in three texts, giving ft, delta, delta
 
2611
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2612
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2613
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2614
        keys = [('d1',), ('d2',)]
 
2615
        generator = _VFContentMapGenerator(vf, keys,
 
2616
            global_map=vf.get_parent_map(keys))
 
2617
        for record in generator.get_record_stream():
 
2618
            if record.key == ('d1',):
 
2619
                self.assertEqual('d1\n', record.get_bytes_as('fulltext'))
 
2620
            else:
 
2621
                self.assertEqual('d2\n', record.get_bytes_as('fulltext'))
 
2622
 
 
2623
    def test_get_record_stream_kinds_are_raw(self):
 
2624
        vf = self.make_test_knit(name='test')
 
2625
        # put in three texts, giving ft, delta, delta
 
2626
        vf.add_lines(('base',), (), ['base\n', 'content\n'])
 
2627
        vf.add_lines(('d1',), (('base',),), ['d1\n'])
 
2628
        vf.add_lines(('d2',), (('d1',),), ['d2\n'])
 
2629
        keys = [('base',), ('d1',), ('d2',)]
 
2630
        generator = _VFContentMapGenerator(vf, keys,
 
2631
            global_map=vf.get_parent_map(keys))
 
2632
        kinds = {('base',): 'knit-delta-closure',
 
2633
            ('d1',): 'knit-delta-closure-ref',
 
2634
            ('d2',): 'knit-delta-closure-ref',
 
2635
            }
 
2636
        for record in generator.get_record_stream():
 
2637
            self.assertEqual(kinds[record.key], record.storage_kind)