~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: John Arbash Meinel
  • Date: 2009-03-11 07:42:37 UTC
  • mto: (3735.2.143 brisbane-core)
  • mto: This revision was merged to the branch mainline in revision 4280.
  • Revision ID: john@arbash-meinel.com-20090311074237-6lzsbd3r0tmbofjz
Continue trying to reduce the friction for the groupcompress repo formats.

Show diffs side-by-side

added added

removed removed

Lines of Context:
20
20
 
21
21
from bzrlib import (
22
22
    chk_map,
 
23
    chk_serializer,
23
24
    debug,
24
25
    errors,
25
26
    inventory,
31
32
    ui,
32
33
    )
33
34
from bzrlib.index import GraphIndex, GraphIndexBuilder
34
 
from bzrlib.repository import InterPackRepo
35
35
from bzrlib.groupcompress import (
36
36
    _GCGraphIndex,
37
37
    GroupCompressVersionedFiles,
43
43
    RepositoryPackCollection,
44
44
    RepositoryFormatKnitPack6,
45
45
    Packer,
46
 
    )
47
 
try:
48
 
    from bzrlib.repofmt.pack_repo import (
49
46
    CHKInventoryRepository,
50
 
    RepositoryFormatPackDevelopment5,
51
47
    RepositoryFormatPackDevelopment5Hash16,
52
 
##    RepositoryFormatPackDevelopment5Hash16b,
53
 
##    RepositoryFormatPackDevelopment5Hash63,
54
 
##    RepositoryFormatPackDevelopment5Hash127a,
55
 
##    RepositoryFormatPackDevelopment5Hash127b,
56
48
    RepositoryFormatPackDevelopment5Hash255,
57
49
    )
58
 
    from bzrlib import 
59
 
    chk_support = True
60
 
except ImportError:
61
 
    chk_support = False
62
50
 
63
51
 
64
52
def open_pack(self):
89
77
        # The relative locations of the packs are constrained, but all are
90
78
        # passed in because the caller has them, so as to avoid object churn.
91
79
        index_builder_class = pack_collection._index_builder_class
92
 
        if chk_support:
93
 
            # from brisbane-core
94
 
            if pack_collection.chk_index is not None:
95
 
                chk_index = index_builder_class(reference_lists=0)
96
 
            else:
97
 
                chk_index = None
98
 
            Pack.__init__(self,
99
 
                # Revisions: parents list, no text compression.
100
 
                index_builder_class(reference_lists=1),
101
 
                # Inventory: We want to map compression only, but currently the
102
 
                # knit code hasn't been updated enough to understand that, so we
103
 
                # have a regular 2-list index giving parents and compression
104
 
                # source.
105
 
                index_builder_class(reference_lists=1),
106
 
                # Texts: compression and per file graph, for all fileids - so two
107
 
                # reference lists and two elements in the key tuple.
108
 
                index_builder_class(reference_lists=1, key_elements=2),
109
 
                # Signatures: Just blobs to store, no compression, no parents
110
 
                # listing.
111
 
                index_builder_class(reference_lists=0),
112
 
                # CHK based storage - just blobs, no compression or parents.
113
 
                chk_index=chk_index
114
 
                )
 
80
        # from brisbane-core
 
81
        if pack_collection.chk_index is not None:
 
82
            chk_index = index_builder_class(reference_lists=0)
115
83
        else:
116
 
            # from bzr.dev
117
 
            Pack.__init__(self,
118
 
                # Revisions: parents list, no text compression.
119
 
                index_builder_class(reference_lists=1),
120
 
                # Inventory: compressed, with graph for compatibility with other
121
 
                # existing bzrlib code.
122
 
                index_builder_class(reference_lists=1),
123
 
                # Texts: per file graph:
124
 
                index_builder_class(reference_lists=1, key_elements=2),
125
 
                # Signatures: Just blobs to store, no compression, no parents
126
 
                # listing.
127
 
                index_builder_class(reference_lists=0),
128
 
                )
 
84
            chk_index = None
 
85
        Pack.__init__(self,
 
86
            # Revisions: parents list, no text compression.
 
87
            index_builder_class(reference_lists=1),
 
88
            # Inventory: We want to map compression only, but currently the
 
89
            # knit code hasn't been updated enough to understand that, so we
 
90
            # have a regular 2-list index giving parents and compression
 
91
            # source.
 
92
            index_builder_class(reference_lists=1),
 
93
            # Texts: compression and per file graph, for all fileids - so two
 
94
            # reference lists and two elements in the key tuple.
 
95
            index_builder_class(reference_lists=1, key_elements=2),
 
96
            # Signatures: Just blobs to store, no compression, no parents
 
97
            # listing.
 
98
            index_builder_class(reference_lists=0),
 
99
            # CHK based storage - just blobs, no compression or parents.
 
100
            chk_index=chk_index
 
101
            )
129
102
        self._pack_collection = pack_collection
130
103
        # When we make readonly indices, we need this.
131
104
        self.index_class = pack_collection._index_class
208
181
            self._new_pack)
209
182
        self.signature_index.add_writable_index(self._new_pack.signature_index,
210
183
            self._new_pack)
211
 
        if chk_support and self.chk_index is not None:
 
184
        if self.chk_index is not None:
212
185
            self.chk_index.add_writable_index(self._new_pack.chk_index,
213
186
                self._new_pack)
214
187
            self.repo.chk_bytes._index._add_callback = self.chk_index.add_callback
426
399
class GCPackRepository(KnitPackRepository):
427
400
    """GC customisation of KnitPackRepository."""
428
401
 
429
 
    # Note: I think the CHK support can be dropped from this class as it's
430
 
    # implemented via the GCCHKPackRepository class defined next. IGC 20090301
431
 
 
432
402
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
433
403
        _serializer):
434
404
        """Overridden to change pack collection class."""
436
406
            _commit_builder_class, _serializer)
437
407
        # and now replace everything it did :)
438
408
        index_transport = self._transport.clone('indices')
439
 
        if chk_support:
440
 
            self._pack_collection = GCRepositoryPackCollection(self,
441
 
                self._transport, index_transport,
442
 
                self._transport.clone('upload'),
443
 
                self._transport.clone('packs'),
444
 
                _format.index_builder_class,
445
 
                _format.index_class,
446
 
                use_chk_index=self._format.supports_chks,
447
 
                )
448
 
        else:
449
 
            self._pack_collection = GCRepositoryPackCollection(self,
450
 
                self._transport, index_transport,
451
 
                self._transport.clone('upload'),
452
 
                self._transport.clone('packs'),
453
 
                _format.index_builder_class,
454
 
                _format.index_class)
 
409
        self._pack_collection = GCRepositoryPackCollection(self,
 
410
            self._transport, index_transport,
 
411
            self._transport.clone('upload'),
 
412
            self._transport.clone('packs'),
 
413
            _format.index_builder_class,
 
414
            _format.index_class,
 
415
            use_chk_index=self._format.supports_chks,
 
416
            )
455
417
        self.inventories = GroupCompressVersionedFiles(
456
418
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
457
419
                add_callback=self._pack_collection.inventory_index.add_callback,
474
436
                add_callback=self._pack_collection.text_index.add_callback,
475
437
                parents=True, is_locked=self.is_locked),
476
438
            access=self._pack_collection.text_index.data_access)
477
 
        if chk_support and _format.supports_chks:
 
439
        if _format.supports_chks:
478
440
            # No graph, no compression:- references from chks are between
479
441
            # different objects not temporal versions of the same; and without
480
442
            # some sort of temporal structure knit compression will just fail.
498
460
        self._reconcile_backsup_inventory = False
499
461
 
500
462
 
501
 
if chk_support:
502
 
    class GCCHKPackRepository(CHKInventoryRepository):
503
 
        """GC customisation of CHKInventoryRepository."""
 
463
class GCCHKPackRepository(CHKInventoryRepository):
 
464
    """GC customisation of CHKInventoryRepository."""
504
465
 
505
 
        def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
506
 
            _serializer):
507
 
            """Overridden to change pack collection class."""
508
 
            KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
509
 
                _commit_builder_class, _serializer)
510
 
            # and now replace everything it did :)
511
 
            index_transport = self._transport.clone('indices')
512
 
            self._pack_collection = GCRepositoryPackCollection(self,
513
 
                self._transport, index_transport,
514
 
                self._transport.clone('upload'),
515
 
                self._transport.clone('packs'),
516
 
                _format.index_builder_class,
517
 
                _format.index_class,
518
 
                use_chk_index=self._format.supports_chks,
519
 
                )
520
 
            self.inventories = GroupCompressVersionedFiles(
521
 
                _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
522
 
                    add_callback=self._pack_collection.inventory_index.add_callback,
523
 
                    parents=True, is_locked=self.is_locked),
524
 
                access=self._pack_collection.inventory_index.data_access)
525
 
            self.revisions = GroupCompressVersionedFiles(
526
 
                _GCGraphIndex(self._pack_collection.revision_index.combined_index,
527
 
                    add_callback=self._pack_collection.revision_index.add_callback,
528
 
                    parents=True, is_locked=self.is_locked),
529
 
                access=self._pack_collection.revision_index.data_access,
530
 
                delta=False)
531
 
            self.signatures = GroupCompressVersionedFiles(
532
 
                _GCGraphIndex(self._pack_collection.signature_index.combined_index,
533
 
                    add_callback=self._pack_collection.signature_index.add_callback,
534
 
                    parents=False, is_locked=self.is_locked),
535
 
                access=self._pack_collection.signature_index.data_access,
536
 
                delta=False)
537
 
            self.texts = GroupCompressVersionedFiles(
538
 
                _GCGraphIndex(self._pack_collection.text_index.combined_index,
539
 
                    add_callback=self._pack_collection.text_index.add_callback,
540
 
                    parents=True, is_locked=self.is_locked),
541
 
                access=self._pack_collection.text_index.data_access)
542
 
            # No parents, individual CHK pages don't have specific ancestry
543
 
            self.chk_bytes = GroupCompressVersionedFiles(
544
 
                _GCGraphIndex(self._pack_collection.chk_index.combined_index,
545
 
                    add_callback=self._pack_collection.chk_index.add_callback,
546
 
                    parents=False, is_locked=self.is_locked),
547
 
                access=self._pack_collection.chk_index.data_access)
548
 
            # True when the repository object is 'write locked' (as opposed to the
549
 
            # physical lock only taken out around changes to the pack-names list.)
550
 
            # Another way to represent this would be a decorator around the control
551
 
            # files object that presents logical locks as physical ones - if this
552
 
            # gets ugly consider that alternative design. RBC 20071011
553
 
            self._write_lock_count = 0
554
 
            self._transaction = None
555
 
            # for tests
556
 
            self._reconcile_does_inventory_gc = True
557
 
            self._reconcile_fixes_text_parents = True
558
 
            self._reconcile_backsup_inventory = False
 
466
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
 
467
        _serializer):
 
468
        """Overridden to change pack collection class."""
 
469
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
 
470
            _commit_builder_class, _serializer)
 
471
        # and now replace everything it did :)
 
472
        index_transport = self._transport.clone('indices')
 
473
        self._pack_collection = GCRepositoryPackCollection(self,
 
474
            self._transport, index_transport,
 
475
            self._transport.clone('upload'),
 
476
            self._transport.clone('packs'),
 
477
            _format.index_builder_class,
 
478
            _format.index_class,
 
479
            use_chk_index=self._format.supports_chks,
 
480
            )
 
481
        self.inventories = GroupCompressVersionedFiles(
 
482
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
 
483
                add_callback=self._pack_collection.inventory_index.add_callback,
 
484
                parents=True, is_locked=self.is_locked),
 
485
            access=self._pack_collection.inventory_index.data_access)
 
486
        self.revisions = GroupCompressVersionedFiles(
 
487
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
 
488
                add_callback=self._pack_collection.revision_index.add_callback,
 
489
                parents=True, is_locked=self.is_locked),
 
490
            access=self._pack_collection.revision_index.data_access,
 
491
            delta=False)
 
492
        self.signatures = GroupCompressVersionedFiles(
 
493
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
 
494
                add_callback=self._pack_collection.signature_index.add_callback,
 
495
                parents=False, is_locked=self.is_locked),
 
496
            access=self._pack_collection.signature_index.data_access,
 
497
            delta=False)
 
498
        self.texts = GroupCompressVersionedFiles(
 
499
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
 
500
                add_callback=self._pack_collection.text_index.add_callback,
 
501
                parents=True, is_locked=self.is_locked),
 
502
            access=self._pack_collection.text_index.data_access)
 
503
        # No parents, individual CHK pages don't have specific ancestry
 
504
        self.chk_bytes = GroupCompressVersionedFiles(
 
505
            _GCGraphIndex(self._pack_collection.chk_index.combined_index,
 
506
                add_callback=self._pack_collection.chk_index.add_callback,
 
507
                parents=False, is_locked=self.is_locked),
 
508
            access=self._pack_collection.chk_index.data_access)
 
509
        # True when the repository object is 'write locked' (as opposed to the
 
510
        # physical lock only taken out around changes to the pack-names list.)
 
511
        # Another way to represent this would be a decorator around the control
 
512
        # files object that presents logical locks as physical ones - if this
 
513
        # gets ugly consider that alternative design. RBC 20071011
 
514
        self._write_lock_count = 0
 
515
        self._transaction = None
 
516
        # for tests
 
517
        self._reconcile_does_inventory_gc = True
 
518
        self._reconcile_fixes_text_parents = True
 
519
        self._reconcile_backsup_inventory = False
559
520
 
560
521
 
561
522
class RepositoryFormatPackGCPlain(RepositoryFormatKnitPack6):
570
531
    # multiple in-a-row (and sharing strings). Topological is better
571
532
    # for remote, because we access less data.
572
533
    _fetch_order = 'unordered'
573
 
    _fetch_gc_optimal = True
574
534
    _fetch_uses_deltas = False
575
535
 
576
536
    def get_format_string(self):
584
544
            ", interoperates with pack-0.92\n")
585
545
 
586
546
 
587
 
if chk_support:
588
 
    from bzrlib import chk_serializer
589
 
    class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
590
 
        """A hashed CHK+group compress pack repository."""
591
 
 
592
 
        repository_class = GCCHKPackRepository
593
 
        rich_root_data = True
594
 
        # Note: We cannot unpack a delta that references a text we haven't
595
 
        # seen yet. There are 2 options, work in fulltexts, or require
596
 
        # topological sorting. Using fulltexts is more optimal for local
597
 
        # operations, because the source can be smart about extracting
598
 
        # multiple in-a-row (and sharing strings). Topological is better
599
 
        # for remote, because we access less data.
600
 
        _fetch_order = 'unordered'
601
 
        _fetch_gc_optimal = True
602
 
        _fetch_uses_deltas = False
603
 
 
604
 
        def get_format_string(self):
605
 
            """See RepositoryFormat.get_format_string()."""
606
 
            return ('Bazaar development format - hash16chk+gc rich-root'
607
 
                    ' (needs bzr.dev from 1.13)\n')
608
 
 
609
 
        def get_format_description(self):
610
 
            """See RepositoryFormat.get_format_description()."""
611
 
            return ("Development repository format - hash16chk+groupcompress")
612
 
 
613
 
 
614
 
    class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
615
 
        """A hashed CHK+group compress pack repository."""
616
 
 
617
 
        repository_class = GCCHKPackRepository
618
 
        # Setting this to True causes us to use InterModel1And2, so for now set
619
 
        # it to False which uses InterDifferingSerializer. When IM1&2 is
620
 
        # removed (as it is in bzr.dev) we can set this back to True.
621
 
        rich_root_data = True
622
 
 
623
 
        def get_format_string(self):
624
 
            """See RepositoryFormat.get_format_string()."""
625
 
            return ('Bazaar development format - hash255chk+gc rich-root'
626
 
                    ' (needs bzr.dev from 1.13)\n')
627
 
 
628
 
        def get_format_description(self):
629
 
            """See RepositoryFormat.get_format_description()."""
630
 
            return ("Development repository format - hash255chk+groupcompress")
631
 
 
632
 
 
633
 
    chk_serializer_255_bigpage = chk_serializer.CHKSerializer(65536, 'hash-255-way')
634
 
    class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
635
 
        """A hashed CHK+group compress pack repository."""
636
 
 
637
 
        repository_class = GCCHKPackRepository
638
 
        # For right now, setting this to True gives us InterModel1And2 rather
639
 
        # than InterDifferingSerializer
640
 
        rich_root_data = True
641
 
        _serializer = chk_serializer_255_bigpage
642
 
        # Note: We cannot unpack a delta that references a text we haven't
643
 
        # seen yet. There are 2 options, work in fulltexts, or require
644
 
        # topological sorting. Using fulltexts is more optimal for local
645
 
        # operations, because the source can be smart about extracting
646
 
        # multiple in-a-row (and sharing strings). Topological is better
647
 
        # for remote, because we access less data.
648
 
        _fetch_order = 'unordered'
649
 
        _fetch_gc_optimal = True
650
 
        _fetch_uses_deltas = False
651
 
 
652
 
        def get_format_string(self):
653
 
            """See RepositoryFormat.get_format_string()."""
654
 
            return ('Bazaar development format - hash255chk+gc rich-root bigpage'
655
 
                    ' (needs bzr.dev from 1.13)\n')
656
 
 
657
 
        def get_format_description(self):
658
 
            """See RepositoryFormat.get_format_description()."""
659
 
            return ("Development repository format - hash255chk+groupcompress + bigpage")
660
 
 
661
 
 
662
 
def pack_incompatible(source, target, orig_method=InterPackRepo.is_compatible):
663
 
    """Be incompatible with the regular fetch code."""
664
 
    formats = (RepositoryFormatPackGCPlain,)
665
 
    if chk_support:
666
 
        formats = formats + (RepositoryFormatPackGCCHK16,
667
 
                             RepositoryFormatPackGCCHK255,
668
 
                             RepositoryFormatPackGCCHK255Big)
669
 
    if isinstance(source._format, formats) or isinstance(target._format, formats):
670
 
        return False
671
 
    else:
672
 
        return orig_method(source, target)
673
 
 
674
 
 
675
 
InterPackRepo.is_compatible = staticmethod(pack_incompatible)
 
547
class RepositoryFormatPackGCCHK16(RepositoryFormatPackDevelopment5Hash16):
 
548
    """A hashed CHK+group compress pack repository."""
 
549
 
 
550
    repository_class = GCCHKPackRepository
 
551
    rich_root_data = True
 
552
    supports_external_lookups = True
 
553
    supports_tree_reference = True
 
554
    supports_chks = True
 
555
    # Note: We cannot unpack a delta that references a text we haven't
 
556
    # seen yet. There are 2 options, work in fulltexts, or require
 
557
    # topological sorting. Using fulltexts is more optimal for local
 
558
    # operations, because the source can be smart about extracting
 
559
    # multiple in-a-row (and sharing strings). Topological is better
 
560
    # for remote, because we access less data.
 
561
    _fetch_order = 'unordered'
 
562
    _fetch_uses_deltas = False
 
563
 
 
564
    def get_format_string(self):
 
565
        """See RepositoryFormat.get_format_string()."""
 
566
        return ('Bazaar development format - hash16chk+gc rich-root'
 
567
                ' (needs bzr.dev from 1.13)\n')
 
568
 
 
569
    def get_format_description(self):
 
570
        """See RepositoryFormat.get_format_description()."""
 
571
        return ("Development repository format - hash16chk+groupcompress")
 
572
 
 
573
 
 
574
class RepositoryFormatPackGCCHK255(RepositoryFormatPackDevelopment5Hash255):
 
575
    """A hashed CHK+group compress pack repository."""
 
576
 
 
577
    repository_class = GCCHKPackRepository
 
578
    supports_chks = True
 
579
    # Setting this to True causes us to use InterModel1And2, so for now set
 
580
    # it to False which uses InterDifferingSerializer. When IM1&2 is
 
581
    # removed (as it is in bzr.dev) we can set this back to True.
 
582
    rich_root_data = True
 
583
 
 
584
    def get_format_string(self):
 
585
        """See RepositoryFormat.get_format_string()."""
 
586
        return ('Bazaar development format - hash255chk+gc rich-root'
 
587
                ' (needs bzr.dev from 1.13)\n')
 
588
 
 
589
    def get_format_description(self):
 
590
        """See RepositoryFormat.get_format_description()."""
 
591
        return ("Development repository format - hash255chk+groupcompress")
 
592
 
 
593
 
 
594
class RepositoryFormatPackGCCHK255Big(RepositoryFormatPackGCCHK255):
 
595
    """A hashed CHK+group compress pack repository."""
 
596
 
 
597
    repository_class = GCCHKPackRepository
 
598
    supports_chks = True
 
599
    # For right now, setting this to True gives us InterModel1And2 rather
 
600
    # than InterDifferingSerializer
 
601
    rich_root_data = True
 
602
    _serializer = chk_serializer.chk_serializer_255_bigpage
 
603
    # Note: We cannot unpack a delta that references a text we haven't
 
604
    # seen yet. There are 2 options, work in fulltexts, or require
 
605
    # topological sorting. Using fulltexts is more optimal for local
 
606
    # operations, because the source can be smart about extracting
 
607
    # multiple in-a-row (and sharing strings). Topological is better
 
608
    # for remote, because we access less data.
 
609
    _fetch_order = 'unordered'
 
610
    _fetch_uses_deltas = False
 
611
 
 
612
    def get_format_string(self):
 
613
        """See RepositoryFormat.get_format_string()."""
 
614
        return ('Bazaar development format - hash255chk+gc rich-root bigpage'
 
615
                ' (needs bzr.dev from 1.13)\n')
 
616
 
 
617
    def get_format_description(self):
 
618
        """See RepositoryFormat.get_format_description()."""
 
619
        return ("Development repository format - hash255chk+groupcompress + bigpage")