~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: Vincent Ladeuil
  • Date: 2011-07-06 09:22:00 UTC
  • mfrom: (6008 +trunk)
  • mto: (6012.1.1 trunk)
  • mto: This revision was merged to the branch mainline in revision 6013.
  • Revision ID: v.ladeuil+lp@free.fr-20110706092200-7iai2mwzc0sqdsvf
MergingĀ inĀ trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
26
26
    errors,
27
27
    index as _mod_index,
28
28
    inventory,
29
 
    knit,
30
29
    osutils,
31
30
    pack,
32
31
    revision as _mod_revision,
44
43
    GroupCompressVersionedFiles,
45
44
    )
46
45
from bzrlib.repofmt.pack_repo import (
 
46
    _DirectPackAccess,
47
47
    Pack,
48
48
    NewPack,
49
 
    KnitPackRepository,
50
 
    KnitPackStreamSource,
 
49
    PackRepository,
51
50
    PackRootCommitBuilder,
52
51
    RepositoryPackCollection,
53
52
    RepositoryFormatPack,
54
53
    ResumedPack,
55
54
    Packer,
56
55
    )
 
56
from bzrlib.vf_repository import (
 
57
    StreamSource,
 
58
    )
57
59
from bzrlib.static_tuple import StaticTuple
58
60
 
59
61
 
352
354
        """Build a VersionedFiles instance on top of this group of packs."""
353
355
        index_name = index_name + '_index'
354
356
        index_to_pack = {}
355
 
        access = knit._DirectPackAccess(index_to_pack,
356
 
                                        reload_func=self._reload_func)
 
357
        access = _DirectPackAccess(index_to_pack,
 
358
                                   reload_func=self._reload_func)
357
359
        if for_write:
358
360
            # Use new_pack
359
361
            if self.new_pack is None:
603
605
    def __init__(self, *args, **kwargs):
604
606
        super(GCCHKCanonicalizingPacker, self).__init__(*args, **kwargs)
605
607
        self._data_changed = False
606
 
    
 
608
 
607
609
    def _exhaust_stream(self, source_vf, keys, message, vf_to_stream, pb_offset):
608
610
        """Create and exhaust a stream, but don't insert it.
609
 
        
 
611
 
610
612
        This is useful to get the side-effects of generating a stream.
611
613
        """
612
614
        self.pb.update('scanning %s' % (message,), pb_offset)
701
703
 
702
704
    pack_factory = GCPack
703
705
    resumed_pack_factory = ResumedGCPack
 
706
    normal_packer_class = GCCHKPacker
 
707
    optimising_packer_class = GCCHKPacker
704
708
 
705
709
    def _check_new_inventories(self):
706
710
        """Detect missing inventories or chk root entries for the new revisions
788
792
                % (sorted(missing_text_keys),))
789
793
        return problems
790
794
 
791
 
    def _execute_pack_operations(self, pack_operations,
792
 
                                 _packer_class=GCCHKPacker,
793
 
                                 reload_func=None):
794
 
        """Execute a series of pack operations.
795
 
 
796
 
        :param pack_operations: A list of [revision_count, packs_to_combine].
797
 
        :param _packer_class: The class of packer to use (default: Packer).
798
 
        :return: None.
799
 
        """
800
 
        # XXX: Copied across from RepositoryPackCollection simply because we
801
 
        #      want to override the _packer_class ... :(
802
 
        for revision_count, packs in pack_operations:
803
 
            # we may have no-ops from the setup logic
804
 
            if len(packs) == 0:
805
 
                continue
806
 
            packer = GCCHKPacker(self, packs, '.autopack',
807
 
                                 reload_func=reload_func)
808
 
            try:
809
 
                result = packer.pack()
810
 
            except errors.RetryWithNewPacks:
811
 
                # An exception is propagating out of this context, make sure
812
 
                # this packer has cleaned up. Packer() doesn't set its new_pack
813
 
                # state into the RepositoryPackCollection object, so we only
814
 
                # have access to it directly here.
815
 
                if packer.new_pack is not None:
816
 
                    packer.new_pack.abort()
817
 
                raise
818
 
            if result is None:
819
 
                return
820
 
            for pack in packs:
821
 
                self._remove_pack_from_memory(pack)
822
 
        # record the newly available packs and stop advertising the old
823
 
        # packs
824
 
        to_be_obsoleted = []
825
 
        for _, packs in pack_operations:
826
 
            to_be_obsoleted.extend(packs)
827
 
        result = self._save_pack_names(clear_obsolete_packs=True,
828
 
                                       obsolete_packs=to_be_obsoleted)
829
 
        return result
830
 
 
831
 
 
832
 
class CHKInventoryRepository(KnitPackRepository):
833
 
    """subclass of KnitPackRepository that uses CHK based inventories."""
 
795
 
 
796
class CHKInventoryRepository(PackRepository):
 
797
    """subclass of PackRepository that uses CHK based inventories."""
834
798
 
835
799
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
836
800
        _serializer):
837
801
        """Overridden to change pack collection class."""
838
 
        KnitPackRepository.__init__(self, _format, a_bzrdir, control_files,
839
 
            _commit_builder_class, _serializer)
840
 
        # and now replace everything it did :)
 
802
        super(CHKInventoryRepository, self).__init__(_format, a_bzrdir,
 
803
            control_files, _commit_builder_class, _serializer)
841
804
        index_transport = self._transport.clone('indices')
842
805
        self._pack_collection = GCRepositoryPackCollection(self,
843
806
            self._transport, index_transport,
1146
1109
            return GroupCHKStreamSource(self, to_format)
1147
1110
        return super(CHKInventoryRepository, self)._get_source(to_format)
1148
1111
 
1149
 
 
1150
 
class GroupCHKStreamSource(KnitPackStreamSource):
 
1112
    def _find_inconsistent_revision_parents(self, revisions_iterator=None):
 
1113
        """Find revisions with different parent lists in the revision object
 
1114
        and in the index graph.
 
1115
 
 
1116
        :param revisions_iterator: None, or an iterator of (revid,
 
1117
            Revision-or-None). This iterator controls the revisions checked.
 
1118
        :returns: an iterator yielding tuples of (revison-id, parents-in-index,
 
1119
            parents-in-revision).
 
1120
        """
 
1121
        if not self.is_locked():
 
1122
            raise AssertionError()
 
1123
        vf = self.revisions
 
1124
        if revisions_iterator is None:
 
1125
            revisions_iterator = self._iter_revisions(None)
 
1126
        for revid, revision in revisions_iterator:
 
1127
            if revision is None:
 
1128
                pass
 
1129
            parent_map = vf.get_parent_map([(revid,)])
 
1130
            parents_according_to_index = tuple(parent[-1] for parent in
 
1131
                parent_map[(revid,)])
 
1132
            parents_according_to_revision = tuple(revision.parent_ids)
 
1133
            if parents_according_to_index != parents_according_to_revision:
 
1134
                yield (revid, parents_according_to_index,
 
1135
                    parents_according_to_revision)
 
1136
 
 
1137
    def _check_for_inconsistent_revision_parents(self):
 
1138
        inconsistencies = list(self._find_inconsistent_revision_parents())
 
1139
        if inconsistencies:
 
1140
            raise errors.BzrCheckError(
 
1141
                "Revision index has inconsistent parents.")
 
1142
 
 
1143
 
 
1144
class GroupCHKStreamSource(StreamSource):
1151
1145
    """Used when both the source and target repo are GroupCHK repos."""
1152
1146
 
1153
1147
    def __init__(self, from_repository, to_format):
1240
1234
            self._chk_p_id_roots = None
1241
1235
        yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1242
1236
 
 
1237
    def _get_text_stream(self):
 
1238
        # Note: We know we don't have to handle adding root keys, because both
 
1239
        # the source and target are the identical network name.
 
1240
        text_stream = self.from_repository.texts.get_record_stream(
 
1241
                        self._text_keys, self._text_fetch_order, False)
 
1242
        return ('texts', text_stream)
 
1243
 
1243
1244
    def get_stream(self, search):
1244
1245
        def wrap_and_count(pb, rc, stream):
1245
1246
            """Yield records from stream while showing progress."""
1260
1261
            yield (stream_info[0],
1261
1262
                wrap_and_count(pb, rc, stream_info[1]))
1262
1263
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1263
 
        self.from_repository.revisions.clear_cache()
1264
 
        self.from_repository.signatures.clear_cache()
1265
 
        s = self._get_inventory_stream(self._revision_keys)
1266
 
        yield (s[0], wrap_and_count(pb, rc, s[1]))
1267
 
        self.from_repository.inventories.clear_cache()
1268
1264
        # TODO: The keys to exclude might be part of the search recipe
1269
1265
        # For now, exclude all parents that are at the edge of ancestry, for
1270
1266
        # which we have inventories
1271
1267
        from_repo = self.from_repository
1272
1268
        parent_keys = from_repo._find_parent_keys_of_revisions(
1273
1269
                        self._revision_keys)
 
1270
        self.from_repository.revisions.clear_cache()
 
1271
        self.from_repository.signatures.clear_cache()
 
1272
        # Clear the repo's get_parent_map cache too.
 
1273
        self.from_repository._unstacked_provider.disable_cache()
 
1274
        self.from_repository._unstacked_provider.enable_cache()
 
1275
        s = self._get_inventory_stream(self._revision_keys)
 
1276
        yield (s[0], wrap_and_count(pb, rc, s[1]))
 
1277
        self.from_repository.inventories.clear_cache()
1274
1278
        for stream_info in self._get_filtered_chk_streams(parent_keys):
1275
1279
            yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
1276
1280
        self.from_repository.chk_bytes.clear_cache()