~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

Merge up through 2.2.0.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
# Copyright (C) 2008, 2009 Canonical Ltd
 
1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
2
2
#
3
3
# This program is free software; you can redistribute it and/or modify
4
4
# it under the terms of the GNU General Public License as published by
29
29
    knit,
30
30
    osutils,
31
31
    pack,
32
 
    remote,
33
32
    revision as _mod_revision,
34
33
    trace,
35
34
    ui,
53
52
    ResumedPack,
54
53
    Packer,
55
54
    )
 
55
from bzrlib.static_tuple import StaticTuple
56
56
 
57
57
 
58
58
class GCPack(NewPack):
262
262
        remaining_keys = set(keys)
263
263
        counter = [0]
264
264
        if self._gather_text_refs:
265
 
            bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
266
265
            self._text_refs = set()
267
266
        def _get_referenced_stream(root_keys, parse_leaf_nodes=False):
268
267
            cur_keys = root_keys
289
288
                    # Store is None, because we know we have a LeafNode, and we
290
289
                    # just want its entries
291
290
                    for file_id, bytes in node.iteritems(None):
292
 
                        name_utf8, file_id, revision_id = bytes_to_info(bytes)
293
 
                        self._text_refs.add((file_id, revision_id))
 
291
                        self._text_refs.add(chk_map._bytes_to_text_key(bytes))
294
292
                def next_stream():
295
293
                    stream = source_vf.get_record_stream(cur_keys,
296
294
                                                         'as-requested', True)
352
350
        """Build a VersionedFiles instance on top of this group of packs."""
353
351
        index_name = index_name + '_index'
354
352
        index_to_pack = {}
355
 
        access = knit._DirectPackAccess(index_to_pack)
 
353
        access = knit._DirectPackAccess(index_to_pack,
 
354
                                        reload_func=self._reload_func)
356
355
        if for_write:
357
356
            # Use new_pack
358
357
            if self.new_pack is None:
584
583
    pack_factory = GCPack
585
584
    resumed_pack_factory = ResumedGCPack
586
585
 
 
586
    def _check_new_inventories(self):
 
587
        """Detect missing inventories or chk root entries for the new revisions
 
588
        in this write group.
 
589
 
 
590
        :returns: list of strs, summarising any problems found.  If the list is
 
591
            empty no problems were found.
 
592
        """
 
593
        # Ensure that all revisions added in this write group have:
 
594
        #   - corresponding inventories,
 
595
        #   - chk root entries for those inventories,
 
596
        #   - and any present parent inventories have their chk root
 
597
        #     entries too.
 
598
        # And all this should be independent of any fallback repository.
 
599
        problems = []
 
600
        key_deps = self.repo.revisions._index._key_dependencies
 
601
        new_revisions_keys = key_deps.get_new_keys()
 
602
        no_fallback_inv_index = self.repo.inventories._index
 
603
        no_fallback_chk_bytes_index = self.repo.chk_bytes._index
 
604
        no_fallback_texts_index = self.repo.texts._index
 
605
        inv_parent_map = no_fallback_inv_index.get_parent_map(
 
606
            new_revisions_keys)
 
607
        # Are any inventories for corresponding to the new revisions missing?
 
608
        corresponding_invs = set(inv_parent_map)
 
609
        missing_corresponding = set(new_revisions_keys)
 
610
        missing_corresponding.difference_update(corresponding_invs)
 
611
        if missing_corresponding:
 
612
            problems.append("inventories missing for revisions %s" %
 
613
                (sorted(missing_corresponding),))
 
614
            return problems
 
615
        # Are any chk root entries missing for any inventories?  This includes
 
616
        # any present parent inventories, which may be used when calculating
 
617
        # deltas for streaming.
 
618
        all_inv_keys = set(corresponding_invs)
 
619
        for parent_inv_keys in inv_parent_map.itervalues():
 
620
            all_inv_keys.update(parent_inv_keys)
 
621
        # Filter out ghost parents.
 
622
        all_inv_keys.intersection_update(
 
623
            no_fallback_inv_index.get_parent_map(all_inv_keys))
 
624
        parent_invs_only_keys = all_inv_keys.symmetric_difference(
 
625
            corresponding_invs)
 
626
        all_missing = set()
 
627
        inv_ids = [key[-1] for key in all_inv_keys]
 
628
        parent_invs_only_ids = [key[-1] for key in parent_invs_only_keys]
 
629
        root_key_info = _build_interesting_key_sets(
 
630
            self.repo, inv_ids, parent_invs_only_ids)
 
631
        expected_chk_roots = root_key_info.all_keys()
 
632
        present_chk_roots = no_fallback_chk_bytes_index.get_parent_map(
 
633
            expected_chk_roots)
 
634
        missing_chk_roots = expected_chk_roots.difference(present_chk_roots)
 
635
        if missing_chk_roots:
 
636
            problems.append("missing referenced chk root keys: %s"
 
637
                % (sorted(missing_chk_roots),))
 
638
            # Don't bother checking any further.
 
639
            return problems
 
640
        # Find all interesting chk_bytes records, and make sure they are
 
641
        # present, as well as the text keys they reference.
 
642
        chk_bytes_no_fallbacks = self.repo.chk_bytes.without_fallbacks()
 
643
        chk_bytes_no_fallbacks._search_key_func = \
 
644
            self.repo.chk_bytes._search_key_func
 
645
        chk_diff = chk_map.iter_interesting_nodes(
 
646
            chk_bytes_no_fallbacks, root_key_info.interesting_root_keys,
 
647
            root_key_info.uninteresting_root_keys)
 
648
        text_keys = set()
 
649
        try:
 
650
            for record in _filter_text_keys(chk_diff, text_keys,
 
651
                                            chk_map._bytes_to_text_key):
 
652
                pass
 
653
        except errors.NoSuchRevision, e:
 
654
            # XXX: It would be nice if we could give a more precise error here.
 
655
            problems.append("missing chk node(s) for id_to_entry maps")
 
656
        chk_diff = chk_map.iter_interesting_nodes(
 
657
            chk_bytes_no_fallbacks, root_key_info.interesting_pid_root_keys,
 
658
            root_key_info.uninteresting_pid_root_keys)
 
659
        try:
 
660
            for interesting_rec, interesting_map in chk_diff:
 
661
                pass
 
662
        except errors.NoSuchRevision, e:
 
663
            problems.append(
 
664
                "missing chk node(s) for parent_id_basename_to_file_id maps")
 
665
        present_text_keys = no_fallback_texts_index.get_parent_map(text_keys)
 
666
        missing_text_keys = text_keys.difference(present_text_keys)
 
667
        if missing_text_keys:
 
668
            problems.append("missing text keys: %r"
 
669
                % (sorted(missing_text_keys),))
 
670
        return problems
 
671
 
587
672
    def _execute_pack_operations(self, pack_operations,
588
673
                                 _packer_class=GCCHKPacker,
589
674
                                 reload_func=None):
617
702
                self._remove_pack_from_memory(pack)
618
703
        # record the newly available packs and stop advertising the old
619
704
        # packs
620
 
        self._save_pack_names(clear_obsolete_packs=True)
621
 
        # Move the old packs out of the way now they are no longer referenced.
622
 
        for revision_count, packs in pack_operations:
623
 
            self._obsolete_packs(packs)
 
705
        to_be_obsoleted = []
 
706
        for _, packs in pack_operations:
 
707
            to_be_obsoleted.extend(packs)
 
708
        result = self._save_pack_names(clear_obsolete_packs=True,
 
709
                                       obsolete_packs=to_be_obsoleted)
 
710
        return result
624
711
 
625
712
 
626
713
class CHKInventoryRepository(KnitPackRepository):
651
738
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
652
739
                add_callback=self._pack_collection.revision_index.add_callback,
653
740
                parents=True, is_locked=self.is_locked,
654
 
                track_external_parent_refs=True),
 
741
                track_external_parent_refs=True, track_new_keys=True),
655
742
            access=self._pack_collection.revision_index.data_access,
656
743
            delta=False)
657
744
        self.signatures = GroupCompressVersionedFiles(
727
814
                                 ' no new_path %r' % (file_id,))
728
815
            if new_path == '':
729
816
                new_inv.root_id = file_id
730
 
                parent_id_basename_key = ('', '')
 
817
                parent_id_basename_key = StaticTuple('', '').intern()
731
818
            else:
732
819
                utf8_entry_name = entry.name.encode('utf-8')
733
 
                parent_id_basename_key = (entry.parent_id, utf8_entry_name)
 
820
                parent_id_basename_key = StaticTuple(entry.parent_id,
 
821
                                                     utf8_entry_name).intern()
734
822
            new_value = entry_to_bytes(entry)
735
823
            # Populate Caches?
736
824
            # new_inv._path_to_fileid_cache[new_path] = file_id
737
 
            id_to_entry_dict[(file_id,)] = new_value
 
825
            key = StaticTuple(file_id).intern()
 
826
            id_to_entry_dict[key] = new_value
738
827
            parent_id_basename_dict[parent_id_basename_key] = file_id
739
828
 
740
829
        new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
773
862
        if basis_inv is None:
774
863
            if basis_revision_id == _mod_revision.NULL_REVISION:
775
864
                new_inv = self._create_inv_from_null(delta, new_revision_id)
 
865
                if new_inv.root_id is None:
 
866
                    raise errors.RootMissing()
776
867
                inv_lines = new_inv.to_lines()
777
868
                return self._inventory_add_lines(new_revision_id, parents,
778
869
                    inv_lines, check_content=False), new_inv
790
881
            if basis_tree is not None:
791
882
                basis_tree.unlock()
792
883
 
793
 
    def deserialise_inventory(self, revision_id, bytes):
 
884
    def _deserialise_inventory(self, revision_id, bytes):
794
885
        return inventory.CHKInventory.deserialise(self.chk_bytes, bytes,
795
886
            (revision_id,))
796
887
 
812
903
    def _iter_inventory_xmls(self, revision_ids, ordering):
813
904
        # Without a native 'xml' inventory, this method doesn't make sense.
814
905
        # However older working trees, and older bundles want it - so we supply
815
 
        # it allowing get_inventory_xml to work. Bundles currently use the
 
906
        # it allowing _get_inventory_xml to work. Bundles currently use the
816
907
        # serializer directly; this also isn't ideal, but there isn't an xml
817
908
        # iteration interface offered at all for repositories. We could make
818
909
        # _iter_inventory_xmls be part of the contract, even if kept private.
850
941
                                        parent_keys)
851
942
            present_parent_inv_ids = set(
852
943
                [k[-1] for k in present_parent_inv_keys])
853
 
            uninteresting_root_keys = set()
854
 
            interesting_root_keys = set()
855
944
            inventories_to_read = set(revision_ids)
856
945
            inventories_to_read.update(present_parent_inv_ids)
857
 
            for inv in self.iter_inventories(inventories_to_read):
858
 
                entry_chk_root_key = inv.id_to_entry.key()
859
 
                if inv.revision_id in present_parent_inv_ids:
860
 
                    uninteresting_root_keys.add(entry_chk_root_key)
861
 
                else:
862
 
                    interesting_root_keys.add(entry_chk_root_key)
863
 
 
 
946
            root_key_info = _build_interesting_key_sets(
 
947
                self, inventories_to_read, present_parent_inv_ids)
 
948
            interesting_root_keys = root_key_info.interesting_root_keys
 
949
            uninteresting_root_keys = root_key_info.uninteresting_root_keys
864
950
            chk_bytes = self.chk_bytes
865
951
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
866
952
                        interesting_root_keys, uninteresting_root_keys,
867
953
                        pb=pb):
868
954
                for name, bytes in items:
869
955
                    (name_utf8, file_id, revision_id) = bytes_to_info(bytes)
 
956
                    # TODO: consider interning file_id, revision_id here, or
 
957
                    #       pushing that intern() into bytes_to_info()
 
958
                    # TODO: rich_root should always be True here, for all
 
959
                    #       repositories that support chk_bytes
870
960
                    if not rich_root and name_utf8 == '':
871
961
                        continue
872
962
                    try:
932
1022
        super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
933
1023
        self._revision_keys = None
934
1024
        self._text_keys = None
935
 
        # self._text_fetch_order = 'unordered'
 
1025
        self._text_fetch_order = 'groupcompress'
936
1026
        self._chk_id_roots = None
937
1027
        self._chk_p_id_roots = None
938
1028
 
949
1039
            p_id_roots_set = set()
950
1040
            source_vf = self.from_repository.inventories
951
1041
            stream = source_vf.get_record_stream(inventory_keys,
952
 
                                                 'unordered', True)
 
1042
                                                 'groupcompress', True)
953
1043
            for record in stream:
954
1044
                if record.storage_kind == 'absent':
955
1045
                    if allow_absent:
997
1087
                uninteresting_root_keys.add(inv.id_to_entry.key())
998
1088
                uninteresting_pid_root_keys.add(
999
1089
                    inv.parent_id_basename_to_file_id.key())
1000
 
        bytes_to_info = inventory.CHKInventory._bytes_to_utf8name_key
1001
1090
        chk_bytes = self.from_repository.chk_bytes
1002
1091
        def _filter_id_to_entry():
1003
 
            for record, items in chk_map.iter_interesting_nodes(chk_bytes,
1004
 
                        self._chk_id_roots, uninteresting_root_keys):
1005
 
                for name, bytes in items:
1006
 
                    # Note: we don't care about name_utf8, because we are always
1007
 
                    # rich-root = True
1008
 
                    _, file_id, revision_id = bytes_to_info(bytes)
1009
 
                    self._text_keys.add((file_id, revision_id))
 
1092
            interesting_nodes = chk_map.iter_interesting_nodes(chk_bytes,
 
1093
                        self._chk_id_roots, uninteresting_root_keys)
 
1094
            for record in _filter_text_keys(interesting_nodes, self._text_keys,
 
1095
                    chk_map._bytes_to_text_key):
1010
1096
                if record is not None:
1011
1097
                    yield record
1012
1098
            # Consumed
1022
1108
        yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
1023
1109
 
1024
1110
    def get_stream(self, search):
 
1111
        def wrap_and_count(pb, rc, stream):
 
1112
            """Yield records from stream while showing progress."""
 
1113
            count = 0
 
1114
            for record in stream:
 
1115
                if count == rc.STEP:
 
1116
                    rc.increment(count)
 
1117
                    pb.update('Estimate', rc.current, rc.max)
 
1118
                    count = 0
 
1119
                count += 1
 
1120
                yield record
 
1121
 
1025
1122
        revision_ids = search.get_keys()
 
1123
        pb = ui.ui_factory.nested_progress_bar()
 
1124
        rc = self._record_counter
 
1125
        self._record_counter.setup(len(revision_ids))
1026
1126
        for stream_info in self._fetch_revision_texts(revision_ids):
1027
 
            yield stream_info
 
1127
            yield (stream_info[0],
 
1128
                wrap_and_count(pb, rc, stream_info[1]))
1028
1129
        self._revision_keys = [(rev_id,) for rev_id in revision_ids]
1029
 
        yield self._get_inventory_stream(self._revision_keys)
 
1130
        self.from_repository.revisions.clear_cache()
 
1131
        self.from_repository.signatures.clear_cache()
 
1132
        s = self._get_inventory_stream(self._revision_keys)
 
1133
        yield (s[0], wrap_and_count(pb, rc, s[1]))
 
1134
        self.from_repository.inventories.clear_cache()
1030
1135
        # TODO: The keys to exclude might be part of the search recipe
1031
1136
        # For now, exclude all parents that are at the edge of ancestry, for
1032
1137
        # which we have inventories
1034
1139
        parent_keys = from_repo._find_parent_keys_of_revisions(
1035
1140
                        self._revision_keys)
1036
1141
        for stream_info in self._get_filtered_chk_streams(parent_keys):
1037
 
            yield stream_info
1038
 
        yield self._get_text_stream()
 
1142
            yield (stream_info[0], wrap_and_count(pb, rc, stream_info[1]))
 
1143
        self.from_repository.chk_bytes.clear_cache()
 
1144
        s = self._get_text_stream()
 
1145
        yield (s[0], wrap_and_count(pb, rc, s[1]))
 
1146
        self.from_repository.texts.clear_cache()
 
1147
        pb.update('Done', rc.max, rc.max)
 
1148
        pb.finished()
1039
1149
 
1040
1150
    def get_stream_for_missing_keys(self, missing_keys):
1041
1151
        # missing keys can only occur when we are byte copying and not
1050
1160
            missing_inventory_keys.add(key[1:])
1051
1161
        if self._chk_id_roots or self._chk_p_id_roots:
1052
1162
            raise AssertionError('Cannot call get_stream_for_missing_keys'
1053
 
                ' untill all of get_stream() has been consumed.')
 
1163
                ' until all of get_stream() has been consumed.')
1054
1164
        # Yield the inventory stream, so we can find the chk stream
1055
1165
        # Some of the missing_keys will be missing because they are ghosts.
1056
1166
        # As such, we can ignore them. The Sink is required to verify there are
1063
1173
            yield stream_info
1064
1174
 
1065
1175
 
 
1176
class _InterestingKeyInfo(object):
 
1177
    def __init__(self):
 
1178
        self.interesting_root_keys = set()
 
1179
        self.interesting_pid_root_keys = set()
 
1180
        self.uninteresting_root_keys = set()
 
1181
        self.uninteresting_pid_root_keys = set()
 
1182
 
 
1183
    def all_interesting(self):
 
1184
        return self.interesting_root_keys.union(self.interesting_pid_root_keys)
 
1185
 
 
1186
    def all_uninteresting(self):
 
1187
        return self.uninteresting_root_keys.union(
 
1188
            self.uninteresting_pid_root_keys)
 
1189
 
 
1190
    def all_keys(self):
 
1191
        return self.all_interesting().union(self.all_uninteresting())
 
1192
 
 
1193
 
 
1194
def _build_interesting_key_sets(repo, inventory_ids, parent_only_inv_ids):
 
1195
    result = _InterestingKeyInfo()
 
1196
    for inv in repo.iter_inventories(inventory_ids, 'unordered'):
 
1197
        root_key = inv.id_to_entry.key()
 
1198
        pid_root_key = inv.parent_id_basename_to_file_id.key()
 
1199
        if inv.revision_id in parent_only_inv_ids:
 
1200
            result.uninteresting_root_keys.add(root_key)
 
1201
            result.uninteresting_pid_root_keys.add(pid_root_key)
 
1202
        else:
 
1203
            result.interesting_root_keys.add(root_key)
 
1204
            result.interesting_pid_root_keys.add(pid_root_key)
 
1205
    return result
 
1206
 
 
1207
 
 
1208
def _filter_text_keys(interesting_nodes_iterable, text_keys, bytes_to_text_key):
 
1209
    """Iterate the result of iter_interesting_nodes, yielding the records
 
1210
    and adding to text_keys.
 
1211
    """
 
1212
    text_keys_update = text_keys.update
 
1213
    for record, items in interesting_nodes_iterable:
 
1214
        text_keys_update([bytes_to_text_key(b) for n,b in items])
 
1215
        yield record
 
1216
 
 
1217
 
 
1218
 
 
1219
 
1066
1220
class RepositoryFormatCHK1(RepositoryFormatPack):
1067
1221
    """A hashed CHK+group compress pack repository."""
1068
1222
 
1145
1299
 
1146
1300
    def get_format_string(self):
1147
1301
        return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')
 
1302
 
 
1303
    def get_format_description(self):
 
1304
        """See RepositoryFormat.get_format_description()."""
 
1305
        return ("Repository format 2a - rich roots, group compression"
 
1306
            " and chk inventories")