~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: John Arbash Meinel
  • Date: 2009-07-29 21:35:05 UTC
  • mfrom: (4576 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4577.
  • Revision ID: john@arbash-meinel.com-20090729213505-tkqsvy1zfpocu75w
Merge bzr.dev 4576 in prep for NEWS

Show diffs side-by-side

added added

removed removed

Lines of Context:
30
30
    osutils,
31
31
    pack,
32
32
    remote,
33
 
    repository,
34
33
    revision as _mod_revision,
35
34
    trace,
36
35
    ui,
39
38
    BTreeGraphIndex,
40
39
    BTreeBuilder,
41
40
    )
42
 
from bzrlib.index import GraphIndex, GraphIndexBuilder
43
41
from bzrlib.groupcompress import (
44
42
    _GCGraphIndex,
45
43
    GroupCompressVersionedFiles,
48
46
    Pack,
49
47
    NewPack,
50
48
    KnitPackRepository,
 
49
    KnitPackStreamSource,
51
50
    PackRootCommitBuilder,
52
51
    RepositoryPackCollection,
53
52
    RepositoryFormatPack,
217
216
            p_id_roots_set = set()
218
217
            stream = source_vf.get_record_stream(keys, 'groupcompress', True)
219
218
            for idx, record in enumerate(stream):
 
219
                # Inventories should always be with revisions; assume success.
220
220
                bytes = record.get_bytes_as('fulltext')
221
221
                chk_inv = inventory.CHKInventory.deserialise(None, bytes,
222
222
                                                             record.key)
293
293
                    stream = source_vf.get_record_stream(cur_keys,
294
294
                                                         'as-requested', True)
295
295
                    for record in stream:
 
296
                        if record.storage_kind == 'absent':
 
297
                            # An absent CHK record: we assume that the missing
 
298
                            # record is in a different pack - e.g. a page not
 
299
                            # altered by the commit we're packing.
 
300
                            continue
296
301
                        bytes = record.get_bytes_as('fulltext')
297
302
                        # We don't care about search_key_func for this code,
298
303
                        # because we only care about external references.
438
443
        #      is grabbing too many keys...
439
444
        text_keys = source_vf.keys()
440
445
        self._copy_stream(source_vf, target_vf, text_keys,
441
 
                          'text', self._get_progress_stream, 4)
 
446
                          'texts', self._get_progress_stream, 4)
442
447
 
443
448
    def _copy_signature_texts(self):
444
449
        source_vf, target_vf = self._build_vfs('signature', False, False)
557
562
    pack_factory = GCPack
558
563
    resumed_pack_factory = ResumedGCPack
559
564
 
560
 
    def _already_packed(self):
561
 
        """Is the collection already packed?"""
562
 
        # Always repack GC repositories for now
563
 
        return False
564
 
 
565
565
    def _execute_pack_operations(self, pack_operations,
566
566
                                 _packer_class=GCCHKPacker,
567
567
                                 reload_func=None):
620
620
        self.inventories = GroupCompressVersionedFiles(
621
621
            _GCGraphIndex(self._pack_collection.inventory_index.combined_index,
622
622
                add_callback=self._pack_collection.inventory_index.add_callback,
623
 
                parents=True, is_locked=self.is_locked),
 
623
                parents=True, is_locked=self.is_locked,
 
624
                inconsistency_fatal=False),
624
625
            access=self._pack_collection.inventory_index.data_access)
625
626
        self.revisions = GroupCompressVersionedFiles(
626
627
            _GCGraphIndex(self._pack_collection.revision_index.combined_index,
632
633
        self.signatures = GroupCompressVersionedFiles(
633
634
            _GCGraphIndex(self._pack_collection.signature_index.combined_index,
634
635
                add_callback=self._pack_collection.signature_index.add_callback,
635
 
                parents=False, is_locked=self.is_locked),
 
636
                parents=False, is_locked=self.is_locked,
 
637
                inconsistency_fatal=False),
636
638
            access=self._pack_collection.signature_index.data_access,
637
639
            delta=False)
638
640
        self.texts = GroupCompressVersionedFiles(
639
641
            _GCGraphIndex(self._pack_collection.text_index.combined_index,
640
642
                add_callback=self._pack_collection.text_index.add_callback,
641
 
                parents=True, is_locked=self.is_locked),
 
643
                parents=True, is_locked=self.is_locked,
 
644
                inconsistency_fatal=False),
642
645
            access=self._pack_collection.text_index.data_access)
643
646
        # No parents, individual CHK pages don't have specific ancestry
644
647
        self.chk_bytes = GroupCompressVersionedFiles(
645
648
            _GCGraphIndex(self._pack_collection.chk_index.combined_index,
646
649
                add_callback=self._pack_collection.chk_index.add_callback,
647
 
                parents=False, is_locked=self.is_locked),
 
650
                parents=False, is_locked=self.is_locked,
 
651
                inconsistency_fatal=False),
648
652
            access=self._pack_collection.chk_index.data_access)
 
653
        search_key_name = self._format._serializer.search_key_name
 
654
        search_key_func = chk_map.search_key_registry.get(search_key_name)
 
655
        self.chk_bytes._search_key_func = search_key_func
649
656
        # True when the repository object is 'write locked' (as opposed to the
650
657
        # physical lock only taken out around changes to the pack-names list.)
651
658
        # Another way to represent this would be a decorator around the control
674
681
        return self._inventory_add_lines(revision_id, parents,
675
682
            inv_lines, check_content=False)
676
683
 
 
684
    def _create_inv_from_null(self, delta, revision_id):
 
685
        """This will mutate new_inv directly.
 
686
 
 
687
        This is a simplified form of create_by_apply_delta which knows that all
 
688
        the old values must be None, so everything is a create.
 
689
        """
 
690
        serializer = self._format._serializer
 
691
        new_inv = inventory.CHKInventory(serializer.search_key_name)
 
692
        new_inv.revision_id = revision_id
 
693
        entry_to_bytes = new_inv._entry_to_bytes
 
694
        id_to_entry_dict = {}
 
695
        parent_id_basename_dict = {}
 
696
        for old_path, new_path, file_id, entry in delta:
 
697
            if old_path is not None:
 
698
                raise ValueError('Invalid delta, somebody tried to delete %r'
 
699
                                 ' from the NULL_REVISION'
 
700
                                 % ((old_path, file_id),))
 
701
            if new_path is None:
 
702
                raise ValueError('Invalid delta, delta from NULL_REVISION has'
 
703
                                 ' no new_path %r' % (file_id,))
 
704
            if new_path == '':
 
705
                new_inv.root_id = file_id
 
706
                parent_id_basename_key = ('', '')
 
707
            else:
 
708
                utf8_entry_name = entry.name.encode('utf-8')
 
709
                parent_id_basename_key = (entry.parent_id, utf8_entry_name)
 
710
            new_value = entry_to_bytes(entry)
 
711
            # Populate Caches?
 
712
            # new_inv._path_to_fileid_cache[new_path] = file_id
 
713
            id_to_entry_dict[(file_id,)] = new_value
 
714
            parent_id_basename_dict[parent_id_basename_key] = file_id
 
715
 
 
716
        new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
 
717
            parent_id_basename_dict, maximum_size=serializer.maximum_size)
 
718
        return new_inv
 
719
 
677
720
    def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
721
                               parents, basis_inv=None, propagate_caches=False):
679
722
        """Add a new inventory expressed as a delta against another revision.
699
742
            repository format specific) of the serialized inventory, and the
700
743
            resulting inventory.
701
744
        """
702
 
        if basis_revision_id == _mod_revision.NULL_REVISION:
703
 
            return KnitPackRepository.add_inventory_by_delta(self,
704
 
                basis_revision_id, delta, new_revision_id, parents)
705
745
        if not self.is_in_write_group():
706
746
            raise AssertionError("%r not in write group" % (self,))
707
747
        _mod_revision.check_not_reserved_id(new_revision_id)
708
 
        basis_tree = self.revision_tree(basis_revision_id)
709
 
        basis_tree.lock_read()
710
 
        try:
711
 
            if basis_inv is None:
 
748
        basis_tree = None
 
749
        if basis_inv is None:
 
750
            if basis_revision_id == _mod_revision.NULL_REVISION:
 
751
                new_inv = self._create_inv_from_null(delta, new_revision_id)
 
752
                inv_lines = new_inv.to_lines()
 
753
                return self._inventory_add_lines(new_revision_id, parents,
 
754
                    inv_lines, check_content=False), new_inv
 
755
            else:
 
756
                basis_tree = self.revision_tree(basis_revision_id)
 
757
                basis_tree.lock_read()
712
758
                basis_inv = basis_tree.inventory
 
759
        try:
713
760
            result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
761
                propagate_caches=propagate_caches)
715
762
            inv_lines = result.to_lines()
716
763
            return self._inventory_add_lines(new_revision_id, parents,
717
764
                inv_lines, check_content=False), result
718
765
        finally:
719
 
            basis_tree.unlock()
 
766
            if basis_tree is not None:
 
767
                basis_tree.unlock()
720
768
 
721
769
    def _iter_inventories(self, revision_ids):
722
770
        """Iterate over many inventory objects."""
736
784
        # make it raise to trap naughty direct users.
737
785
        raise NotImplementedError(self._iter_inventory_xmls)
738
786
 
739
 
    def _find_parent_ids_of_revisions(self, revision_ids):
740
 
        # TODO: we probably want to make this a helper that other code can get
741
 
        #       at
742
 
        parent_map = self.get_parent_map(revision_ids)
743
 
        parents = set()
744
 
        map(parents.update, parent_map.itervalues())
745
 
        parents.difference_update(revision_ids)
746
 
        parents.discard(_mod_revision.NULL_REVISION)
747
 
        return parents
748
 
 
749
 
    def _find_present_inventory_ids(self, revision_ids):
750
 
        keys = [(r,) for r in revision_ids]
751
 
        parent_map = self.inventories.get_parent_map(keys)
752
 
        present_inventory_ids = set(k[-1] for k in parent_map)
753
 
        return present_inventory_ids
 
787
    def _find_present_inventory_keys(self, revision_keys):
 
788
        parent_map = self.inventories.get_parent_map(revision_keys)
 
789
        present_inventory_keys = set(k for k in parent_map)
 
790
        return present_inventory_keys
754
791
 
755
792
    def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
756
793
        """Find the file ids and versions affected by revisions.
767
804
        file_id_revisions = {}
768
805
        pb = ui.ui_factory.nested_progress_bar()
769
806
        try:
770
 
            parent_ids = self._find_parent_ids_of_revisions(revision_ids)
771
 
            present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
 
807
            revision_keys = [(r,) for r in revision_ids]
 
808
            parent_keys = self._find_parent_keys_of_revisions(revision_keys)
 
809
            # TODO: instead of using _find_present_inventory_keys, change the
 
810
            #       code paths to allow missing inventories to be tolerated.
 
811
            #       However, we only want to tolerate missing parent
 
812
            #       inventories, not missing inventories for revision_ids
 
813
            present_parent_inv_keys = self._find_present_inventory_keys(
 
814
                                        parent_keys)
 
815
            present_parent_inv_ids = set(
 
816
                [k[-1] for k in present_parent_inv_keys])
772
817
            uninteresting_root_keys = set()
773
818
            interesting_root_keys = set()
774
 
            inventories_to_read = set(present_parent_inv_ids)
775
 
            inventories_to_read.update(revision_ids)
 
819
            inventories_to_read = set(revision_ids)
 
820
            inventories_to_read.update(present_parent_inv_ids)
776
821
            for inv in self.iter_inventories(inventories_to_read):
777
822
                entry_chk_root_key = inv.id_to_entry.key()
778
823
                if inv.revision_id in present_parent_inv_ids:
846
891
        return super(CHKInventoryRepository, self)._get_source(to_format)
847
892
 
848
893
 
849
 
class GroupCHKStreamSource(repository.StreamSource):
 
894
class GroupCHKStreamSource(KnitPackStreamSource):
850
895
    """Used when both the source and target repo are GroupCHK repos."""
851
896
 
852
897
    def __init__(self, from_repository, to_format):
854
899
        super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
855
900
        self._revision_keys = None
856
901
        self._text_keys = None
 
902
        self._text_fetch_order = 'groupcompress'
857
903
        self._chk_id_roots = None
858
904
        self._chk_p_id_roots = None
859
905
 
898
944
            p_id_roots_set.clear()
899
945
        return ('inventories', _filtered_inv_stream())
900
946
 
901
 
    def _find_present_inventories(self, revision_ids):
902
 
        revision_keys = [(r,) for r in revision_ids]
903
 
        inventories = self.from_repository.inventories
904
 
        present_inventories = inventories.get_parent_map(revision_keys)
905
 
        return [p[-1] for p in present_inventories]
906
 
 
907
 
    def _get_filtered_chk_streams(self, excluded_revision_ids):
 
947
    def _get_filtered_chk_streams(self, excluded_revision_keys):
908
948
        self._text_keys = set()
909
 
        excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
910
 
        if not excluded_revision_ids:
 
949
        excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
 
950
        if not excluded_revision_keys:
911
951
            uninteresting_root_keys = set()
912
952
            uninteresting_pid_root_keys = set()
913
953
        else:
915
955
            # actually present
916
956
            # TODO: Update Repository.iter_inventories() to add
917
957
            #       ignore_missing=True
918
 
            present_ids = self.from_repository._find_present_inventory_ids(
919
 
                            excluded_revision_ids)
920
 
            present_ids = self._find_present_inventories(excluded_revision_ids)
 
958
            present_keys = self.from_repository._find_present_inventory_keys(
 
959
                            excluded_revision_keys)
 
960
            present_ids = [k[-1] for k in present_keys]
921
961
            uninteresting_root_keys = set()
922
962
            uninteresting_pid_root_keys = set()
923
963
            for inv in self.from_repository.iter_inventories(present_ids):
948
988
            self._chk_p_id_roots = None
949
989
        yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
950
990
 
951
 
    def _get_text_stream(self):
952
 
        # Note: We know we don't have to handle adding root keys, because both
953
 
        # the source and target are GCCHK, and those always support rich-roots
954
 
        # We may want to request as 'unordered', in case the source has done a
955
 
        # 'split' packing
956
 
        return ('texts', self.from_repository.texts.get_record_stream(
957
 
                            self._text_keys, 'groupcompress', False))
958
 
 
959
991
    def get_stream(self, search):
960
992
        revision_ids = search.get_keys()
961
993
        for stream_info in self._fetch_revision_texts(revision_ids):
966
998
        # For now, exclude all parents that are at the edge of ancestry, for
967
999
        # which we have inventories
968
1000
        from_repo = self.from_repository
969
 
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
970
 
        for stream_info in self._get_filtered_chk_streams(parent_ids):
 
1001
        parent_keys = from_repo._find_parent_keys_of_revisions(
 
1002
                        self._revision_keys)
 
1003
        for stream_info in self._get_filtered_chk_streams(parent_keys):
971
1004
            yield stream_info
972
1005
        yield self._get_text_stream()
973
1006
 
991
1024
        # no unavailable texts when the ghost inventories are not filled in.
992
1025
        yield self._get_inventory_stream(missing_inventory_keys,
993
1026
                                         allow_absent=True)
994
 
        # We use the empty set for excluded_revision_ids, to make it clear that
995
 
        # we want to transmit all referenced chk pages.
 
1027
        # We use the empty set for excluded_revision_keys, to make it clear
 
1028
        # that we want to transmit all referenced chk pages.
996
1029
        for stream_info in self._get_filtered_chk_streams(set()):
997
1030
            yield stream_info
998
1031
 
1021
1054
    _fetch_order = 'unordered'
1022
1055
    _fetch_uses_deltas = False # essentially ignored by the groupcompress code.
1023
1056
    fast_deltas = True
 
1057
    pack_compresses = True
1024
1058
 
1025
1059
    def _get_matching_bzrdir(self):
1026
1060
        return bzrdir.format_registry.make_bzrdir('development6-rich-root')
1044
1078
        if not target_format.rich_root_data:
1045
1079
            raise errors.BadConversionTarget(
1046
1080
                'Does not support rich root data.', target_format)
1047
 
        if not getattr(target_format, 'supports_tree_reference', False):
 
1081
        if (self.supports_tree_reference and 
 
1082
            not getattr(target_format, 'supports_tree_reference', False)):
1048
1083
            raise errors.BadConversionTarget(
1049
1084
                'Does not support nested trees', target_format)
1050
1085
 
1066
1101
    def get_format_string(self):
1067
1102
        """See RepositoryFormat.get_format_string()."""
1068
1103
        return ('Bazaar development format - chk repository with bencode '
1069
 
                'revision serialization (needs bzr.dev from 1.15)\n')
1070
 
 
1071
 
 
 
1104
                'revision serialization (needs bzr.dev from 1.16)\n')
 
1105
 
 
1106
 
 
1107
class RepositoryFormat2a(RepositoryFormatCHK2):
 
1108
    """A CHK repository that uses the bencode revision serializer.
 
1109
    
 
1110
    This is the same as RepositoryFormatCHK2 but with a public name.
 
1111
    """
 
1112
 
 
1113
    _serializer = chk_serializer.chk_bencode_serializer
 
1114
 
 
1115
    def _get_matching_bzrdir(self):
 
1116
        return bzrdir.format_registry.make_bzrdir('2a')
 
1117
 
 
1118
    def _ignore_setting_bzrdir(self, format):
 
1119
        pass
 
1120
 
 
1121
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
 
1122
 
 
1123
    def get_format_string(self):
 
1124
        return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')