~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/groupcompress_repo.py

  • Committer: Martin Pool
  • Date: 2009-06-19 09:06:56 UTC
  • mfrom: (4463 +trunk)
  • mto: This revision was merged to the branch mainline in revision 4464.
  • Revision ID: mbp@sourcefrog.net-20090619090656-d5weqeecyscv8kqp
merge news

Show diffs side-by-side

added added

removed removed

Lines of Context:
48
48
    Pack,
49
49
    NewPack,
50
50
    KnitPackRepository,
 
51
    KnitPackStreamSource,
51
52
    PackRootCommitBuilder,
52
53
    RepositoryPackCollection,
53
54
    RepositoryFormatPack,
674
675
        return self._inventory_add_lines(revision_id, parents,
675
676
            inv_lines, check_content=False)
676
677
 
 
678
    def _create_inv_from_null(self, delta, revision_id):
 
679
        """This will mutate new_inv directly.
 
680
 
 
681
        This is a simplified form of create_by_apply_delta which knows that all
 
682
        the old values must be None, so everything is a create.
 
683
        """
 
684
        serializer = self._format._serializer
 
685
        new_inv = inventory.CHKInventory(serializer.search_key_name)
 
686
        new_inv.revision_id = revision_id
 
687
        entry_to_bytes = new_inv._entry_to_bytes
 
688
        id_to_entry_dict = {}
 
689
        parent_id_basename_dict = {}
 
690
        for old_path, new_path, file_id, entry in delta:
 
691
            if old_path is not None:
 
692
                raise ValueError('Invalid delta, somebody tried to delete %r'
 
693
                                 ' from the NULL_REVISION'
 
694
                                 % ((old_path, file_id),))
 
695
            if new_path is None:
 
696
                raise ValueError('Invalid delta, delta from NULL_REVISION has'
 
697
                                 ' no new_path %r' % (file_id,))
 
698
            if new_path == '':
 
699
                new_inv.root_id = file_id
 
700
                parent_id_basename_key = ('', '')
 
701
            else:
 
702
                utf8_entry_name = entry.name.encode('utf-8')
 
703
                parent_id_basename_key = (entry.parent_id, utf8_entry_name)
 
704
            new_value = entry_to_bytes(entry)
 
705
            # Populate Caches?
 
706
            # new_inv._path_to_fileid_cache[new_path] = file_id
 
707
            id_to_entry_dict[(file_id,)] = new_value
 
708
            parent_id_basename_dict[parent_id_basename_key] = file_id
 
709
 
 
710
        new_inv._populate_from_dicts(self.chk_bytes, id_to_entry_dict,
 
711
            parent_id_basename_dict, maximum_size=serializer.maximum_size)
 
712
        return new_inv
 
713
 
677
714
    def add_inventory_by_delta(self, basis_revision_id, delta, new_revision_id,
678
715
                               parents, basis_inv=None, propagate_caches=False):
679
716
        """Add a new inventory expressed as a delta against another revision.
699
736
            repository format specific) of the serialized inventory, and the
700
737
            resulting inventory.
701
738
        """
702
 
        if basis_revision_id == _mod_revision.NULL_REVISION:
703
 
            return KnitPackRepository.add_inventory_by_delta(self,
704
 
                basis_revision_id, delta, new_revision_id, parents)
705
739
        if not self.is_in_write_group():
706
740
            raise AssertionError("%r not in write group" % (self,))
707
741
        _mod_revision.check_not_reserved_id(new_revision_id)
708
 
        basis_tree = self.revision_tree(basis_revision_id)
709
 
        basis_tree.lock_read()
710
 
        try:
711
 
            if basis_inv is None:
 
742
        basis_tree = None
 
743
        if basis_inv is None:
 
744
            if basis_revision_id == _mod_revision.NULL_REVISION:
 
745
                new_inv = self._create_inv_from_null(delta, new_revision_id)
 
746
                inv_lines = new_inv.to_lines()
 
747
                return self._inventory_add_lines(new_revision_id, parents,
 
748
                    inv_lines, check_content=False), new_inv
 
749
            else:
 
750
                basis_tree = self.revision_tree(basis_revision_id)
 
751
                basis_tree.lock_read()
712
752
                basis_inv = basis_tree.inventory
 
753
        try:
713
754
            result = basis_inv.create_by_apply_delta(delta, new_revision_id,
714
755
                propagate_caches=propagate_caches)
715
756
            inv_lines = result.to_lines()
716
757
            return self._inventory_add_lines(new_revision_id, parents,
717
758
                inv_lines, check_content=False), result
718
759
        finally:
719
 
            basis_tree.unlock()
 
760
            if basis_tree is not None:
 
761
                basis_tree.unlock()
720
762
 
721
763
    def _iter_inventories(self, revision_ids):
722
764
        """Iterate over many inventory objects."""
736
778
        # make it raise to trap naughty direct users.
737
779
        raise NotImplementedError(self._iter_inventory_xmls)
738
780
 
739
 
    def _find_parent_ids_of_revisions(self, revision_ids):
740
 
        # TODO: we probably want to make this a helper that other code can get
741
 
        #       at
742
 
        parent_map = self.get_parent_map(revision_ids)
743
 
        parents = set()
744
 
        map(parents.update, parent_map.itervalues())
745
 
        parents.difference_update(revision_ids)
746
 
        parents.discard(_mod_revision.NULL_REVISION)
747
 
        return parents
748
 
 
749
 
    def _find_present_inventory_ids(self, revision_ids):
750
 
        keys = [(r,) for r in revision_ids]
751
 
        parent_map = self.inventories.get_parent_map(keys)
752
 
        present_inventory_ids = set(k[-1] for k in parent_map)
753
 
        return present_inventory_ids
 
781
    def _find_present_inventory_keys(self, revision_keys):
 
782
        parent_map = self.inventories.get_parent_map(revision_keys)
 
783
        present_inventory_keys = set(k for k in parent_map)
 
784
        return present_inventory_keys
754
785
 
755
786
    def fileids_altered_by_revision_ids(self, revision_ids, _inv_weave=None):
756
787
        """Find the file ids and versions affected by revisions.
767
798
        file_id_revisions = {}
768
799
        pb = ui.ui_factory.nested_progress_bar()
769
800
        try:
770
 
            parent_ids = self._find_parent_ids_of_revisions(revision_ids)
771
 
            present_parent_inv_ids = self._find_present_inventory_ids(parent_ids)
 
801
            revision_keys = [(r,) for r in revision_ids]
 
802
            parent_keys = self._find_parent_keys_of_revisions(revision_keys)
 
803
            # TODO: instead of using _find_present_inventory_keys, change the
 
804
            #       code paths to allow missing inventories to be tolerated.
 
805
            #       However, we only want to tolerate missing parent
 
806
            #       inventories, not missing inventories for revision_ids
 
807
            present_parent_inv_keys = self._find_present_inventory_keys(
 
808
                                        parent_keys)
 
809
            present_parent_inv_ids = set(
 
810
                [k[-1] for k in present_parent_inv_keys])
772
811
            uninteresting_root_keys = set()
773
812
            interesting_root_keys = set()
774
 
            inventories_to_read = set(present_parent_inv_ids)
775
 
            inventories_to_read.update(revision_ids)
 
813
            inventories_to_read = set(revision_ids)
 
814
            inventories_to_read.update(present_parent_inv_ids)
776
815
            for inv in self.iter_inventories(inventories_to_read):
777
816
                entry_chk_root_key = inv.id_to_entry.key()
778
817
                if inv.revision_id in present_parent_inv_ids:
846
885
        return super(CHKInventoryRepository, self)._get_source(to_format)
847
886
 
848
887
 
849
 
class GroupCHKStreamSource(repository.StreamSource):
 
888
class GroupCHKStreamSource(KnitPackStreamSource):
850
889
    """Used when both the source and target repo are GroupCHK repos."""
851
890
 
852
891
    def __init__(self, from_repository, to_format):
854
893
        super(GroupCHKStreamSource, self).__init__(from_repository, to_format)
855
894
        self._revision_keys = None
856
895
        self._text_keys = None
 
896
        self._text_fetch_order = 'groupcompress'
857
897
        self._chk_id_roots = None
858
898
        self._chk_p_id_roots = None
859
899
 
898
938
            p_id_roots_set.clear()
899
939
        return ('inventories', _filtered_inv_stream())
900
940
 
901
 
    def _find_present_inventories(self, revision_ids):
902
 
        revision_keys = [(r,) for r in revision_ids]
903
 
        inventories = self.from_repository.inventories
904
 
        present_inventories = inventories.get_parent_map(revision_keys)
905
 
        return [p[-1] for p in present_inventories]
906
 
 
907
 
    def _get_filtered_chk_streams(self, excluded_revision_ids):
 
941
    def _get_filtered_chk_streams(self, excluded_revision_keys):
908
942
        self._text_keys = set()
909
 
        excluded_revision_ids.discard(_mod_revision.NULL_REVISION)
910
 
        if not excluded_revision_ids:
 
943
        excluded_revision_keys.discard(_mod_revision.NULL_REVISION)
 
944
        if not excluded_revision_keys:
911
945
            uninteresting_root_keys = set()
912
946
            uninteresting_pid_root_keys = set()
913
947
        else:
915
949
            # actually present
916
950
            # TODO: Update Repository.iter_inventories() to add
917
951
            #       ignore_missing=True
918
 
            present_ids = self.from_repository._find_present_inventory_ids(
919
 
                            excluded_revision_ids)
920
 
            present_ids = self._find_present_inventories(excluded_revision_ids)
 
952
            present_keys = self.from_repository._find_present_inventory_keys(
 
953
                            excluded_revision_keys)
 
954
            present_ids = [k[-1] for k in present_keys]
921
955
            uninteresting_root_keys = set()
922
956
            uninteresting_pid_root_keys = set()
923
957
            for inv in self.from_repository.iter_inventories(present_ids):
948
982
            self._chk_p_id_roots = None
949
983
        yield 'chk_bytes', _get_parent_id_basename_to_file_id_pages()
950
984
 
951
 
    def _get_text_stream(self):
952
 
        # Note: We know we don't have to handle adding root keys, because both
953
 
        # the source and target are GCCHK, and those always support rich-roots
954
 
        # We may want to request as 'unordered', in case the source has done a
955
 
        # 'split' packing
956
 
        return ('texts', self.from_repository.texts.get_record_stream(
957
 
                            self._text_keys, 'groupcompress', False))
958
 
 
959
985
    def get_stream(self, search):
960
986
        revision_ids = search.get_keys()
961
987
        for stream_info in self._fetch_revision_texts(revision_ids):
966
992
        # For now, exclude all parents that are at the edge of ancestry, for
967
993
        # which we have inventories
968
994
        from_repo = self.from_repository
969
 
        parent_ids = from_repo._find_parent_ids_of_revisions(revision_ids)
970
 
        for stream_info in self._get_filtered_chk_streams(parent_ids):
 
995
        parent_keys = from_repo._find_parent_keys_of_revisions(
 
996
                        self._revision_keys)
 
997
        for stream_info in self._get_filtered_chk_streams(parent_keys):
971
998
            yield stream_info
972
999
        yield self._get_text_stream()
973
1000
 
991
1018
        # no unavailable texts when the ghost inventories are not filled in.
992
1019
        yield self._get_inventory_stream(missing_inventory_keys,
993
1020
                                         allow_absent=True)
994
 
        # We use the empty set for excluded_revision_ids, to make it clear that
995
 
        # we want to transmit all referenced chk pages.
 
1021
        # We use the empty set for excluded_revision_keys, to make it clear
 
1022
        # that we want to transmit all referenced chk pages.
996
1023
        for stream_info in self._get_filtered_chk_streams(set()):
997
1024
            yield stream_info
998
1025
 
1066
1093
    def get_format_string(self):
1067
1094
        """See RepositoryFormat.get_format_string()."""
1068
1095
        return ('Bazaar development format - chk repository with bencode '
1069
 
                'revision serialization (needs bzr.dev from 1.15)\n')
1070
 
 
1071
 
 
 
1096
                'revision serialization (needs bzr.dev from 1.16)\n')
 
1097
 
 
1098
 
 
1099
class RepositoryFormat2a(RepositoryFormatCHK2):
 
1100
    """A CHK repository that uses the bencode revision serializer.
 
1101
    
 
1102
    This is the same as RepositoryFormatCHK2 but with a public name.
 
1103
    """
 
1104
 
 
1105
    _serializer = chk_serializer.chk_bencode_serializer
 
1106
 
 
1107
    def _get_matching_bzrdir(self):
 
1108
        return bzrdir.format_registry.make_bzrdir('2a')
 
1109
 
 
1110
    def _ignore_setting_bzrdir(self, format):
 
1111
        pass
 
1112
 
 
1113
    _matchingbzrdir = property(_get_matching_bzrdir, _ignore_setting_bzrdir)
 
1114
 
 
1115
    def get_format_string(self):
 
1116
        return ('Bazaar repository format 2a (needs bzr 1.16 or later)\n')