~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/repofmt/pack_repo.py

  • Committer: Canonical.com Patch Queue Manager
  • Date: 2011-04-16 01:09:56 UTC
  • mfrom: (5784.1.4 760435-less-fail)
  • Revision ID: pqm@pqm.ubuntu.com-20110416010956-5wrpm136qq2hz5f3
(mbp) rename and deprecate failUnlessExists and failIfExists (Martin Pool)

Show diffs side-by-side

added added

removed removed

Lines of Context:
37
37
    CombinedGraphIndex,
38
38
    GraphIndexPrefixAdapter,
39
39
    )
 
40
from bzrlib.knit import (
 
41
    KnitPlainFactory,
 
42
    KnitVersionedFiles,
 
43
    _DirectPackAccess,
 
44
    )
40
45
""")
41
46
from bzrlib import (
42
47
    btree_index,
45
50
    lockdir,
46
51
    )
47
52
 
48
 
from bzrlib.decorators import (
49
 
    needs_read_lock,
50
 
    needs_write_lock,
51
 
    only_raises,
52
 
    )
 
53
from bzrlib.decorators import needs_write_lock, only_raises
53
54
from bzrlib.lock import LogicalLockResult
 
55
from bzrlib.repofmt.knitrepo import KnitRepository
54
56
from bzrlib.repository import (
55
57
    CommitBuilder,
56
 
    MetaDirRepository,
57
58
    MetaDirRepositoryFormat,
58
59
    RepositoryFormat,
59
60
    RepositoryWriteLockResult,
75
76
 
76
77
    def __init__(self, repository, parents, config, timestamp=None,
77
78
                 timezone=None, committer=None, revprops=None,
78
 
                 revision_id=None, lossy=False):
 
79
                 revision_id=None):
79
80
        CommitBuilder.__init__(self, repository, parents, config,
80
81
            timestamp=timestamp, timezone=timezone, committer=committer,
81
 
            revprops=revprops, revision_id=revision_id, lossy=lossy)
 
82
            revprops=revprops, revision_id=revision_id)
82
83
        self._file_graph = graph.Graph(
83
84
            repository._pack_collection.text_index.combined_index)
84
85
 
96
97
 
97
98
    def __init__(self, repository, parents, config, timestamp=None,
98
99
                 timezone=None, committer=None, revprops=None,
99
 
                 revision_id=None, lossy=False):
 
100
                 revision_id=None):
100
101
        CommitBuilder.__init__(self, repository, parents, config,
101
102
            timestamp=timestamp, timezone=timezone, committer=committer,
102
 
            revprops=revprops, revision_id=revision_id, lossy=lossy)
 
103
            revprops=revprops, revision_id=revision_id)
103
104
        self._file_graph = graph.Graph(
104
105
            repository._pack_collection.text_index.combined_index)
105
106
 
662
663
        # _copy_inventory_texts
663
664
        self._text_filter = None
664
665
 
 
666
    def _pack_map_and_index_list(self, index_attribute):
 
667
        """Convert a list of packs to an index pack map and index list.
 
668
 
 
669
        :param index_attribute: The attribute that the desired index is found
 
670
            on.
 
671
        :return: A tuple (map, list) where map contains the dict from
 
672
            index:pack_tuple, and list contains the indices in the preferred
 
673
            access order.
 
674
        """
 
675
        indices = []
 
676
        pack_map = {}
 
677
        for pack_obj in self.packs:
 
678
            index = getattr(pack_obj, index_attribute)
 
679
            indices.append(index)
 
680
            pack_map[index] = pack_obj
 
681
        return pack_map, indices
 
682
 
 
683
    def _index_contents(self, indices, key_filter=None):
 
684
        """Get an iterable of the index contents from a pack_map.
 
685
 
 
686
        :param indices: The list of indices to query
 
687
        :param key_filter: An optional filter to limit the keys returned.
 
688
        """
 
689
        all_index = CombinedGraphIndex(indices)
 
690
        if key_filter is None:
 
691
            return all_index.iter_all_entries()
 
692
        else:
 
693
            return all_index.iter_entries(key_filter)
 
694
 
665
695
    def pack(self, pb=None):
666
696
        """Create a new pack by reading data from other packs.
667
697
 
715
745
        new_pack.signature_index.set_optimize(combine_backing_indices=False)
716
746
        return new_pack
717
747
 
 
748
    def _update_pack_order(self, entries, index_to_pack_map):
 
749
        """Determine how we want our packs to be ordered.
 
750
 
 
751
        This changes the sort order of the self.packs list so that packs unused
 
752
        by 'entries' will be at the end of the list, so that future requests
 
753
        can avoid probing them.  Used packs will be at the front of the
 
754
        self.packs list, in the order of their first use in 'entries'.
 
755
 
 
756
        :param entries: A list of (index, ...) tuples
 
757
        :param index_to_pack_map: A mapping from index objects to pack objects.
 
758
        """
 
759
        packs = []
 
760
        seen_indexes = set()
 
761
        for entry in entries:
 
762
            index = entry[0]
 
763
            if index not in seen_indexes:
 
764
                packs.append(index_to_pack_map[index])
 
765
                seen_indexes.add(index)
 
766
        if len(packs) == len(self.packs):
 
767
            if 'pack' in debug.debug_flags:
 
768
                mutter('Not changing pack list, all packs used.')
 
769
            return
 
770
        seen_packs = set(packs)
 
771
        for pack in self.packs:
 
772
            if pack not in seen_packs:
 
773
                packs.append(pack)
 
774
                seen_packs.add(pack)
 
775
        if 'pack' in debug.debug_flags:
 
776
            old_names = [p.access_tuple()[1] for p in self.packs]
 
777
            new_names = [p.access_tuple()[1] for p in packs]
 
778
            mutter('Reordering packs\nfrom: %s\n  to: %s',
 
779
                   old_names, new_names)
 
780
        self.packs = packs
 
781
 
718
782
    def _copy_revision_texts(self):
719
783
        """Copy revision data to the new pack."""
720
 
        raise NotImplementedError(self._copy_revision_texts)
 
784
        # select revisions
 
785
        if self.revision_ids:
 
786
            revision_keys = [(revision_id,) for revision_id in self.revision_ids]
 
787
        else:
 
788
            revision_keys = None
 
789
        # select revision keys
 
790
        revision_index_map, revision_indices = self._pack_map_and_index_list(
 
791
            'revision_index')
 
792
        revision_nodes = self._index_contents(revision_indices, revision_keys)
 
793
        revision_nodes = list(revision_nodes)
 
794
        self._update_pack_order(revision_nodes, revision_index_map)
 
795
        # copy revision keys and adjust values
 
796
        self.pb.update("Copying revision texts", 1)
 
797
        total_items, readv_group_iter = self._revision_node_readv(revision_nodes)
 
798
        list(self._copy_nodes_graph(revision_index_map, self.new_pack._writer,
 
799
            self.new_pack.revision_index, readv_group_iter, total_items))
 
800
        if 'pack' in debug.debug_flags:
 
801
            mutter('%s: create_pack: revisions copied: %s%s %d items t+%6.3fs',
 
802
                time.ctime(), self._pack_collection._upload_transport.base,
 
803
                self.new_pack.random_name,
 
804
                self.new_pack.revision_index.key_count(),
 
805
                time.time() - self.new_pack.start_time)
 
806
        self._revision_keys = revision_keys
721
807
 
722
808
    def _copy_inventory_texts(self):
723
809
        """Copy the inventory texts to the new pack.
726
812
 
727
813
        Sets self._text_filter appropriately.
728
814
        """
729
 
        raise NotImplementedError(self._copy_inventory_texts)
 
815
        # select inventory keys
 
816
        inv_keys = self._revision_keys # currently the same keyspace, and note that
 
817
        # querying for keys here could introduce a bug where an inventory item
 
818
        # is missed, so do not change it to query separately without cross
 
819
        # checking like the text key check below.
 
820
        inventory_index_map, inventory_indices = self._pack_map_and_index_list(
 
821
            'inventory_index')
 
822
        inv_nodes = self._index_contents(inventory_indices, inv_keys)
 
823
        # copy inventory keys and adjust values
 
824
        # XXX: Should be a helper function to allow different inv representation
 
825
        # at this point.
 
826
        self.pb.update("Copying inventory texts", 2)
 
827
        total_items, readv_group_iter = self._least_readv_node_readv(inv_nodes)
 
828
        # Only grab the output lines if we will be processing them
 
829
        output_lines = bool(self.revision_ids)
 
830
        inv_lines = self._copy_nodes_graph(inventory_index_map,
 
831
            self.new_pack._writer, self.new_pack.inventory_index,
 
832
            readv_group_iter, total_items, output_lines=output_lines)
 
833
        if self.revision_ids:
 
834
            self._process_inventory_lines(inv_lines)
 
835
        else:
 
836
            # eat the iterator to cause it to execute.
 
837
            list(inv_lines)
 
838
            self._text_filter = None
 
839
        if 'pack' in debug.debug_flags:
 
840
            mutter('%s: create_pack: inventories copied: %s%s %d items t+%6.3fs',
 
841
                time.ctime(), self._pack_collection._upload_transport.base,
 
842
                self.new_pack.random_name,
 
843
                self.new_pack.inventory_index.key_count(),
 
844
                time.time() - self.new_pack.start_time)
730
845
 
731
846
    def _copy_text_texts(self):
732
 
        raise NotImplementedError(self._copy_text_texts)
 
847
        # select text keys
 
848
        text_index_map, text_nodes = self._get_text_nodes()
 
849
        if self._text_filter is not None:
 
850
            # We could return the keys copied as part of the return value from
 
851
            # _copy_nodes_graph but this doesn't work all that well with the
 
852
            # need to get line output too, so we check separately, and as we're
 
853
            # going to buffer everything anyway, we check beforehand, which
 
854
            # saves reading knit data over the wire when we know there are
 
855
            # mising records.
 
856
            text_nodes = set(text_nodes)
 
857
            present_text_keys = set(_node[1] for _node in text_nodes)
 
858
            missing_text_keys = set(self._text_filter) - present_text_keys
 
859
            if missing_text_keys:
 
860
                # TODO: raise a specific error that can handle many missing
 
861
                # keys.
 
862
                mutter("missing keys during fetch: %r", missing_text_keys)
 
863
                a_missing_key = missing_text_keys.pop()
 
864
                raise errors.RevisionNotPresent(a_missing_key[1],
 
865
                    a_missing_key[0])
 
866
        # copy text keys and adjust values
 
867
        self.pb.update("Copying content texts", 3)
 
868
        total_items, readv_group_iter = self._least_readv_node_readv(text_nodes)
 
869
        list(self._copy_nodes_graph(text_index_map, self.new_pack._writer,
 
870
            self.new_pack.text_index, readv_group_iter, total_items))
 
871
        self._log_copied_texts()
733
872
 
734
873
    def _create_pack_from_packs(self):
735
 
        raise NotImplementedError(self._create_pack_from_packs)
 
874
        self.pb.update("Opening pack", 0, 5)
 
875
        self.new_pack = self.open_pack()
 
876
        new_pack = self.new_pack
 
877
        # buffer data - we won't be reading-back during the pack creation and
 
878
        # this makes a significant difference on sftp pushes.
 
879
        new_pack.set_write_cache_size(1024*1024)
 
880
        if 'pack' in debug.debug_flags:
 
881
            plain_pack_list = ['%s%s' % (a_pack.pack_transport.base, a_pack.name)
 
882
                for a_pack in self.packs]
 
883
            if self.revision_ids is not None:
 
884
                rev_count = len(self.revision_ids)
 
885
            else:
 
886
                rev_count = 'all'
 
887
            mutter('%s: create_pack: creating pack from source packs: '
 
888
                '%s%s %s revisions wanted %s t=0',
 
889
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
 
890
                plain_pack_list, rev_count)
 
891
        self._copy_revision_texts()
 
892
        self._copy_inventory_texts()
 
893
        self._copy_text_texts()
 
894
        # select signature keys
 
895
        signature_filter = self._revision_keys # same keyspace
 
896
        signature_index_map, signature_indices = self._pack_map_and_index_list(
 
897
            'signature_index')
 
898
        signature_nodes = self._index_contents(signature_indices,
 
899
            signature_filter)
 
900
        # copy signature keys and adjust values
 
901
        self.pb.update("Copying signature texts", 4)
 
902
        self._copy_nodes(signature_nodes, signature_index_map, new_pack._writer,
 
903
            new_pack.signature_index)
 
904
        if 'pack' in debug.debug_flags:
 
905
            mutter('%s: create_pack: revision signatures copied: %s%s %d items t+%6.3fs',
 
906
                time.ctime(), self._pack_collection._upload_transport.base, new_pack.random_name,
 
907
                new_pack.signature_index.key_count(),
 
908
                time.time() - new_pack.start_time)
 
909
        # copy chk contents
 
910
        # NB XXX: how to check CHK references are present? perhaps by yielding
 
911
        # the items? How should that interact with stacked repos?
 
912
        if new_pack.chk_index is not None:
 
913
            self._copy_chks()
 
914
            if 'pack' in debug.debug_flags:
 
915
                mutter('%s: create_pack: chk content copied: %s%s %d items t+%6.3fs',
 
916
                    time.ctime(), self._pack_collection._upload_transport.base,
 
917
                    new_pack.random_name,
 
918
                    new_pack.chk_index.key_count(),
 
919
                    time.time() - new_pack.start_time)
 
920
        new_pack._check_references()
 
921
        if not self._use_pack(new_pack):
 
922
            new_pack.abort()
 
923
            return None
 
924
        self.pb.update("Finishing pack", 5)
 
925
        new_pack.finish()
 
926
        self._pack_collection.allocate(new_pack)
 
927
        return new_pack
 
928
 
 
929
    def _copy_chks(self, refs=None):
 
930
        # XXX: Todo, recursive follow-pointers facility when fetching some
 
931
        # revisions only.
 
932
        chk_index_map, chk_indices = self._pack_map_and_index_list(
 
933
            'chk_index')
 
934
        chk_nodes = self._index_contents(chk_indices, refs)
 
935
        new_refs = set()
 
936
        # TODO: This isn't strictly tasteful as we are accessing some private
 
937
        #       variables (_serializer). Perhaps a better way would be to have
 
938
        #       Repository._deserialise_chk_node()
 
939
        search_key_func = chk_map.search_key_registry.get(
 
940
            self._pack_collection.repo._serializer.search_key_name)
 
941
        def accumlate_refs(lines):
 
942
            # XXX: move to a generic location
 
943
            # Yay mismatch:
 
944
            bytes = ''.join(lines)
 
945
            node = chk_map._deserialise(bytes, ("unknown",), search_key_func)
 
946
            new_refs.update(node.refs())
 
947
        self._copy_nodes(chk_nodes, chk_index_map, self.new_pack._writer,
 
948
            self.new_pack.chk_index, output_lines=accumlate_refs)
 
949
        return new_refs
 
950
 
 
951
    def _copy_nodes(self, nodes, index_map, writer, write_index,
 
952
        output_lines=None):
 
953
        """Copy knit nodes between packs with no graph references.
 
954
 
 
955
        :param output_lines: Output full texts of copied items.
 
956
        """
 
957
        pb = ui.ui_factory.nested_progress_bar()
 
958
        try:
 
959
            return self._do_copy_nodes(nodes, index_map, writer,
 
960
                write_index, pb, output_lines=output_lines)
 
961
        finally:
 
962
            pb.finished()
 
963
 
 
964
    def _do_copy_nodes(self, nodes, index_map, writer, write_index, pb,
 
965
        output_lines=None):
 
966
        # for record verification
 
967
        knit = KnitVersionedFiles(None, None)
 
968
        # plan a readv on each source pack:
 
969
        # group by pack
 
970
        nodes = sorted(nodes)
 
971
        # how to map this into knit.py - or knit.py into this?
 
972
        # we don't want the typical knit logic, we want grouping by pack
 
973
        # at this point - perhaps a helper library for the following code
 
974
        # duplication points?
 
975
        request_groups = {}
 
976
        for index, key, value in nodes:
 
977
            if index not in request_groups:
 
978
                request_groups[index] = []
 
979
            request_groups[index].append((key, value))
 
980
        record_index = 0
 
981
        pb.update("Copied record", record_index, len(nodes))
 
982
        for index, items in request_groups.iteritems():
 
983
            pack_readv_requests = []
 
984
            for key, value in items:
 
985
                # ---- KnitGraphIndex.get_position
 
986
                bits = value[1:].split(' ')
 
987
                offset, length = int(bits[0]), int(bits[1])
 
988
                pack_readv_requests.append((offset, length, (key, value[0])))
 
989
            # linear scan up the pack
 
990
            pack_readv_requests.sort()
 
991
            # copy the data
 
992
            pack_obj = index_map[index]
 
993
            transport, path = pack_obj.access_tuple()
 
994
            try:
 
995
                reader = pack.make_readv_reader(transport, path,
 
996
                    [offset[0:2] for offset in pack_readv_requests])
 
997
            except errors.NoSuchFile:
 
998
                if self._reload_func is not None:
 
999
                    self._reload_func()
 
1000
                raise
 
1001
            for (names, read_func), (_1, _2, (key, eol_flag)) in \
 
1002
                izip(reader.iter_records(), pack_readv_requests):
 
1003
                raw_data = read_func(None)
 
1004
                # check the header only
 
1005
                if output_lines is not None:
 
1006
                    output_lines(knit._parse_record(key[-1], raw_data)[0])
 
1007
                else:
 
1008
                    df, _ = knit._parse_record_header(key, raw_data)
 
1009
                    df.close()
 
1010
                pos, size = writer.add_bytes_record(raw_data, names)
 
1011
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size))
 
1012
                pb.update("Copied record", record_index)
 
1013
                record_index += 1
 
1014
 
 
1015
    def _copy_nodes_graph(self, index_map, writer, write_index,
 
1016
        readv_group_iter, total_items, output_lines=False):
 
1017
        """Copy knit nodes between packs.
 
1018
 
 
1019
        :param output_lines: Return lines present in the copied data as
 
1020
            an iterator of line,version_id.
 
1021
        """
 
1022
        pb = ui.ui_factory.nested_progress_bar()
 
1023
        try:
 
1024
            for result in self._do_copy_nodes_graph(index_map, writer,
 
1025
                write_index, output_lines, pb, readv_group_iter, total_items):
 
1026
                yield result
 
1027
        except Exception:
 
1028
            # Python 2.4 does not permit try:finally: in a generator.
 
1029
            pb.finished()
 
1030
            raise
 
1031
        else:
 
1032
            pb.finished()
 
1033
 
 
1034
    def _do_copy_nodes_graph(self, index_map, writer, write_index,
 
1035
        output_lines, pb, readv_group_iter, total_items):
 
1036
        # for record verification
 
1037
        knit = KnitVersionedFiles(None, None)
 
1038
        # for line extraction when requested (inventories only)
 
1039
        if output_lines:
 
1040
            factory = KnitPlainFactory()
 
1041
        record_index = 0
 
1042
        pb.update("Copied record", record_index, total_items)
 
1043
        for index, readv_vector, node_vector in readv_group_iter:
 
1044
            # copy the data
 
1045
            pack_obj = index_map[index]
 
1046
            transport, path = pack_obj.access_tuple()
 
1047
            try:
 
1048
                reader = pack.make_readv_reader(transport, path, readv_vector)
 
1049
            except errors.NoSuchFile:
 
1050
                if self._reload_func is not None:
 
1051
                    self._reload_func()
 
1052
                raise
 
1053
            for (names, read_func), (key, eol_flag, references) in \
 
1054
                izip(reader.iter_records(), node_vector):
 
1055
                raw_data = read_func(None)
 
1056
                if output_lines:
 
1057
                    # read the entire thing
 
1058
                    content, _ = knit._parse_record(key[-1], raw_data)
 
1059
                    if len(references[-1]) == 0:
 
1060
                        line_iterator = factory.get_fulltext_content(content)
 
1061
                    else:
 
1062
                        line_iterator = factory.get_linedelta_content(content)
 
1063
                    for line in line_iterator:
 
1064
                        yield line, key
 
1065
                else:
 
1066
                    # check the header only
 
1067
                    df, _ = knit._parse_record_header(key, raw_data)
 
1068
                    df.close()
 
1069
                pos, size = writer.add_bytes_record(raw_data, names)
 
1070
                write_index.add_node(key, eol_flag + "%d %d" % (pos, size), references)
 
1071
                pb.update("Copied record", record_index)
 
1072
                record_index += 1
 
1073
 
 
1074
    def _get_text_nodes(self):
 
1075
        text_index_map, text_indices = self._pack_map_and_index_list(
 
1076
            'text_index')
 
1077
        return text_index_map, self._index_contents(text_indices,
 
1078
            self._text_filter)
 
1079
 
 
1080
    def _least_readv_node_readv(self, nodes):
 
1081
        """Generate request groups for nodes using the least readv's.
 
1082
 
 
1083
        :param nodes: An iterable of graph index nodes.
 
1084
        :return: Total node count and an iterator of the data needed to perform
 
1085
            readvs to obtain the data for nodes. Each item yielded by the
 
1086
            iterator is a tuple with:
 
1087
            index, readv_vector, node_vector. readv_vector is a list ready to
 
1088
            hand to the transport readv method, and node_vector is a list of
 
1089
            (key, eol_flag, references) for the node retrieved by the
 
1090
            matching readv_vector.
 
1091
        """
 
1092
        # group by pack so we do one readv per pack
 
1093
        nodes = sorted(nodes)
 
1094
        total = len(nodes)
 
1095
        request_groups = {}
 
1096
        for index, key, value, references in nodes:
 
1097
            if index not in request_groups:
 
1098
                request_groups[index] = []
 
1099
            request_groups[index].append((key, value, references))
 
1100
        result = []
 
1101
        for index, items in request_groups.iteritems():
 
1102
            pack_readv_requests = []
 
1103
            for key, value, references in items:
 
1104
                # ---- KnitGraphIndex.get_position
 
1105
                bits = value[1:].split(' ')
 
1106
                offset, length = int(bits[0]), int(bits[1])
 
1107
                pack_readv_requests.append(
 
1108
                    ((offset, length), (key, value[0], references)))
 
1109
            # linear scan up the pack to maximum range combining.
 
1110
            pack_readv_requests.sort()
 
1111
            # split out the readv and the node data.
 
1112
            pack_readv = [readv for readv, node in pack_readv_requests]
 
1113
            node_vector = [node for readv, node in pack_readv_requests]
 
1114
            result.append((index, pack_readv, node_vector))
 
1115
        return total, result
736
1116
 
737
1117
    def _log_copied_texts(self):
738
1118
        if 'pack' in debug.debug_flags:
742
1122
                self.new_pack.text_index.key_count(),
743
1123
                time.time() - self.new_pack.start_time)
744
1124
 
 
1125
    def _process_inventory_lines(self, inv_lines):
 
1126
        """Use up the inv_lines generator and setup a text key filter."""
 
1127
        repo = self._pack_collection.repo
 
1128
        fileid_revisions = repo._find_file_ids_from_xml_inventory_lines(
 
1129
            inv_lines, self.revision_keys)
 
1130
        text_filter = []
 
1131
        for fileid, file_revids in fileid_revisions.iteritems():
 
1132
            text_filter.extend([(fileid, file_revid) for file_revid in file_revids])
 
1133
        self._text_filter = text_filter
 
1134
 
 
1135
    def _revision_node_readv(self, revision_nodes):
 
1136
        """Return the total revisions and the readv's to issue.
 
1137
 
 
1138
        :param revision_nodes: The revision index contents for the packs being
 
1139
            incorporated into the new pack.
 
1140
        :return: As per _least_readv_node_readv.
 
1141
        """
 
1142
        return self._least_readv_node_readv(revision_nodes)
 
1143
 
745
1144
    def _use_pack(self, new_pack):
746
1145
        """Return True if new_pack should be used.
747
1146
 
751
1150
        return new_pack.data_inserted()
752
1151
 
753
1152
 
 
1153
class OptimisingPacker(Packer):
 
1154
    """A packer which spends more time to create better disk layouts."""
 
1155
 
 
1156
    def _revision_node_readv(self, revision_nodes):
 
1157
        """Return the total revisions and the readv's to issue.
 
1158
 
 
1159
        This sort places revisions in topological order with the ancestors
 
1160
        after the children.
 
1161
 
 
1162
        :param revision_nodes: The revision index contents for the packs being
 
1163
            incorporated into the new pack.
 
1164
        :return: As per _least_readv_node_readv.
 
1165
        """
 
1166
        # build an ancestors dict
 
1167
        ancestors = {}
 
1168
        by_key = {}
 
1169
        for index, key, value, references in revision_nodes:
 
1170
            ancestors[key] = references[0]
 
1171
            by_key[key] = (index, value, references)
 
1172
        order = tsort.topo_sort(ancestors)
 
1173
        total = len(order)
 
1174
        # Single IO is pathological, but it will work as a starting point.
 
1175
        requests = []
 
1176
        for key in reversed(order):
 
1177
            index, value, references = by_key[key]
 
1178
            # ---- KnitGraphIndex.get_position
 
1179
            bits = value[1:].split(' ')
 
1180
            offset, length = int(bits[0]), int(bits[1])
 
1181
            requests.append(
 
1182
                (index, [(offset, length)], [(key, value[0], references)]))
 
1183
        # TODO: combine requests in the same index that are in ascending order.
 
1184
        return total, requests
 
1185
 
 
1186
    def open_pack(self):
 
1187
        """Open a pack for the pack we are creating."""
 
1188
        new_pack = super(OptimisingPacker, self).open_pack()
 
1189
        # Turn on the optimization flags for all the index builders.
 
1190
        new_pack.revision_index.set_optimize(for_size=True)
 
1191
        new_pack.inventory_index.set_optimize(for_size=True)
 
1192
        new_pack.text_index.set_optimize(for_size=True)
 
1193
        new_pack.signature_index.set_optimize(for_size=True)
 
1194
        return new_pack
 
1195
 
 
1196
 
754
1197
class RepositoryPackCollection(object):
755
1198
    """Management of packs within a repository.
756
1199
 
757
1200
    :ivar _names: map of {pack_name: (index_size,)}
758
1201
    """
759
1202
 
760
 
    pack_factory = None
761
 
    resumed_pack_factory = None
762
 
    normal_packer_class = None
763
 
    optimising_packer_class = None
 
1203
    pack_factory = NewPack
 
1204
    resumed_pack_factory = ResumedPack
764
1205
 
765
1206
    def __init__(self, repo, transport, index_transport, upload_transport,
766
1207
                 pack_transport, index_builder_class, index_class,
907
1348
            'containing %d revisions. Packing %d files into %d affecting %d'
908
1349
            ' revisions', self, total_packs, total_revisions, num_old_packs,
909
1350
            num_new_packs, num_revs_affected)
910
 
        result = self._execute_pack_operations(pack_operations, packer_class=self.normal_packer_class,
 
1351
        result = self._execute_pack_operations(pack_operations,
911
1352
                                      reload_func=self._restart_autopack)
912
1353
        mutter('Auto-packing repository %s completed', self)
913
1354
        return result
914
1355
 
915
 
    def _execute_pack_operations(self, pack_operations, packer_class,
916
 
            reload_func=None):
 
1356
    def _execute_pack_operations(self, pack_operations, _packer_class=Packer,
 
1357
                                 reload_func=None):
917
1358
        """Execute a series of pack operations.
918
1359
 
919
1360
        :param pack_operations: A list of [revision_count, packs_to_combine].
920
 
        :param packer_class: The class of packer to use
 
1361
        :param _packer_class: The class of packer to use (default: Packer).
921
1362
        :return: The new pack names.
922
1363
        """
923
1364
        for revision_count, packs in pack_operations:
924
1365
            # we may have no-ops from the setup logic
925
1366
            if len(packs) == 0:
926
1367
                continue
927
 
            packer = packer_class(self, packs, '.autopack',
 
1368
            packer = _packer_class(self, packs, '.autopack',
928
1369
                                   reload_func=reload_func)
929
1370
            try:
930
 
                result = packer.pack()
 
1371
                packer.pack()
931
1372
            except errors.RetryWithNewPacks:
932
1373
                # An exception is propagating out of this context, make sure
933
1374
                # this packer has cleaned up. Packer() doesn't set its new_pack
936
1377
                if packer.new_pack is not None:
937
1378
                    packer.new_pack.abort()
938
1379
                raise
939
 
            if result is None:
940
 
                return
941
1380
            for pack in packs:
942
1381
                self._remove_pack_from_memory(pack)
943
1382
        # record the newly available packs and stop advertising the old
999
1438
                # or this pack was included in the hint.
1000
1439
                pack_operations[-1][0] += pack.get_revision_count()
1001
1440
                pack_operations[-1][1].append(pack)
1002
 
        self._execute_pack_operations(pack_operations,
1003
 
            packer_class=self.optimising_packer_class,
 
1441
        self._execute_pack_operations(pack_operations, OptimisingPacker,
1004
1442
            reload_func=self._restart_pack_operations)
1005
1443
 
1006
1444
    def plan_autopack_combinations(self, existing_packs, pack_distribution):
1622
2060
            self._resume_pack(token)
1623
2061
 
1624
2062
 
1625
 
class PackRepository(MetaDirRepository):
 
2063
class PackRepository(KnitRepository):
1626
2064
    """Repository with knit objects stored inside pack containers.
1627
2065
 
1628
2066
    The layering for a KnitPackRepository is:
1647
2085
 
1648
2086
    """
1649
2087
 
1650
 
    # These attributes are inherited from the Repository base class. Setting
1651
 
    # them to None ensures that if the constructor is changed to not initialize
1652
 
    # them, or a subclass fails to call the constructor, that an error will
1653
 
    # occur rather than the system working but generating incorrect data.
1654
 
    _commit_builder_class = None
1655
 
    _serializer = None
1656
 
 
1657
 
    def __init__(self, _format, a_bzrdir, control_files, _commit_builder_class,
1658
 
        _serializer):
1659
 
        MetaDirRepository.__init__(self, _format, a_bzrdir, control_files)
1660
 
        self._commit_builder_class = _commit_builder_class
1661
 
        self._serializer = _serializer
1662
 
        self._reconcile_fixes_text_parents = True
1663
 
 
1664
 
    @needs_read_lock
1665
 
    def _all_revision_ids(self):
1666
 
        """See Repository.all_revision_ids()."""
1667
 
        return [key[0] for key in self.revisions.keys()]
1668
 
 
1669
2088
    def _abort_write_group(self):
1670
2089
        self.revisions._index._key_dependencies.clear()
1671
2090
        self._pack_collection._abort_write_group()