~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

  • Committer: Martin Pool
  • Date: 2010-04-01 04:41:18 UTC
  • mto: This revision was merged to the branch mainline in revision 5128.
  • Revision ID: mbp@sourcefrog.net-20100401044118-shyctqc02ob08ngz
ignore .testrepository

Show diffs side-by-side

added added

removed removed

Lines of Context:
33
33
    osutils,
34
34
    static_tuple,
35
35
    trace,
36
 
    transport,
37
36
    )
38
37
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
 
38
from bzrlib.transport import get_transport
39
39
 
40
40
 
41
41
_BTSIGNATURE = "B+Tree Graph Index 2\n"
193
193
            new_backing_file, size = self._spill_mem_keys_without_combining()
194
194
        # Note: The transport here isn't strictly needed, because we will use
195
195
        #       direct access to the new_backing._file object
196
 
        new_backing = BTreeGraphIndex(transport.get_transport('.'),
197
 
                                      '<temp>', size)
 
196
        new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)
198
197
        # GC will clean up the file
199
198
        new_backing._file = new_backing_file
200
199
        if self._combine_backing_indices:
602
601
        """In memory index's have no known corruption at the moment."""
603
602
 
604
603
 
605
 
class _LeafNode(dict):
 
604
class _LeafNode(object):
606
605
    """A leaf node for a serialised B+Tree index."""
607
606
 
608
 
    __slots__ = ('min_key', 'max_key', '_keys')
 
607
    __slots__ = ('keys', 'min_key', 'max_key')
609
608
 
610
609
    def __init__(self, bytes, key_length, ref_list_length):
611
610
        """Parse bytes to create a leaf node object."""
617
616
            self.max_key = key_list[-1][0]
618
617
        else:
619
618
            self.min_key = self.max_key = None
620
 
        super(_LeafNode, self).__init__(key_list)
621
 
        self._keys = dict(self)
622
 
 
623
 
    def all_items(self):
624
 
        """Return a sorted list of (key, (value, refs)) items"""
625
 
        items = self.items()
626
 
        items.sort()
627
 
        return items
628
 
 
629
 
    def all_keys(self):
630
 
        """Return a sorted list of all keys."""
631
 
        keys = self.keys()
632
 
        keys.sort()
633
 
        return keys
 
619
        self.keys = dict(key_list)
634
620
 
635
621
 
636
622
class _InternalNode(object):
685
671
        self._recommended_pages = self._compute_recommended_pages()
686
672
        self._root_node = None
687
673
        self._base_offset = offset
688
 
        self._leaf_factory = _LeafNode
689
674
        # Default max size is 100,000 leave values
690
675
        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
691
676
        if unlimited_cache:
964
949
        """Cache directly from key => value, skipping the btree."""
965
950
        if self._leaf_value_cache is not None:
966
951
            for node in nodes.itervalues():
967
 
                for key, value in node.all_items():
 
952
                for key, value in node.keys.iteritems():
968
953
                    if key in self._leaf_value_cache:
969
954
                        # Don't add the rest of the keys, we've seen this node
970
955
                        # before.
994
979
        if self._row_offsets[-1] == 1:
995
980
            # There is only the root node, and we read that via key_count()
996
981
            if self.node_ref_lists:
997
 
                for key, (value, refs) in self._root_node.all_items():
 
982
                for key, (value, refs) in sorted(self._root_node.keys.items()):
998
983
                    yield (self, key, value, refs)
999
984
            else:
1000
 
                for key, (value, refs) in self._root_node.all_items():
 
985
                for key, (value, refs) in sorted(self._root_node.keys.items()):
1001
986
                    yield (self, key, value)
1002
987
            return
1003
988
        start_of_leaves = self._row_offsets[-2]
1013
998
        # for spilling index builds to disk.
1014
999
        if self.node_ref_lists:
1015
1000
            for _, node in nodes:
1016
 
                for key, (value, refs) in node.all_items():
 
1001
                for key, (value, refs) in sorted(node.keys.items()):
1017
1002
                    yield (self, key, value, refs)
1018
1003
        else:
1019
1004
            for _, node in nodes:
1020
 
                for key, (value, refs) in node.all_items():
 
1005
                for key, (value, refs) in sorted(node.keys.items()):
1021
1006
                    yield (self, key, value)
1022
1007
 
1023
1008
    @staticmethod
1184
1169
                continue
1185
1170
            node = nodes[node_index]
1186
1171
            for next_sub_key in sub_keys:
1187
 
                if next_sub_key in node:
1188
 
                    value, refs = node[next_sub_key]
 
1172
                if next_sub_key in node.keys:
 
1173
                    value, refs = node.keys[next_sub_key]
1189
1174
                    if self.node_ref_lists:
1190
1175
                        yield (self, next_sub_key, value, refs)
1191
1176
                    else:
1259
1244
            # sub_keys is all of the keys we are looking for that should exist
1260
1245
            # on this page, if they aren't here, then they won't be found
1261
1246
            node = nodes[node_index]
 
1247
            node_keys = node.keys
1262
1248
            parents_to_check = set()
1263
1249
            for next_sub_key in sub_keys:
1264
 
                if next_sub_key not in node:
 
1250
                if next_sub_key not in node_keys:
1265
1251
                    # This one is just not present in the index at all
1266
1252
                    missing_keys.add(next_sub_key)
1267
1253
                else:
1268
 
                    value, refs = node[next_sub_key]
 
1254
                    value, refs = node_keys[next_sub_key]
1269
1255
                    parent_keys = refs[ref_list_num]
1270
1256
                    parent_map[next_sub_key] = parent_keys
1271
1257
                    parents_to_check.update(parent_keys)
1278
1264
            while parents_to_check:
1279
1265
                next_parents_to_check = set()
1280
1266
                for key in parents_to_check:
1281
 
                    if key in node:
1282
 
                        value, refs = node[key]
 
1267
                    if key in node_keys:
 
1268
                        value, refs = node_keys[key]
1283
1269
                        parent_keys = refs[ref_list_num]
1284
1270
                        parent_map[key] = parent_keys
1285
1271
                        next_parents_to_check.update(parent_keys)
1559
1545
                    continue
1560
1546
            bytes = zlib.decompress(data)
1561
1547
            if bytes.startswith(_LEAF_FLAG):
1562
 
                node = self._leaf_factory(bytes, self._key_length,
1563
 
                                          self.node_ref_lists)
 
1548
                node = _LeafNode(bytes, self._key_length, self.node_ref_lists)
1564
1549
            elif bytes.startswith(_INTERNAL_FLAG):
1565
1550
                node = _InternalNode(bytes)
1566
1551
            else:
1585
1570
            pass
1586
1571
 
1587
1572
 
1588
 
_gcchk_factory = _LeafNode
1589
 
 
1590
1573
try:
1591
1574
    from bzrlib import _btree_serializer_pyx as _btree_serializer
1592
 
    _gcchk_factory = _btree_serializer._parse_into_chk
1593
1575
except ImportError, e:
1594
1576
    osutils.failed_to_load_extension(e)
1595
1577
    from bzrlib import _btree_serializer_py as _btree_serializer