~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

  • Committer: Vincent Ladeuil
  • Date: 2010-10-08 10:50:51 UTC
  • mfrom: (5050.17.31 2.2.2-dev)
  • mto: This revision was merged to the branch mainline in revision 5474.
  • Revision ID: v.ladeuil+lp@free.fr-20101008105051-xd4knkrohzclffic
Merge 2.2 into trunk including fixes for bug #651706 and bug #646133

Show diffs side-by-side

added added

removed removed

Lines of Context:
33
33
    osutils,
34
34
    static_tuple,
35
35
    trace,
 
36
    transport,
36
37
    )
37
38
from bzrlib.index import _OPTION_NODE_REFS, _OPTION_KEY_ELEMENTS, _OPTION_LEN
38
 
from bzrlib.transport import get_transport
39
39
 
40
40
 
41
41
_BTSIGNATURE = "B+Tree Graph Index 2\n"
193
193
            new_backing_file, size = self._spill_mem_keys_without_combining()
194
194
        # Note: The transport here isn't strictly needed, because we will use
195
195
        #       direct access to the new_backing._file object
196
 
        new_backing = BTreeGraphIndex(get_transport('.'), '<temp>', size)
 
196
        new_backing = BTreeGraphIndex(transport.get_transport('.'),
 
197
                                      '<temp>', size)
197
198
        # GC will clean up the file
198
199
        new_backing._file = new_backing_file
199
200
        if self._combine_backing_indices:
601
602
        """In memory index's have no known corruption at the moment."""
602
603
 
603
604
 
604
 
class _LeafNode(object):
 
605
class _LeafNode(dict):
605
606
    """A leaf node for a serialised B+Tree index."""
606
607
 
607
 
    __slots__ = ('keys', 'min_key', 'max_key')
 
608
    __slots__ = ('min_key', 'max_key', '_keys')
608
609
 
609
610
    def __init__(self, bytes, key_length, ref_list_length):
610
611
        """Parse bytes to create a leaf node object."""
616
617
            self.max_key = key_list[-1][0]
617
618
        else:
618
619
            self.min_key = self.max_key = None
619
 
        self.keys = dict(key_list)
 
620
        super(_LeafNode, self).__init__(key_list)
 
621
        self._keys = dict(self)
 
622
 
 
623
    def all_items(self):
 
624
        """Return a sorted list of (key, (value, refs)) items"""
 
625
        items = self.items()
 
626
        items.sort()
 
627
        return items
 
628
 
 
629
    def all_keys(self):
 
630
        """Return a sorted list of all keys."""
 
631
        keys = self.keys()
 
632
        keys.sort()
 
633
        return keys
620
634
 
621
635
 
622
636
class _InternalNode(object):
671
685
        self._recommended_pages = self._compute_recommended_pages()
672
686
        self._root_node = None
673
687
        self._base_offset = offset
 
688
        self._leaf_factory = _LeafNode
674
689
        # Default max size is 100,000 leave values
675
690
        self._leaf_value_cache = None # lru_cache.LRUCache(100*1000)
676
691
        if unlimited_cache:
949
964
        """Cache directly from key => value, skipping the btree."""
950
965
        if self._leaf_value_cache is not None:
951
966
            for node in nodes.itervalues():
952
 
                for key, value in node.keys.iteritems():
 
967
                for key, value in node.all_items():
953
968
                    if key in self._leaf_value_cache:
954
969
                        # Don't add the rest of the keys, we've seen this node
955
970
                        # before.
979
994
        if self._row_offsets[-1] == 1:
980
995
            # There is only the root node, and we read that via key_count()
981
996
            if self.node_ref_lists:
982
 
                for key, (value, refs) in sorted(self._root_node.keys.items()):
 
997
                for key, (value, refs) in self._root_node.all_items():
983
998
                    yield (self, key, value, refs)
984
999
            else:
985
 
                for key, (value, refs) in sorted(self._root_node.keys.items()):
 
1000
                for key, (value, refs) in self._root_node.all_items():
986
1001
                    yield (self, key, value)
987
1002
            return
988
1003
        start_of_leaves = self._row_offsets[-2]
998
1013
        # for spilling index builds to disk.
999
1014
        if self.node_ref_lists:
1000
1015
            for _, node in nodes:
1001
 
                for key, (value, refs) in sorted(node.keys.items()):
 
1016
                for key, (value, refs) in node.all_items():
1002
1017
                    yield (self, key, value, refs)
1003
1018
        else:
1004
1019
            for _, node in nodes:
1005
 
                for key, (value, refs) in sorted(node.keys.items()):
 
1020
                for key, (value, refs) in node.all_items():
1006
1021
                    yield (self, key, value)
1007
1022
 
1008
1023
    @staticmethod
1169
1184
                continue
1170
1185
            node = nodes[node_index]
1171
1186
            for next_sub_key in sub_keys:
1172
 
                if next_sub_key in node.keys:
1173
 
                    value, refs = node.keys[next_sub_key]
 
1187
                if next_sub_key in node:
 
1188
                    value, refs = node[next_sub_key]
1174
1189
                    if self.node_ref_lists:
1175
1190
                        yield (self, next_sub_key, value, refs)
1176
1191
                    else:
1244
1259
            # sub_keys is all of the keys we are looking for that should exist
1245
1260
            # on this page, if they aren't here, then they won't be found
1246
1261
            node = nodes[node_index]
1247
 
            node_keys = node.keys
1248
1262
            parents_to_check = set()
1249
1263
            for next_sub_key in sub_keys:
1250
 
                if next_sub_key not in node_keys:
 
1264
                if next_sub_key not in node:
1251
1265
                    # This one is just not present in the index at all
1252
1266
                    missing_keys.add(next_sub_key)
1253
1267
                else:
1254
 
                    value, refs = node_keys[next_sub_key]
 
1268
                    value, refs = node[next_sub_key]
1255
1269
                    parent_keys = refs[ref_list_num]
1256
1270
                    parent_map[next_sub_key] = parent_keys
1257
1271
                    parents_to_check.update(parent_keys)
1264
1278
            while parents_to_check:
1265
1279
                next_parents_to_check = set()
1266
1280
                for key in parents_to_check:
1267
 
                    if key in node_keys:
1268
 
                        value, refs = node_keys[key]
 
1281
                    if key in node:
 
1282
                        value, refs = node[key]
1269
1283
                        parent_keys = refs[ref_list_num]
1270
1284
                        parent_map[key] = parent_keys
1271
1285
                        next_parents_to_check.update(parent_keys)
1545
1559
                    continue
1546
1560
            bytes = zlib.decompress(data)
1547
1561
            if bytes.startswith(_LEAF_FLAG):
1548
 
                node = _LeafNode(bytes, self._key_length, self.node_ref_lists)
 
1562
                node = self._leaf_factory(bytes, self._key_length,
 
1563
                                          self.node_ref_lists)
1549
1564
            elif bytes.startswith(_INTERNAL_FLAG):
1550
1565
                node = _InternalNode(bytes)
1551
1566
            else:
1570
1585
            pass
1571
1586
 
1572
1587
 
 
1588
_gcchk_factory = _LeafNode
 
1589
 
1573
1590
try:
1574
1591
    from bzrlib import _btree_serializer_pyx as _btree_serializer
 
1592
    _gcchk_factory = _btree_serializer._parse_into_chk
1575
1593
except ImportError, e:
1576
1594
    osutils.failed_to_load_extension(e)
1577
1595
    from bzrlib import _btree_serializer_py as _btree_serializer