~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/btree_index.py

  • Committer: John Arbash Meinel
  • Date: 2008-08-25 03:41:39 UTC
  • mto: This revision was merged to the branch mainline in revision 3688.
  • Revision ID: john@arbash-meinel.com-20080825034139-68nxqiqrmqi1l5f0
Change the IndexBuilders to not generate the nodes_by_key unless needed.

Show diffs side-by-side

added added

removed removed

Lines of Context:
142
142
            key_elements=key_elements)
143
143
        self._spill_at = spill_at
144
144
        self._backing_indices = []
 
145
        # Indicate it hasn't been built yet
 
146
        self._nodes_by_key = None
145
147
 
146
148
    def add_node(self, key, value, references=()):
147
149
        """Add a node to the index.
157
159
        :param value: The value to associate with the key. It may be any
158
160
            bytes as long as it does not contain \0 or \n.
159
161
        """
160
 
        index.GraphIndexBuilder.add_node(self, key, value, references=references)
 
162
        self._check_key(key)
 
163
        if index._newline_null_re.search(value) is not None:
 
164
            raise errors.BadIndexValue(value)
 
165
        if len(references) != self.reference_lists:
 
166
            raise errors.BadIndexValue(references)
 
167
        node_refs = []
 
168
        for reference_list in references:
 
169
            for reference in reference_list:
 
170
                self._check_key(reference)
 
171
            node_refs.append(tuple(reference_list))
 
172
        if key in self._nodes and self._nodes[key][0] == '':
 
173
            raise errors.BadIndexDuplicateKey(key, self)
 
174
        self._nodes[key] = (tuple(node_refs), value)
 
175
        self._keys.add(key)
 
176
        if self._key_length > 1 and self._nodes_by_key is not None:
 
177
            key_dict = self._nodes_by_key
 
178
            if self.reference_lists:
 
179
                key_value = key, value, tuple(node_refs)
 
180
            else:
 
181
                key_value = key, value
 
182
            # possibly should do this on-demand, but it seems likely it is 
 
183
            # always wanted
 
184
            # For a key of (foo, bar, baz) create
 
185
            # _nodes_by_key[foo][bar][baz] = key_value
 
186
            for subkey in key[:-1]:
 
187
                key_dict = key_dict.setdefault(subkey, {})
 
188
            key_dict[key[-1]] = key_value
161
189
        if len(self._keys) < self._spill_at:
162
190
            return
163
191
        iterators_to_combine = [iter(sorted(self._iter_mem_nodes()))]
182
210
            self._backing_indices[pos] = None
183
211
        self._keys = set()
184
212
        self._nodes = {}
185
 
        self._nodes_by_key = {}
 
213
        self._nodes_by_key = None
186
214
 
187
215
    def add_nodes(self, nodes):
188
216
        """Add nodes to the index.
199
227
    def _iter_mem_nodes(self):
200
228
        """Iterate over the nodes held in memory."""
201
229
        if self.reference_lists:
202
 
            for key, (absent, references, value) in self._nodes.iteritems():
203
 
                if not absent:
204
 
                    yield self, key, value, references
 
230
            return ((self, key, value, references)
 
231
                    for key, (references, value) in self._nodes.iteritems())
205
232
        else:
206
 
            for key, (absent, references, value) in self._nodes.iteritems():
207
 
                if not absent:
208
 
                    yield self, key, value
 
233
            return ((self, key, value)
 
234
                    for key, (references, value) in self._nodes.iteritems())
209
235
 
210
236
    def _iter_smallest(self, iterators_to_combine):
211
237
        if len(iterators_to_combine) == 1:
411
437
        if self.reference_lists:
412
438
            for key in keys.intersection(self._keys):
413
439
                node = self._nodes[key]
414
 
                if not node[0]:
415
 
                    yield self, key, node[2], node[1]
 
440
                yield self, key, node[1], node[0]
416
441
        else:
417
442
            for key in keys.intersection(self._keys):
418
443
                node = self._nodes[key]
419
 
                if not node[0]:
420
 
                    yield self, key, node[2]
 
444
                yield self, key, node[1]
421
445
        keys.difference_update(self._keys)
422
446
        for backing in self._backing_indices:
423
447
            if backing is None:
454
478
            if backing is None:
455
479
                continue
456
480
            for node in backing.iter_entries_prefix(keys):
 
481
                # TODO: We should probably remove yielded keys from the keys
 
482
                #       list
457
483
                yield (self,) + node[1:]
458
484
        if self._key_length == 1:
459
485
            for key in keys:
466
492
                    node = self._nodes[key]
467
493
                except KeyError:
468
494
                    continue
469
 
                if node[0]:
470
 
                    continue
471
495
                if self.reference_lists:
472
 
                    yield self, key, node[2], node[1]
 
496
                    yield self, key, node[1], node[0]
473
497
                else:
474
 
                    yield self, key, node[2]
 
498
                    yield self, key, node[1]
475
499
            return
476
500
        for key in keys:
477
501
            # sanity check
480
504
            if len(key) != self._key_length:
481
505
                raise errors.BadIndexKey(key)
482
506
            # find what it refers to:
483
 
            key_dict = self._nodes_by_key
 
507
            key_dict = self._get_nodes_by_key()
484
508
            elements = list(key)
485
509
            # find the subdict to return
486
510
            try:
506
530
            else:
507
531
                yield (self, ) + key_dict
508
532
 
 
533
    def _get_nodes_by_key(self):
 
534
        if self._nodes_by_key is None:
 
535
            nodes_by_key = {}
 
536
            if self.reference_lists:
 
537
                for key, (references, value) in self._nodes.iteritems():
 
538
                    key_dict = nodes_by_key
 
539
                    for subkey in key[:-1]:
 
540
                        key_dict = key_dict.setdefault(subkey, {})
 
541
                    key_dict[key[-1]] = key, value, references
 
542
            else:
 
543
                for key, (references, value) in self._nodes.iteritems():
 
544
                    key_dict = nodes_by_key
 
545
                    for subkey in key[:-1]:
 
546
                        key_dict = key_dict.setdefault(subkey, {})
 
547
                    key_dict[key[-1]] = key, value
 
548
            self._nodes_by_key = nodes_by_key
 
549
        return self._nodes_by_key
 
550
 
509
551
    def key_count(self):
510
552
        """Return an estimate of the number of keys in this index.
511
553