666
666
found[node_pos] = node
669
def _compute_recommended_pages(self):
670
"""Given the transport's recommended_page_size, determine num pages."""
671
recommended_read = self._transport.recommended_page_size()
672
recommended_pages = int(math.ceil(recommended_read /
674
return recommended_pages
676
def _compute_num_pages(self):
677
"""Determine the offset to the last page in this index."""
678
if self._size is None:
679
raise AssertionError('_compute_num_pages should not be called'
680
' when self._size is None')
681
if self._root_node is not None:
682
# This is the number of pages as defined by the header
683
return self._row_offsets[-1]
684
# This is the number of pages as defined by the size of the index. They
685
# should be indentical.
686
num_pages = int(math.ceil(self._size / float(_PAGE_SIZE)))
689
def _get_cached_nodes(self):
690
"""Determine what nodes we already have cached."""
691
# XXX: Update the LRUCache interface to have a .keys() attribute
692
cached_nodes = set(self._internal_node_cache._cache.keys())
693
cached_nodes.update(self._leaf_node_cache._cache.keys())
694
if self._root_node is not None:
669
698
def _expand_nodes(self, node_indexes):
670
699
"""Find extra nodes to download.
675
704
out what other pages we might want to read.
677
706
:param node_indexes: The nodes that have been requested to read.
678
# :param recommended_size: The size of download we want, this assumes
679
# that readv() is accomplished in a single round trip (which is true
680
# for http and bzr+ssh, and sftp uses async to get close
682
# :param max_page: The last possible page to read, possibly max_size is
684
707
:return: A new list of nodes to download
686
trace.note('request: %s\tnodes: %s', self._name[-14:], node_indexes)
687
# return node_indexes
709
if 'index' in debug.debug_flags:
710
trace.mutter('expanding: %s\tnodes: %s', self._name, node_indexes)
689
# TODO: This could be cached
690
recommended_read = self._transport.recommended_page_size()
691
recommended_pages = int(math.ceil(recommended_read /
693
# Disable the algorithm
694
# recommended_pages = 1
695
if len(node_indexes) >= recommended_pages:
712
if len(node_indexes) >= self._recommended_pages:
696
713
# Don't add more, we are already requesting more than enough
714
if 'index' in debug.debug_flags:
715
trace.mutter(' not expanding large request (%s >= %s)',
716
len(node_indexes), self._recommended_pages)
697
717
return node_indexes
698
718
if self._size is None:
699
719
# Don't try anything, because we don't know where the file ends
720
if 'index' in debug.debug_flags:
721
trace.mutter(' not expanding without knowing index size')
700
722
return node_indexes
723
# TODO: Should this be cached instead?
724
num_pages = self._compute_num_pages()
701
725
# The idea here is to get nodes "next to" the ones we are already
703
max_page = int(math.ceil(self._size / float(_PAGE_SIZE)))
704
# XXX: Update the LRUCache interface to have a .keys() attribute
705
cached_nodes = set(self._internal_node_cache._cache.keys())
706
cached_nodes.update(self._leaf_node_cache._cache.keys())
707
if self._root_node is not None:
727
cached_nodes = self._get_cached_nodes()
710
# if len(cached_nodes) < 2: # We haven't read enough to justify expansion
729
# if len(cached_nodes) < 2:
730
# # We haven't read enough to justify expansion
711
731
# return node_indexes
713
733
# If reading recommended_pages would read the rest of the index, just
715
if max_page - len(cached_nodes) <= recommended_pages:
716
# Just read the whole thing
717
# NOTE: this breaks a little bit with the distinction between index
718
# nodes and leaf nodes (as they are cached separately). It is
719
# still worthwhile to read it all, but we need to figure out what
720
# cache we should really put things in when we are done.
721
# However, we may not have read the index header yet to know where
722
# the internal nodes break from where the leaf nodes. We are sure
723
# to know *after* we've done the read.
724
# This also does the wrong thing if there are bloom pages.
726
# Further, this gives the wrong results because we have 2 caches to
735
if num_pages - len(cached_nodes) <= self._recommended_pages:
736
# Read whatever is left
729
return [x for x in xrange(max_page) if x not in cached_nodes]
730
return range(max_page)
738
expanded = [x for x in xrange(num_pages)
739
if x not in cached_nodes]
741
expanded = range(num_pages)
742
if 'index' in debug.debug_flags:
743
trace.mutter(' reading all unread pages: %s', expanded)
732
needed_nodes = recommended_pages - len(node_indexes)
746
needed_nodes = self._recommended_pages - len(node_indexes)
733
747
final_nodes = set(node_indexes)
734
if node_indexes == [0]:
735
# Special case when we are reading the root node, just read the
736
# first N pages along with the root node
748
if self._root_node is None:
749
# ATM on the first read of the root node of a large index, we don't
750
# bother pre-reading any other pages. This is because the
751
# likelyhood of actually reading interesting pages is very low.
752
# See doc/developers/btree_index_prefetch.txt for a discussion, and
753
# a possible implementation when we are guessing that the second
754
# layer index is small
737
755
final_nodes = node_indexes
738
# for index in xrange(1, max_page):
739
# if index not in final_nodes and index not in cached_nodes:
740
# final_nodes.add(index)
741
# if len(final_nodes) >= recommended_pages:
757
# Expand requests to neighbors until we have at least
758
# recommended_pages to request. We only want to expand requests
759
# within a given layer.
744
760
new_tips = set(final_nodes)
745
while len(final_nodes) < recommended_pages and new_tips:
761
while len(final_nodes) < self._recommended_pages and new_tips:
746
762
next_tips = set()
747
for pos in sorted(new_tips):
749
765
previous = pos - 1
750
766
if (previous not in cached_nodes
751
767
and previous not in final_nodes):
752
final_nodes.add(previous)
753
768
next_tips.add(previous)
770
if after < num_pages:
756
771
if (after not in cached_nodes
757
772
and after not in final_nodes):
758
final_nodes.add(after)
759
next_tips.add(previous)
774
# This would keep us from going bigger than
775
# recommended_pages by only expanding the first nodes.
776
# However, if we are making a 'wide' request, it is
777
# reasonable to expand all points equally.
760
778
# if len(final_nodes) > recommended_pages:
780
final_nodes.update(next_tips)
762
781
new_tips = next_tips
764
783
final_nodes = sorted(final_nodes)
765
trace.note('\t\t%s', final_nodes)
784
if 'index' in debug.debug_flags:
785
trace.mutter('expanded: %s', final_nodes)
766
786
return final_nodes
768
788
def _get_nodes(self, cache, node_indexes):