653
651
trace.mutter('Requesting %s > %s nodes, not all will be cached',
654
652
len(nodes), cache._max_cache)
654
start_of_leaves = None
656
655
for node_pos, node in self._read_nodes(sorted(nodes)):
657
656
if node_pos == 0: # Special case
658
657
self._root_node = node
659
if start_of_leaves is None:
660
start_of_leaves = self._row_offsets[-2]
661
if node_pos < start_of_leaves:
662
self._internal_node_cache.add(node_pos, node)
664
self._leaf_node_cache.add(node_pos, node)
660
665
cache.add(node_pos, node)
661
666
found[node_pos] = node
669
def _expand_nodes(self, node_indexes):
670
"""Find extra nodes to download.
672
The idea is that we always want to make big-enough requests (like 64kB
673
for http), so that we don't waste round trips. So given the entries
674
that we already have cached, and the new nodes being downloaded, figure
675
out what other pages we might want to read.
677
:param node_indexes: The nodes that have been requested to read.
678
# :param recommended_size: The size of download we want, this assumes
679
# that readv() is accomplished in a single round trip (which is true
680
# for http and bzr+ssh, and sftp uses async to get close
682
# :param max_page: The last possible page to read, possibly max_size is
684
:return: A new list of nodes to download
686
trace.note('request: %s\tnodes: %s', self._name[-14:], node_indexes)
687
# return node_indexes
689
# TODO: This could be cached
690
recommended_read = self._transport.recommended_page_size() / 4
691
recommended_pages = int(math.ceil(recommended_read /
693
if len(node_indexes) >= recommended_pages:
694
# Don't add more, we are already requesting more than enough
696
if self._size is None:
697
# Don't try anything, because we don't know where the file ends
699
# The idea here is to get nodes "next to" the ones we are already
701
max_page = int(math.ceil(self._size / float(_PAGE_SIZE)))
702
# XXX: Update the LRUCache interface to have a .keys() attribute
703
cached_nodes = set(self._internal_node_cache._cache.keys())
704
cached_nodes.update(self._leaf_node_cache._cache.keys())
705
if self._root_node is not None:
708
# If reading recommended_pages would read the rest of the index, just
710
if max_page - len(cached_nodes) <= recommended_pages:
711
# Just read the whole thing
712
# NOTE: this breaks a little bit with the distinction between index
713
# nodes and leaf nodes (as they are cached separately). It is
714
# still worthwhile to read it all, but we need to figure out what
715
# cache we should really put things in when we are done.
716
# However, we may not have read the index header yet to know where
717
# the internal nodes break from where the leaf nodes. We are sure
718
# to know *after* we've done the read.
719
# This also does the wrong thing if there are bloom pages.
721
# Further, this gives the wrong results because we have 2 caches to
724
return [x for x in xrange(max_page) if x not in cached_nodes]
725
return range(max_page)
727
needed_nodes = recommended_pages - len(node_indexes)
728
final_nodes = set(node_indexes)
729
if node_indexes == [0]:
730
# Special case when we are reading the root node, just read the
731
# first N pages along with the root node
732
final_nodes = node_indexes
733
# for index in xrange(1, max_page):
734
# if index not in final_nodes and index not in cached_nodes:
735
# final_nodes.add(index)
736
# if len(final_nodes) >= recommended_pages:
739
while len(final_nodes) < recommended_pages:
740
for pos in sorted(final_nodes):
743
if previous not in cached_nodes:
744
final_nodes.add(previous)
747
if after not in cached_nodes:
748
final_nodes.add(after)
749
# if len(final_nodes) > recommended_pages:
752
final_nodes = sorted(final_nodes)
753
trace.note('\t\t\t\t%s', final_nodes)
664
756
def _get_nodes(self, cache, node_indexes):