~bzr-pqm/bzr/bzr.dev

Committer: Canonical.com Patch Queue Manager
Date: 2011-08-25 07:51:37 UTC
mfrom: (6015.23.17 2.4-too-much-walking-388269)
Revision ID: pqm@pqm.ubuntu.com-20110825075137-9h77a9wtrnh14hkc

(jameinel) Bug #388269, when walking to find revisions to transmit,
only send a local part of the search graph,
not the whole graph walked for every step. (John A Meinel)

files modified:
bzrlib/graph.py

bzrlib/remote.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_remote.py

bzrlib/vf_repository.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

def get_parent_map(self, keys):

"""See StackedParentsProvider.get_parent_map"""

ancestry = self.ancestry

return dict((k, ancestry[k]) for k in keys if k in ancestry)

return dict([(k, ancestry[k]) for k in keys if k in ancestry])

class StackedParentsProvider(object):

1419

parents_of_found = set()

1420

# revisions may contain nodes that point to other nodes in revisions:

1421

# we want to filter them out.

1422

self.seen.update(revisions)

1422

seen = self.seen

1423

seen.update(revisions)

1423

1424

parent_map = self._parents_provider.get_parent_map(revisions)

1424

1425

found_revisions.update(parent_map)

1425

1426

for rev_id, parents in parent_map.iteritems():

1426

1427

if parents is None:

1427

1428

continue

1428

new_found_parents = [p for p in parents if p not in self.seen]

1429

new_found_parents = [p for p in parents if p not in seen]

1429

1430

if new_found_parents:

1430

1431

# Calling set.update() with an empty generator is actually

1431

1432

# rather expensive.

1891

1892

limit=self.limit)

1892

1893

1894

1895

def invert_parent_map(parent_map):

1896

"""Given a map from child => parents, create a map of parent=>children"""

1897

child_map = {}

1898

for child, parents in parent_map.iteritems():

1899

for p in parents:

1900

# Any given parent is likely to have only a small handful

1901

# of children, many will have only one. So we avoid mem overhead of

1902

# a list, in exchange for extra copying of tuples

1903

if p not in child_map:

1904

child_map[p] = (child,)

1905

else:

1906

child_map[p] = child_map[p] + (child,)

1907

return child_map

1908

1909

1910

def _find_possible_heads(parent_map, tip_keys, depth):

1911

"""Walk backwards (towards children) through the parent_map.

1912

1913

This finds 'heads' that will hopefully succinctly describe our search

1914

graph.

1915

"""

1916

child_map = invert_parent_map(parent_map)

1917

heads = set()

1918

current_roots = tip_keys

1919

walked = set(current_roots)

1920

while current_roots and depth > 0:

1921

depth -= 1

1922

children = set()

1923

children_update = children.update

1924

for p in current_roots:

1925

# Is it better to pre- or post- filter the children?

1926

try:

1927

children_update(child_map[p])

1928

except KeyError:

1929

heads.add(p)

1930

# If we've seen a key before, we don't want to walk it again. Note that

1931

# 'children' stays relatively small while 'walked' grows large. So

1932

# don't use 'difference_update' here which has to walk all of 'walked'.

1933

# '.difference' is smart enough to walk only children and compare it to

1934

# walked.

1935

children = children.difference(walked)

1936

walked.update(children)

1937

current_roots = children

1938

if current_roots:

1939

# We walked to the end of depth, so these are the new tips.

1940

heads.update(current_roots)

1941

return heads

1942

1943

1944

def _run_search(parent_map, heads, exclude_keys):

1945

"""Given a parent map, run a _BreadthFirstSearcher on it.

1946

1947

Start at heads, walk until you hit exclude_keys. As a further improvement,

1948

watch for any heads that you encounter while walking, which means they were

1949

not heads of the search.

1950

1951

This is mostly used to generate a succinct recipe for how to walk through

1952

most of parent_map.

1953

1954

:return: (_BreadthFirstSearcher, set(heads_encountered_by_walking))

1955

"""

1956

g = Graph(DictParentsProvider(parent_map))

1957

s = g._make_breadth_first_searcher(heads)

1958

found_heads = set()

1959

while True:

1960

try:

1961

next_revs = s.next()

1962

except StopIteration:

1963

break

1964

for parents in s._current_parents.itervalues():

1965

f_heads = heads.intersection(parents)

1966

if f_heads:

1967

found_heads.update(f_heads)

1968

stop_keys = exclude_keys.intersection(next_revs)

1969

if stop_keys:

1970

s.stop_searching_any(stop_keys)

1971

for parents in s._current_parents.itervalues():

1972

f_heads = heads.intersection(parents)

1973

if f_heads:

1974

found_heads.update(f_heads)

1975

return s, found_heads

1976

1977

1978

def limited_search_result_from_parent_map(parent_map, missing_keys, tip_keys,

1979

depth):

1980

"""Transform a parent_map that is searching 'tip_keys' into an

1981

approximate SearchResult.

1982

1983

We should be able to generate a SearchResult from a given set of starting

1984

keys, that covers a subset of parent_map that has the last step pointing at

1985

tip_keys. This is to handle the case that really-long-searches shouldn't be

1986

started from scratch on each get_parent_map request, but we *do* want to

1987

filter out some of the keys that we've already seen, so we don't get

1988

information that we already know about on every request.

1989

1990

The server will validate the search (that starting at start_keys and

1991

stopping at stop_keys yields the exact key_count), so we have to be careful

1992

to give an exact recipe.

1993

1994

Basic algorithm is:

1995

1) Invert parent_map to get child_map (todo: have it cached and pass it

1996

in)

1997

2) Starting at tip_keys, walk towards children for 'depth' steps.

1998

3) At that point, we have the 'start' keys.

1999

4) Start walking parent_map from 'start' keys, counting how many keys

2000

are seen, and generating stop_keys for anything that would walk

2001

outside of the parent_map.

2002

2003

:param parent_map: A map from {child_id: (parent_ids,)}

2004

:param missing_keys: parent_ids that we know are unavailable

2005

:param tip_keys: the revision_ids that we are searching

2006

:param depth: How far back to walk.

2007

"""

2008

if not parent_map:

2009

# No search to send, because we haven't done any searching yet.

2010

return [], [], 0

2011

heads = _find_possible_heads(parent_map, tip_keys, depth)

2012

s, found_heads = _run_search(parent_map, heads, set(tip_keys))

2013

_, start_keys, exclude_keys, key_count = s.get_result().get_recipe()

2014

if found_heads:

2015

# Anything in found_heads are redundant start_keys, we hit them while

2016

# walking, so we can exclude them from the start list.

2017

start_keys = set(start_keys).difference(found_heads)

2018

return start_keys, exclude_keys, key_count

2019

2020

2021

def search_result_from_parent_map(parent_map, missing_keys):

2022

"""Transform a parent_map into SearchResult information."""

2023

if not parent_map:

2024

# parent_map is empty or None, simple search result

2025

return [], [], 0

2026

# start_set is all the keys in the cache

2027

start_set = set(parent_map)

2028

# result set is all the references to keys in the cache

2029

result_parents = set()

2030

for parents in parent_map.itervalues():

2031

result_parents.update(parents)

2032

stop_keys = result_parents.difference(start_set)

2033

# We don't need to send ghosts back to the server as a position to

2034

# stop either.

2035

stop_keys.difference_update(missing_keys)

2036

key_count = len(parent_map)

2037

if (revision.NULL_REVISION in result_parents

2038

and revision.NULL_REVISION in missing_keys):

2039

# If we pruned NULL_REVISION from the stop_keys because it's also

2040

# in our cache of "missing" keys we need to increment our key count

2041

# by 1, because the reconsitituted SearchResult on the server will

2042

# still consider NULL_REVISION to be an included key.

2043

key_count += 1

2044

included_keys = start_set.intersection(result_parents)

2045

start_set.difference_update(included_keys)

2046

return start_set, stop_keys, key_count

2047

2048

1894

2049

def collapse_linear_regions(parent_map):

1895

2050

"""Collapse regions of the graph that are 'linear'.

1896

2051

Older »