~bzr-pqm/bzr/bzr.dev

Committer: John Arbash Meinel
Date: 2009-05-19 06:22:18 UTC
mto: (4371.4.5 vila-better-heads)
mto: This revision was merged to the branch mainline in revision 4449.
Revision ID: john@arbash-meinel.com-20090519062218-u44qbf6koyz2w4c6

The initial results for _find_gdfo on bzr's ancestry:
with 25k revisions, it takes 40.8M steps, a peak todo of 10.6k and 9.7M
nodes skipped. Overall, taking 3m55s, which is pretty unusable.
Only 25% of the evaluations are 'wasted' (where we see a node from a longer
path before we evaluate the node where we thought we should).
My guess is that we get lots of 'small waves', where we number a bunch of
revisions based on path A, then find out it is reachable from path B, and
renumber all of those revisions.
Perhaps walking from the largest rather than the smallest would be advantageous,
though I have the feeling it just means we get 'long walks', where we
walk from the new longest path to the tips of everything underneath it.

files modified:
bzrlib/graph.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

import heapq

import sys

import time

from bzrlib import (

1226

1227

parent_node = _KnownGraphNode(parent_key, None)

1227

1228

nodes[parent_key] = parent_node

1228

1229

parent_node.children.append(key)

1229

self._find_linear_dominators()

1230

# self._find_linear_dominators()

1230

1231

self._find_gdfo()

1231

1232

1233

def _find_linear_dominators(self):

1287

1288

def _find_gdfo(self):

1288

1289

# TODO: Consider moving the tails search into the first-pass over the

1289

1290

# data, inside _find_linear_dominators

1290

tails = [node for node in self._nodes.itervalues()

1291

def find_tails():

1292

return [node for node in self._nodes.itervalues()

1291

1293

if not node.parent_keys]

1294

tails = find_tails()

1292

1295

todo = []

1293

1296

for node in tails:

1294

1297

node.gdfo = 1

1295

1298

heapq.heappush(todo, (1, node))

1299

tstart = time.time()

1300

tnext = tstart + 0.2

1301

processed = 0

1302

skipped = 0

1303

max_gdfo = len(self._nodes)

1296

1304

while todo:

1297

1305

gdfo, next = heapq.heappop(todo)

1306

processed += 1

1298

1307

if gdfo != next.gdfo:

1299

1308

# This node was reached from a longer path, we assume it was

1300

1309

# enqued correctly with the longer gdfo, so don't continue

1301

1310

# processing now

1302

1311

assert gdfo < next.gdfo

1312

skipped += 1

1303

1313

continue

1304

1314

next_gdfo = gdfo + 1

1315

assert next_gdfo < max_gdfo

1305

1316

for child_key in next.children:

1306

1317

child_node = self._nodes[child_key]

1307

1318

if child_node.gdfo is None or child_node.gdfo < next_gdfo:

1308

1319

child_node.gdfo = next_gdfo

1309

1320

heapq.heappush(todo, (next_gdfo, child_node))

1310

1321

tnow = time.time()

1322

if tnow > tnext:

1323

sys.stderr.write('todo: %8d %8d %8d %8d\r'

1324

% (len(todo), processed, skipped, next_gdfo))

1325

tnext = tnow + 0.2

1311

1326

1312

1327

def heads(self, keys):

1313

1328

"""Return the heads from amongst keys.

Older »