~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/graph.py

Committer: Canonical.com Patch Queue Manager
Date: 2008-05-05 23:15:58 UTC
mfrom: (3377.3.45 submitted)
Revision ID: pqm@pqm.ubuntu.com-20080505231558-7w3zaehbvtcjk7jv

(jam) Make Graph.find_differences() correct,
and create a Graph.find_unique_ancestors function.

files modified:
NEWS

bzrlib/debug.py

bzrlib/graph.py

bzrlib/status.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_status.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

from bzrlib import (

debug,

errors,

revision,

symbol_versioning,

trace,

tsort,

)

from bzrlib.deprecated_graph import (node_distances, select_farthest)

199

201

200

202

def find_difference(self, left_revision, right_revision):

201

203

"""Determine the graph difference between two revisions"""

202

border, common, (left, right) = self._find_border_ancestors(

204

border, common, searchers = self._find_border_ancestors(

203

205

[left_revision, right_revision])

204

return (left.difference(right).difference(common),

205

right.difference(left).difference(common))

206

self._search_for_extra_common(common, searchers)

207

left = searchers[0].seen

208

right = searchers[1].seen

209

return (left.difference(right), right.difference(left))

210

211

def find_unique_ancestors(self, unique_revision, common_revisions):

212

"""Find the unique ancestors for a revision versus others.

213

214

This returns the ancestry of unique_revision, excluding all revisions

215

in the ancestry of common_revisions. If unique_revision is in the

216

ancestry, then the empty set will be returned.

217

218

:param unique_revision: The revision_id whose ancestry we are

219

interested in.

220

XXX: Would this API be better if we allowed multiple revisions on

221

to be searched here?

222

:param common_revisions: Revision_ids of ancestries to exclude.

223

:return: A set of revisions in the ancestry of unique_revision

224

"""

225

if unique_revision in common_revisions:

226

return set()

227

228

# Algorithm description

229

# 1) Walk backwards from the unique node and all common nodes.

230

# 2) When a node is seen by both sides, stop searching it in the unique

231

# walker, include it in the common walker.

232

# 3) Stop searching when there are no nodes left for the unique walker.

233

# At this point, you have a maximal set of unique nodes. Some of

234

# them may actually be common, and you haven't reached them yet.

235

# 4) Start new searchers for the unique nodes, seeded with the

236

# information you have so far.

237

# 5) Continue searching, stopping the common searches when the search

238

# tip is an ancestor of all unique nodes.

239

# 6) Search is done when all common searchers have completed.

240

241

if 'graph' in debug.debug_flags:

242

_mutter = trace.mutter

243

else:

244

def _mutter(*args, **kwargs):

245

pass

246

247

unique_searcher = self._make_breadth_first_searcher([unique_revision])

248

# we know that unique_revision isn't in common_revisions

249

unique_searcher.next()

250

common_searcher = self._make_breadth_first_searcher(common_revisions)

251

252

while unique_searcher._next_query:

253

next_unique_nodes = set(unique_searcher.step())

254

next_common_nodes = set(common_searcher.step())

255

256

# Check if either searcher encounters new nodes seen by the other

257

# side.

258

unique_are_common_nodes = next_unique_nodes.intersection(

259

common_searcher.seen)

260

unique_are_common_nodes.update(

261

next_common_nodes.intersection(unique_searcher.seen))

262

if unique_are_common_nodes:

263

ancestors = unique_searcher.find_seen_ancestors(

264

unique_are_common_nodes)

265

ancestors.update(common_searcher.find_seen_ancestors(ancestors))

266

unique_searcher.stop_searching_any(ancestors)

267

common_searcher.start_searching(ancestors)

268

269

unique_nodes = unique_searcher.seen.difference(common_searcher.seen)

270

if not unique_nodes:

271

return unique_nodes

272

unique_tips = self._remove_simple_descendants(unique_nodes,

273

self.get_parent_map(unique_nodes))

274

275

if len(unique_tips) == 1:

276

unique_searchers = []

277

ancestor_all_unique = unique_searcher.find_seen_ancestors(unique_tips)

278

else:

279

unique_searchers = []

280

for tip in unique_tips:

281

revs_to_search = unique_searcher.find_seen_ancestors([tip])

282

searcher = self._make_breadth_first_searcher(revs_to_search)

283

# We don't care about the starting nodes.

284

searcher._label = tip

285

searcher.step()

286

unique_searchers.append(searcher)

287

288

ancestor_all_unique = None

289

for searcher in unique_searchers:

290

if ancestor_all_unique is None:

291

ancestor_all_unique = set(searcher.seen)

292

else:

293

ancestor_all_unique = ancestor_all_unique.intersection(

294

searcher.seen)

295

# Collapse all the common nodes into a single searcher

296

all_unique_searcher = self._make_breadth_first_searcher(ancestor_all_unique)

297

if ancestor_all_unique:

298

all_unique_searcher.step()

299

300

# Stop any search tips that are already known as ancestors of the

301

# unique nodes

302

common_searcher.stop_searching_any(

303

common_searcher.find_seen_ancestors(ancestor_all_unique))

304

305

total_stopped = 0

306

for searcher in unique_searchers:

307

total_stopped += len(searcher.stop_searching_any(

308

searcher.find_seen_ancestors(ancestor_all_unique)))

309

_mutter('For %s unique nodes, created %s + 1 unique searchers'

310

' (%s stopped search tips, %s common ancestors)',

311

len(unique_nodes), len(unique_searchers), total_stopped,

312

len(ancestor_all_unique))

313

del ancestor_all_unique

314

315

# While we still have common nodes to search

316

while common_searcher._next_query:

317

newly_seen_common = set(common_searcher.step())

318

newly_seen_unique = set()

319

for searcher in unique_searchers:

320

newly_seen_unique.update(searcher.step())

321

# These nodes are common ancestors of all unique nodes

322

unique_are_common_nodes = newly_seen_unique.copy()

323

for searcher in unique_searchers:

324

unique_are_common_nodes = unique_are_common_nodes.intersection(

325

searcher.seen)

326

unique_are_common_nodes = unique_are_common_nodes.intersection(

327

all_unique_searcher.seen)

328

unique_are_common_nodes.update(all_unique_searcher.step())

329

if newly_seen_common:

330

# If a 'common' node is an ancestor of all unique searchers, we

331

# can stop searching it.

332

common_searcher.stop_searching_any(

333

all_unique_searcher.seen.intersection(newly_seen_common))

334

if unique_are_common_nodes:

335

# We have new common-to-all-unique-searchers nodes

336

for searcher in unique_searchers:

337

unique_are_common_nodes.update(

338

searcher.find_seen_ancestors(unique_are_common_nodes))

339

unique_are_common_nodes.update(

340

all_unique_searcher.find_seen_ancestors(unique_are_common_nodes))

341

# Since these are common, we can grab another set of ancestors

342

# that we have seen

343

unique_are_common_nodes.update(

344

common_searcher.find_seen_ancestors(unique_are_common_nodes))

345

346

# The all_unique searcher can start searching the common nodes

347

# but everyone else can stop.

348

all_unique_searcher.start_searching(unique_are_common_nodes)

349

common_searcher.stop_searching_any(unique_are_common_nodes)

350

351

# Filter out searchers that don't actually search different

352

# nodes. We already have the ancestry intersection for them

353

next_unique_searchers = []

354

unique_search_sets = set()

355

for searcher in unique_searchers:

356

stopped = searcher.stop_searching_any(unique_are_common_nodes)

357

will_search_set = frozenset(searcher._next_query)

358

if not will_search_set:

359

_mutter('Unique searcher %s was stopped.'

360

' (%s iterations) %d nodes stopped',

361

searcher._label,

362

searcher._iterations,

363

len(stopped))

364

elif will_search_set not in unique_search_sets:

365

# This searcher is searching a unique set of nodes, let it

366

unique_search_sets.add(will_search_set)

367

next_unique_searchers.append(searcher)

368

else:

369

_mutter('Unique searcher %s stopped for repeated'

370

' search of %s nodes',

371

searcher._label, len(will_search_set))

372

if len(unique_searchers) != len(next_unique_searchers):

373

_mutter('Collapsed %s unique searchers => %s'

374

' at %s iterations',

375

len(unique_searchers), len(next_unique_searchers),

376

all_unique_searcher._iterations)

377

unique_searchers = next_unique_searchers

378

return unique_nodes.difference(common_searcher.seen)

206

379

207

380

@symbol_versioning.deprecated_method(symbol_versioning.one_one)

208

381

def get_parents(self, revisions):

220

393

Deprecated in bzr 1.2 - please see get_parent_map.

221

394

"""

222

395

parents = self.get_parent_map(revisions)

223

return [parent.get(r, None) for r in revisions]

396

return [parents.get(r, None) for r in revisions]

224

397

225

398

def get_parent_map(self, revisions):

226

399

"""Get a map of key:parent_list for revisions.

253

426

"""

254

427

if None in revisions:

255

428

raise errors.InvalidRevisionId(None, self)

256

common_searcher = self._make_breadth_first_searcher([])

257

429

common_ancestors = set()

258

430

searchers = [self._make_breadth_first_searcher([r])

259

431

for r in revisions]

260

432

active_searchers = searchers[:]

261

433

border_ancestors = set()

262

def update_common(searcher, revisions):

263

w_seen_ancestors = searcher.find_seen_ancestors(

264

revision)

265

stopped = searcher.stop_searching_any(w_seen_ancestors)

266

common_ancestors.update(w_seen_ancestors)

267

common_searcher.start_searching(stopped)

268

434

269

435

while True:

270

if len(active_searchers) == 0:

271

return border_ancestors, common_ancestors, [s.seen for s in

272

searchers]

273

try:

274

new_common = common_searcher.next()

275

common_ancestors.update(new_common)

276

except StopIteration:

277

pass

278

else:

279

for searcher in active_searchers:

280

for revision in new_common.intersection(searcher.seen):

281

update_common(searcher, revision)

282

283

436

newly_seen = set()

284

new_active_searchers = []

285

for searcher in active_searchers:

286

try:

287

newly_seen.update(searcher.next())

288

except StopIteration:

289

pass

290

else:

291

new_active_searchers.append(searcher)

292

active_searchers = new_active_searchers

437

for searcher in searchers:

438

new_ancestors = searcher.step()

439

if new_ancestors:

440

newly_seen.update(new_ancestors)

441

new_common = set()

293

442

for revision in newly_seen:

294

443

if revision in common_ancestors:

295

for searcher in searchers:

296

update_common(searcher, revision)

444

# Not a border ancestor because it was seen as common

445

# already

446

new_common.add(revision)

297

447

continue

298

448

for searcher in searchers:

299

449

if revision not in searcher.seen:

300

450

break

301

451

else:

452

# This is a border because it is a first common that we see

453

# after walking for a while.

302

454

border_ancestors.add(revision)

303

for searcher in searchers:

304

update_common(searcher, revision)

455

new_common.add(revision)

456

if new_common:

457

for searcher in searchers:

458

new_common.update(searcher.find_seen_ancestors(new_common))

459

for searcher in searchers:

460

searcher.start_searching(new_common)

461

common_ancestors.update(new_common)

462

463

# Figure out what the searchers will be searching next, and if

464

# there is only 1 set being searched, then we are done searching,

465

# since all searchers would have to be searching the same data,

466

# thus it *must* be in common.

467

unique_search_sets = set()

468

for searcher in searchers:

469

will_search_set = frozenset(searcher._next_query)

470

if will_search_set not in unique_search_sets:

471

# This searcher is searching a unique set of nodes, let it

472

unique_search_sets.add(will_search_set)

473

474

if len(unique_search_sets) == 1:

475

nodes = unique_search_sets.pop()

476

uncommon_nodes = nodes.difference(common_ancestors)

477

assert not uncommon_nodes, ("Somehow we ended up converging"

478

" without actually marking them as"

479

" in common."

480

"\nStart_nodes: %s"

481

"\nuncommon_nodes: %s"

482

% (revisions, uncommon_nodes))

483

break

484

return border_ancestors, common_ancestors, searchers

305

485

306

486

def heads(self, keys):

307

487

"""Return the heads from amongst keys.

387

567

new_common.add(ancestor)

388

568

for searcher in searchers.itervalues():

389

569

seen_ancestors =\

390

searcher.find_seen_ancestors(ancestor)

570

searcher.find_seen_ancestors([ancestor])

391

571

searcher.stop_searching_any(seen_ancestors)

392

572

common_walker.start_searching(new_common)

393

573

return candidate_heads

469

649

return set([candidate_descendant]) == self.heads(

470

650

[candidate_ancestor, candidate_descendant])

471

651

652

def _search_for_extra_common(self, common, searchers):

653

"""Make sure that unique nodes are genuinely unique.

654

655

After _find_border_ancestors, all nodes marked "common" are indeed

656

common. Some of the nodes considered unique are not, due to history

657

shortcuts stopping the searches early.

658

659

We know that we have searched enough when all common search tips are

660

descended from all unique (uncommon) nodes because we know that a node

661

cannot be an ancestor of its own ancestor.

662

663

:param common: A set of common nodes

664

:param searchers: The searchers returned from _find_border_ancestors

665

:return: None

666

"""

667

# Basic algorithm...

668

# A) The passed in searchers should all be on the same tips, thus

669

# they should be considered the "common" searchers.

670

# B) We find the difference between the searchers, these are the

671

# "unique" nodes for each side.

672

# C) We do a quick culling so that we only start searching from the

673

# more interesting unique nodes. (A unique ancestor is more

674

# interesting than any of its children.)

675

# D) We start searching for ancestors common to all unique nodes.

676

# E) We have the common searchers stop searching any ancestors of

677

# nodes found by (D)

678

# F) When there are no more common search tips, we stop

679

680

# TODO: We need a way to remove unique_searchers when they overlap with

681

# other unique searchers.

682

assert len(searchers) == 2, (

683

"Algorithm not yet implemented for > 2 searchers")

684

common_searchers = searchers

685

left_searcher = searchers[0]

686

right_searcher = searchers[1]

687

unique = left_searcher.seen.symmetric_difference(right_searcher.seen)

688

if not unique: # No unique nodes, nothing to do

689

return

690

total_unique = len(unique)

691

unique = self._remove_simple_descendants(unique,

692

self.get_parent_map(unique))

693

simple_unique = len(unique)

694

695

unique_searchers = []

696

for revision_id in unique:

697

if revision_id in left_searcher.seen:

698

parent_searcher = left_searcher

699

else:

700

parent_searcher = right_searcher

701

revs_to_search = parent_searcher.find_seen_ancestors([revision_id])

702

if not revs_to_search: # XXX: This shouldn't be possible

703

revs_to_search = [revision_id]

704

searcher = self._make_breadth_first_searcher(revs_to_search)

705

# We don't care about the starting nodes.

706

searcher.step()

707

unique_searchers.append(searcher)

708

709

# possible todo: aggregate the common searchers into a single common

710

# searcher, just make sure that we include the nodes into the .seen

711

# properties of the original searchers

712

713

ancestor_all_unique = None

714

for searcher in unique_searchers:

715

if ancestor_all_unique is None:

716

ancestor_all_unique = set(searcher.seen)

717

else:

718

ancestor_all_unique = ancestor_all_unique.intersection(

719

searcher.seen)

720

721

trace.mutter('Started %s unique searchers for %s unique revisions',

722

simple_unique, total_unique)

723

724

while True: # If we have no more nodes we have nothing to do

725

newly_seen_common = set()

726

for searcher in common_searchers:

727

newly_seen_common.update(searcher.step())

728

newly_seen_unique = set()

729

for searcher in unique_searchers:

730

newly_seen_unique.update(searcher.step())

731

new_common_unique = set()

732

for revision in newly_seen_unique:

733

for searcher in unique_searchers:

734

if revision not in searcher.seen:

735

break

736

else:

737

# This is a border because it is a first common that we see

738

# after walking for a while.

739

new_common_unique.add(revision)

740

if newly_seen_common:

741

# These are nodes descended from one of the 'common' searchers.

742

# Make sure all searchers are on the same page

743

for searcher in common_searchers:

744

newly_seen_common.update(

745

searcher.find_seen_ancestors(newly_seen_common))

746

# We start searching the whole ancestry. It is a bit wasteful,

747

# though. We really just want to mark all of these nodes as

748

# 'seen' and then start just the tips. However, it requires a

749

# get_parent_map() call to figure out the tips anyway, and all

750

# redundant requests should be fairly fast.

751

for searcher in common_searchers:

752

searcher.start_searching(newly_seen_common)

753

754

# If a 'common' node is an ancestor of all unique searchers, we

755

# can stop searching it.

756

stop_searching_common = ancestor_all_unique.intersection(

757

newly_seen_common)

758

if stop_searching_common:

759

for searcher in common_searchers:

760

searcher.stop_searching_any(stop_searching_common)

761

if new_common_unique:

762

# We found some ancestors that are common

763

for searcher in unique_searchers:

764

new_common_unique.update(

765

searcher.find_seen_ancestors(new_common_unique))

766

# Since these are common, we can grab another set of ancestors

767

# that we have seen

768

for searcher in common_searchers:

769

new_common_unique.update(

770

searcher.find_seen_ancestors(new_common_unique))

771

772

# We can tell all of the unique searchers to start at these

773

# nodes, and tell all of the common searchers to *stop*

774

# searching these nodes

775

for searcher in unique_searchers:

776

searcher.start_searching(new_common_unique)

777

for searcher in common_searchers:

778

searcher.stop_searching_any(new_common_unique)

779

ancestor_all_unique.update(new_common_unique)

780

781

# Filter out searchers that don't actually search different

782

# nodes. We already have the ancestry intersection for them

783

next_unique_searchers = []

784

unique_search_sets = set()

785

for searcher in unique_searchers:

786

will_search_set = frozenset(searcher._next_query)

787

if will_search_set not in unique_search_sets:

788

# This searcher is searching a unique set of nodes, let it

789

unique_search_sets.add(will_search_set)

790

next_unique_searchers.append(searcher)

791

unique_searchers = next_unique_searchers

792

for searcher in common_searchers:

793

if searcher._next_query:

794

break

795

else:

796

# All common searcher have stopped searching

797

return

798

799

def _remove_simple_descendants(self, revisions, parent_map):

800

"""remove revisions which are children of other ones in the set

801

802

This doesn't do any graph searching, it just checks the immediate

803

parent_map to find if there are any children which can be removed.

804

805

:param revisions: A set of revision_ids

806

:return: A set of revision_ids with the children removed

807

"""

808

simple_ancestors = revisions.copy()

809

# TODO: jam 20071214 we *could* restrict it to searching only the

810

# parent_map of revisions already present in 'revisions', but

811

# considering the general use case, I think this is actually

812

# better.

813

814

# This is the same as the following loop. I don't know that it is any

815

# faster.

816

## simple_ancestors.difference_update(r for r, p_ids in parent_map.iteritems()

817

## if p_ids is not None and revisions.intersection(p_ids))

818

## return simple_ancestors

819

820

# Yet Another Way, invert the parent map (which can be cached)

821

## descendants = {}

822

## for revision_id, parent_ids in parent_map.iteritems():

823

## for p_id in parent_ids:

824

## descendants.setdefault(p_id, []).append(revision_id)

825

## for revision in revisions.intersection(descendants):

826

## simple_ancestors.difference_update(descendants[revision])

827

## return simple_ancestors

828

for revision, parent_ids in parent_map.iteritems():

829

if parent_ids is None:

830

continue

831

for parent_id in parent_ids:

832

if parent_id in revisions:

833

# This node has a parent present in the set, so we can

834

# remove it

835

simple_ancestors.discard(revision)

836

break

837

return simple_ancestors

838

472

839

473

840

class HeadsCache(object):

474

841

"""A cache of results for graph heads calls."""

582

949

return SearchResult(self._started_keys, excludes, len(included_keys),

583

950

included_keys)

584

951

952

def step(self):

953

try:

954

return self.next()

955

except StopIteration:

956

return ()

957

585

958

def next(self):

586

959

"""Return the next ancestors of this revision.

587

960

652

1025

:return: A tuple: (set(found_revisions), set(ghost_revisions),

653

1026

set(parents_of_found_revisions), dict(found_revisions:parents)).

654

1027

"""

655

found_parents = set()

1028

found_revisions = set()

656

1029

parents_of_found = set()

657

1030

# revisions may contain nodes that point to other nodes in revisions:

658

1031

# we want to filter them out.

659

1032

self.seen.update(revisions)

660

1033

parent_map = self._parents_provider.get_parent_map(revisions)

1034

found_revisions.update(parent_map)

661

1035

for rev_id, parents in parent_map.iteritems():

662

found_parents.add(rev_id)

663

parents_of_found.update(p for p in parents if p not in self.seen)

664

ghost_parents = revisions - found_parents

665

return found_parents, ghost_parents, parents_of_found, parent_map

1036

new_found_parents = [p for p in parents if p not in self.seen]

1037

if new_found_parents:

1038

# Calling set.update() with an empty generator is actually

1039

# rather expensive.

1040

parents_of_found.update(new_found_parents)

1041

ghost_revisions = revisions - found_revisions

1042

return found_revisions, ghost_revisions, parents_of_found, parent_map

666

1043

667

1044

def __iter__(self):

668

1045

return self

669

1046

670

def find_seen_ancestors(self, revision):

671

"""Find ancestors of this revision that have already been seen."""

672

searcher = _BreadthFirstSearcher([revision], self._parents_provider)

673

seen_ancestors = set()

674

for ancestors in searcher:

675

for ancestor in ancestors:

676

if ancestor not in self.seen:

677

searcher.stop_searching_any([ancestor])

678

else:

679

seen_ancestors.add(ancestor)

1047

def find_seen_ancestors(self, revisions):

1048

"""Find ancestors of these revisions that have already been seen."""

1049

all_seen = self.seen

1050

pending = set(revisions).intersection(all_seen)

1051

seen_ancestors = set(pending)

1052

1053

if self._returning == 'next':

1054

# self.seen contains what nodes have been returned, not what nodes

1055

# have been queried. We don't want to probe for nodes that haven't

1056

# been searched yet.

1057

not_searched_yet = self._next_query

1058

else:

1059

not_searched_yet = ()

1060

pending.difference_update(not_searched_yet)

1061

get_parent_map = self._parents_provider.get_parent_map

1062

while pending:

1063

parent_map = get_parent_map(pending)

1064

all_parents = []

1065

# We don't care if it is a ghost, since it can't be seen if it is

1066

# a ghost

1067

for parent_ids in parent_map.itervalues():

1068

all_parents.extend(parent_ids)

1069

next_pending = all_seen.intersection(all_parents).difference(seen_ancestors)

1070

seen_ancestors.update(next_pending)

1071

next_pending.difference_update(not_searched_yet)

1072

pending = next_pending

1073

680

1074

return seen_ancestors

681

1075

682

1076

def stop_searching_any(self, revisions):

731

1125

revisions = frozenset(revisions)

732

1126

self._started_keys.update(revisions)

733

1127

new_revisions = revisions.difference(self.seen)

734

revs, ghosts, query, parents = self._do_query(revisions)

735

self._stopped_keys.update(ghosts)

736

1128

if self._returning == 'next':

737

1129

self._next_query.update(new_revisions)

1130

self.seen.update(new_revisions)

738

1131

else:

739

1132

# perform a query on revisions

1133

revs, ghosts, query, parents = self._do_query(revisions)

1134

self._stopped_keys.update(ghosts)

740

1135

self._current_present.update(revs)

741

1136

self._current_ghosts.update(ghosts)

742

1137

self._next_query.update(query)

Older »