~bzr-pqm/bzr/bzr.dev

Committer: Canonical.com Patch Queue Manager
Date: 2008-01-18 01:16:25 UTC
mfrom: (3184.1.13 integration)
Revision ID: pqm@pqm.ubuntu.com-20080118011625-465mgy0mhdz1jiky

(robertc) Reduce traffic when requesting revision streams from a
smart server (3 MB to 172 bytes for a full bzr.dev pull)
(Robert Collins)

files modified:
NEWS

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/smart/client.py

bzrlib/smart/repository.py

bzrlib/smart/request.py

bzrlib/symbol_versioning.py

bzrlib/tests/interrepository_implementations/test_interrepository.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_repository.py

bzrlib/tests/test_smart.py

Show diffs side-by-side

added added

removed removed

bzrlib/graph.py

509

self._iterations = 0

510

self._next_query = set(revisions)

511

self.seen = set()

512

self._started_keys = set(self._next_query)

513

self._stopped_keys = set()

512

514

self._parents_provider = parents_provider

513

515

self._returning = 'next_with_ghosts'

516

self._current_present = set()

517

self._current_ghosts = set()

518

self._current_parents = {}

514

519

515

520

def __repr__(self):

516

521

if self._iterations:

521

526

return ('_BreadthFirstSearcher(iterations=%d, %s,'

522

527

' seen=%r)' % (self._iterations, search, list(self.seen)))

523

528

529

def get_result(self):

530

"""Get a SearchResult for the current state of this searcher.

531

532

:return: A SearchResult for this search so far. The SearchResult is

533

static - the search can be advanced and the search result will not

534

be invalidated or altered.

535

"""

536

if self._returning == 'next':

537

# We have to know the current nodes children to be able to list the

538

# exclude keys for them. However, while we could have a second

539

# look-ahead result buffer and shuffle things around, this method

540

# is typically only called once per search - when memoising the

541

# results of the search.

542

found, ghosts, next, parents = self._do_query(self._next_query)

543

# pretend we didn't query: perhaps we should tweak _do_query to be

544

# entirely stateless?

545

self.seen.difference_update(next)

546

next_query = next.union(ghosts)

547

else:

548

next_query = self._next_query

549

excludes = self._stopped_keys.union(next_query)

550

included_keys = self.seen.difference(excludes)

551

return SearchResult(self._started_keys, excludes, len(included_keys),

552

included_keys)

553

524

554

def next(self):

525

555

"""Return the next ancestors of this revision.

526

556

538

568

# switch to returning the query, not the results.

539

569

self._returning = 'next'

540

570

self._iterations += 1

541

self.seen.update(self._next_query)

542

571

else:

543

572

self._advance()

544

573

if len(self._next_query) == 0:

545

574

raise StopIteration()

575

# We have seen what we're querying at this point as we are returning

576

# the query, not the results.

577

self.seen.update(self._next_query)

546

578

return self._next_query

547

579

548

580

def next_with_ghosts(self):

576

608

self._current_ghosts = ghosts

577

609

self._next_query = next

578

610

self._current_parents = parents

611

# ghosts are implicit stop points, otherwise the search cannot be

612

# repeated when ghosts are filled.

613

self._stopped_keys.update(ghosts)

579

614

580

615

def _do_query(self, revisions):

581

616

"""Query for revisions.

582

617

618

Adds revisions to the seen set.

619

583

620

:param revisions: Revisions to query.

584

621

:return: A tuple: (set(found_revisions), set(ghost_revisions),

585

622

set(parents_of_found_revisions), dict(found_revisions:parents)).

586

623

"""

587

624

found_parents = set()

588

625

parents_of_found = set()

626

# revisions may contain nodes that point to other nodes in revisions:

627

# we want to filter them out.

628

self.seen.update(revisions)

589

629

parent_map = self._parents_provider.get_parent_map(revisions)

590

630

for rev_id, parents in parent_map.iteritems():

591

631

found_parents.add(rev_id)

592

632

parents_of_found.update(p for p in parents if p not in self.seen)

593

633

ghost_parents = revisions - found_parents

594

self.seen.update(parents_of_found)

595

634

return found_parents, ghost_parents, parents_of_found, parent_map

596

635

597

636

def __iter__(self):

621

660

stopped = self._next_query.intersection(revisions)

622

661

self._next_query = self._next_query.difference(revisions)

623

662

else:

624

stopped = set()

625

stopped.update(self._current_present.intersection(revisions))

626

stopped.update(self._current_ghosts.intersection(revisions))

663

stopped_present = self._current_present.intersection(revisions)

664

stopped = stopped_present.union(

665

self._current_ghosts.intersection(revisions))

627

666

self._current_present.difference_update(stopped)

628

667

self._current_ghosts.difference_update(stopped)

629

668

# stopping 'x' should stop returning parents of 'x', but

630

669

# not if 'y' always references those same parents

631

670

stop_rev_references = {}

632

for rev in stopped:

671

for rev in stopped_present:

633

672

for parent_id in self._current_parents[rev]:

634

673

if parent_id not in stop_rev_references:

635

674

stop_rev_references[parent_id] = 0

647

686

if refs == 0:

648

687

stop_parents.add(rev_id)

649

688

self._next_query.difference_update(stop_parents)

689

self._stopped_keys.update(stopped)

650

690

return stopped

651

691

652

692

def start_searching(self, revisions):

658

698

ghost/not ghost status of revisions. (A tuple (present, ghosted)).

659

699

"""

660

700

revisions = frozenset(revisions)

701

self._started_keys.update(revisions)

702

new_revisions = revisions.difference(self.seen)

703

revs, ghosts, query, parents = self._do_query(revisions)

704

self._stopped_keys.update(ghosts)

661

705

if self._returning == 'next':

662

self._next_query.update(revisions.difference(self.seen))

663

self.seen.update(revisions)

706

self._next_query.update(new_revisions)

664

707

else:

665

708

# perform a query on revisions

666

revs, ghosts, query, parents = self._do_query(revisions)

667

709

self._current_present.update(revs)

668

710

self._current_ghosts.update(ghosts)

669

711

self._next_query.update(query)

670

712

self._current_parents.update(parents)

671

713

return revs, ghosts

714

715

716

class SearchResult(object):

717

"""The result of a breadth first search.

718

719

A SearchResult provides the ability to reconstruct the search or access a

720

set of the keys the search found.

721

"""

722

723

def __init__(self, start_keys, exclude_keys, key_count, keys):

724

"""Create a SearchResult.

725

726

:param start_keys: The keys the search started at.

727

:param exclude_keys: The keys the search excludes.

728

:param key_count: The total number of keys (from start to but not

729

including exclude).

730

:param keys: The keys the search found. Note that in future we may get

731

a SearchResult from a smart server, in which case the keys list is

732

not necessarily immediately available.

733

"""

734

self._recipe = (start_keys, exclude_keys, key_count)

735

self._keys = frozenset(keys)

736

737

def get_recipe(self):

738

"""Return a recipe that can be used to replay this search.

739

740

The recipe allows reconstruction of the same results at a later date

741

without knowing all the found keys. The essential elements are a list

742

of keys to start and and to stop at. In order to give reproducible

743

results when ghosts are encountered by a search they are automatically

744

added to the exclude list (or else ghost filling may alter the

745

results).

746

747

:return: A tuple (start_keys_set, exclude_keys_set, revision_count). To

748

recreate the results of this search, create a breadth first

749

searcher on the same graph starting at start_keys. Then call next()

750

(or next_with_ghosts()) repeatedly, and on every result, call

751

stop_searching_any on any keys from the exclude_keys set. The

752

revision_count value acts as a trivial cross-check - the found

753

revisions of the new search should have as many elements as

754

revision_count. If it does not, then additional revisions have been

755

ghosted since the search was executed the first time and the second

756

time.

757

"""

758

return self._recipe

759

760

def get_keys(self):

761

"""Return the keys found in this search.

762

763

:return: A set of keys.

764

"""

765

return self._keys

766

Older »