~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Martin Pool
Date: 2009-07-24 03:15:56 UTC
mfrom: (4565 +trunk)
mto: This revision was merged to the branch mainline in revision 4566.
Revision ID: mbp@sourcefrog.net-20090724031556-5zyef6f1ixtn6r3z

merge news

files added:
bzrlib/_annotator_py.py

bzrlib/_annotator_pyx.pyx

bzrlib/tests/per_workingtree/test_annotate_iter.py

bzrlib/tests/test__annotator.py

doc/en/upgrade-guide

doc/en/upgrade-guide/data_migration.txt

doc/en/upgrade-guide/index.txt

doc/en/upgrade-guide/overview.txt

doc/en/upgrade-guide/tips_and_tricks.txt

doc/es/quick-reference/quick-start-summary.pdf

doc/es/quick-reference/quick-start-summary.png

tools/win32/bootstrap.py

tools/win32/buildout-templates

tools/win32/buildout-templates/bin

tools/win32/buildout-templates/bin/build-installer.bat.in

tools/win32/buildout.cfg

files renamed:
bzrlib/tests/branch_implementations/ => bzrlib/tests/per_branch/

bzrlib/tests/bzrdir_implementations/ => bzrlib/tests/per_bzrdir/

bzrlib/tests/interrepository_implementations/ => bzrlib/tests/per_interrepository/

bzrlib/tests/intertree_implementations/ => bzrlib/tests/per_intertree/

bzrlib/tests/inventory_implementations/ => bzrlib/tests/per_inventory/

bzrlib/tests/test_transport_implementations.py => bzrlib/tests/per_transport.py

bzrlib/tests/tree_implementations/ => bzrlib/tests/per_tree/

bzrlib/tests/workingtree_implementations/ => bzrlib/tests/per_workingtree/

doc/es/guia-desarrollador/ => doc/es/developer-guide/

doc/es/referencia-rapida/ => doc/es/quick-reference/

doc/es/referencia-rapida/referencia-rapida.svg => doc/es/quick-reference/quick-start-summary.svg

doc/es/notas-version/ => doc/es/release-notes/

doc/es/guia-usuario/ => doc/es/user-guide/

doc/es/referencia/ => doc/es/user-reference/

files modified:
.bzrignore

Makefile

NEWS

bzrlib/__init__.py

bzrlib/_known_graph_py.py

bzrlib/_known_graph_pyx.pyx

bzrlib/annotate.py

bzrlib/branchbuilder.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bzrdir.py

bzrlib/chk_map.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/export/dir_exporter.py

bzrlib/foreign.py

bzrlib/groupcompress.py

bzrlib/help_topics/en/debug-flags.txt

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/lru_cache.py

bzrlib/merge.py

bzrlib/mutabletree.py

bzrlib/osutils.py

bzrlib/progress.py

bzrlib/push.py

bzrlib/reconfigure.py

bzrlib/repofmt/groupcompress_repo.py

bzrlib/revisiontree.py

bzrlib/send.py

bzrlib/shelf.py

bzrlib/shelf_ui.py

bzrlib/smart/medium.py

bzrlib/smart/protocol.py

bzrlib/smart/request.py

bzrlib/smart/server.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_switch.py

bzrlib/tests/http_server.py

bzrlib/tests/per_branch/__init__.py

bzrlib/tests/per_branch/test_branch.py

bzrlib/tests/per_branch/test_break_lock.py

bzrlib/tests/per_branch/test_check.py

bzrlib/tests/per_branch/test_commit.py

bzrlib/tests/per_branch/test_create_checkout.py

bzrlib/tests/per_branch/test_create_clone.py

bzrlib/tests/per_branch/test_dotted_revno_to_revision_id.py

bzrlib/tests/per_branch/test_get_revision_id_to_revno_map.py

bzrlib/tests/per_branch/test_http.py

bzrlib/tests/per_branch/test_iter_merge_sorted_revisions.py

bzrlib/tests/per_branch/test_locking.py

bzrlib/tests/per_branch/test_pull.py

bzrlib/tests/per_branch/test_push.py

bzrlib/tests/per_branch/test_reconcile.py

bzrlib/tests/per_branch/test_revision_history.py

bzrlib/tests/per_branch/test_revision_id_to_dotted_revno.py

bzrlib/tests/per_branch/test_revision_id_to_revno.py

bzrlib/tests/per_branch/test_sprout.py

bzrlib/tests/per_branch/test_stacking.py

bzrlib/tests/per_branch/test_tags.py

bzrlib/tests/per_branch/test_uncommit.py

bzrlib/tests/per_branch/test_update.py

bzrlib/tests/per_bzrdir/__init__.py

bzrlib/tests/per_bzrdir/test_bzrdir.py

bzrlib/tests/per_bzrdir/test_push.py

bzrlib/tests/per_interbranch/test_pull.py

bzrlib/tests/per_interbranch/test_push.py

bzrlib/tests/per_interrepository/__init__.py

bzrlib/tests/per_interrepository/test_fetch.py

bzrlib/tests/per_interrepository/test_interrepository.py

bzrlib/tests/per_intertree/__init__.py

bzrlib/tests/per_intertree/test_compare.py

bzrlib/tests/per_inventory/__init__.py

bzrlib/tests/per_inventory/basics.py

bzrlib/tests/per_repository/__init__.py

bzrlib/tests/per_repository/test_break_lock.py

bzrlib/tests/per_repository/test_fetch.py

bzrlib/tests/per_repository_reference/test_break_lock.py

bzrlib/tests/per_tree/__init__.py

bzrlib/tests/per_tree/test_annotate_iter.py

bzrlib/tests/per_tree/test_get_file_mtime.py

bzrlib/tests/per_tree/test_get_file_with_stat.py

bzrlib/tests/per_tree/test_get_root_id.py

bzrlib/tests/per_tree/test_get_symlink_target.py

bzrlib/tests/per_tree/test_inv.py

bzrlib/tests/per_tree/test_iter_search_rules.py

bzrlib/tests/per_tree/test_list_files.py

bzrlib/tests/per_tree/test_path_content_summary.py

bzrlib/tests/per_tree/test_revision_tree.py

bzrlib/tests/per_tree/test_test_trees.py

bzrlib/tests/per_tree/test_tree.py

bzrlib/tests/per_tree/test_walkdirs.py

bzrlib/tests/per_workingtree/__init__.py

bzrlib/tests/per_workingtree/test_add.py

bzrlib/tests/per_workingtree/test_add_reference.py

bzrlib/tests/per_workingtree/test_basis_inventory.py

bzrlib/tests/per_workingtree/test_basis_tree.py

bzrlib/tests/per_workingtree/test_break_lock.py

bzrlib/tests/per_workingtree/test_changes_from.py

bzrlib/tests/per_workingtree/test_commit.py

bzrlib/tests/per_workingtree/test_content_filters.py

bzrlib/tests/per_workingtree/test_eol_conversion.py

bzrlib/tests/per_workingtree/test_executable.py

bzrlib/tests/per_workingtree/test_flush.py

bzrlib/tests/per_workingtree/test_get_file_mtime.py

bzrlib/tests/per_workingtree/test_get_parent_ids.py

bzrlib/tests/per_workingtree/test_inv.py

bzrlib/tests/per_workingtree/test_is_control_filename.py

bzrlib/tests/per_workingtree/test_is_ignored.py

bzrlib/tests/per_workingtree/test_locking.py

bzrlib/tests/per_workingtree/test_merge_from_branch.py

bzrlib/tests/per_workingtree/test_mkdir.py

bzrlib/tests/per_workingtree/test_move.py

bzrlib/tests/per_workingtree/test_nested_specifics.py

bzrlib/tests/per_workingtree/test_parents.py

bzrlib/tests/per_workingtree/test_paths2ids.py

bzrlib/tests/per_workingtree/test_pull.py

bzrlib/tests/per_workingtree/test_put_file.py

bzrlib/tests/per_workingtree/test_read_working_inventory.py

bzrlib/tests/per_workingtree/test_readonly.py

bzrlib/tests/per_workingtree/test_remove.py

bzrlib/tests/per_workingtree/test_rename_one.py

bzrlib/tests/per_workingtree/test_revision_tree.py

bzrlib/tests/per_workingtree/test_set_root_id.py

bzrlib/tests/per_workingtree/test_smart_add.py

bzrlib/tests/per_workingtree/test_uncommit.py

bzrlib/tests/per_workingtree/test_unversion.py

bzrlib/tests/per_workingtree/test_views.py

bzrlib/tests/per_workingtree/test_walkdirs.py

bzrlib/tests/per_workingtree/test_workingtree.py

bzrlib/tests/test__known_graph.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_chk_map.py

bzrlib/tests/test_config.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_export.py

bzrlib/tests/test_fetch.py

bzrlib/tests/test_ftp_transport.py

bzrlib/tests/test_inv.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_lockable_files.py

bzrlib/tests/test_lockdir.py

bzrlib/tests/test_lru_cache.py

bzrlib/tests/test_osutils.py

bzrlib/tests/test_pack_repository.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_remote.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_shelf.py

bzrlib/tests/test_shelf_ui.py

bzrlib/tests/test_smart.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_ui.py

bzrlib/tests/test_versionedfile.py

bzrlib/transform.py

bzrlib/transport/ftp/__init__.py

bzrlib/transport/memory.py

bzrlib/transport/ssh.py

bzrlib/tree.py

bzrlib/ui/__init__.py

bzrlib/ui/text.py

bzrlib/urlutils.py

bzrlib/versionedfile.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml8.py

doc/developers/bug-handling.txt

doc/developers/inventory.txt

doc/es/mini-tutorial/index.txt

doc/es/quick-reference/Makefile

doc/index.es.txt

doc/index.txt

setup.py

tools/generate_docs.py

tools/win32/build_release.py

tools/win32/ostools.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

664

665

see parse_fulltext which this inverts.

666

"""

667

# TODO: jam 20070209 We only do the caching thing to make sure that

668

# the origin is a valid utf-8 line, eventually we could remove it

669

667

return ['%s %s' % (o, t) for o, t in content._lines]

670

668

671

669

def lower_line_delta(self, delta):

758

756

759

757

def annotate(self, knit, key):

760

758

annotator = _KnitAnnotator(knit)

761

return annotator.annotate(key)

759

return annotator.annotate_flat(key)

762

760

763

761

764

762

1044

1042

"""See VersionedFiles.annotate."""

1045

1043

return self._factory.annotate(self, key)

1046

1044

1045

def get_annotator(self):

1046

return _KnitAnnotator(self)

1047

1048

def check(self, progress_bar=None):

1048

1049

"""See VersionedFiles.check()."""

1049

1050

# This doesn't actually test extraction of everything, but that will

3336

3337

recommended.

3337

3338

"""

3338

3339

annotator = _KnitAnnotator(knit)

3339

return iter(annotator.annotate(revision_id))

3340

3341

3342

class _KnitAnnotator(object):

3340

return iter(annotator.annotate_flat(revision_id))

3341

3342

3343

class _KnitAnnotator(annotate.Annotator):

3343

3344

"""Build up the annotations for a text."""

3344

3345

def __init__(self, knit):

3346

self._knit = knit

3347

3348

# Content objects, differs from fulltexts because of how final newlines

3349

# are treated by knits. the content objects here will always have a

3350

# final newline

3351

self._fulltext_contents = {}

3352

3353

# Annotated lines of specific revisions

3354

self._annotated_lines = {}

3355

3356

# Track the raw data for nodes that we could not process yet.

3357

# This maps the revision_id of the base to a list of children that will

3358

# annotated from it.

3359

self._pending_children = {}

3360

3361

# Nodes which cannot be extracted

3362

self._ghosts = set()

3363

3364

# Track how many children this node has, so we know if we need to keep

3365

# it

3366

self._annotate_children = {}

3367

self._compression_children = {}

3346

def __init__(self, vf):

3347

annotate.Annotator.__init__(self, vf)

3348

3349

# TODO: handle Nodes which cannot be extracted

3350

# self._ghosts = set()

3351

3352

# Map from (key, parent_key) => matching_blocks, should be 'use once'

3353

self._matching_blocks = {}

3354

3355

# KnitContent objects

3356

self._content_objects = {}

3357

# The number of children that depend on this fulltext content object

3358

self._num_compression_children = {}

3359

# Delta records that need their compression parent before they can be

3360

# expanded

3361

self._pending_deltas = {}

3362

# Fulltext records that are waiting for their parents fulltexts before

3363

# they can be yielded for annotation

3364

self._pending_annotation = {}

3368

3365

3369

3366

self._all_build_details = {}

3370

# The children => parent revision_id graph

3371

self._revision_id_graph = {}

3372

3373

self._heads_provider = None

3374

3375

self._nodes_to_keep_annotations = set()

3376

self._generations_until_keep = 100

3377

3378

def set_generations_until_keep(self, value):

3379

"""Set the number of generations before caching a node.

3380

3381

Setting this to -1 will cache every merge node, setting this higher

3382

will cache fewer nodes.

3383

"""

3384

self._generations_until_keep = value

3385

3386

def _add_fulltext_content(self, revision_id, content_obj):

3387

self._fulltext_contents[revision_id] = content_obj

3388

# TODO: jam 20080305 It might be good to check the sha1digest here

3389

return content_obj.text()

3390

3391

def _check_parents(self, child, nodes_to_annotate):

3392

"""Check if all parents have been processed.

3393

3394

:param child: A tuple of (rev_id, parents, raw_content)

3395

:param nodes_to_annotate: If child is ready, add it to

3396

nodes_to_annotate, otherwise put it back in self._pending_children

3397

"""

3398

for parent_id in child[1]:

3399

if (parent_id not in self._annotated_lines):

3400

# This parent is present, but another parent is missing

3401

self._pending_children.setdefault(parent_id,

3402

[]).append(child)

3403

break

3404

else:

3405

# This one is ready to be processed

3406

nodes_to_annotate.append(child)

3407

3408

def _add_annotation(self, revision_id, fulltext, parent_ids,

3409

left_matching_blocks=None):

3410

"""Add an annotation entry.

3411

3412

All parents should already have been annotated.

3413

:return: A list of children that now have their parents satisfied.

3414

"""

3415

a = self._annotated_lines

3416

annotated_parent_lines = [a[p] for p in parent_ids]

3417

annotated_lines = list(annotate.reannotate(annotated_parent_lines,

3418

fulltext, revision_id, left_matching_blocks,

3419

heads_provider=self._get_heads_provider()))

3420

self._annotated_lines[revision_id] = annotated_lines

3421

for p in parent_ids:

3422

ann_children = self._annotate_children[p]

3423

ann_children.remove(revision_id)

3424

if (not ann_children

3425

and p not in self._nodes_to_keep_annotations):

3426

del self._annotated_lines[p]

3427

del self._all_build_details[p]

3428

if p in self._fulltext_contents:

3429

del self._fulltext_contents[p]

3430

# Now that we've added this one, see if there are any pending

3431

# deltas to be done, certainly this parent is finished

3432

nodes_to_annotate = []

3433

for child in self._pending_children.pop(revision_id, []):

3434

self._check_parents(child, nodes_to_annotate)

3435

return nodes_to_annotate

3436

3367

3437

3368

def _get_build_graph(self, key):

3438

3369

"""Get the graphs for building texts and annotations.

3446

3377

passing to read_records_iter to start reading in the raw data from

3447

3378

the pack file.

3448

3379

"""

3449

if key in self._annotated_lines:

3450

# Nothing to do

3451

return []

3452

3380

pending = set([key])

3453

3381

records = []

3454

generation = 0

3455

kept_generation = 0

3382

ann_keys = set()

3383

self._num_needed_children[key] = 1

3456

3384

while pending:

3457

3385

# get all pending nodes

3458

generation += 1

3459

3386

this_iteration = pending

3460

build_details = self._knit._index.get_build_details(this_iteration)

3387

build_details = self._vf._index.get_build_details(this_iteration)

3461

3388

self._all_build_details.update(build_details)

3462

# new_nodes = self._knit._index._get_entries(this_iteration)

3389

# new_nodes = self._vf._index._get_entries(this_iteration)

3463

3390

pending = set()

3464

3391

for key, details in build_details.iteritems():

3465

(index_memo, compression_parent, parents,

3392

(index_memo, compression_parent, parent_keys,

3466

3393

record_details) = details

3467

self._revision_id_graph[key] = parents

3394

self._parent_map[key] = parent_keys

3395

self._heads_provider = None

3468

3396

records.append((key, index_memo))

3469

3397

# Do we actually need to check _annotated_lines?

3470

pending.update(p for p in parents

3471

if p not in self._all_build_details)

3398

pending.update([p for p in parent_keys

3399

if p not in self._all_build_details])

3400

if parent_keys:

3401

for parent_key in parent_keys:

3402

if parent_key in self._num_needed_children:

3403

self._num_needed_children[parent_key] += 1

3404

else:

3405

self._num_needed_children[parent_key] = 1

3472

3406

if compression_parent:

3473

self._compression_children.setdefault(compression_parent,

3474

[]).append(key)

3475

if parents:

3476

for parent in parents:

3477

self._annotate_children.setdefault(parent,

3478

[]).append(key)

3479

num_gens = generation - kept_generation

3480

if ((num_gens >= self._generations_until_keep)

3481

and len(parents) > 1):

3482

kept_generation = generation

3483

self._nodes_to_keep_annotations.add(key)

3407

if compression_parent in self._num_compression_children:

3408

self._num_compression_children[compression_parent] += 1

3409

else:

3410

self._num_compression_children[compression_parent] = 1

3484

3411

3485

3412

missing_versions = this_iteration.difference(build_details.keys())

3486

self._ghosts.update(missing_versions)

3487

for missing_version in missing_versions:

3488

# add a key, no parents

3489

self._revision_id_graph[missing_version] = ()

3490

pending.discard(missing_version) # don't look for it

3491

if self._ghosts.intersection(self._compression_children):

3492

raise KnitCorrupt(

3493

"We cannot have nodes which have a ghost compression parent:\n"

3494

"ghosts: %r\n"

3495

"compression children: %r"

3496

% (self._ghosts, self._compression_children))

3497

# Cleanout anything that depends on a ghost so that we don't wait for

3498

# the ghost to show up

3499

for node in self._ghosts:

3500

if node in self._annotate_children:

3501

# We won't be building this node

3502

del self._annotate_children[node]

3413

if missing_versions:

3414

for key in missing_versions:

3415

if key in self._parent_map and key in self._text_cache:

3416

# We already have this text ready, we just need to

3417

# yield it later so we get it annotated

3418

ann_keys.add(key)

3419

parent_keys = self._parent_map[key]

3420

for parent_key in parent_keys:

3421

if parent_key in self._num_needed_children:

3422

self._num_needed_children[parent_key] += 1

3423

else:

3424

self._num_needed_children[parent_key] = 1

3425

pending.update([p for p in parent_keys

3426

if p not in self._all_build_details])

3427

else:

3428

raise errors.RevisionNotPresent(key, self._vf)

3503

3429

# Generally we will want to read the records in reverse order, because

3504

3430

# we find the parent nodes after the children

3505

3431

records.reverse()

3506

return records

3507

3508

def _annotate_records(self, records):

3509

"""Build the annotations for the listed records."""

3432

return records, ann_keys

3433

3434

def _get_needed_texts(self, key, pb=None):

3435

# if True or len(self._vf._fallback_vfs) > 0:

3436

if len(self._vf._fallback_vfs) > 0:

3437

# If we have fallbacks, go to the generic path

3438

for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):

3439

yield v

3440

return

3441

while True:

3442

try:

3443

records, ann_keys = self._get_build_graph(key)

3444

for idx, (sub_key, text, num_lines) in enumerate(

3445

self._extract_texts(records)):

3446

if pb is not None:

3447

pb.update('annotating', idx, len(records))

3448

yield sub_key, text, num_lines

3449

for sub_key in ann_keys:

3450

text = self._text_cache[sub_key]

3451

num_lines = len(text) # bad assumption

3452

yield sub_key, text, num_lines

3453

return

3454

except errors.RetryWithNewPacks, e:

3455

self._vf._access.reload_or_raise(e)

3456

# The cached build_details are no longer valid

3457

self._all_build_details.clear()

3458

3459

def _cache_delta_blocks(self, key, compression_parent, delta, lines):

3460

parent_lines = self._text_cache[compression_parent]

3461

blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))

3462

self._matching_blocks[(key, compression_parent)] = blocks

3463

3464

def _expand_record(self, key, parent_keys, compression_parent, record,

3465

record_details):

3466

delta = None

3467

if compression_parent:

3468

if compression_parent not in self._content_objects:

3469

# Waiting for the parent

3470

self._pending_deltas.setdefault(compression_parent, []).append(

3471

(key, parent_keys, record, record_details))

3472

return None

3473

# We have the basis parent, so expand the delta

3474

num = self._num_compression_children[compression_parent]

3475

num -= 1

3476

if num == 0:

3477

base_content = self._content_objects.pop(compression_parent)

3478

self._num_compression_children.pop(compression_parent)

3479

else:

3480

self._num_compression_children[compression_parent] = num

3481

base_content = self._content_objects[compression_parent]

3482

# It is tempting to want to copy_base_content=False for the last

3483

# child object. However, whenever noeol=False,

3484

# self._text_cache[parent_key] is content._lines. So mutating it

3485

# gives very bad results.

3486

# The alternative is to copy the lines into text cache, but then we

3487

# are copying anyway, so just do it here.

3488

content, delta = self._vf._factory.parse_record(

3489

key, record, record_details, base_content,

3490

copy_base_content=True)

3491

else:

3492

# Fulltext record

3493

content, _ = self._vf._factory.parse_record(

3494

key, record, record_details, None)

3495

if self._num_compression_children.get(key, 0) > 0:

3496

self._content_objects[key] = content

3497

lines = content.text()

3498

self._text_cache[key] = lines

3499

if delta is not None:

3500

self._cache_delta_blocks(key, compression_parent, delta, lines)

3501

return lines

3502

3503

def _get_parent_annotations_and_matches(self, key, text, parent_key):

3504

"""Get the list of annotations for the parent, and the matching lines.

3505

3506

:param text: The opaque value given by _get_needed_texts

3507

:param parent_key: The key for the parent text

3508

:return: (parent_annotations, matching_blocks)

3509

parent_annotations is a list as long as the number of lines in

3510

parent

3511

matching_blocks is a list of (parent_idx, text_idx, len) tuples

3512

indicating which lines match between the two texts

3513

"""

3514

block_key = (key, parent_key)

3515

if block_key in self._matching_blocks:

3516

blocks = self._matching_blocks.pop(block_key)

3517

parent_annotations = self._annotations_cache[parent_key]

3518

return parent_annotations, blocks

3519

return annotate.Annotator._get_parent_annotations_and_matches(self,

3520

key, text, parent_key)

3521

3522

def _process_pending(self, key):

3523

"""The content for 'key' was just processed.

3524

3525

Determine if there is any more pending work to be processed.

3526

"""

3527

to_return = []

3528

if key in self._pending_deltas:

3529

compression_parent = key

3530

children = self._pending_deltas.pop(key)

3531

for child_key, parent_keys, record, record_details in children:

3532

lines = self._expand_record(child_key, parent_keys,

3533

compression_parent,

3534

record, record_details)

3535

if self._check_ready_for_annotations(child_key, parent_keys):

3536

to_return.append(child_key)

3537

# Also check any children that are waiting for this parent to be

3538

# annotation ready

3539

if key in self._pending_annotation:

3540

children = self._pending_annotation.pop(key)

3541

to_return.extend([c for c, p_keys in children

3542

if self._check_ready_for_annotations(c, p_keys)])

3543

return to_return

3544

3545

def _check_ready_for_annotations(self, key, parent_keys):

3546

"""return true if this text is ready to be yielded.

3547

3548

Otherwise, this will return False, and queue the text into

3549

self._pending_annotation

3550

"""

3551

for parent_key in parent_keys:

3552

if parent_key not in self._annotations_cache:

3553

# still waiting on at least one parent text, so queue it up

3554

# Note that if there are multiple parents, we need to wait

3555

# for all of them.

3556

self._pending_annotation.setdefault(parent_key,

3557

[]).append((key, parent_keys))

3558

return False

3559

return True

3560

3561

def _extract_texts(self, records):

3562

"""Extract the various texts needed based on records"""

3510

3563

# We iterate in the order read, rather than a strict order requested

3511

3564

# However, process what we can, and put off to the side things that

3512

3565

# still need parents, cleaning them up when those parents are

3513

3566

# processed.

3514

for (rev_id, record,

3515

digest) in self._knit._read_records_iter(records):

3516

if rev_id in self._annotated_lines:

3567

# Basic data flow:

3568

# 1) As 'records' are read, see if we can expand these records into

3569

# Content objects (and thus lines)

3570

# 2) If a given line-delta is waiting on its compression parent, it

3571

# gets queued up into self._pending_deltas, otherwise we expand

3572

# it, and put it into self._text_cache and self._content_objects

3573

# 3) If we expanded the text, we will then check to see if all

3574

# parents have also been processed. If so, this text gets yielded,

3575

# else this record gets set aside into pending_annotation

3576

# 4) Further, if we expanded the text in (2), we will then check to

3577

# see if there are any children in self._pending_deltas waiting to

3578

# also be processed. If so, we go back to (2) for those

3579

# 5) Further again, if we yielded the text, we can then check if that

3580

# 'unlocks' any of the texts in pending_annotations, which should

3581

# then get yielded as well

3582

# Note that both steps 4 and 5 are 'recursive' in that unlocking one

3583

# compression child could unlock yet another, and yielding a fulltext

3584

# will also 'unlock' the children that are waiting on that annotation.

3585

# (Though also, unlocking 1 parent's fulltext, does not unlock a child

3586

# if other parents are also waiting.)

3587

# We want to yield content before expanding child content objects, so

3588

# that we know when we can re-use the content lines, and the annotation

3589

# code can know when it can stop caching fulltexts, as well.

3590

3591

# Children that are missing their compression parent

3592

pending_deltas = {}

3593

for (key, record, digest) in self._vf._read_records_iter(records):

3594

# ghosts?

3595

details = self._all_build_details[key]

3596

(_, compression_parent, parent_keys, record_details) = details

3597

lines = self._expand_record(key, parent_keys, compression_parent,

3598

record, record_details)

3599

if lines is None:

3600

# Pending delta should be queued up

3517

3601

continue

3518

parent_ids = self._revision_id_graph[rev_id]

3519

parent_ids = [p for p in parent_ids if p not in self._ghosts]

3520

details = self._all_build_details[rev_id]

3521

(index_memo, compression_parent, parents,

3522

record_details) = details

3523

nodes_to_annotate = []

3524

# TODO: Remove the punning between compression parents, and

3525

# parent_ids, we should be able to do this without assuming

3526

# the build order

3527

if len(parent_ids) == 0:

3528

# There are no parents for this node, so just add it

3529

# TODO: This probably needs to be decoupled

3530

fulltext_content, delta = self._knit._factory.parse_record(

3531

rev_id, record, record_details, None)

3532

fulltext = self._add_fulltext_content(rev_id, fulltext_content)

3533

nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,

3534

parent_ids, left_matching_blocks=None))

3535

else:

3536

child = (rev_id, parent_ids, record)

3537

# Check if all the parents are present

3538

self._check_parents(child, nodes_to_annotate)

3539

while nodes_to_annotate:

3540

# Should we use a queue here instead of a stack?

3541

(rev_id, parent_ids, record) = nodes_to_annotate.pop()

3542

(index_memo, compression_parent, parents,

3543

record_details) = self._all_build_details[rev_id]

3544

blocks = None

3545

if compression_parent is not None:

3546

comp_children = self._compression_children[compression_parent]

3547

if rev_id not in comp_children:

3548

raise AssertionError("%r not in compression children %r"

3549

% (rev_id, comp_children))

3550

# If there is only 1 child, it is safe to reuse this

3551

# content

3552

reuse_content = (len(comp_children) == 1

3553

and compression_parent not in

3554

self._nodes_to_keep_annotations)

3555

if reuse_content:

3556

# Remove it from the cache since it will be changing

3557

parent_fulltext_content = self._fulltext_contents.pop(compression_parent)

3558

# Make sure to copy the fulltext since it might be

3559

# modified

3560

parent_fulltext = list(parent_fulltext_content.text())

3561

else:

3562

parent_fulltext_content = self._fulltext_contents[compression_parent]

3563

parent_fulltext = parent_fulltext_content.text()

3564

comp_children.remove(rev_id)

3565

fulltext_content, delta = self._knit._factory.parse_record(

3566

rev_id, record, record_details,

3567

parent_fulltext_content,

3568

copy_base_content=(not reuse_content))

3569

fulltext = self._add_fulltext_content(rev_id,

3570

fulltext_content)

3571

if compression_parent == parent_ids[0]:

3572

# the compression_parent is the left parent, so we can

3573

# re-use the delta

3574

blocks = KnitContent.get_line_delta_blocks(delta,

3575

parent_fulltext, fulltext)

3576

else:

3577

fulltext_content = self._knit._factory.parse_fulltext(

3578

record, rev_id)

3579

fulltext = self._add_fulltext_content(rev_id,

3580

fulltext_content)

3581

nodes_to_annotate.extend(

3582

self._add_annotation(rev_id, fulltext, parent_ids,

3583

left_matching_blocks=blocks))

3584

3585

def _get_heads_provider(self):

3586

"""Create a heads provider for resolving ancestry issues."""

3587

if self._heads_provider is not None:

3588

return self._heads_provider

3589

self._heads_provider = _mod_graph.KnownGraph(self._revision_id_graph)

3590

return self._heads_provider

3591

3592

def annotate(self, key):

3593

"""Return the annotated fulltext at the given key.

3594

3595

:param key: The key to annotate.

3596

"""

3597

if len(self._knit._fallback_vfs) > 0:

3598

# stacked knits can't use the fast path at present.

3599

return self._simple_annotate(key)

3600

while True:

3601

try:

3602

records = self._get_build_graph(key)

3603

if key in self._ghosts:

3604

raise errors.RevisionNotPresent(key, self._knit)

3605

self._annotate_records(records)

3606

return self._annotated_lines[key]

3607

except errors.RetryWithNewPacks, e:

3608

self._knit._access.reload_or_raise(e)

3609

# The cached build_details are no longer valid

3610

self._all_build_details.clear()

3611

3612

def _simple_annotate(self, key):

3613

"""Return annotated fulltext, rediffing from the full texts.

3614

3615

This is slow but makes no assumptions about the repository

3616

being able to produce line deltas.

3617

"""

3618

# TODO: this code generates a parent maps of present ancestors; it

3619

# could be split out into a separate method

3620

# -- mbp and robertc 20080704

3621

graph = _mod_graph.Graph(self._knit)

3622

parent_map = dict((k, v) for k, v in graph.iter_ancestry([key])

3623

if v is not None)

3624

if not parent_map:

3625

raise errors.RevisionNotPresent(key, self)

3626

keys = parent_map.keys()

3627

heads_provider = _mod_graph.KnownGraph(parent_map)

3628

parent_cache = {}

3629

reannotate = annotate.reannotate

3630

for record in self._knit.get_record_stream(keys, 'topological', True):

3631

key = record.key

3632

fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))

3633

parents = parent_map[key]

3634

if parents is not None:

3635

parent_lines = [parent_cache[parent] for parent in parent_map[key]]

3636

else:

3637

parent_lines = []

3638

parent_cache[key] = list(

3639

reannotate(parent_lines, fulltext, key, None, heads_provider))

3640

try:

3641

return parent_cache[key]

3642

except KeyError, e:

3643

raise errors.RevisionNotPresent(key, self._knit)

3644

3602

# At this point, we may be able to yield this content, if all

3603

# parents are also finished

3604

yield_this_text = self._check_ready_for_annotations(key,

3605

parent_keys)

3606

if yield_this_text:

3607

# All parents present

3608

yield key, lines, len(lines)

3609

to_process = self._process_pending(key)

3610

while to_process:

3611

this_process = to_process

3612

to_process = []

3613

for key in this_process:

3614

lines = self._text_cache[key]

3615

yield key, lines, len(lines)

3616

to_process.extend(self._process_pending(key))

3645

3617

3646

3618

try:

3647

3619

from bzrlib._knit_load_data_c import _load_data_c as _load_data

Older »