3427
3266
annotator = _KnitAnnotator(knit)
3428
return iter(annotator.annotate_flat(revision_id))
3431
class _KnitAnnotator(annotate.Annotator):
3267
return iter(annotator.annotate(revision_id))
3270
class _KnitAnnotator(object):
3432
3271
"""Build up the annotations for a text."""
3434
def __init__(self, vf):
3435
annotate.Annotator.__init__(self, vf)
3437
# TODO: handle Nodes which cannot be extracted
3438
# self._ghosts = set()
3440
# Map from (key, parent_key) => matching_blocks, should be 'use once'
3441
self._matching_blocks = {}
3443
# KnitContent objects
3444
self._content_objects = {}
3445
# The number of children that depend on this fulltext content object
3446
self._num_compression_children = {}
3447
# Delta records that need their compression parent before they can be
3449
self._pending_deltas = {}
3450
# Fulltext records that are waiting for their parents fulltexts before
3451
# they can be yielded for annotation
3452
self._pending_annotation = {}
3273
def __init__(self, knit):
3276
# Content objects, differs from fulltexts because of how final newlines
3277
# are treated by knits. the content objects here will always have a
3279
self._fulltext_contents = {}
3281
# Annotated lines of specific revisions
3282
self._annotated_lines = {}
3284
# Track the raw data for nodes that we could not process yet.
3285
# This maps the revision_id of the base to a list of children that will
3286
# annotated from it.
3287
self._pending_children = {}
3289
# Nodes which cannot be extracted
3290
self._ghosts = set()
3292
# Track how many children this node has, so we know if we need to keep
3294
self._annotate_children = {}
3295
self._compression_children = {}
3454
3297
self._all_build_details = {}
3298
# The children => parent revision_id graph
3299
self._revision_id_graph = {}
3301
self._heads_provider = None
3303
self._nodes_to_keep_annotations = set()
3304
self._generations_until_keep = 100
3306
def set_generations_until_keep(self, value):
3307
"""Set the number of generations before caching a node.
3309
Setting this to -1 will cache every merge node, setting this higher
3310
will cache fewer nodes.
3312
self._generations_until_keep = value
3314
def _add_fulltext_content(self, revision_id, content_obj):
3315
self._fulltext_contents[revision_id] = content_obj
3316
# TODO: jam 20080305 It might be good to check the sha1digest here
3317
return content_obj.text()
3319
def _check_parents(self, child, nodes_to_annotate):
3320
"""Check if all parents have been processed.
3322
:param child: A tuple of (rev_id, parents, raw_content)
3323
:param nodes_to_annotate: If child is ready, add it to
3324
nodes_to_annotate, otherwise put it back in self._pending_children
3326
for parent_id in child[1]:
3327
if (parent_id not in self._annotated_lines):
3328
# This parent is present, but another parent is missing
3329
self._pending_children.setdefault(parent_id,
3333
# This one is ready to be processed
3334
nodes_to_annotate.append(child)
3336
def _add_annotation(self, revision_id, fulltext, parent_ids,
3337
left_matching_blocks=None):
3338
"""Add an annotation entry.
3340
All parents should already have been annotated.
3341
:return: A list of children that now have their parents satisfied.
3343
a = self._annotated_lines
3344
annotated_parent_lines = [a[p] for p in parent_ids]
3345
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
3346
fulltext, revision_id, left_matching_blocks,
3347
heads_provider=self._get_heads_provider()))
3348
self._annotated_lines[revision_id] = annotated_lines
3349
for p in parent_ids:
3350
ann_children = self._annotate_children[p]
3351
ann_children.remove(revision_id)
3352
if (not ann_children
3353
and p not in self._nodes_to_keep_annotations):
3354
del self._annotated_lines[p]
3355
del self._all_build_details[p]
3356
if p in self._fulltext_contents:
3357
del self._fulltext_contents[p]
3358
# Now that we've added this one, see if there are any pending
3359
# deltas to be done, certainly this parent is finished
3360
nodes_to_annotate = []
3361
for child in self._pending_children.pop(revision_id, []):
3362
self._check_parents(child, nodes_to_annotate)
3363
return nodes_to_annotate
3456
3365
def _get_build_graph(self, key):
3457
3366
"""Get the graphs for building texts and annotations.
3464
3373
:return: A list of (key, index_memo) records, suitable for
3465
passing to read_records_iter to start reading in the raw data from
3374
passing to read_records_iter to start reading in the raw data fro/
3377
if key in self._annotated_lines:
3468
3380
pending = set([key])
3471
self._num_needed_children[key] = 1
3473
3385
# get all pending nodes
3474
3387
this_iteration = pending
3475
build_details = self._vf._index.get_build_details(this_iteration)
3388
build_details = self._knit._index.get_build_details(this_iteration)
3476
3389
self._all_build_details.update(build_details)
3477
# new_nodes = self._vf._index._get_entries(this_iteration)
3390
# new_nodes = self._knit._index._get_entries(this_iteration)
3478
3391
pending = set()
3479
3392
for key, details in build_details.iteritems():
3480
(index_memo, compression_parent, parent_keys,
3393
(index_memo, compression_parent, parents,
3481
3394
record_details) = details
3482
self._parent_map[key] = parent_keys
3483
self._heads_provider = None
3395
self._revision_id_graph[key] = parents
3484
3396
records.append((key, index_memo))
3485
3397
# Do we actually need to check _annotated_lines?
3486
pending.update([p for p in parent_keys
3487
if p not in self._all_build_details])
3489
for parent_key in parent_keys:
3490
if parent_key in self._num_needed_children:
3491
self._num_needed_children[parent_key] += 1
3493
self._num_needed_children[parent_key] = 1
3398
pending.update(p for p in parents
3399
if p not in self._all_build_details)
3494
3400
if compression_parent:
3495
if compression_parent in self._num_compression_children:
3496
self._num_compression_children[compression_parent] += 1
3498
self._num_compression_children[compression_parent] = 1
3401
self._compression_children.setdefault(compression_parent,
3404
for parent in parents:
3405
self._annotate_children.setdefault(parent,
3407
num_gens = generation - kept_generation
3408
if ((num_gens >= self._generations_until_keep)
3409
and len(parents) > 1):
3410
kept_generation = generation
3411
self._nodes_to_keep_annotations.add(key)
3500
3413
missing_versions = this_iteration.difference(build_details.keys())
3501
if missing_versions:
3502
for key in missing_versions:
3503
if key in self._parent_map and key in self._text_cache:
3504
# We already have this text ready, we just need to
3505
# yield it later so we get it annotated
3507
parent_keys = self._parent_map[key]
3508
for parent_key in parent_keys:
3509
if parent_key in self._num_needed_children:
3510
self._num_needed_children[parent_key] += 1
3512
self._num_needed_children[parent_key] = 1
3513
pending.update([p for p in parent_keys
3514
if p not in self._all_build_details])
3516
raise errors.RevisionNotPresent(key, self._vf)
3414
self._ghosts.update(missing_versions)
3415
for missing_version in missing_versions:
3416
# add a key, no parents
3417
self._revision_id_graph[missing_version] = ()
3418
pending.discard(missing_version) # don't look for it
3419
if self._ghosts.intersection(self._compression_children):
3421
"We cannot have nodes which have a ghost compression parent:\n"
3423
"compression children: %r"
3424
% (self._ghosts, self._compression_children))
3425
# Cleanout anything that depends on a ghost so that we don't wait for
3426
# the ghost to show up
3427
for node in self._ghosts:
3428
if node in self._annotate_children:
3429
# We won't be building this node
3430
del self._annotate_children[node]
3517
3431
# Generally we will want to read the records in reverse order, because
3518
3432
# we find the parent nodes after the children
3519
3433
records.reverse()
3520
return records, ann_keys
3522
def _get_needed_texts(self, key, pb=None):
3523
# if True or len(self._vf._fallback_vfs) > 0:
3524
if len(self._vf._fallback_vfs) > 0:
3525
# If we have fallbacks, go to the generic path
3526
for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):
3531
records, ann_keys = self._get_build_graph(key)
3532
for idx, (sub_key, text, num_lines) in enumerate(
3533
self._extract_texts(records)):
3535
pb.update('annotating', idx, len(records))
3536
yield sub_key, text, num_lines
3537
for sub_key in ann_keys:
3538
text = self._text_cache[sub_key]
3539
num_lines = len(text) # bad assumption
3540
yield sub_key, text, num_lines
3542
except errors.RetryWithNewPacks, e:
3543
self._vf._access.reload_or_raise(e)
3544
# The cached build_details are no longer valid
3545
self._all_build_details.clear()
3547
def _cache_delta_blocks(self, key, compression_parent, delta, lines):
3548
parent_lines = self._text_cache[compression_parent]
3549
blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))
3550
self._matching_blocks[(key, compression_parent)] = blocks
3552
def _expand_record(self, key, parent_keys, compression_parent, record,
3555
if compression_parent:
3556
if compression_parent not in self._content_objects:
3557
# Waiting for the parent
3558
self._pending_deltas.setdefault(compression_parent, []).append(
3559
(key, parent_keys, record, record_details))
3561
# We have the basis parent, so expand the delta
3562
num = self._num_compression_children[compression_parent]
3565
base_content = self._content_objects.pop(compression_parent)
3566
self._num_compression_children.pop(compression_parent)
3568
self._num_compression_children[compression_parent] = num
3569
base_content = self._content_objects[compression_parent]
3570
# It is tempting to want to copy_base_content=False for the last
3571
# child object. However, whenever noeol=False,
3572
# self._text_cache[parent_key] is content._lines. So mutating it
3573
# gives very bad results.
3574
# The alternative is to copy the lines into text cache, but then we
3575
# are copying anyway, so just do it here.
3576
content, delta = self._vf._factory.parse_record(
3577
key, record, record_details, base_content,
3578
copy_base_content=True)
3581
content, _ = self._vf._factory.parse_record(
3582
key, record, record_details, None)
3583
if self._num_compression_children.get(key, 0) > 0:
3584
self._content_objects[key] = content
3585
lines = content.text()
3586
self._text_cache[key] = lines
3587
if delta is not None:
3588
self._cache_delta_blocks(key, compression_parent, delta, lines)
3591
def _get_parent_annotations_and_matches(self, key, text, parent_key):
3592
"""Get the list of annotations for the parent, and the matching lines.
3594
:param text: The opaque value given by _get_needed_texts
3595
:param parent_key: The key for the parent text
3596
:return: (parent_annotations, matching_blocks)
3597
parent_annotations is a list as long as the number of lines in
3599
matching_blocks is a list of (parent_idx, text_idx, len) tuples
3600
indicating which lines match between the two texts
3602
block_key = (key, parent_key)
3603
if block_key in self._matching_blocks:
3604
blocks = self._matching_blocks.pop(block_key)
3605
parent_annotations = self._annotations_cache[parent_key]
3606
return parent_annotations, blocks
3607
return annotate.Annotator._get_parent_annotations_and_matches(self,
3608
key, text, parent_key)
3610
def _process_pending(self, key):
3611
"""The content for 'key' was just processed.
3613
Determine if there is any more pending work to be processed.
3616
if key in self._pending_deltas:
3617
compression_parent = key
3618
children = self._pending_deltas.pop(key)
3619
for child_key, parent_keys, record, record_details in children:
3620
lines = self._expand_record(child_key, parent_keys,
3622
record, record_details)
3623
if self._check_ready_for_annotations(child_key, parent_keys):
3624
to_return.append(child_key)
3625
# Also check any children that are waiting for this parent to be
3627
if key in self._pending_annotation:
3628
children = self._pending_annotation.pop(key)
3629
to_return.extend([c for c, p_keys in children
3630
if self._check_ready_for_annotations(c, p_keys)])
3633
def _check_ready_for_annotations(self, key, parent_keys):
3634
"""return true if this text is ready to be yielded.
3636
Otherwise, this will return False, and queue the text into
3637
self._pending_annotation
3639
for parent_key in parent_keys:
3640
if parent_key not in self._annotations_cache:
3641
# still waiting on at least one parent text, so queue it up
3642
# Note that if there are multiple parents, we need to wait
3644
self._pending_annotation.setdefault(parent_key,
3645
[]).append((key, parent_keys))
3649
def _extract_texts(self, records):
3650
"""Extract the various texts needed based on records"""
3436
def _annotate_records(self, records):
3437
"""Build the annotations for the listed records."""
3651
3438
# We iterate in the order read, rather than a strict order requested
3652
3439
# However, process what we can, and put off to the side things that
3653
3440
# still need parents, cleaning them up when those parents are
3656
# 1) As 'records' are read, see if we can expand these records into
3657
# Content objects (and thus lines)
3658
# 2) If a given line-delta is waiting on its compression parent, it
3659
# gets queued up into self._pending_deltas, otherwise we expand
3660
# it, and put it into self._text_cache and self._content_objects
3661
# 3) If we expanded the text, we will then check to see if all
3662
# parents have also been processed. If so, this text gets yielded,
3663
# else this record gets set aside into pending_annotation
3664
# 4) Further, if we expanded the text in (2), we will then check to
3665
# see if there are any children in self._pending_deltas waiting to
3666
# also be processed. If so, we go back to (2) for those
3667
# 5) Further again, if we yielded the text, we can then check if that
3668
# 'unlocks' any of the texts in pending_annotations, which should
3669
# then get yielded as well
3670
# Note that both steps 4 and 5 are 'recursive' in that unlocking one
3671
# compression child could unlock yet another, and yielding a fulltext
3672
# will also 'unlock' the children that are waiting on that annotation.
3673
# (Though also, unlocking 1 parent's fulltext, does not unlock a child
3674
# if other parents are also waiting.)
3675
# We want to yield content before expanding child content objects, so
3676
# that we know when we can re-use the content lines, and the annotation
3677
# code can know when it can stop caching fulltexts, as well.
3679
# Children that are missing their compression parent
3681
for (key, record, digest) in self._vf._read_records_iter(records):
3683
details = self._all_build_details[key]
3684
(_, compression_parent, parent_keys, record_details) = details
3685
lines = self._expand_record(key, parent_keys, compression_parent,
3686
record, record_details)
3688
# Pending delta should be queued up
3442
for (rev_id, record,
3443
digest) in self._knit._read_records_iter(records):
3444
if rev_id in self._annotated_lines:
3690
# At this point, we may be able to yield this content, if all
3691
# parents are also finished
3692
yield_this_text = self._check_ready_for_annotations(key,
3695
# All parents present
3696
yield key, lines, len(lines)
3697
to_process = self._process_pending(key)
3699
this_process = to_process
3701
for key in this_process:
3702
lines = self._text_cache[key]
3703
yield key, lines, len(lines)
3704
to_process.extend(self._process_pending(key))
3446
parent_ids = self._revision_id_graph[rev_id]
3447
parent_ids = [p for p in parent_ids if p not in self._ghosts]
3448
details = self._all_build_details[rev_id]
3449
(index_memo, compression_parent, parents,
3450
record_details) = details
3451
nodes_to_annotate = []
3452
# TODO: Remove the punning between compression parents, and
3453
# parent_ids, we should be able to do this without assuming
3455
if len(parent_ids) == 0:
3456
# There are no parents for this node, so just add it
3457
# TODO: This probably needs to be decoupled
3458
fulltext_content, delta = self._knit._factory.parse_record(
3459
rev_id, record, record_details, None)
3460
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
3461
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
3462
parent_ids, left_matching_blocks=None))
3464
child = (rev_id, parent_ids, record)
3465
# Check if all the parents are present
3466
self._check_parents(child, nodes_to_annotate)
3467
while nodes_to_annotate:
3468
# Should we use a queue here instead of a stack?
3469
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
3470
(index_memo, compression_parent, parents,
3471
record_details) = self._all_build_details[rev_id]
3473
if compression_parent is not None:
3474
comp_children = self._compression_children[compression_parent]
3475
if rev_id not in comp_children:
3476
raise AssertionError("%r not in compression children %r"
3477
% (rev_id, comp_children))
3478
# If there is only 1 child, it is safe to reuse this
3480
reuse_content = (len(comp_children) == 1
3481
and compression_parent not in
3482
self._nodes_to_keep_annotations)
3484
# Remove it from the cache since it will be changing
3485
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
3486
# Make sure to copy the fulltext since it might be
3488
parent_fulltext = list(parent_fulltext_content.text())
3490
parent_fulltext_content = self._fulltext_contents[compression_parent]
3491
parent_fulltext = parent_fulltext_content.text()
3492
comp_children.remove(rev_id)
3493
fulltext_content, delta = self._knit._factory.parse_record(
3494
rev_id, record, record_details,
3495
parent_fulltext_content,
3496
copy_base_content=(not reuse_content))
3497
fulltext = self._add_fulltext_content(rev_id,
3499
if compression_parent == parent_ids[0]:
3500
# the compression_parent is the left parent, so we can
3502
blocks = KnitContent.get_line_delta_blocks(delta,
3503
parent_fulltext, fulltext)
3505
fulltext_content = self._knit._factory.parse_fulltext(
3507
fulltext = self._add_fulltext_content(rev_id,
3509
nodes_to_annotate.extend(
3510
self._add_annotation(rev_id, fulltext, parent_ids,
3511
left_matching_blocks=blocks))
3513
def _get_heads_provider(self):
3514
"""Create a heads provider for resolving ancestry issues."""
3515
if self._heads_provider is not None:
3516
return self._heads_provider
3517
parent_provider = _mod_graph.DictParentsProvider(
3518
self._revision_id_graph)
3519
graph_obj = _mod_graph.Graph(parent_provider)
3520
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
3521
self._heads_provider = head_cache
3524
def annotate(self, key):
3525
"""Return the annotated fulltext at the given key.
3527
:param key: The key to annotate.
3529
if len(self._knit._fallback_vfs) > 0:
3530
# stacked knits can't use the fast path at present.
3531
return self._simple_annotate(key)
3534
records = self._get_build_graph(key)
3535
if key in self._ghosts:
3536
raise errors.RevisionNotPresent(key, self._knit)
3537
self._annotate_records(records)
3538
return self._annotated_lines[key]
3539
except errors.RetryWithNewPacks, e:
3540
self._knit._access.reload_or_raise(e)
3541
# The cached build_details are no longer valid
3542
self._all_build_details.clear()
3544
def _simple_annotate(self, key):
3545
"""Return annotated fulltext, rediffing from the full texts.
3547
This is slow but makes no assumptions about the repository
3548
being able to produce line deltas.
3550
# TODO: this code generates a parent maps of present ancestors; it
3551
# could be split out into a separate method, and probably should use
3552
# iter_ancestry instead. -- mbp and robertc 20080704
3553
graph = _mod_graph.Graph(self._knit)
3554
head_cache = _mod_graph.FrozenHeadsCache(graph)
3555
search = graph._make_breadth_first_searcher([key])
3559
present, ghosts = search.next_with_ghosts()
3560
except StopIteration:
3562
keys.update(present)
3563
parent_map = self._knit.get_parent_map(keys)
3565
reannotate = annotate.reannotate
3566
for record in self._knit.get_record_stream(keys, 'topological', True):
3568
fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
3569
parents = parent_map[key]
3570
if parents is not None:
3571
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
3574
parent_cache[key] = list(
3575
reannotate(parent_lines, fulltext, key, None, head_cache))
3577
return parent_cache[key]
3579
raise errors.RevisionNotPresent(key, self._knit)
3707
from bzrlib._knit_load_data_pyx import _load_data_c as _load_data
3708
except ImportError, e:
3709
osutils.failed_to_load_extension(e)
3583
from bzrlib._knit_load_data_c import _load_data_c as _load_data
3710
3585
from bzrlib._knit_load_data_py import _load_data_py as _load_data