536
536
return view_revisions
539
def _filter_revisions_touching_file_id(branch, file_id, mainline_revisions,
541
"""Return the list of revision ids which touch a given file id.
539
def _filter_revisions_touching_file_id(branch, file_id, view_revisions,
541
r"""Return the list of revision ids which touch a given file id.
543
543
The function filters view_revisions and returns a subset.
544
544
This includes the revisions which directly change the file id,
545
545
and the revisions which merge these changes. So if the
546
546
revision graph is::
553
And 'C' changes a file, then both C and D will be returned.
555
This will also can be restricted based on a subset of the mainline.
557
And 'C' changes a file, then both C and D will be returned. F will not be
558
returned even though it brings the changes to C into the branch starting
559
with E. (Note that if we were using F as the tip instead of G, then we
562
This will also be restricted based on a subset of the mainline.
564
:param branch: The branch where we can get text revision information.
565
:param file_id: Filter out revisions that do not touch file_id.
566
:param view_revisions: A list of (revision_id, dotted_revno, merge_depth)
567
tuples. This is the list of revisions which will be filtered. It is
568
assumed that view_revisions is in merge_sort order (either forward or
570
:param direction: The direction of view_revisions. See also
571
reverse_by_depth, and get_view_revisions
557
572
:return: A list of (revision_id, dotted_revno, merge_depth) tuples.
559
# find all the revisions that change the specific file
560
# build the ancestry of each revision in the graph
561
# - only listing the ancestors that change the specific file.
562
graph = branch.repository.get_graph()
563
# This asks for all mainline revisions, which means we only have to spider
564
# sideways, rather than depth history. That said, its still size-of-history
565
# and should be addressed.
566
# mainline_revisions always includes an extra revision at the beginning, so
568
parent_map = dict(((key, value) for key, value in
569
graph.iter_ancestry(mainline_revisions[1:]) if value is not None))
570
sorted_rev_list = tsort.topo_sort(parent_map.items())
571
text_keys = [(file_id, rev_id) for rev_id in sorted_rev_list]
572
modified_text_versions = branch.repository.texts.get_parent_map(text_keys)
574
for rev in sorted_rev_list:
575
text_key = (file_id, rev)
576
parents = parent_map[rev]
577
if text_key not in modified_text_versions and len(parents) == 1:
578
# We will not be adding anything new, so just use a reference to
579
# the parent ancestry.
580
rev_ancestry = ancestry[parents[0]]
574
# Lookup all possible text keys to determine which ones actually modified
576
text_keys = [(file_id, rev_id) for rev_id, revno, depth in view_revisions]
577
# Looking up keys in batches of 1000 can cut the time in half, as well as
578
# memory consumption. GraphIndex *does* like to look for a few keys in
579
# parallel, it just doesn't like looking for *lots* of keys in parallel.
580
# TODO: This code needs to be re-evaluated periodically as we tune the
581
# indexing layer. We might consider passing in hints as to the known
582
# access pattern (sparse/clustered, high success rate/low success
583
# rate). This particular access is clustered with a low success rate.
584
get_parent_map = branch.repository.texts.get_parent_map
585
modified_text_revisions = set()
587
for start in xrange(0, len(text_keys), chunk_size):
588
next_keys = text_keys[start:start + chunk_size]
589
# Only keep the revision_id portion of the key
590
modified_text_revisions.update(
591
[k[1] for k in get_parent_map(next_keys)])
592
del text_keys, next_keys
595
if direction == 'forward':
596
# TODO: The algorithm for finding 'merges' of file changes expects
597
# 'reverse' order (the default from 'merge_sort()'). Instead of
598
# forcing this, we could just use the reverse_by_depth order.
599
view_revisions = reverse_by_depth(view_revisions)
600
# Track what revisions will merge the current revision, replace entries
601
# with 'None' when they have been added to result
602
current_merge_stack = [None]
603
for info in view_revisions:
604
rev_id, revno, depth = info
605
if depth == len(current_merge_stack):
606
current_merge_stack.append(info)
583
if text_key in modified_text_versions:
584
rev_ancestry.add(rev)
585
for parent in parents:
586
if parent not in ancestry:
587
# parent is a Ghost, which won't be present in
588
# sorted_rev_list, but we may access it later, so create an
590
ancestry[parent] = set()
591
rev_ancestry = rev_ancestry.union(ancestry[parent])
592
ancestry[rev] = rev_ancestry
594
def is_merging_rev(r):
595
parents = parent_map[r]
597
leftparent = parents[0]
598
for rightparent in parents[1:]:
599
if not ancestry[leftparent].issuperset(
600
ancestry[rightparent]):
604
# filter from the view the revisions that did not change or merge
606
return [(r, n, d) for r, n, d in view_revs_iter
607
if (file_id, r) in modified_text_versions or is_merging_rev(r)]
608
del current_merge_stack[depth + 1:]
609
current_merge_stack[-1] = info
611
if rev_id in modified_text_revisions:
612
# This needs to be logged, along with the extra revisions
613
for idx in xrange(len(current_merge_stack)):
614
node = current_merge_stack[idx]
617
current_merge_stack[idx] = None
618
if direction == 'forward':
619
result = reverse_by_depth(result)
610
623
def get_view_revisions(mainline_revs, rev_nos, branch, direction,