1130
1238
yield status_a(revision, text)
1131
1239
for revision, text in annotated_b[b_cur:bi]:
1132
1240
yield status_b(revision, text)
1134
1241
# and now the matched section
1137
for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):
1138
assert text_a == text_b
1244
for text_a in plain_a[ai:a_cur]:
1139
1245
yield "unchanged", text_a
1248
class _PlanMergeBase(object):
1250
def __init__(self, a_rev, b_rev, vf, key_prefix):
1253
:param a_rev: Revision-id of one revision to merge
1254
:param b_rev: Revision-id of the other revision to merge
1255
:param vf: A VersionedFiles containing both revisions
1256
:param key_prefix: A prefix for accessing keys in vf, typically
1262
self._last_lines = None
1263
self._last_lines_revision_id = None
1264
self._cached_matching_blocks = {}
1265
self._key_prefix = key_prefix
1266
self._precache_tip_lines()
1268
def _precache_tip_lines(self):
1269
lines = self.get_lines([self.a_rev, self.b_rev])
1270
self.lines_a = lines[self.a_rev]
1271
self.lines_b = lines[self.b_rev]
1273
def get_lines(self, revisions):
1274
"""Get lines for revisions from the backing VersionedFiles.
1276
:raises RevisionNotPresent: on absent texts.
1278
keys = [(self._key_prefix + (rev,)) for rev in revisions]
1280
for record in self.vf.get_record_stream(keys, 'unordered', True):
1281
if record.storage_kind == 'absent':
1282
raise errors.RevisionNotPresent(record.key, self.vf)
1283
result[record.key[-1]] = osutils.split_lines(
1284
record.get_bytes_as('fulltext'))
1287
def plan_merge(self):
1288
"""Generate a 'plan' for merging the two revisions.
1290
This involves comparing their texts and determining the cause of
1291
differences. If text A has a line and text B does not, then either the
1292
line was added to text A, or it was deleted from B. Once the causes
1293
are combined, they are written out in the format described in
1294
VersionedFile.plan_merge
1296
blocks = self._get_matching_blocks(self.a_rev, self.b_rev)
1297
unique_a, unique_b = self._unique_lines(blocks)
1298
new_a, killed_b = self._determine_status(self.a_rev, unique_a)
1299
new_b, killed_a = self._determine_status(self.b_rev, unique_b)
1300
return self._iter_plan(blocks, new_a, killed_b, new_b, killed_a)
1302
def _iter_plan(self, blocks, new_a, killed_b, new_b, killed_a):
1305
for i, j, n in blocks:
1306
for a_index in range(last_i, i):
1307
if a_index in new_a:
1308
if a_index in killed_b:
1309
yield 'conflicted-a', self.lines_a[a_index]
1311
yield 'new-a', self.lines_a[a_index]
1313
yield 'killed-b', self.lines_a[a_index]
1314
for b_index in range(last_j, j):
1315
if b_index in new_b:
1316
if b_index in killed_a:
1317
yield 'conflicted-b', self.lines_b[b_index]
1319
yield 'new-b', self.lines_b[b_index]
1321
yield 'killed-a', self.lines_b[b_index]
1322
# handle common lines
1323
for a_index in range(i, i+n):
1324
yield 'unchanged', self.lines_a[a_index]
1328
def _get_matching_blocks(self, left_revision, right_revision):
1329
"""Return a description of which sections of two revisions match.
1331
See SequenceMatcher.get_matching_blocks
1333
cached = self._cached_matching_blocks.get((left_revision,
1335
if cached is not None:
1337
if self._last_lines_revision_id == left_revision:
1338
left_lines = self._last_lines
1339
right_lines = self.get_lines([right_revision])[right_revision]
1341
lines = self.get_lines([left_revision, right_revision])
1342
left_lines = lines[left_revision]
1343
right_lines = lines[right_revision]
1344
self._last_lines = right_lines
1345
self._last_lines_revision_id = right_revision
1346
matcher = patiencediff.PatienceSequenceMatcher(None, left_lines,
1348
return matcher.get_matching_blocks()
1350
def _unique_lines(self, matching_blocks):
1351
"""Analyse matching_blocks to determine which lines are unique
1353
:return: a tuple of (unique_left, unique_right), where the values are
1354
sets of line numbers of unique lines.
1360
for i, j, n in matching_blocks:
1361
unique_left.extend(range(last_i, i))
1362
unique_right.extend(range(last_j, j))
1365
return unique_left, unique_right
1368
def _subtract_plans(old_plan, new_plan):
1369
"""Remove changes from new_plan that came from old_plan.
1371
It is assumed that the difference between the old_plan and new_plan
1372
is their choice of 'b' text.
1374
All lines from new_plan that differ from old_plan are emitted
1375
verbatim. All lines from new_plan that match old_plan but are
1376
not about the 'b' revision are emitted verbatim.
1378
Lines that match and are about the 'b' revision are the lines we
1379
don't want, so we convert 'killed-b' -> 'unchanged', and 'new-b'
1380
is skipped entirely.
1382
matcher = patiencediff.PatienceSequenceMatcher(None, old_plan,
1385
for i, j, n in matcher.get_matching_blocks():
1386
for jj in range(last_j, j):
1388
for jj in range(j, j+n):
1389
plan_line = new_plan[jj]
1390
if plan_line[0] == 'new-b':
1392
elif plan_line[0] == 'killed-b':
1393
yield 'unchanged', plan_line[1]
1399
class _PlanMerge(_PlanMergeBase):
1400
"""Plan an annotate merge using on-the-fly annotation"""
1402
def __init__(self, a_rev, b_rev, vf, key_prefix):
1403
super(_PlanMerge, self).__init__(a_rev, b_rev, vf, key_prefix)
1404
self.a_key = self._key_prefix + (self.a_rev,)
1405
self.b_key = self._key_prefix + (self.b_rev,)
1406
self.graph = Graph(self.vf)
1407
heads = self.graph.heads((self.a_key, self.b_key))
1409
# one side dominates, so we can just return its values, yay for
1411
# Ideally we would know that before we get this far
1412
self._head_key = heads.pop()
1413
if self._head_key == self.a_key:
1417
mutter('found dominating revision for %s\n%s > %s', self.vf,
1418
self._head_key[-1], other)
1421
self._head_key = None
1424
def _precache_tip_lines(self):
1425
# Turn this into a no-op, because we will do this later
1428
def _find_recursive_lcas(self):
1429
"""Find all the ancestors back to a unique lca"""
1430
cur_ancestors = (self.a_key, self.b_key)
1431
# graph.find_lca(uncommon, keys) now returns plain NULL_REVISION,
1432
# rather than a key tuple. We will just map that directly to no common
1436
next_lcas = self.graph.find_lca(*cur_ancestors)
1437
# Map a plain NULL_REVISION to a simple no-ancestors
1438
if next_lcas == set([NULL_REVISION]):
1440
# Order the lca's based on when they were merged into the tip
1441
# While the actual merge portion of weave merge uses a set() of
1442
# active revisions, the order of insertion *does* effect the
1443
# implicit ordering of the texts.
1444
for rev_key in cur_ancestors:
1445
ordered_parents = tuple(self.graph.find_merge_order(rev_key,
1447
parent_map[rev_key] = ordered_parents
1448
if len(next_lcas) == 0:
1450
elif len(next_lcas) == 1:
1451
parent_map[list(next_lcas)[0]] = ()
1453
elif len(next_lcas) > 2:
1454
# More than 2 lca's, fall back to grabbing all nodes between
1455
# this and the unique lca.
1456
mutter('More than 2 LCAs, falling back to all nodes for:'
1457
' %s, %s\n=> %s', self.a_key, self.b_key, cur_ancestors)
1458
cur_lcas = next_lcas
1459
while len(cur_lcas) > 1:
1460
cur_lcas = self.graph.find_lca(*cur_lcas)
1461
if len(cur_lcas) == 0:
1462
# No common base to find, use the full ancestry
1465
unique_lca = list(cur_lcas)[0]
1466
if unique_lca == NULL_REVISION:
1467
# find_lca will return a plain 'NULL_REVISION' rather
1468
# than a key tuple when there is no common ancestor, we
1469
# prefer to just use None, because it doesn't confuse
1470
# _get_interesting_texts()
1472
parent_map.update(self._find_unique_parents(next_lcas,
1475
cur_ancestors = next_lcas
1478
def _find_unique_parents(self, tip_keys, base_key):
1479
"""Find ancestors of tip that aren't ancestors of base.
1481
:param tip_keys: Nodes that are interesting
1482
:param base_key: Cull all ancestors of this node
1483
:return: The parent map for all revisions between tip_keys and
1484
base_key. base_key will be included. References to nodes outside of
1485
the ancestor set will also be removed.
1487
# TODO: this would be simpler if find_unique_ancestors took a list
1488
# instead of a single tip, internally it supports it, but it
1489
# isn't a "backwards compatible" api change.
1490
if base_key is None:
1491
parent_map = dict(self.graph.iter_ancestry(tip_keys))
1492
# We remove NULL_REVISION because it isn't a proper tuple key, and
1493
# thus confuses things like _get_interesting_texts, and our logic
1494
# to add the texts into the memory weave.
1495
if NULL_REVISION in parent_map:
1496
parent_map.pop(NULL_REVISION)
1499
for tip in tip_keys:
1501
self.graph.find_unique_ancestors(tip, [base_key]))
1502
parent_map = self.graph.get_parent_map(interesting)
1503
parent_map[base_key] = ()
1504
culled_parent_map, child_map, tails = self._remove_external_references(
1506
# Remove all the tails but base_key
1507
if base_key is not None:
1508
tails.remove(base_key)
1509
self._prune_tails(culled_parent_map, child_map, tails)
1510
# Now remove all the uninteresting 'linear' regions
1511
simple_map = _mod_graph.collapse_linear_regions(culled_parent_map)
1515
def _remove_external_references(parent_map):
1516
"""Remove references that go outside of the parent map.
1518
:param parent_map: Something returned from Graph.get_parent_map(keys)
1519
:return: (filtered_parent_map, child_map, tails)
1520
filtered_parent_map is parent_map without external references
1521
child_map is the {parent_key: [child_keys]} mapping
1522
tails is a list of nodes that do not have any parents in the map
1524
# TODO: The basic effect of this function seems more generic than
1525
# _PlanMerge. But the specific details of building a child_map,
1526
# and computing tails seems very specific to _PlanMerge.
1527
# Still, should this be in Graph land?
1528
filtered_parent_map = {}
1531
for key, parent_keys in parent_map.iteritems():
1532
culled_parent_keys = [p for p in parent_keys if p in parent_map]
1533
if not culled_parent_keys:
1535
for parent_key in culled_parent_keys:
1536
child_map.setdefault(parent_key, []).append(key)
1537
# TODO: Do we want to do this, it adds overhead for every node,
1538
# just to say that the node has no children
1539
child_map.setdefault(key, [])
1540
filtered_parent_map[key] = culled_parent_keys
1541
return filtered_parent_map, child_map, tails
1544
def _prune_tails(parent_map, child_map, tails_to_remove):
1545
"""Remove tails from the parent map.
1547
This will remove the supplied revisions until no more children have 0
1550
:param parent_map: A dict of {child: [parents]}, this dictionary will
1551
be modified in place.
1552
:param tails_to_remove: A list of tips that should be removed,
1553
this list will be consumed
1554
:param child_map: The reverse dict of parent_map ({parent: [children]})
1555
this dict will be modified
1556
:return: None, parent_map will be modified in place.
1558
while tails_to_remove:
1559
next = tails_to_remove.pop()
1560
parent_map.pop(next)
1561
children = child_map.pop(next)
1562
for child in children:
1563
child_parents = parent_map[child]
1564
child_parents.remove(next)
1565
if len(child_parents) == 0:
1566
tails_to_remove.append(child)
1568
def _get_interesting_texts(self, parent_map):
1569
"""Return a dict of texts we are interested in.
1571
Note that the input is in key tuples, but the output is in plain
1574
:param parent_map: The output from _find_recursive_lcas
1575
:return: A dict of {'revision_id':lines} as returned by
1576
_PlanMergeBase.get_lines()
1578
all_revision_keys = set(parent_map)
1579
all_revision_keys.add(self.a_key)
1580
all_revision_keys.add(self.b_key)
1582
# Everything else is in 'keys' but get_lines is in 'revision_ids'
1583
all_texts = self.get_lines([k[-1] for k in all_revision_keys])
1586
def _build_weave(self):
1587
from bzrlib import weave
1588
self._weave = weave.Weave(weave_name='in_memory_weave',
1589
allow_reserved=True)
1590
parent_map = self._find_recursive_lcas()
1592
all_texts = self._get_interesting_texts(parent_map)
1594
# Note: Unfortunately, the order given by topo_sort will effect the
1595
# ordering resolution in the output. Specifically, if you add A then B,
1596
# then in the output text A lines will show up before B lines. And, of
1597
# course, topo_sort doesn't guarantee any real ordering.
1598
# So we use merge_sort, and add a fake node on the tip.
1599
# This ensures that left-hand parents will always be inserted into the
1600
# weave before right-hand parents.
1601
tip_key = self._key_prefix + (_mod_revision.CURRENT_REVISION,)
1602
parent_map[tip_key] = (self.a_key, self.b_key)
1604
for seq_num, key, depth, eom in reversed(tsort.merge_sort(parent_map,
1608
# for key in tsort.topo_sort(parent_map):
1609
parent_keys = parent_map[key]
1610
revision_id = key[-1]
1611
parent_ids = [k[-1] for k in parent_keys]
1612
self._weave.add_lines(revision_id, parent_ids,
1613
all_texts[revision_id])
1615
def plan_merge(self):
1616
"""Generate a 'plan' for merging the two revisions.
1618
This involves comparing their texts and determining the cause of
1619
differences. If text A has a line and text B does not, then either the
1620
line was added to text A, or it was deleted from B. Once the causes
1621
are combined, they are written out in the format described in
1622
VersionedFile.plan_merge
1624
if self._head_key is not None: # There was a single head
1625
if self._head_key == self.a_key:
1628
if self._head_key != self.b_key:
1629
raise AssertionError('There was an invalid head: %s != %s'
1630
% (self.b_key, self._head_key))
1632
head_rev = self._head_key[-1]
1633
lines = self.get_lines([head_rev])[head_rev]
1634
return ((plan, line) for line in lines)
1635
return self._weave.plan_merge(self.a_rev, self.b_rev)
1638
class _PlanLCAMerge(_PlanMergeBase):
1640
This merge algorithm differs from _PlanMerge in that:
1641
1. comparisons are done against LCAs only
1642
2. cases where a contested line is new versus one LCA but old versus
1643
another are marked as conflicts, by emitting the line as conflicted-a
1646
This is faster, and hopefully produces more useful output.
1649
def __init__(self, a_rev, b_rev, vf, key_prefix, graph):
1650
_PlanMergeBase.__init__(self, a_rev, b_rev, vf, key_prefix)
1651
lcas = graph.find_lca(key_prefix + (a_rev,), key_prefix + (b_rev,))
1654
if lca == NULL_REVISION:
1657
self.lcas.add(lca[-1])
1658
for lca in self.lcas:
1659
if _mod_revision.is_null(lca):
1662
lca_lines = self.get_lines([lca])[lca]
1663
matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_a,
1665
blocks = list(matcher.get_matching_blocks())
1666
self._cached_matching_blocks[(a_rev, lca)] = blocks
1667
matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_b,
1669
blocks = list(matcher.get_matching_blocks())
1670
self._cached_matching_blocks[(b_rev, lca)] = blocks
1672
def _determine_status(self, revision_id, unique_line_numbers):
1673
"""Determines the status unique lines versus all lcas.
1675
Basically, determines why the line is unique to this revision.
1677
A line may be determined new, killed, or both.
1679
If a line is determined new, that means it was not present in at least
1680
one LCA, and is not present in the other merge revision.
1682
If a line is determined killed, that means the line was present in
1685
If a line is killed and new, this indicates that the two merge
1686
revisions contain differing conflict resolutions.
1687
:param revision_id: The id of the revision in which the lines are
1689
:param unique_line_numbers: The line numbers of unique lines.
1690
:return a tuple of (new_this, killed_other):
1694
unique_line_numbers = set(unique_line_numbers)
1695
for lca in self.lcas:
1696
blocks = self._get_matching_blocks(revision_id, lca)
1697
unique_vs_lca, _ignored = self._unique_lines(blocks)
1698
new.update(unique_line_numbers.intersection(unique_vs_lca))
1699
killed.update(unique_line_numbers.difference(unique_vs_lca))