1130
1235
yield status_a(revision, text)
1131
1236
for revision, text in annotated_b[b_cur:bi]:
1132
1237
yield status_b(revision, text)
1134
1238
# and now the matched section
1137
for text_a, text_b in zip(plain_a[ai:a_cur], plain_b[bi:b_cur]):
1138
assert text_a == text_b
1241
for text_a in plain_a[ai:a_cur]:
1139
1242
yield "unchanged", text_a
1245
class _PlanMergeBase(object):
1247
def __init__(self, a_rev, b_rev, vf, key_prefix):
1250
:param a_rev: Revision-id of one revision to merge
1251
:param b_rev: Revision-id of the other revision to merge
1252
:param vf: A VersionedFiles containing both revisions
1253
:param key_prefix: A prefix for accessing keys in vf, typically
1259
self._last_lines = None
1260
self._last_lines_revision_id = None
1261
self._cached_matching_blocks = {}
1262
self._key_prefix = key_prefix
1263
self._precache_tip_lines()
1265
def _precache_tip_lines(self):
1266
lines = self.get_lines([self.a_rev, self.b_rev])
1267
self.lines_a = lines[self.a_rev]
1268
self.lines_b = lines[self.b_rev]
1270
def get_lines(self, revisions):
1271
"""Get lines for revisions from the backing VersionedFiles.
1273
:raises RevisionNotPresent: on absent texts.
1275
keys = [(self._key_prefix + (rev,)) for rev in revisions]
1277
for record in self.vf.get_record_stream(keys, 'unordered', True):
1278
if record.storage_kind == 'absent':
1279
raise errors.RevisionNotPresent(record.key, self.vf)
1280
result[record.key[-1]] = osutils.split_lines(
1281
record.get_bytes_as('fulltext'))
1284
def plan_merge(self):
1285
"""Generate a 'plan' for merging the two revisions.
1287
This involves comparing their texts and determining the cause of
1288
differences. If text A has a line and text B does not, then either the
1289
line was added to text A, or it was deleted from B. Once the causes
1290
are combined, they are written out in the format described in
1291
VersionedFile.plan_merge
1293
blocks = self._get_matching_blocks(self.a_rev, self.b_rev)
1294
unique_a, unique_b = self._unique_lines(blocks)
1295
new_a, killed_b = self._determine_status(self.a_rev, unique_a)
1296
new_b, killed_a = self._determine_status(self.b_rev, unique_b)
1297
return self._iter_plan(blocks, new_a, killed_b, new_b, killed_a)
1299
def _iter_plan(self, blocks, new_a, killed_b, new_b, killed_a):
1302
for i, j, n in blocks:
1303
for a_index in range(last_i, i):
1304
if a_index in new_a:
1305
if a_index in killed_b:
1306
yield 'conflicted-a', self.lines_a[a_index]
1308
yield 'new-a', self.lines_a[a_index]
1310
yield 'killed-b', self.lines_a[a_index]
1311
for b_index in range(last_j, j):
1312
if b_index in new_b:
1313
if b_index in killed_a:
1314
yield 'conflicted-b', self.lines_b[b_index]
1316
yield 'new-b', self.lines_b[b_index]
1318
yield 'killed-a', self.lines_b[b_index]
1319
# handle common lines
1320
for a_index in range(i, i+n):
1321
yield 'unchanged', self.lines_a[a_index]
1325
def _get_matching_blocks(self, left_revision, right_revision):
1326
"""Return a description of which sections of two revisions match.
1328
See SequenceMatcher.get_matching_blocks
1330
cached = self._cached_matching_blocks.get((left_revision,
1332
if cached is not None:
1334
if self._last_lines_revision_id == left_revision:
1335
left_lines = self._last_lines
1336
right_lines = self.get_lines([right_revision])[right_revision]
1338
lines = self.get_lines([left_revision, right_revision])
1339
left_lines = lines[left_revision]
1340
right_lines = lines[right_revision]
1341
self._last_lines = right_lines
1342
self._last_lines_revision_id = right_revision
1343
matcher = patiencediff.PatienceSequenceMatcher(None, left_lines,
1345
return matcher.get_matching_blocks()
1347
def _unique_lines(self, matching_blocks):
1348
"""Analyse matching_blocks to determine which lines are unique
1350
:return: a tuple of (unique_left, unique_right), where the values are
1351
sets of line numbers of unique lines.
1357
for i, j, n in matching_blocks:
1358
unique_left.extend(range(last_i, i))
1359
unique_right.extend(range(last_j, j))
1362
return unique_left, unique_right
1365
def _subtract_plans(old_plan, new_plan):
1366
"""Remove changes from new_plan that came from old_plan.
1368
It is assumed that the difference between the old_plan and new_plan
1369
is their choice of 'b' text.
1371
All lines from new_plan that differ from old_plan are emitted
1372
verbatim. All lines from new_plan that match old_plan but are
1373
not about the 'b' revision are emitted verbatim.
1375
Lines that match and are about the 'b' revision are the lines we
1376
don't want, so we convert 'killed-b' -> 'unchanged', and 'new-b'
1377
is skipped entirely.
1379
matcher = patiencediff.PatienceSequenceMatcher(None, old_plan,
1382
for i, j, n in matcher.get_matching_blocks():
1383
for jj in range(last_j, j):
1385
for jj in range(j, j+n):
1386
plan_line = new_plan[jj]
1387
if plan_line[0] == 'new-b':
1389
elif plan_line[0] == 'killed-b':
1390
yield 'unchanged', plan_line[1]
1396
class _PlanMerge(_PlanMergeBase):
1397
"""Plan an annotate merge using on-the-fly annotation"""
1399
def __init__(self, a_rev, b_rev, vf, key_prefix):
1400
super(_PlanMerge, self).__init__(a_rev, b_rev, vf, key_prefix)
1401
self.a_key = self._key_prefix + (self.a_rev,)
1402
self.b_key = self._key_prefix + (self.b_rev,)
1403
self.graph = Graph(self.vf)
1404
heads = self.graph.heads((self.a_key, self.b_key))
1406
# one side dominates, so we can just return its values, yay for
1408
# Ideally we would know that before we get this far
1409
self._head_key = heads.pop()
1410
if self._head_key == self.a_key:
1414
mutter('found dominating revision for %s\n%s > %s', self.vf,
1415
self._head_key[-1], other)
1418
self._head_key = None
1421
def _precache_tip_lines(self):
1422
# Turn this into a no-op, because we will do this later
1425
def _find_recursive_lcas(self):
1426
"""Find all the ancestors back to a unique lca"""
1427
cur_ancestors = (self.a_key, self.b_key)
1428
# graph.find_lca(uncommon, keys) now returns plain NULL_REVISION,
1429
# rather than a key tuple. We will just map that directly to no common
1433
next_lcas = self.graph.find_lca(*cur_ancestors)
1434
# Map a plain NULL_REVISION to a simple no-ancestors
1435
if next_lcas == set([NULL_REVISION]):
1437
# Order the lca's based on when they were merged into the tip
1438
# While the actual merge portion of weave merge uses a set() of
1439
# active revisions, the order of insertion *does* effect the
1440
# implicit ordering of the texts.
1441
for rev_key in cur_ancestors:
1442
ordered_parents = tuple(self.graph.find_merge_order(rev_key,
1444
parent_map[rev_key] = ordered_parents
1445
if len(next_lcas) == 0:
1447
elif len(next_lcas) == 1:
1448
parent_map[list(next_lcas)[0]] = ()
1450
elif len(next_lcas) > 2:
1451
# More than 2 lca's, fall back to grabbing all nodes between
1452
# this and the unique lca.
1453
mutter('More than 2 LCAs, falling back to all nodes for:'
1454
' %s, %s\n=> %s', self.a_key, self.b_key, cur_ancestors)
1455
cur_lcas = next_lcas
1456
while len(cur_lcas) > 1:
1457
cur_lcas = self.graph.find_lca(*cur_lcas)
1458
if len(cur_lcas) == 0:
1459
# No common base to find, use the full ancestry
1462
unique_lca = list(cur_lcas)[0]
1463
if unique_lca == NULL_REVISION:
1464
# find_lca will return a plain 'NULL_REVISION' rather
1465
# than a key tuple when there is no common ancestor, we
1466
# prefer to just use None, because it doesn't confuse
1467
# _get_interesting_texts()
1469
parent_map.update(self._find_unique_parents(next_lcas,
1472
cur_ancestors = next_lcas
1475
def _find_unique_parents(self, tip_keys, base_key):
1476
"""Find ancestors of tip that aren't ancestors of base.
1478
:param tip_keys: Nodes that are interesting
1479
:param base_key: Cull all ancestors of this node
1480
:return: The parent map for all revisions between tip_keys and
1481
base_key. base_key will be included. References to nodes outside of
1482
the ancestor set will also be removed.
1484
# TODO: this would be simpler if find_unique_ancestors took a list
1485
# instead of a single tip, internally it supports it, but it
1486
# isn't a "backwards compatible" api change.
1487
if base_key is None:
1488
parent_map = dict(self.graph.iter_ancestry(tip_keys))
1489
# We remove NULL_REVISION because it isn't a proper tuple key, and
1490
# thus confuses things like _get_interesting_texts, and our logic
1491
# to add the texts into the memory weave.
1492
if NULL_REVISION in parent_map:
1493
parent_map.pop(NULL_REVISION)
1496
for tip in tip_keys:
1498
self.graph.find_unique_ancestors(tip, [base_key]))
1499
parent_map = self.graph.get_parent_map(interesting)
1500
parent_map[base_key] = ()
1501
culled_parent_map, child_map, tails = self._remove_external_references(
1503
# Remove all the tails but base_key
1504
if base_key is not None:
1505
tails.remove(base_key)
1506
self._prune_tails(culled_parent_map, child_map, tails)
1507
# Now remove all the uninteresting 'linear' regions
1508
simple_map = _mod_graph.collapse_linear_regions(culled_parent_map)
1512
def _remove_external_references(parent_map):
1513
"""Remove references that go outside of the parent map.
1515
:param parent_map: Something returned from Graph.get_parent_map(keys)
1516
:return: (filtered_parent_map, child_map, tails)
1517
filtered_parent_map is parent_map without external references
1518
child_map is the {parent_key: [child_keys]} mapping
1519
tails is a list of nodes that do not have any parents in the map
1521
# TODO: The basic effect of this function seems more generic than
1522
# _PlanMerge. But the specific details of building a child_map,
1523
# and computing tails seems very specific to _PlanMerge.
1524
# Still, should this be in Graph land?
1525
filtered_parent_map = {}
1528
for key, parent_keys in parent_map.iteritems():
1529
culled_parent_keys = [p for p in parent_keys if p in parent_map]
1530
if not culled_parent_keys:
1532
for parent_key in culled_parent_keys:
1533
child_map.setdefault(parent_key, []).append(key)
1534
# TODO: Do we want to do this, it adds overhead for every node,
1535
# just to say that the node has no children
1536
child_map.setdefault(key, [])
1537
filtered_parent_map[key] = culled_parent_keys
1538
return filtered_parent_map, child_map, tails
1541
def _prune_tails(parent_map, child_map, tails_to_remove):
1542
"""Remove tails from the parent map.
1544
This will remove the supplied revisions until no more children have 0
1547
:param parent_map: A dict of {child: [parents]}, this dictionary will
1548
be modified in place.
1549
:param tails_to_remove: A list of tips that should be removed,
1550
this list will be consumed
1551
:param child_map: The reverse dict of parent_map ({parent: [children]})
1552
this dict will be modified
1553
:return: None, parent_map will be modified in place.
1555
while tails_to_remove:
1556
next = tails_to_remove.pop()
1557
parent_map.pop(next)
1558
children = child_map.pop(next)
1559
for child in children:
1560
child_parents = parent_map[child]
1561
child_parents.remove(next)
1562
if len(child_parents) == 0:
1563
tails_to_remove.append(child)
1565
def _get_interesting_texts(self, parent_map):
1566
"""Return a dict of texts we are interested in.
1568
Note that the input is in key tuples, but the output is in plain
1571
:param parent_map: The output from _find_recursive_lcas
1572
:return: A dict of {'revision_id':lines} as returned by
1573
_PlanMergeBase.get_lines()
1575
all_revision_keys = set(parent_map)
1576
all_revision_keys.add(self.a_key)
1577
all_revision_keys.add(self.b_key)
1579
# Everything else is in 'keys' but get_lines is in 'revision_ids'
1580
all_texts = self.get_lines([k[-1] for k in all_revision_keys])
1583
def _build_weave(self):
1584
from bzrlib import weave
1585
self._weave = weave.Weave(weave_name='in_memory_weave',
1586
allow_reserved=True)
1587
parent_map = self._find_recursive_lcas()
1589
all_texts = self._get_interesting_texts(parent_map)
1591
# Note: Unfortunately, the order given by topo_sort will effect the
1592
# ordering resolution in the output. Specifically, if you add A then B,
1593
# then in the output text A lines will show up before B lines. And, of
1594
# course, topo_sort doesn't guarantee any real ordering.
1595
# So we use merge_sort, and add a fake node on the tip.
1596
# This ensures that left-hand parents will always be inserted into the
1597
# weave before right-hand parents.
1598
tip_key = self._key_prefix + (_mod_revision.CURRENT_REVISION,)
1599
parent_map[tip_key] = (self.a_key, self.b_key)
1601
for seq_num, key, depth, eom in reversed(tsort.merge_sort(parent_map,
1605
# for key in tsort.topo_sort(parent_map):
1606
parent_keys = parent_map[key]
1607
revision_id = key[-1]
1608
parent_ids = [k[-1] for k in parent_keys]
1609
self._weave.add_lines(revision_id, parent_ids,
1610
all_texts[revision_id])
1612
def plan_merge(self):
1613
"""Generate a 'plan' for merging the two revisions.
1615
This involves comparing their texts and determining the cause of
1616
differences. If text A has a line and text B does not, then either the
1617
line was added to text A, or it was deleted from B. Once the causes
1618
are combined, they are written out in the format described in
1619
VersionedFile.plan_merge
1621
if self._head_key is not None: # There was a single head
1622
if self._head_key == self.a_key:
1625
if self._head_key != self.b_key:
1626
raise AssertionError('There was an invalid head: %s != %s'
1627
% (self.b_key, self._head_key))
1629
head_rev = self._head_key[-1]
1630
lines = self.get_lines([head_rev])[head_rev]
1631
return ((plan, line) for line in lines)
1632
return self._weave.plan_merge(self.a_rev, self.b_rev)
1635
class _PlanLCAMerge(_PlanMergeBase):
1637
This merge algorithm differs from _PlanMerge in that:
1638
1. comparisons are done against LCAs only
1639
2. cases where a contested line is new versus one LCA but old versus
1640
another are marked as conflicts, by emitting the line as conflicted-a
1643
This is faster, and hopefully produces more useful output.
1646
def __init__(self, a_rev, b_rev, vf, key_prefix, graph):
1647
_PlanMergeBase.__init__(self, a_rev, b_rev, vf, key_prefix)
1648
lcas = graph.find_lca(key_prefix + (a_rev,), key_prefix + (b_rev,))
1651
if lca == NULL_REVISION:
1654
self.lcas.add(lca[-1])
1655
for lca in self.lcas:
1656
if _mod_revision.is_null(lca):
1659
lca_lines = self.get_lines([lca])[lca]
1660
matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_a,
1662
blocks = list(matcher.get_matching_blocks())
1663
self._cached_matching_blocks[(a_rev, lca)] = blocks
1664
matcher = patiencediff.PatienceSequenceMatcher(None, self.lines_b,
1666
blocks = list(matcher.get_matching_blocks())
1667
self._cached_matching_blocks[(b_rev, lca)] = blocks
1669
def _determine_status(self, revision_id, unique_line_numbers):
1670
"""Determines the status unique lines versus all lcas.
1672
Basically, determines why the line is unique to this revision.
1674
A line may be determined new, killed, or both.
1676
If a line is determined new, that means it was not present in at least
1677
one LCA, and is not present in the other merge revision.
1679
If a line is determined killed, that means the line was present in
1682
If a line is killed and new, this indicates that the two merge
1683
revisions contain differing conflict resolutions.
1684
:param revision_id: The id of the revision in which the lines are
1686
:param unique_line_numbers: The line numbers of unique lines.
1687
:return a tuple of (new_this, killed_other):
1691
unique_line_numbers = set(unique_line_numbers)
1692
for lca in self.lcas:
1693
blocks = self._get_matching_blocks(revision_id, lca)
1694
unique_vs_lca, _ignored = self._unique_lines(blocks)
1695
new.update(unique_line_numbers.intersection(unique_vs_lca))
1696
killed.update(unique_line_numbers.difference(unique_vs_lca))