208
class _MPDiffGenerator(object):
209
"""Pull out the functionality for generating mp_diffs."""
211
def __init__(self, vf, keys):
213
# This is the order the keys were requested in
214
self.ordered_keys = tuple(keys)
215
# keys + their parents, what we need to compute the diffs
216
self.needed_keys = ()
217
# Map from key: mp_diff
219
# Map from key: parents_needed (may have ghosts)
221
# Parents that aren't present
222
self.ghost_parents = ()
223
# Map from parent_key => number of children for this text
225
# Content chunks that are cached while we still need them
228
def _find_needed_keys(self):
229
"""Find the set of keys we need to request.
231
This includes all the original keys passed in, and the non-ghost
232
parents of those keys.
234
:return: (needed_keys, refcounts)
235
needed_keys is the set of all texts we need to extract
236
refcounts is a dict of {key: num_children} letting us know when we
237
no longer need to cache a given parent text
239
# All the keys and their parents
240
needed_keys = set(self.ordered_keys)
241
parent_map = self.vf.get_parent_map(needed_keys)
242
self.parent_map = parent_map
243
# TODO: Should we be using a different construct here? I think this
244
# uses difference_update internally, and we expect the result to
246
missing_keys = needed_keys.difference(parent_map)
248
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
249
# Parents that might be missing. They are allowed to be ghosts, but we
250
# should check for them
252
setdefault = refcounts.setdefault
254
for child_key, parent_keys in parent_map.iteritems():
256
# parent_keys may be None if a given VersionedFile claims to
257
# not support graph operations.
259
just_parents.update(parent_keys)
260
needed_keys.update(parent_keys)
261
for p in parent_keys:
262
refcounts[p] = setdefault(p, 0) + 1
263
just_parents.difference_update(parent_map)
264
# Remove any parents that are actually ghosts from the needed set
265
self.present_parents = set(self.vf.get_parent_map(just_parents))
266
self.ghost_parents = just_parents.difference(self.present_parents)
267
needed_keys.difference_update(self.ghost_parents)
268
self.needed_keys = needed_keys
269
self.refcounts = refcounts
270
return needed_keys, refcounts
272
def _compute_diff(self, key, parent_lines, lines):
273
"""Compute a single mp_diff, and store it in self._diffs"""
274
if len(parent_lines) > 0:
275
# XXX: _extract_blocks is not usefully defined anywhere...
276
# It was meant to extract the left-parent diff without
277
# having to recompute it for Knit content (pack-0.92,
278
# etc). That seems to have regressed somewhere
279
left_parent_blocks = self.vf._extract_blocks(key,
280
parent_lines[0], lines)
282
left_parent_blocks = None
283
diff = multiparent.MultiParent.from_lines(lines,
284
parent_lines, left_parent_blocks)
285
self.diffs[key] = diff
287
def _process_one_record(self, key, this_chunks):
289
if key in self.parent_map:
290
# This record should be ready to diff, since we requested
291
# content in 'topological' order
292
parent_keys = self.parent_map.pop(key)
293
# If a VersionedFile claims 'no-graph' support, then it may return
294
# None for any parent request, so we replace it with an empty tuple
295
if parent_keys is None:
298
for p in parent_keys:
299
# Alternatively we could check p not in self.needed_keys, but
300
# ghost_parents should be tiny versus huge
301
if p in self.ghost_parents:
303
refcount = self.refcounts[p]
304
if refcount == 1: # Last child reference
305
self.refcounts.pop(p)
306
parent_chunks = self.chunks.pop(p)
308
self.refcounts[p] = refcount - 1
309
parent_chunks = self.chunks[p]
310
p_lines = osutils.chunks_to_lines(parent_chunks)
311
# TODO: Should we cache the line form? We did the
312
# computation to get it, but storing it this way will
313
# be less memory efficient...
314
parent_lines.append(p_lines)
316
lines = osutils.chunks_to_lines(this_chunks)
317
# Since we needed the lines, we'll go ahead and cache them this way
319
self._compute_diff(key, parent_lines, lines)
321
# Is this content required for any more children?
322
if key in self.refcounts:
323
self.chunks[key] = this_chunks
325
def _extract_diffs(self):
326
needed_keys, refcounts = self._find_needed_keys()
327
for record in self.vf.get_record_stream(needed_keys,
328
'topological', True):
329
if record.storage_kind == 'absent':
330
raise errors.RevisionNotPresent(record.key, self.vf)
331
self._process_one_record(record.key,
332
record.get_bytes_as('chunked'))
334
def compute_diffs(self):
335
self._extract_diffs()
336
dpop = self.diffs.pop
337
return [dpop(k) for k in self.ordered_keys]
340
209
class VersionedFile(object):
341
210
"""Versioned text file storage.
1183
1027
def make_mpdiffs(self, keys):
1184
1028
"""Create multiparent diffs for specified keys."""
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
1029
keys_order = tuple(keys)
1030
keys = frozenset(keys)
1031
knit_keys = set(keys)
1032
parent_map = self.get_parent_map(keys)
1033
for parent_keys in parent_map.itervalues():
1035
knit_keys.update(parent_keys)
1036
missing_keys = keys - set(parent_map)
1038
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1039
# We need to filter out ghosts, because we can't diff against them.
1040
maybe_ghosts = knit_keys - keys
1041
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1042
knit_keys.difference_update(ghosts)
1044
chunks_to_lines = osutils.chunks_to_lines
1045
for record in self.get_record_stream(knit_keys, 'topological', True):
1046
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1047
# line_block_dict = {}
1048
# for parent, blocks in record.extract_line_blocks():
1049
# line_blocks[parent] = blocks
1050
# line_blocks[record.key] = line_block_dict
1052
for key in keys_order:
1054
parents = parent_map[key] or []
1055
# Note that filtering knit_keys can lead to a parent difference
1056
# between the creation and the application of the mpdiff.
1057
parent_lines = [lines[p] for p in parents if p in knit_keys]
1058
if len(parent_lines) > 0:
1059
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1062
left_parent_blocks = None
1063
diffs.append(multiparent.MultiParent.from_lines(target,
1064
parent_lines, left_parent_blocks))
1191
1067
missing_keys = index._missing_keys_from_parent_map
1589
1468
for struct in outstanding_struct():
1592
def base_from_plan(self):
1593
"""Construct a BASE file from the plan text."""
1595
for state, line in self.plan:
1596
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1597
# If unchanged, then this line is straight from base. If a or b
1598
# or both killed the line, then it *used* to be in base.
1599
base_lines.append(line)
1601
if state not in ('killed-base', 'irrelevant',
1602
'ghost-a', 'ghost-b',
1604
'conflicted-a', 'conflicted-b'):
1605
# killed-base, irrelevant means it doesn't apply
1606
# ghost-a/ghost-b are harder to say for sure, but they
1607
# aren't in the 'inc_c' which means they aren't in the
1608
# shared base of a & b. So we don't include them. And
1609
# obviously if the line is newly inserted, it isn't in base
1611
# If 'conflicted-a' or b, then it is new vs one base, but
1612
# old versus another base. However, if we make it present
1613
# in the base, it will be deleted from the target, and it
1614
# seems better to get a line doubled in the merge result,
1615
# rather than have it deleted entirely.
1616
# Example, each node is the 'text' at that point:
1624
# There was a criss-cross conflict merge. Both sides
1625
# include the other, but put themselves first.
1626
# Weave marks this as a 'clean' merge, picking OTHER over
1627
# THIS. (Though the details depend on order inserted into
1629
# LCA generates a plan:
1630
# [('unchanged', M),
1631
# ('conflicted-b', b),
1633
# ('conflicted-a', b),
1635
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1636
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1637
# removes one 'b', and BASE vs OTHER removes the other)
1638
# If you include neither, 3-way creates a clean "MbabN" as
1639
# THIS adds one 'b', and OTHER does too.
1640
# It seems that having the line 2 times is better than
1641
# having it omitted. (Easier to manually delete than notice
1642
# it needs to be added.)
1643
raise AssertionError('Unknown state: %s' % (state,))
1647
1472
class WeaveMerge(PlanWeaveMerge):
1648
1473
"""Weave merge that takes a VersionedFile and two versions as its input."""
1727
class NoDupeAddLinesDecorator(object):
1728
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1732
def __init__(self, store):
1735
def add_lines(self, key, parents, lines, parent_texts=None,
1736
left_matching_blocks=None, nostore_sha=None, random_id=False,
1737
check_content=True):
1738
"""See VersionedFiles.add_lines.
1740
This implementation may return None as the third element of the return
1741
value when the original store wouldn't.
1744
raise NotImplementedError(
1745
"NoDupeAddLinesDecorator.add_lines does not implement the "
1746
"nostore_sha behaviour.")
1748
sha1 = osutils.sha_strings(lines)
1749
key = ("sha1:" + sha1,)
1752
if key in self._store.get_parent_map([key]):
1753
# This key has already been inserted, so don't do it again.
1755
sha1 = osutils.sha_strings(lines)
1756
return sha1, sum(map(len, lines)), None
1757
return self._store.add_lines(key, parents, lines,
1758
parent_texts=parent_texts,
1759
left_matching_blocks=left_matching_blocks,
1760
nostore_sha=nostore_sha, random_id=random_id,
1761
check_content=check_content)
1763
def __getattr__(self, name):
1764
return getattr(self._store, name)
1767
1552
def network_bytes_to_kind_and_offset(network_bytes):
1768
1553
"""Strip of a record kind from the front of network_bytes.
1786
1571
record.get_bytes_as(record.storage_kind) call.
1788
1573
self._bytes_iterator = bytes_iterator
1789
self._kind_factory = {
1790
'fulltext': fulltext_network_to_record,
1791
'groupcompress-block': groupcompress.network_block_to_records,
1792
'knit-ft-gz': knit.knit_network_to_record,
1793
'knit-delta-gz': knit.knit_network_to_record,
1794
'knit-annotated-ft-gz': knit.knit_network_to_record,
1795
'knit-annotated-delta-gz': knit.knit_network_to_record,
1796
'knit-delta-closure': knit.knit_delta_closure_to_records,
1574
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1575
'knit-delta-gz':knit.knit_network_to_record,
1576
'knit-annotated-ft-gz':knit.knit_network_to_record,
1577
'knit-annotated-delta-gz':knit.knit_network_to_record,
1578
'knit-delta-closure':knit.knit_delta_closure_to_records,
1579
'fulltext':fulltext_network_to_record,
1580
'groupcompress-block':groupcompress.network_block_to_records,
1799
1583
def read(self):