209
class _MPDiffGenerator(object):
210
"""Pull out the functionality for generating mp_diffs."""
212
def __init__(self, vf, keys):
214
# This is the order the keys were requested in
215
self.ordered_keys = tuple(keys)
216
# keys + their parents, what we need to compute the diffs
217
self.needed_keys = ()
218
# Map from key: mp_diff
220
# Map from key: parents_needed (may have ghosts)
222
# Parents that aren't present
223
self.ghost_parents = ()
224
# Map from parent_key => number of children for this text
226
# Content chunks that are cached while we still need them
229
def _find_needed_keys(self):
230
"""Find the set of keys we need to request.
232
This includes all the original keys passed in, and the non-ghost
233
parents of those keys.
235
:return: (needed_keys, refcounts)
236
needed_keys is the set of all texts we need to extract
237
refcounts is a dict of {key: num_children} letting us know when we
238
no longer need to cache a given parent text
240
# All the keys and their parents
241
needed_keys = set(self.ordered_keys)
242
parent_map = self.vf.get_parent_map(needed_keys)
243
self.parent_map = parent_map
244
# TODO: Should we be using a different construct here? I think this
245
# uses difference_update internally, and we expect the result to
247
missing_keys = needed_keys.difference(parent_map)
249
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
250
# Parents that might be missing. They are allowed to be ghosts, but we
251
# should check for them
253
setdefault = refcounts.setdefault
255
for child_key, parent_keys in parent_map.iteritems():
257
# parent_keys may be None if a given VersionedFile claims to
258
# not support graph operations.
260
just_parents.update(parent_keys)
261
needed_keys.update(parent_keys)
262
for p in parent_keys:
263
refcounts[p] = setdefault(p, 0) + 1
264
just_parents.difference_update(parent_map)
265
# Remove any parents that are actually ghosts from the needed set
266
self.present_parents = set(self.vf.get_parent_map(just_parents))
267
self.ghost_parents = just_parents.difference(self.present_parents)
268
needed_keys.difference_update(self.ghost_parents)
269
self.needed_keys = needed_keys
270
self.refcounts = refcounts
271
return needed_keys, refcounts
273
def _compute_diff(self, key, parent_lines, lines):
274
"""Compute a single mp_diff, and store it in self._diffs"""
275
if len(parent_lines) > 0:
276
# XXX: _extract_blocks is not usefully defined anywhere...
277
# It was meant to extract the left-parent diff without
278
# having to recompute it for Knit content (pack-0.92,
279
# etc). That seems to have regressed somewhere
280
left_parent_blocks = self.vf._extract_blocks(key,
281
parent_lines[0], lines)
283
left_parent_blocks = None
284
diff = multiparent.MultiParent.from_lines(lines,
285
parent_lines, left_parent_blocks)
286
self.diffs[key] = diff
288
def _process_one_record(self, key, this_chunks):
290
if key in self.parent_map:
291
# This record should be ready to diff, since we requested
292
# content in 'topological' order
293
parent_keys = self.parent_map.pop(key)
294
# If a VersionedFile claims 'no-graph' support, then it may return
295
# None for any parent request, so we replace it with an empty tuple
296
if parent_keys is None:
299
for p in parent_keys:
300
# Alternatively we could check p not in self.needed_keys, but
301
# ghost_parents should be tiny versus huge
302
if p in self.ghost_parents:
304
refcount = self.refcounts[p]
305
if refcount == 1: # Last child reference
306
self.refcounts.pop(p)
307
parent_chunks = self.chunks.pop(p)
309
self.refcounts[p] = refcount - 1
310
parent_chunks = self.chunks[p]
311
p_lines = osutils.chunks_to_lines(parent_chunks)
312
# TODO: Should we cache the line form? We did the
313
# computation to get it, but storing it this way will
314
# be less memory efficient...
315
parent_lines.append(p_lines)
317
lines = osutils.chunks_to_lines(this_chunks)
318
# Since we needed the lines, we'll go ahead and cache them this way
320
self._compute_diff(key, parent_lines, lines)
322
# Is this content required for any more children?
323
if key in self.refcounts:
324
self.chunks[key] = this_chunks
326
def _extract_diffs(self):
327
needed_keys, refcounts = self._find_needed_keys()
328
for record in self.vf.get_record_stream(needed_keys,
329
'topological', True):
330
if record.storage_kind == 'absent':
331
raise errors.RevisionNotPresent(record.key, self.vf)
332
self._process_one_record(record.key,
333
record.get_bytes_as('chunked'))
335
def compute_diffs(self):
336
self._extract_diffs()
337
dpop = self.diffs.pop
338
return [dpop(k) for k in self.ordered_keys]
209
341
class VersionedFile(object):
210
342
"""Versioned text file storage.
1027
1184
def make_mpdiffs(self, keys):
1028
1185
"""Create multiparent diffs for specified keys."""
1029
keys_order = tuple(keys)
1030
keys = frozenset(keys)
1031
knit_keys = set(keys)
1032
parent_map = self.get_parent_map(keys)
1033
for parent_keys in parent_map.itervalues():
1035
knit_keys.update(parent_keys)
1036
missing_keys = keys - set(parent_map)
1038
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1039
# We need to filter out ghosts, because we can't diff against them.
1040
maybe_ghosts = knit_keys - keys
1041
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1042
knit_keys.difference_update(ghosts)
1044
chunks_to_lines = osutils.chunks_to_lines
1045
for record in self.get_record_stream(knit_keys, 'topological', True):
1046
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1047
# line_block_dict = {}
1048
# for parent, blocks in record.extract_line_blocks():
1049
# line_blocks[parent] = blocks
1050
# line_blocks[record.key] = line_block_dict
1052
for key in keys_order:
1054
parents = parent_map[key] or []
1055
# Note that filtering knit_keys can lead to a parent difference
1056
# between the creation and the application of the mpdiff.
1057
parent_lines = [lines[p] for p in parents if p in knit_keys]
1058
if len(parent_lines) > 0:
1059
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1062
left_parent_blocks = None
1063
diffs.append(multiparent.MultiParent.from_lines(target,
1064
parent_lines, left_parent_blocks))
1186
generator = _MPDiffGenerator(self, keys)
1187
return generator.compute_diffs()
1189
def get_annotator(self):
1190
return annotate.Annotator(self)
1067
1192
missing_keys = index._missing_keys_from_parent_map
1468
1590
for struct in outstanding_struct():
1593
def base_from_plan(self):
1594
"""Construct a BASE file from the plan text."""
1596
for state, line in self.plan:
1597
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1598
# If unchanged, then this line is straight from base. If a or b
1599
# or both killed the line, then it *used* to be in base.
1600
base_lines.append(line)
1602
if state not in ('killed-base', 'irrelevant',
1603
'ghost-a', 'ghost-b',
1605
'conflicted-a', 'conflicted-b'):
1606
# killed-base, irrelevant means it doesn't apply
1607
# ghost-a/ghost-b are harder to say for sure, but they
1608
# aren't in the 'inc_c' which means they aren't in the
1609
# shared base of a & b. So we don't include them. And
1610
# obviously if the line is newly inserted, it isn't in base
1612
# If 'conflicted-a' or b, then it is new vs one base, but
1613
# old versus another base. However, if we make it present
1614
# in the base, it will be deleted from the target, and it
1615
# seems better to get a line doubled in the merge result,
1616
# rather than have it deleted entirely.
1617
# Example, each node is the 'text' at that point:
1625
# There was a criss-cross conflict merge. Both sides
1626
# include the other, but put themselves first.
1627
# Weave marks this as a 'clean' merge, picking OTHER over
1628
# THIS. (Though the details depend on order inserted into
1630
# LCA generates a plan:
1631
# [('unchanged', M),
1632
# ('conflicted-b', b),
1634
# ('conflicted-a', b),
1636
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1637
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1638
# removes one 'b', and BASE vs OTHER removes the other)
1639
# If you include neither, 3-way creates a clean "MbabN" as
1640
# THIS adds one 'b', and OTHER does too.
1641
# It seems that having the line 2 times is better than
1642
# having it omitted. (Easier to manually delete than notice
1643
# it needs to be added.)
1644
raise AssertionError('Unknown state: %s' % (state,))
1472
1648
class WeaveMerge(PlanWeaveMerge):
1473
1649
"""Weave merge that takes a VersionedFile and two versions as its input."""
1728
class NoDupeAddLinesDecorator(object):
1729
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1733
def __init__(self, store):
1736
def add_lines(self, key, parents, lines, parent_texts=None,
1737
left_matching_blocks=None, nostore_sha=None, random_id=False,
1738
check_content=True):
1739
"""See VersionedFiles.add_lines.
1741
This implementation may return None as the third element of the return
1742
value when the original store wouldn't.
1745
raise NotImplementedError(
1746
"NoDupeAddLinesDecorator.add_lines does not implement the "
1747
"nostore_sha behaviour.")
1749
sha1 = osutils.sha_strings(lines)
1750
key = ("sha1:" + sha1,)
1753
if key in self._store.get_parent_map([key]):
1754
# This key has already been inserted, so don't do it again.
1756
sha1 = osutils.sha_strings(lines)
1757
return sha1, sum(map(len, lines)), None
1758
return self._store.add_lines(key, parents, lines,
1759
parent_texts=parent_texts,
1760
left_matching_blocks=left_matching_blocks,
1761
nostore_sha=nostore_sha, random_id=random_id,
1762
check_content=check_content)
1764
def __getattr__(self, name):
1765
return getattr(self._store, name)
1552
1768
def network_bytes_to_kind_and_offset(network_bytes):
1553
1769
"""Strip of a record kind from the front of network_bytes.
1571
1787
record.get_bytes_as(record.storage_kind) call.
1573
1789
self._bytes_iterator = bytes_iterator
1574
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1575
'knit-delta-gz':knit.knit_network_to_record,
1576
'knit-annotated-ft-gz':knit.knit_network_to_record,
1577
'knit-annotated-delta-gz':knit.knit_network_to_record,
1578
'knit-delta-closure':knit.knit_delta_closure_to_records,
1579
'fulltext':fulltext_network_to_record,
1580
'groupcompress-block':groupcompress.network_block_to_records,
1790
self._kind_factory = {
1791
'fulltext': fulltext_network_to_record,
1792
'groupcompress-block': groupcompress.network_block_to_records,
1793
'knit-ft-gz': knit.knit_network_to_record,
1794
'knit-delta-gz': knit.knit_network_to_record,
1795
'knit-annotated-ft-gz': knit.knit_network_to_record,
1796
'knit-annotated-delta-gz': knit.knit_network_to_record,
1797
'knit-delta-closure': knit.knit_delta_closure_to_records,
1583
1800
def read(self):