209
class _MPDiffGenerator(object):
210
"""Pull out the functionality for generating mp_diffs."""
212
def __init__(self, vf, keys):
214
# This is the order the keys were requested in
215
self.ordered_keys = tuple(keys)
216
# keys + their parents, what we need to compute the diffs
217
self.needed_keys = ()
218
# Map from key: mp_diff
220
# Map from key: parents_needed (may have ghosts)
222
# Parents that aren't present
223
self.ghost_parents = ()
224
# Map from parent_key => number of children for this text
226
# Content chunks that are cached while we still need them
229
def _find_needed_keys(self):
230
"""Find the set of keys we need to request.
232
This includes all the original keys passed in, and the non-ghost
233
parents of those keys.
235
:return: (needed_keys, refcounts)
236
needed_keys is the set of all texts we need to extract
237
refcounts is a dict of {key: num_children} letting us know when we
238
no longer need to cache a given parent text
240
# All the keys and their parents
241
needed_keys = set(self.ordered_keys)
242
parent_map = self.vf.get_parent_map(needed_keys)
243
self.parent_map = parent_map
244
# TODO: Should we be using a different construct here? I think this
245
# uses difference_update internally, and we expect the result to
247
missing_keys = needed_keys.difference(parent_map)
249
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
250
# Parents that might be missing. They are allowed to be ghosts, but we
251
# should check for them
253
setdefault = refcounts.setdefault
255
for child_key, parent_keys in parent_map.iteritems():
257
# parent_keys may be None if a given VersionedFile claims to
258
# not support graph operations.
260
just_parents.update(parent_keys)
261
needed_keys.update(parent_keys)
262
for p in parent_keys:
263
refcounts[p] = setdefault(p, 0) + 1
264
just_parents.difference_update(parent_map)
265
# Remove any parents that are actually ghosts from the needed set
266
self.present_parents = set(self.vf.get_parent_map(just_parents))
267
self.ghost_parents = just_parents.difference(self.present_parents)
268
needed_keys.difference_update(self.ghost_parents)
269
self.needed_keys = needed_keys
270
self.refcounts = refcounts
271
return needed_keys, refcounts
273
def _compute_diff(self, key, parent_lines, lines):
274
"""Compute a single mp_diff, and store it in self._diffs"""
275
if len(parent_lines) > 0:
276
# XXX: _extract_blocks is not usefully defined anywhere...
277
# It was meant to extract the left-parent diff without
278
# having to recompute it for Knit content (pack-0.92,
279
# etc). That seems to have regressed somewhere
280
left_parent_blocks = self.vf._extract_blocks(key,
281
parent_lines[0], lines)
283
left_parent_blocks = None
284
diff = multiparent.MultiParent.from_lines(lines,
285
parent_lines, left_parent_blocks)
286
self.diffs[key] = diff
288
def _process_one_record(self, key, this_chunks):
290
if key in self.parent_map:
291
# This record should be ready to diff, since we requested
292
# content in 'topological' order
293
parent_keys = self.parent_map.pop(key)
294
# If a VersionedFile claims 'no-graph' support, then it may return
295
# None for any parent request, so we replace it with an empty tuple
296
if parent_keys is None:
299
for p in parent_keys:
300
# Alternatively we could check p not in self.needed_keys, but
301
# ghost_parents should be tiny versus huge
302
if p in self.ghost_parents:
304
refcount = self.refcounts[p]
305
if refcount == 1: # Last child reference
306
self.refcounts.pop(p)
307
parent_chunks = self.chunks.pop(p)
309
self.refcounts[p] = refcount - 1
310
parent_chunks = self.chunks[p]
311
p_lines = osutils.chunks_to_lines(parent_chunks)
312
# TODO: Should we cache the line form? We did the
313
# computation to get it, but storing it this way will
314
# be less memory efficient...
315
parent_lines.append(p_lines)
317
lines = osutils.chunks_to_lines(this_chunks)
318
# Since we needed the lines, we'll go ahead and cache them this way
320
self._compute_diff(key, parent_lines, lines)
322
# Is this content required for any more children?
323
if key in self.refcounts:
324
self.chunks[key] = this_chunks
326
def _extract_diffs(self):
327
needed_keys, refcounts = self._find_needed_keys()
328
for record in self.vf.get_record_stream(needed_keys,
329
'topological', True):
330
if record.storage_kind == 'absent':
331
raise errors.RevisionNotPresent(record.key, self.vf)
332
self._process_one_record(record.key,
333
record.get_bytes_as('chunked'))
335
def compute_diffs(self):
336
self._extract_diffs()
337
dpop = self.diffs.pop
338
return [dpop(k) for k in self.ordered_keys]
209
341
class VersionedFile(object):
210
342
"""Versioned text file storage.
913
1049
raise NotImplementedError(self.annotate)
915
1051
def check(self, progress_bar=None):
916
"""Check this object for integrity."""
1052
"""Check this object for integrity.
1054
:param progress_bar: A progress bar to output as the check progresses.
1055
:param keys: Specific keys within the VersionedFiles to check. When
1056
this parameter is not None, check() becomes a generator as per
1057
get_record_stream. The difference to get_record_stream is that
1058
more or deeper checks will be performed.
1059
:return: None, or if keys was supplied a generator as per
917
1062
raise NotImplementedError(self.check)
920
1065
def check_not_reserved_id(version_id):
921
1066
revision.check_not_reserved_id(version_id)
1068
def clear_cache(self):
1069
"""Clear whatever caches this VersionedFile holds.
1071
This is generally called after an operation has been performed, when we
1072
don't expect to be using this versioned file again soon.
923
1075
def _check_lines_not_unicode(self, lines):
924
1076
"""Check that lines being added to a versioned file are not unicode."""
925
1077
for line in lines:
1018
1184
def make_mpdiffs(self, keys):
1019
1185
"""Create multiparent diffs for specified keys."""
1020
keys_order = tuple(keys)
1021
keys = frozenset(keys)
1022
knit_keys = set(keys)
1023
parent_map = self.get_parent_map(keys)
1024
for parent_keys in parent_map.itervalues():
1026
knit_keys.update(parent_keys)
1027
missing_keys = keys - set(parent_map)
1029
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1030
# We need to filter out ghosts, because we can't diff against them.
1031
maybe_ghosts = knit_keys - keys
1032
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1033
knit_keys.difference_update(ghosts)
1035
chunks_to_lines = osutils.chunks_to_lines
1036
for record in self.get_record_stream(knit_keys, 'topological', True):
1037
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1038
# line_block_dict = {}
1039
# for parent, blocks in record.extract_line_blocks():
1040
# line_blocks[parent] = blocks
1041
# line_blocks[record.key] = line_block_dict
1043
for key in keys_order:
1045
parents = parent_map[key] or []
1046
# Note that filtering knit_keys can lead to a parent difference
1047
# between the creation and the application of the mpdiff.
1048
parent_lines = [lines[p] for p in parents if p in knit_keys]
1049
if len(parent_lines) > 0:
1050
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1053
left_parent_blocks = None
1054
diffs.append(multiparent.MultiParent.from_lines(target,
1055
parent_lines, left_parent_blocks))
1186
generator = _MPDiffGenerator(self, keys)
1187
return generator.compute_diffs()
1189
def get_annotator(self):
1190
return annotate.Annotator(self)
1058
1192
missing_keys = index._missing_keys_from_parent_map
1454
1590
for struct in outstanding_struct():
1593
def base_from_plan(self):
1594
"""Construct a BASE file from the plan text."""
1596
for state, line in self.plan:
1597
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1598
# If unchanged, then this line is straight from base. If a or b
1599
# or both killed the line, then it *used* to be in base.
1600
base_lines.append(line)
1602
if state not in ('killed-base', 'irrelevant',
1603
'ghost-a', 'ghost-b',
1605
'conflicted-a', 'conflicted-b'):
1606
# killed-base, irrelevant means it doesn't apply
1607
# ghost-a/ghost-b are harder to say for sure, but they
1608
# aren't in the 'inc_c' which means they aren't in the
1609
# shared base of a & b. So we don't include them. And
1610
# obviously if the line is newly inserted, it isn't in base
1612
# If 'conflicted-a' or b, then it is new vs one base, but
1613
# old versus another base. However, if we make it present
1614
# in the base, it will be deleted from the target, and it
1615
# seems better to get a line doubled in the merge result,
1616
# rather than have it deleted entirely.
1617
# Example, each node is the 'text' at that point:
1625
# There was a criss-cross conflict merge. Both sides
1626
# include the other, but put themselves first.
1627
# Weave marks this as a 'clean' merge, picking OTHER over
1628
# THIS. (Though the details depend on order inserted into
1630
# LCA generates a plan:
1631
# [('unchanged', M),
1632
# ('conflicted-b', b),
1634
# ('conflicted-a', b),
1636
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1637
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1638
# removes one 'b', and BASE vs OTHER removes the other)
1639
# If you include neither, 3-way creates a clean "MbabN" as
1640
# THIS adds one 'b', and OTHER does too.
1641
# It seems that having the line 2 times is better than
1642
# having it omitted. (Easier to manually delete than notice
1643
# it needs to be added.)
1644
raise AssertionError('Unknown state: %s' % (state,))
1458
1648
class WeaveMerge(PlanWeaveMerge):
1459
1649
"""Weave merge that takes a VersionedFile and two versions as its input."""
1728
class NoDupeAddLinesDecorator(object):
1729
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1733
def __init__(self, store):
1736
def add_lines(self, key, parents, lines, parent_texts=None,
1737
left_matching_blocks=None, nostore_sha=None, random_id=False,
1738
check_content=True):
1739
"""See VersionedFiles.add_lines.
1741
This implementation may return None as the third element of the return
1742
value when the original store wouldn't.
1745
raise NotImplementedError(
1746
"NoDupeAddLinesDecorator.add_lines does not implement the "
1747
"nostore_sha behaviour.")
1749
sha1 = osutils.sha_strings(lines)
1750
key = ("sha1:" + sha1,)
1753
if key in self._store.get_parent_map([key]):
1754
# This key has already been inserted, so don't do it again.
1756
sha1 = osutils.sha_strings(lines)
1757
return sha1, sum(map(len, lines)), None
1758
return self._store.add_lines(key, parents, lines,
1759
parent_texts=parent_texts,
1760
left_matching_blocks=left_matching_blocks,
1761
nostore_sha=nostore_sha, random_id=random_id,
1762
check_content=check_content)
1764
def __getattr__(self, name):
1765
return getattr(self._store, name)
1538
1768
def network_bytes_to_kind_and_offset(network_bytes):
1539
1769
"""Strip of a record kind from the front of network_bytes.
1557
1787
record.get_bytes_as(record.storage_kind) call.
1559
1789
self._bytes_iterator = bytes_iterator
1560
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1561
'knit-delta-gz':knit.knit_network_to_record,
1562
'knit-annotated-ft-gz':knit.knit_network_to_record,
1563
'knit-annotated-delta-gz':knit.knit_network_to_record,
1564
'knit-delta-closure':knit.knit_delta_closure_to_records,
1565
'fulltext':fulltext_network_to_record,
1566
'groupcompress-block':groupcompress.network_block_to_records,
1790
self._kind_factory = {
1791
'fulltext': fulltext_network_to_record,
1792
'groupcompress-block': groupcompress.network_block_to_records,
1793
'knit-ft-gz': knit.knit_network_to_record,
1794
'knit-delta-gz': knit.knit_network_to_record,
1795
'knit-annotated-ft-gz': knit.knit_network_to_record,
1796
'knit-annotated-delta-gz': knit.knit_network_to_record,
1797
'knit-delta-closure': knit.knit_delta_closure_to_records,
1569
1800
def read(self):