208
class _MPDiffGenerator(object):
209
"""Pull out the functionality for generating mp_diffs."""
211
def __init__(self, vf, keys):
213
# This is the order the keys were requested in
214
self.ordered_keys = tuple(keys)
215
# keys + their parents, what we need to compute the diffs
216
self.needed_keys = ()
217
# Map from key: mp_diff
219
# Map from key: parents_needed (may have ghosts)
221
# Parents that aren't present
222
self.ghost_parents = ()
223
# Map from parent_key => number of children for this text
225
# Content chunks that are cached while we still need them
228
def _find_needed_keys(self):
229
"""Find the set of keys we need to request.
231
This includes all the original keys passed in, and the non-ghost
232
parents of those keys.
234
:return: (needed_keys, refcounts)
235
needed_keys is the set of all texts we need to extract
236
refcounts is a dict of {key: num_children} letting us know when we
237
no longer need to cache a given parent text
239
# All the keys and their parents
240
needed_keys = set(self.ordered_keys)
241
parent_map = self.vf.get_parent_map(needed_keys)
242
self.parent_map = parent_map
243
# TODO: Should we be using a different construct here? I think this
244
# uses difference_update internally, and we expect the result to
246
missing_keys = needed_keys.difference(parent_map)
248
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
249
# Parents that might be missing. They are allowed to be ghosts, but we
250
# should check for them
252
setdefault = refcounts.setdefault
254
for child_key, parent_keys in parent_map.iteritems():
256
# parent_keys may be None if a given VersionedFile claims to
257
# not support graph operations.
259
just_parents.update(parent_keys)
260
needed_keys.update(parent_keys)
261
for p in parent_keys:
262
refcounts[p] = setdefault(p, 0) + 1
263
just_parents.difference_update(parent_map)
264
# Remove any parents that are actually ghosts from the needed set
265
self.present_parents = set(self.vf.get_parent_map(just_parents))
266
self.ghost_parents = just_parents.difference(self.present_parents)
267
needed_keys.difference_update(self.ghost_parents)
268
self.needed_keys = needed_keys
269
self.refcounts = refcounts
270
return needed_keys, refcounts
272
def _compute_diff(self, key, parent_lines, lines):
273
"""Compute a single mp_diff, and store it in self._diffs"""
274
if len(parent_lines) > 0:
275
# XXX: _extract_blocks is not usefully defined anywhere...
276
# It was meant to extract the left-parent diff without
277
# having to recompute it for Knit content (pack-0.92,
278
# etc). That seems to have regressed somewhere
279
left_parent_blocks = self.vf._extract_blocks(key,
280
parent_lines[0], lines)
282
left_parent_blocks = None
283
diff = multiparent.MultiParent.from_lines(lines,
284
parent_lines, left_parent_blocks)
285
self.diffs[key] = diff
287
def _process_one_record(self, key, this_chunks):
289
if key in self.parent_map:
290
# This record should be ready to diff, since we requested
291
# content in 'topological' order
292
parent_keys = self.parent_map.pop(key)
293
# If a VersionedFile claims 'no-graph' support, then it may return
294
# None for any parent request, so we replace it with an empty tuple
295
if parent_keys is None:
298
for p in parent_keys:
299
# Alternatively we could check p not in self.needed_keys, but
300
# ghost_parents should be tiny versus huge
301
if p in self.ghost_parents:
303
refcount = self.refcounts[p]
304
if refcount == 1: # Last child reference
305
self.refcounts.pop(p)
306
parent_chunks = self.chunks.pop(p)
308
self.refcounts[p] = refcount - 1
309
parent_chunks = self.chunks[p]
310
p_lines = osutils.chunks_to_lines(parent_chunks)
311
# TODO: Should we cache the line form? We did the
312
# computation to get it, but storing it this way will
313
# be less memory efficient...
314
parent_lines.append(p_lines)
316
lines = osutils.chunks_to_lines(this_chunks)
317
# Since we needed the lines, we'll go ahead and cache them this way
319
self._compute_diff(key, parent_lines, lines)
321
# Is this content required for any more children?
322
if key in self.refcounts:
323
self.chunks[key] = this_chunks
325
def _extract_diffs(self):
326
needed_keys, refcounts = self._find_needed_keys()
327
for record in self.vf.get_record_stream(needed_keys,
328
'topological', True):
329
if record.storage_kind == 'absent':
330
raise errors.RevisionNotPresent(record.key, self.vf)
331
self._process_one_record(record.key,
332
record.get_bytes_as('chunked'))
334
def compute_diffs(self):
335
self._extract_diffs()
336
dpop = self.diffs.pop
337
return [dpop(k) for k in self.ordered_keys]
209
340
class VersionedFile(object):
210
341
"""Versioned text file storage.
913
1048
raise NotImplementedError(self.annotate)
915
1050
def check(self, progress_bar=None):
916
"""Check this object for integrity."""
1051
"""Check this object for integrity.
1053
:param progress_bar: A progress bar to output as the check progresses.
1054
:param keys: Specific keys within the VersionedFiles to check. When
1055
this parameter is not None, check() becomes a generator as per
1056
get_record_stream. The difference to get_record_stream is that
1057
more or deeper checks will be performed.
1058
:return: None, or if keys was supplied a generator as per
917
1061
raise NotImplementedError(self.check)
920
1064
def check_not_reserved_id(version_id):
921
1065
revision.check_not_reserved_id(version_id)
1067
def clear_cache(self):
1068
"""Clear whatever caches this VersionedFile holds.
1070
This is generally called after an operation has been performed, when we
1071
don't expect to be using this versioned file again soon.
923
1074
def _check_lines_not_unicode(self, lines):
924
1075
"""Check that lines being added to a versioned file are not unicode."""
925
1076
for line in lines:
1018
1183
def make_mpdiffs(self, keys):
1019
1184
"""Create multiparent diffs for specified keys."""
1020
keys_order = tuple(keys)
1021
keys = frozenset(keys)
1022
knit_keys = set(keys)
1023
parent_map = self.get_parent_map(keys)
1024
for parent_keys in parent_map.itervalues():
1026
knit_keys.update(parent_keys)
1027
missing_keys = keys - set(parent_map)
1029
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1030
# We need to filter out ghosts, because we can't diff against them.
1031
maybe_ghosts = knit_keys - keys
1032
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1033
knit_keys.difference_update(ghosts)
1035
chunks_to_lines = osutils.chunks_to_lines
1036
for record in self.get_record_stream(knit_keys, 'topological', True):
1037
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1038
# line_block_dict = {}
1039
# for parent, blocks in record.extract_line_blocks():
1040
# line_blocks[parent] = blocks
1041
# line_blocks[record.key] = line_block_dict
1043
for key in keys_order:
1045
parents = parent_map[key] or []
1046
# Note that filtering knit_keys can lead to a parent difference
1047
# between the creation and the application of the mpdiff.
1048
parent_lines = [lines[p] for p in parents if p in knit_keys]
1049
if len(parent_lines) > 0:
1050
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1053
left_parent_blocks = None
1054
diffs.append(multiparent.MultiParent.from_lines(target,
1055
parent_lines, left_parent_blocks))
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
1058
1191
missing_keys = index._missing_keys_from_parent_map
1060
1193
def _extract_blocks(self, version_id, source, target):
1196
def _transitive_fallbacks(self):
1197
"""Return the whole stack of fallback versionedfiles.
1199
This VersionedFiles may have a list of fallbacks, but it doesn't
1200
necessarily know about the whole stack going down, and it can't know
1201
at open time because they may change after the objects are opened.
1204
for a_vfs in self._fallback_vfs:
1205
all_fallbacks.append(a_vfs)
1206
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1207
return all_fallbacks
1064
1210
class ThunkedVersionedFiles(VersionedFiles):
1065
1211
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1454
1602
for struct in outstanding_struct():
1605
def base_from_plan(self):
1606
"""Construct a BASE file from the plan text."""
1608
for state, line in self.plan:
1609
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1610
# If unchanged, then this line is straight from base. If a or b
1611
# or both killed the line, then it *used* to be in base.
1612
base_lines.append(line)
1614
if state not in ('killed-base', 'irrelevant',
1615
'ghost-a', 'ghost-b',
1617
'conflicted-a', 'conflicted-b'):
1618
# killed-base, irrelevant means it doesn't apply
1619
# ghost-a/ghost-b are harder to say for sure, but they
1620
# aren't in the 'inc_c' which means they aren't in the
1621
# shared base of a & b. So we don't include them. And
1622
# obviously if the line is newly inserted, it isn't in base
1624
# If 'conflicted-a' or b, then it is new vs one base, but
1625
# old versus another base. However, if we make it present
1626
# in the base, it will be deleted from the target, and it
1627
# seems better to get a line doubled in the merge result,
1628
# rather than have it deleted entirely.
1629
# Example, each node is the 'text' at that point:
1637
# There was a criss-cross conflict merge. Both sides
1638
# include the other, but put themselves first.
1639
# Weave marks this as a 'clean' merge, picking OTHER over
1640
# THIS. (Though the details depend on order inserted into
1642
# LCA generates a plan:
1643
# [('unchanged', M),
1644
# ('conflicted-b', b),
1646
# ('conflicted-a', b),
1648
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1649
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1650
# removes one 'b', and BASE vs OTHER removes the other)
1651
# If you include neither, 3-way creates a clean "MbabN" as
1652
# THIS adds one 'b', and OTHER does too.
1653
# It seems that having the line 2 times is better than
1654
# having it omitted. (Easier to manually delete than notice
1655
# it needs to be added.)
1656
raise AssertionError('Unknown state: %s' % (state,))
1458
1660
class WeaveMerge(PlanWeaveMerge):
1459
1661
"""Weave merge that takes a VersionedFile and two versions as its input."""
1740
class NoDupeAddLinesDecorator(object):
1741
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1745
def __init__(self, store):
1748
def add_lines(self, key, parents, lines, parent_texts=None,
1749
left_matching_blocks=None, nostore_sha=None, random_id=False,
1750
check_content=True):
1751
"""See VersionedFiles.add_lines.
1753
This implementation may return None as the third element of the return
1754
value when the original store wouldn't.
1757
raise NotImplementedError(
1758
"NoDupeAddLinesDecorator.add_lines does not implement the "
1759
"nostore_sha behaviour.")
1761
sha1 = osutils.sha_strings(lines)
1762
key = ("sha1:" + sha1,)
1765
if key in self._store.get_parent_map([key]):
1766
# This key has already been inserted, so don't do it again.
1768
sha1 = osutils.sha_strings(lines)
1769
return sha1, sum(map(len, lines)), None
1770
return self._store.add_lines(key, parents, lines,
1771
parent_texts=parent_texts,
1772
left_matching_blocks=left_matching_blocks,
1773
nostore_sha=nostore_sha, random_id=random_id,
1774
check_content=check_content)
1776
def __getattr__(self, name):
1777
return getattr(self._store, name)
1538
1780
def network_bytes_to_kind_and_offset(network_bytes):
1539
1781
"""Strip of a record kind from the front of network_bytes.
1557
1799
record.get_bytes_as(record.storage_kind) call.
1559
1801
self._bytes_iterator = bytes_iterator
1560
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1561
'knit-delta-gz':knit.knit_network_to_record,
1562
'knit-annotated-ft-gz':knit.knit_network_to_record,
1563
'knit-annotated-delta-gz':knit.knit_network_to_record,
1564
'knit-delta-closure':knit.knit_delta_closure_to_records,
1565
'fulltext':fulltext_network_to_record,
1566
'groupcompress-block':groupcompress.network_block_to_records,
1802
self._kind_factory = {
1803
'fulltext': fulltext_network_to_record,
1804
'groupcompress-block': groupcompress.network_block_to_records,
1805
'knit-ft-gz': knit.knit_network_to_record,
1806
'knit-delta-gz': knit.knit_network_to_record,
1807
'knit-annotated-ft-gz': knit.knit_network_to_record,
1808
'knit-annotated-delta-gz': knit.knit_network_to_record,
1809
'knit-delta-closure': knit.knit_delta_closure_to_records,
1569
1812
def read(self):