203
class _MPDiffGenerator(object):
204
"""Pull out the functionality for generating mp_diffs."""
206
def __init__(self, vf, keys):
208
# This is the order the keys were requested in
209
self.ordered_keys = tuple(keys)
210
# keys + their parents, what we need to compute the diffs
211
self.needed_keys = ()
212
# Map from key: mp_diff
214
# Map from key: parents_needed (may have ghosts)
216
# Parents that aren't present
217
self.ghost_parents = ()
218
# Map from parent_key => number of children for this text
220
# Content chunks that are cached while we still need them
223
def _find_needed_keys(self):
224
"""Find the set of keys we need to request.
226
This includes all the original keys passed in, and the non-ghost
227
parents of those keys.
229
:return: (needed_keys, refcounts)
230
needed_keys is the set of all texts we need to extract
231
refcounts is a dict of {key: num_children} letting us know when we
232
no longer need to cache a given parent text
234
# All the keys and their parents
235
needed_keys = set(self.ordered_keys)
236
parent_map = self.vf.get_parent_map(needed_keys)
237
self.parent_map = parent_map
238
# TODO: Should we be using a different construct here? I think this
239
# uses difference_update internally, and we expect the result to
241
missing_keys = needed_keys.difference(parent_map)
243
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
244
# Parents that might be missing. They are allowed to be ghosts, but we
245
# should check for them
247
setdefault = refcounts.setdefault
249
for child_key, parent_keys in parent_map.iteritems():
251
# parent_keys may be None if a given VersionedFile claims to
252
# not support graph operations.
254
just_parents.update(parent_keys)
255
needed_keys.update(parent_keys)
256
for p in parent_keys:
257
refcounts[p] = setdefault(p, 0) + 1
258
just_parents.difference_update(parent_map)
259
# Remove any parents that are actually ghosts from the needed set
260
self.present_parents = set(self.vf.get_parent_map(just_parents))
261
self.ghost_parents = just_parents.difference(self.present_parents)
262
needed_keys.difference_update(self.ghost_parents)
263
self.needed_keys = needed_keys
264
self.refcounts = refcounts
265
return needed_keys, refcounts
267
def _compute_diff(self, key, parent_lines, lines):
268
"""Compute a single mp_diff, and store it in self._diffs"""
269
if len(parent_lines) > 0:
270
# XXX: _extract_blocks is not usefully defined anywhere...
271
# It was meant to extract the left-parent diff without
272
# having to recompute it for Knit content (pack-0.92,
273
# etc). That seems to have regressed somewhere
274
left_parent_blocks = self.vf._extract_blocks(key,
275
parent_lines[0], lines)
277
left_parent_blocks = None
278
diff = multiparent.MultiParent.from_lines(lines,
279
parent_lines, left_parent_blocks)
280
self.diffs[key] = diff
282
def _process_one_record(self, key, this_chunks):
284
if key in self.parent_map:
285
# This record should be ready to diff, since we requested
286
# content in 'topological' order
287
parent_keys = self.parent_map.pop(key)
288
# If a VersionedFile claims 'no-graph' support, then it may return
289
# None for any parent request, so we replace it with an empty tuple
290
if parent_keys is None:
293
for p in parent_keys:
294
# Alternatively we could check p not in self.needed_keys, but
295
# ghost_parents should be tiny versus huge
296
if p in self.ghost_parents:
298
refcount = self.refcounts[p]
299
if refcount == 1: # Last child reference
300
self.refcounts.pop(p)
301
parent_chunks = self.chunks.pop(p)
303
self.refcounts[p] = refcount - 1
304
parent_chunks = self.chunks[p]
305
p_lines = osutils.chunks_to_lines(parent_chunks)
306
# TODO: Should we cache the line form? We did the
307
# computation to get it, but storing it this way will
308
# be less memory efficient...
309
parent_lines.append(p_lines)
311
lines = osutils.chunks_to_lines(this_chunks)
312
# Since we needed the lines, we'll go ahead and cache them this way
314
self._compute_diff(key, parent_lines, lines)
316
# Is this content required for any more children?
317
if key in self.refcounts:
318
self.chunks[key] = this_chunks
320
def _extract_diffs(self):
321
needed_keys, refcounts = self._find_needed_keys()
322
for record in self.vf.get_record_stream(needed_keys,
323
'topological', True):
324
if record.storage_kind == 'absent':
325
raise errors.RevisionNotPresent(record.key, self.vf)
326
self._process_one_record(record.key,
327
record.get_bytes_as('chunked'))
329
def compute_diffs(self):
330
self._extract_diffs()
331
dpop = self.diffs.pop
332
return [dpop(k) for k in self.ordered_keys]
335
210
class VersionedFile(object):
336
211
"""Versioned text file storage.
1182
1042
def make_mpdiffs(self, keys):
1183
1043
"""Create multiparent diffs for specified keys."""
1184
generator = _MPDiffGenerator(self, keys)
1185
return generator.compute_diffs()
1187
def get_annotator(self):
1188
return annotate.Annotator(self)
1044
keys_order = tuple(keys)
1045
keys = frozenset(keys)
1046
knit_keys = set(keys)
1047
parent_map = self.get_parent_map(keys)
1048
for parent_keys in parent_map.itervalues():
1050
knit_keys.update(parent_keys)
1051
missing_keys = keys - set(parent_map)
1053
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1054
# We need to filter out ghosts, because we can't diff against them.
1055
maybe_ghosts = knit_keys - keys
1056
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1057
knit_keys.difference_update(ghosts)
1059
chunks_to_lines = osutils.chunks_to_lines
1060
for record in self.get_record_stream(knit_keys, 'topological', True):
1061
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1062
# line_block_dict = {}
1063
# for parent, blocks in record.extract_line_blocks():
1064
# line_blocks[parent] = blocks
1065
# line_blocks[record.key] = line_block_dict
1067
for key in keys_order:
1069
parents = parent_map[key] or []
1070
# Note that filtering knit_keys can lead to a parent difference
1071
# between the creation and the application of the mpdiff.
1072
parent_lines = [lines[p] for p in parents if p in knit_keys]
1073
if len(parent_lines) > 0:
1074
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1077
left_parent_blocks = None
1078
diffs.append(multiparent.MultiParent.from_lines(target,
1079
parent_lines, left_parent_blocks))
1190
1082
missing_keys = index._missing_keys_from_parent_map
1192
1084
def _extract_blocks(self, version_id, source, target):
1195
def _transitive_fallbacks(self):
1196
"""Return the whole stack of fallback versionedfiles.
1198
This VersionedFiles may have a list of fallbacks, but it doesn't
1199
necessarily know about the whole stack going down, and it can't know
1200
at open time because they may change after the objects are opened.
1203
for a_vfs in self._immediate_fallback_vfs:
1204
all_fallbacks.append(a_vfs)
1205
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1206
return all_fallbacks
1209
1088
class ThunkedVersionedFiles(VersionedFiles):
1210
1089
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1426
class VersionedFilesWithFallbacks(VersionedFiles):
1428
def without_fallbacks(self):
1429
"""Return a clone of this object without any fallbacks configured."""
1430
raise NotImplementedError(self.without_fallbacks)
1432
def add_fallback_versioned_files(self, a_versioned_files):
1433
"""Add a source of texts for texts not present in this knit.
1435
:param a_versioned_files: A VersionedFiles object.
1437
raise NotImplementedError(self.add_fallback_versioned_files)
1439
def get_known_graph_ancestry(self, keys):
1440
"""Get a KnownGraph instance with the ancestry of keys."""
1441
parent_map, missing_keys = self._index.find_ancestry(keys)
1442
for fallback in self._transitive_fallbacks():
1443
if not missing_keys:
1445
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1447
parent_map.update(f_parent_map)
1448
missing_keys = f_missing_keys
1449
kg = _mod_graph.KnownGraph(parent_map)
1453
1308
class _PlanMergeVersionedFile(VersionedFiles):
1454
1309
"""A VersionedFile for uncommitted and committed texts.
1629
1483
for struct in outstanding_struct():
1632
def base_from_plan(self):
1633
"""Construct a BASE file from the plan text."""
1635
for state, line in self.plan:
1636
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1637
# If unchanged, then this line is straight from base. If a or b
1638
# or both killed the line, then it *used* to be in base.
1639
base_lines.append(line)
1641
if state not in ('killed-base', 'irrelevant',
1642
'ghost-a', 'ghost-b',
1644
'conflicted-a', 'conflicted-b'):
1645
# killed-base, irrelevant means it doesn't apply
1646
# ghost-a/ghost-b are harder to say for sure, but they
1647
# aren't in the 'inc_c' which means they aren't in the
1648
# shared base of a & b. So we don't include them. And
1649
# obviously if the line is newly inserted, it isn't in base
1651
# If 'conflicted-a' or b, then it is new vs one base, but
1652
# old versus another base. However, if we make it present
1653
# in the base, it will be deleted from the target, and it
1654
# seems better to get a line doubled in the merge result,
1655
# rather than have it deleted entirely.
1656
# Example, each node is the 'text' at that point:
1664
# There was a criss-cross conflict merge. Both sides
1665
# include the other, but put themselves first.
1666
# Weave marks this as a 'clean' merge, picking OTHER over
1667
# THIS. (Though the details depend on order inserted into
1669
# LCA generates a plan:
1670
# [('unchanged', M),
1671
# ('conflicted-b', b),
1673
# ('conflicted-a', b),
1675
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1676
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1677
# removes one 'b', and BASE vs OTHER removes the other)
1678
# If you include neither, 3-way creates a clean "MbabN" as
1679
# THIS adds one 'b', and OTHER does too.
1680
# It seems that having the line 2 times is better than
1681
# having it omitted. (Easier to manually delete than notice
1682
# it needs to be added.)
1683
raise AssertionError('Unknown state: %s' % (state,))
1687
1487
class WeaveMerge(PlanWeaveMerge):
1688
1488
"""Weave merge that takes a VersionedFile and two versions as its input."""
1767
class NoDupeAddLinesDecorator(object):
1768
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1772
def __init__(self, store):
1775
def add_lines(self, key, parents, lines, parent_texts=None,
1776
left_matching_blocks=None, nostore_sha=None, random_id=False,
1777
check_content=True):
1778
"""See VersionedFiles.add_lines.
1780
This implementation may return None as the third element of the return
1781
value when the original store wouldn't.
1784
raise NotImplementedError(
1785
"NoDupeAddLinesDecorator.add_lines does not implement the "
1786
"nostore_sha behaviour.")
1788
sha1 = osutils.sha_strings(lines)
1789
key = ("sha1:" + sha1,)
1792
if key in self._store.get_parent_map([key]):
1793
# This key has already been inserted, so don't do it again.
1795
sha1 = osutils.sha_strings(lines)
1796
return sha1, sum(map(len, lines)), None
1797
return self._store.add_lines(key, parents, lines,
1798
parent_texts=parent_texts,
1799
left_matching_blocks=left_matching_blocks,
1800
nostore_sha=nostore_sha, random_id=random_id,
1801
check_content=check_content)
1803
def __getattr__(self, name):
1804
return getattr(self._store, name)
1807
1567
def network_bytes_to_kind_and_offset(network_bytes):
1808
1568
"""Strip of a record kind from the front of network_bytes.
1900
1660
for prefix in sorted(per_prefix_map):
1901
1661
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1902
1662
return present_keys
1905
class _KeyRefs(object):
1907
def __init__(self, track_new_keys=False):
1908
# dict mapping 'key' to 'set of keys referring to that key'
1911
# set remembering all new keys
1912
self.new_keys = set()
1914
self.new_keys = None
1920
self.new_keys.clear()
1922
def add_references(self, key, refs):
1923
# Record the new references
1924
for referenced in refs:
1926
needed_by = self.refs[referenced]
1928
needed_by = self.refs[referenced] = set()
1930
# Discard references satisfied by the new key
1933
def get_new_keys(self):
1934
return self.new_keys
1936
def get_unsatisfied_refs(self):
1937
return self.refs.iterkeys()
1939
def _satisfy_refs_for_key(self, key):
1943
# No keys depended on this key. That's ok.
1946
def add_key(self, key):
1947
# satisfy refs for key, and remember that we've seen this key.
1948
self._satisfy_refs_for_key(key)
1949
if self.new_keys is not None:
1950
self.new_keys.add(key)
1952
def satisfy_refs_for_keys(self, keys):
1954
self._satisfy_refs_for_key(key)
1956
def get_referrers(self):
1958
for referrers in self.refs.itervalues():
1959
result.update(referrers)