202
class _MPDiffGenerator(object):
203
"""Pull out the functionality for generating mp_diffs."""
205
def __init__(self, vf, keys):
207
# This is the order the keys were requested in
208
self.ordered_keys = tuple(keys)
209
# keys + their parents, what we need to compute the diffs
210
self.needed_keys = ()
211
# Map from key: mp_diff
213
# Map from key: parents_needed (may have ghosts)
215
# Parents that aren't present
216
self.ghost_parents = ()
217
# Map from parent_key => number of children for this text
219
# Content chunks that are cached while we still need them
222
def _find_needed_keys(self):
223
"""Find the set of keys we need to request.
225
This includes all the original keys passed in, and the non-ghost
226
parents of those keys.
228
:return: (needed_keys, refcounts)
229
needed_keys is the set of all texts we need to extract
230
refcounts is a dict of {key: num_children} letting us know when we
231
no longer need to cache a given parent text
233
# All the keys and their parents
234
needed_keys = set(self.ordered_keys)
235
parent_map = self.vf.get_parent_map(needed_keys)
236
self.parent_map = parent_map
237
# TODO: Should we be using a different construct here? I think this
238
# uses difference_update internally, and we expect the result to
240
missing_keys = needed_keys.difference(parent_map)
242
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
243
# Parents that might be missing. They are allowed to be ghosts, but we
244
# should check for them
246
setdefault = refcounts.setdefault
248
for child_key, parent_keys in parent_map.iteritems():
250
# parent_keys may be None if a given VersionedFile claims to
251
# not support graph operations.
253
just_parents.update(parent_keys)
254
needed_keys.update(parent_keys)
255
for p in parent_keys:
256
refcounts[p] = setdefault(p, 0) + 1
257
just_parents.difference_update(parent_map)
258
# Remove any parents that are actually ghosts from the needed set
259
self.present_parents = set(self.vf.get_parent_map(just_parents))
260
self.ghost_parents = just_parents.difference(self.present_parents)
261
needed_keys.difference_update(self.ghost_parents)
262
self.needed_keys = needed_keys
263
self.refcounts = refcounts
264
return needed_keys, refcounts
266
def _compute_diff(self, key, parent_lines, lines):
267
"""Compute a single mp_diff, and store it in self._diffs"""
268
if len(parent_lines) > 0:
269
# XXX: _extract_blocks is not usefully defined anywhere...
270
# It was meant to extract the left-parent diff without
271
# having to recompute it for Knit content (pack-0.92,
272
# etc). That seems to have regressed somewhere
273
left_parent_blocks = self.vf._extract_blocks(key,
274
parent_lines[0], lines)
276
left_parent_blocks = None
277
diff = multiparent.MultiParent.from_lines(lines,
278
parent_lines, left_parent_blocks)
279
self.diffs[key] = diff
281
def _process_one_record(self, key, this_chunks):
283
if key in self.parent_map:
284
# This record should be ready to diff, since we requested
285
# content in 'topological' order
286
parent_keys = self.parent_map.pop(key)
287
# If a VersionedFile claims 'no-graph' support, then it may return
288
# None for any parent request, so we replace it with an empty tuple
289
if parent_keys is None:
292
for p in parent_keys:
293
# Alternatively we could check p not in self.needed_keys, but
294
# ghost_parents should be tiny versus huge
295
if p in self.ghost_parents:
297
refcount = self.refcounts[p]
298
if refcount == 1: # Last child reference
299
self.refcounts.pop(p)
300
parent_chunks = self.chunks.pop(p)
302
self.refcounts[p] = refcount - 1
303
parent_chunks = self.chunks[p]
304
p_lines = osutils.chunks_to_lines(parent_chunks)
305
# TODO: Should we cache the line form? We did the
306
# computation to get it, but storing it this way will
307
# be less memory efficient...
308
parent_lines.append(p_lines)
310
lines = osutils.chunks_to_lines(this_chunks)
311
# Since we needed the lines, we'll go ahead and cache them this way
313
self._compute_diff(key, parent_lines, lines)
315
# Is this content required for any more children?
316
if key in self.refcounts:
317
self.chunks[key] = this_chunks
319
def _extract_diffs(self):
320
needed_keys, refcounts = self._find_needed_keys()
321
for record in self.vf.get_record_stream(needed_keys,
322
'topological', True):
323
if record.storage_kind == 'absent':
324
raise errors.RevisionNotPresent(record.key, self.vf)
325
self._process_one_record(record.key,
326
record.get_bytes_as('chunked'))
328
def compute_diffs(self):
329
self._extract_diffs()
330
dpop = self.diffs.pop
331
return [dpop(k) for k in self.ordered_keys]
209
334
class VersionedFile(object):
210
335
"""Versioned text file storage.
913
1046
raise NotImplementedError(self.annotate)
915
1048
def check(self, progress_bar=None):
916
"""Check this object for integrity."""
1049
"""Check this object for integrity.
1051
:param progress_bar: A progress bar to output as the check progresses.
1052
:param keys: Specific keys within the VersionedFiles to check. When
1053
this parameter is not None, check() becomes a generator as per
1054
get_record_stream. The difference to get_record_stream is that
1055
more or deeper checks will be performed.
1056
:return: None, or if keys was supplied a generator as per
917
1059
raise NotImplementedError(self.check)
920
1062
def check_not_reserved_id(version_id):
921
1063
revision.check_not_reserved_id(version_id)
1065
def clear_cache(self):
1066
"""Clear whatever caches this VersionedFile holds.
1068
This is generally called after an operation has been performed, when we
1069
don't expect to be using this versioned file again soon.
923
1072
def _check_lines_not_unicode(self, lines):
924
1073
"""Check that lines being added to a versioned file are not unicode."""
925
1074
for line in lines:
1018
1181
def make_mpdiffs(self, keys):
1019
1182
"""Create multiparent diffs for specified keys."""
1020
keys_order = tuple(keys)
1021
keys = frozenset(keys)
1022
knit_keys = set(keys)
1023
parent_map = self.get_parent_map(keys)
1024
for parent_keys in parent_map.itervalues():
1026
knit_keys.update(parent_keys)
1027
missing_keys = keys - set(parent_map)
1029
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1030
# We need to filter out ghosts, because we can't diff against them.
1031
maybe_ghosts = knit_keys - keys
1032
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1033
knit_keys.difference_update(ghosts)
1035
chunks_to_lines = osutils.chunks_to_lines
1036
for record in self.get_record_stream(knit_keys, 'topological', True):
1037
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1038
# line_block_dict = {}
1039
# for parent, blocks in record.extract_line_blocks():
1040
# line_blocks[parent] = blocks
1041
# line_blocks[record.key] = line_block_dict
1043
for key in keys_order:
1045
parents = parent_map[key] or []
1046
# Note that filtering knit_keys can lead to a parent difference
1047
# between the creation and the application of the mpdiff.
1048
parent_lines = [lines[p] for p in parents if p in knit_keys]
1049
if len(parent_lines) > 0:
1050
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1053
left_parent_blocks = None
1054
diffs.append(multiparent.MultiParent.from_lines(target,
1055
parent_lines, left_parent_blocks))
1183
generator = _MPDiffGenerator(self, keys)
1184
return generator.compute_diffs()
1186
def get_annotator(self):
1187
return annotate.Annotator(self)
1058
1189
missing_keys = index._missing_keys_from_parent_map
1060
1191
def _extract_blocks(self, version_id, source, target):
1194
def _transitive_fallbacks(self):
1195
"""Return the whole stack of fallback versionedfiles.
1197
This VersionedFiles may have a list of fallbacks, but it doesn't
1198
necessarily know about the whole stack going down, and it can't know
1199
at open time because they may change after the objects are opened.
1202
for a_vfs in self._immediate_fallback_vfs:
1203
all_fallbacks.append(a_vfs)
1204
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1205
return all_fallbacks
1064
1208
class ThunkedVersionedFiles(VersionedFiles):
1065
1209
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1425
class VersionedFilesWithFallbacks(VersionedFiles):
1427
def without_fallbacks(self):
1428
"""Return a clone of this object without any fallbacks configured."""
1429
raise NotImplementedError(self.without_fallbacks)
1431
def add_fallback_versioned_files(self, a_versioned_files):
1432
"""Add a source of texts for texts not present in this knit.
1434
:param a_versioned_files: A VersionedFiles object.
1436
raise NotImplementedError(self.add_fallback_versioned_files)
1438
def get_known_graph_ancestry(self, keys):
1439
"""Get a KnownGraph instance with the ancestry of keys."""
1440
parent_map, missing_keys = self._index.find_ancestry(keys)
1441
for fallback in self._transitive_fallbacks():
1442
if not missing_keys:
1444
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1446
parent_map.update(f_parent_map)
1447
missing_keys = f_missing_keys
1448
kg = _mod_graph.KnownGraph(parent_map)
1279
1452
class _PlanMergeVersionedFile(VersionedFiles):
1280
1453
"""A VersionedFile for uncommitted and committed texts.
1454
1628
for struct in outstanding_struct():
1631
def base_from_plan(self):
1632
"""Construct a BASE file from the plan text."""
1634
for state, line in self.plan:
1635
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1636
# If unchanged, then this line is straight from base. If a or b
1637
# or both killed the line, then it *used* to be in base.
1638
base_lines.append(line)
1640
if state not in ('killed-base', 'irrelevant',
1641
'ghost-a', 'ghost-b',
1643
'conflicted-a', 'conflicted-b'):
1644
# killed-base, irrelevant means it doesn't apply
1645
# ghost-a/ghost-b are harder to say for sure, but they
1646
# aren't in the 'inc_c' which means they aren't in the
1647
# shared base of a & b. So we don't include them. And
1648
# obviously if the line is newly inserted, it isn't in base
1650
# If 'conflicted-a' or b, then it is new vs one base, but
1651
# old versus another base. However, if we make it present
1652
# in the base, it will be deleted from the target, and it
1653
# seems better to get a line doubled in the merge result,
1654
# rather than have it deleted entirely.
1655
# Example, each node is the 'text' at that point:
1663
# There was a criss-cross conflict merge. Both sides
1664
# include the other, but put themselves first.
1665
# Weave marks this as a 'clean' merge, picking OTHER over
1666
# THIS. (Though the details depend on order inserted into
1668
# LCA generates a plan:
1669
# [('unchanged', M),
1670
# ('conflicted-b', b),
1672
# ('conflicted-a', b),
1674
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1675
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1676
# removes one 'b', and BASE vs OTHER removes the other)
1677
# If you include neither, 3-way creates a clean "MbabN" as
1678
# THIS adds one 'b', and OTHER does too.
1679
# It seems that having the line 2 times is better than
1680
# having it omitted. (Easier to manually delete than notice
1681
# it needs to be added.)
1682
raise AssertionError('Unknown state: %s' % (state,))
1458
1686
class WeaveMerge(PlanWeaveMerge):
1459
1687
"""Weave merge that takes a VersionedFile and two versions as its input."""
1766
class NoDupeAddLinesDecorator(object):
1767
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1771
def __init__(self, store):
1774
def add_lines(self, key, parents, lines, parent_texts=None,
1775
left_matching_blocks=None, nostore_sha=None, random_id=False,
1776
check_content=True):
1777
"""See VersionedFiles.add_lines.
1779
This implementation may return None as the third element of the return
1780
value when the original store wouldn't.
1783
raise NotImplementedError(
1784
"NoDupeAddLinesDecorator.add_lines does not implement the "
1785
"nostore_sha behaviour.")
1787
sha1 = osutils.sha_strings(lines)
1788
key = ("sha1:" + sha1,)
1791
if key in self._store.get_parent_map([key]):
1792
# This key has already been inserted, so don't do it again.
1794
sha1 = osutils.sha_strings(lines)
1795
return sha1, sum(map(len, lines)), None
1796
return self._store.add_lines(key, parents, lines,
1797
parent_texts=parent_texts,
1798
left_matching_blocks=left_matching_blocks,
1799
nostore_sha=nostore_sha, random_id=random_id,
1800
check_content=check_content)
1802
def __getattr__(self, name):
1803
return getattr(self._store, name)
1538
1806
def network_bytes_to_kind_and_offset(network_bytes):
1539
1807
"""Strip of a record kind from the front of network_bytes.
1557
1825
record.get_bytes_as(record.storage_kind) call.
1559
1827
self._bytes_iterator = bytes_iterator
1560
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1561
'knit-delta-gz':knit.knit_network_to_record,
1562
'knit-annotated-ft-gz':knit.knit_network_to_record,
1563
'knit-annotated-delta-gz':knit.knit_network_to_record,
1564
'knit-delta-closure':knit.knit_delta_closure_to_records,
1565
'fulltext':fulltext_network_to_record,
1566
'groupcompress-block':groupcompress.network_block_to_records,
1828
self._kind_factory = {
1829
'fulltext': fulltext_network_to_record,
1830
'groupcompress-block': groupcompress.network_block_to_records,
1831
'knit-ft-gz': knit.knit_network_to_record,
1832
'knit-delta-gz': knit.knit_network_to_record,
1833
'knit-annotated-ft-gz': knit.knit_network_to_record,
1834
'knit-annotated-delta-gz': knit.knit_network_to_record,
1835
'knit-delta-closure': knit.knit_delta_closure_to_records,
1569
1838
def read(self):
1630
1899
for prefix in sorted(per_prefix_map):
1631
1900
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1632
1901
return present_keys
1904
class _KeyRefs(object):
1906
def __init__(self, track_new_keys=False):
1907
# dict mapping 'key' to 'set of keys referring to that key'
1910
# set remembering all new keys
1911
self.new_keys = set()
1913
self.new_keys = None
1919
self.new_keys.clear()
1921
def add_references(self, key, refs):
1922
# Record the new references
1923
for referenced in refs:
1925
needed_by = self.refs[referenced]
1927
needed_by = self.refs[referenced] = set()
1929
# Discard references satisfied by the new key
1932
def get_new_keys(self):
1933
return self.new_keys
1935
def get_unsatisfied_refs(self):
1936
return self.refs.iterkeys()
1938
def _satisfy_refs_for_key(self, key):
1942
# No keys depended on this key. That's ok.
1945
def add_key(self, key):
1946
# satisfy refs for key, and remember that we've seen this key.
1947
self._satisfy_refs_for_key(key)
1948
if self.new_keys is not None:
1949
self.new_keys.add(key)
1951
def satisfy_refs_for_keys(self, keys):
1953
self._satisfy_refs_for_key(key)
1955
def get_referrers(self):
1957
for referrers in self.refs.itervalues():
1958
result.update(referrers)