203
class _MPDiffGenerator(object):
204
"""Pull out the functionality for generating mp_diffs."""
206
def __init__(self, vf, keys):
208
# This is the order the keys were requested in
209
self.ordered_keys = tuple(keys)
210
# keys + their parents, what we need to compute the diffs
211
self.needed_keys = ()
212
# Map from key: mp_diff
214
# Map from key: parents_needed (may have ghosts)
216
# Parents that aren't present
217
self.ghost_parents = ()
218
# Map from parent_key => number of children for this text
220
# Content chunks that are cached while we still need them
223
def _find_needed_keys(self):
224
"""Find the set of keys we need to request.
226
This includes all the original keys passed in, and the non-ghost
227
parents of those keys.
229
:return: (needed_keys, refcounts)
230
needed_keys is the set of all texts we need to extract
231
refcounts is a dict of {key: num_children} letting us know when we
232
no longer need to cache a given parent text
234
# All the keys and their parents
235
needed_keys = set(self.ordered_keys)
236
parent_map = self.vf.get_parent_map(needed_keys)
237
self.parent_map = parent_map
238
# TODO: Should we be using a different construct here? I think this
239
# uses difference_update internally, and we expect the result to
241
missing_keys = needed_keys.difference(parent_map)
243
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
244
# Parents that might be missing. They are allowed to be ghosts, but we
245
# should check for them
247
setdefault = refcounts.setdefault
249
for child_key, parent_keys in parent_map.iteritems():
251
# parent_keys may be None if a given VersionedFile claims to
252
# not support graph operations.
254
just_parents.update(parent_keys)
255
needed_keys.update(parent_keys)
256
for p in parent_keys:
257
refcounts[p] = setdefault(p, 0) + 1
258
just_parents.difference_update(parent_map)
259
# Remove any parents that are actually ghosts from the needed set
260
self.present_parents = set(self.vf.get_parent_map(just_parents))
261
self.ghost_parents = just_parents.difference(self.present_parents)
262
needed_keys.difference_update(self.ghost_parents)
263
self.needed_keys = needed_keys
264
self.refcounts = refcounts
265
return needed_keys, refcounts
267
def _compute_diff(self, key, parent_lines, lines):
268
"""Compute a single mp_diff, and store it in self._diffs"""
269
if len(parent_lines) > 0:
270
# XXX: _extract_blocks is not usefully defined anywhere...
271
# It was meant to extract the left-parent diff without
272
# having to recompute it for Knit content (pack-0.92,
273
# etc). That seems to have regressed somewhere
274
left_parent_blocks = self.vf._extract_blocks(key,
275
parent_lines[0], lines)
277
left_parent_blocks = None
278
diff = multiparent.MultiParent.from_lines(lines,
279
parent_lines, left_parent_blocks)
280
self.diffs[key] = diff
282
def _process_one_record(self, key, this_chunks):
284
if key in self.parent_map:
285
# This record should be ready to diff, since we requested
286
# content in 'topological' order
287
parent_keys = self.parent_map.pop(key)
288
# If a VersionedFile claims 'no-graph' support, then it may return
289
# None for any parent request, so we replace it with an empty tuple
290
if parent_keys is None:
293
for p in parent_keys:
294
# Alternatively we could check p not in self.needed_keys, but
295
# ghost_parents should be tiny versus huge
296
if p in self.ghost_parents:
298
refcount = self.refcounts[p]
299
if refcount == 1: # Last child reference
300
self.refcounts.pop(p)
301
parent_chunks = self.chunks.pop(p)
303
self.refcounts[p] = refcount - 1
304
parent_chunks = self.chunks[p]
305
p_lines = osutils.chunks_to_lines(parent_chunks)
306
# TODO: Should we cache the line form? We did the
307
# computation to get it, but storing it this way will
308
# be less memory efficient...
309
parent_lines.append(p_lines)
311
lines = osutils.chunks_to_lines(this_chunks)
312
# Since we needed the lines, we'll go ahead and cache them this way
314
self._compute_diff(key, parent_lines, lines)
316
# Is this content required for any more children?
317
if key in self.refcounts:
318
self.chunks[key] = this_chunks
320
def _extract_diffs(self):
321
needed_keys, refcounts = self._find_needed_keys()
322
for record in self.vf.get_record_stream(needed_keys,
323
'topological', True):
324
if record.storage_kind == 'absent':
325
raise errors.RevisionNotPresent(record.key, self.vf)
326
self._process_one_record(record.key,
327
record.get_bytes_as('chunked'))
329
def compute_diffs(self):
330
self._extract_diffs()
331
dpop = self.diffs.pop
332
return [dpop(k) for k in self.ordered_keys]
202
335
class VersionedFile(object):
203
336
"""Versioned text file storage.
830
971
raise NotImplementedError(self.add_lines)
973
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
974
"""Add a text to the store.
976
This is a private function for use by VersionedFileCommitBuilder.
978
:param key: The key tuple of the text to add. If the last element is
979
None, a CHK string will be generated during the addition.
980
:param parents: The parents key tuples of the text to add.
981
:param text: A string containing the text to be committed.
982
:param nostore_sha: Raise ExistingContent and do not add the lines to
983
the versioned file if the digest of the lines matches this.
984
:param random_id: If True a random id has been selected rather than
985
an id determined by some deterministic process such as a converter
986
from a foreign VCS. When True the backend may choose not to check
987
for uniqueness of the resulting key within the versioned file, so
988
this should only be done when the result is expected to be unique
990
:param check_content: If True, the lines supplied are verified to be
991
bytestrings that are correctly formed lines.
992
:return: The text sha1, the number of bytes in the text, and an opaque
993
representation of the inserted version which can be provided
994
back to future _add_text calls in the parent_texts dictionary.
996
# The default implementation just thunks over to .add_lines(),
997
# inefficient, but it works.
998
return self.add_lines(key, parents, osutils.split_lines(text),
999
nostore_sha=nostore_sha,
1000
random_id=random_id,
832
1003
def add_mpdiffs(self, records):
833
1004
"""Add mpdiffs to this VersionedFile.
876
1047
raise NotImplementedError(self.annotate)
878
1049
def check(self, progress_bar=None):
879
"""Check this object for integrity."""
1050
"""Check this object for integrity.
1052
:param progress_bar: A progress bar to output as the check progresses.
1053
:param keys: Specific keys within the VersionedFiles to check. When
1054
this parameter is not None, check() becomes a generator as per
1055
get_record_stream. The difference to get_record_stream is that
1056
more or deeper checks will be performed.
1057
:return: None, or if keys was supplied a generator as per
880
1060
raise NotImplementedError(self.check)
883
1063
def check_not_reserved_id(version_id):
884
1064
revision.check_not_reserved_id(version_id)
1066
def clear_cache(self):
1067
"""Clear whatever caches this VersionedFile holds.
1069
This is generally called after an operation has been performed, when we
1070
don't expect to be using this versioned file again soon.
886
1073
def _check_lines_not_unicode(self, lines):
887
1074
"""Check that lines being added to a versioned file are not unicode."""
888
1075
for line in lines:
981
1182
def make_mpdiffs(self, keys):
982
1183
"""Create multiparent diffs for specified keys."""
983
keys_order = tuple(keys)
984
keys = frozenset(keys)
985
knit_keys = set(keys)
986
parent_map = self.get_parent_map(keys)
987
for parent_keys in parent_map.itervalues():
989
knit_keys.update(parent_keys)
990
missing_keys = keys - set(parent_map)
992
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
993
# We need to filter out ghosts, because we can't diff against them.
994
maybe_ghosts = knit_keys - keys
995
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
996
knit_keys.difference_update(ghosts)
998
chunks_to_lines = osutils.chunks_to_lines
999
for record in self.get_record_stream(knit_keys, 'topological', True):
1000
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1001
# line_block_dict = {}
1002
# for parent, blocks in record.extract_line_blocks():
1003
# line_blocks[parent] = blocks
1004
# line_blocks[record.key] = line_block_dict
1006
for key in keys_order:
1008
parents = parent_map[key] or []
1009
# Note that filtering knit_keys can lead to a parent difference
1010
# between the creation and the application of the mpdiff.
1011
parent_lines = [lines[p] for p in parents if p in knit_keys]
1012
if len(parent_lines) > 0:
1013
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1016
left_parent_blocks = None
1017
diffs.append(multiparent.MultiParent.from_lines(target,
1018
parent_lines, left_parent_blocks))
1184
generator = _MPDiffGenerator(self, keys)
1185
return generator.compute_diffs()
1187
def get_annotator(self):
1188
return annotate.Annotator(self)
1021
1190
missing_keys = index._missing_keys_from_parent_map
1023
1192
def _extract_blocks(self, version_id, source, target):
1195
def _transitive_fallbacks(self):
1196
"""Return the whole stack of fallback versionedfiles.
1198
This VersionedFiles may have a list of fallbacks, but it doesn't
1199
necessarily know about the whole stack going down, and it can't know
1200
at open time because they may change after the objects are opened.
1203
for a_vfs in self._immediate_fallback_vfs:
1204
all_fallbacks.append(a_vfs)
1205
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1206
return all_fallbacks
1027
1209
class ThunkedVersionedFiles(VersionedFiles):
1028
1210
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1426
class VersionedFilesWithFallbacks(VersionedFiles):
1428
def without_fallbacks(self):
1429
"""Return a clone of this object without any fallbacks configured."""
1430
raise NotImplementedError(self.without_fallbacks)
1432
def add_fallback_versioned_files(self, a_versioned_files):
1433
"""Add a source of texts for texts not present in this knit.
1435
:param a_versioned_files: A VersionedFiles object.
1437
raise NotImplementedError(self.add_fallback_versioned_files)
1439
def get_known_graph_ancestry(self, keys):
1440
"""Get a KnownGraph instance with the ancestry of keys."""
1441
parent_map, missing_keys = self._index.find_ancestry(keys)
1442
for fallback in self._transitive_fallbacks():
1443
if not missing_keys:
1445
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1447
parent_map.update(f_parent_map)
1448
missing_keys = f_missing_keys
1449
kg = _mod_graph.KnownGraph(parent_map)
1239
1453
class _PlanMergeVersionedFile(VersionedFiles):
1240
1454
"""A VersionedFile for uncommitted and committed texts.
1403
1618
elif state == 'conflicted-b':
1404
1619
ch_b = ch_a = True
1405
1620
lines_b.append(line)
1621
elif state == 'killed-both':
1622
# This counts as a change, even though there is no associated
1407
1626
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1408
'killed-base', 'killed-both'):
1409
1628
raise AssertionError(state)
1410
1629
for struct in outstanding_struct():
1632
def base_from_plan(self):
1633
"""Construct a BASE file from the plan text."""
1635
for state, line in self.plan:
1636
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1637
# If unchanged, then this line is straight from base. If a or b
1638
# or both killed the line, then it *used* to be in base.
1639
base_lines.append(line)
1641
if state not in ('killed-base', 'irrelevant',
1642
'ghost-a', 'ghost-b',
1644
'conflicted-a', 'conflicted-b'):
1645
# killed-base, irrelevant means it doesn't apply
1646
# ghost-a/ghost-b are harder to say for sure, but they
1647
# aren't in the 'inc_c' which means they aren't in the
1648
# shared base of a & b. So we don't include them. And
1649
# obviously if the line is newly inserted, it isn't in base
1651
# If 'conflicted-a' or b, then it is new vs one base, but
1652
# old versus another base. However, if we make it present
1653
# in the base, it will be deleted from the target, and it
1654
# seems better to get a line doubled in the merge result,
1655
# rather than have it deleted entirely.
1656
# Example, each node is the 'text' at that point:
1664
# There was a criss-cross conflict merge. Both sides
1665
# include the other, but put themselves first.
1666
# Weave marks this as a 'clean' merge, picking OTHER over
1667
# THIS. (Though the details depend on order inserted into
1669
# LCA generates a plan:
1670
# [('unchanged', M),
1671
# ('conflicted-b', b),
1673
# ('conflicted-a', b),
1675
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1676
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1677
# removes one 'b', and BASE vs OTHER removes the other)
1678
# If you include neither, 3-way creates a clean "MbabN" as
1679
# THIS adds one 'b', and OTHER does too.
1680
# It seems that having the line 2 times is better than
1681
# having it omitted. (Easier to manually delete than notice
1682
# it needs to be added.)
1683
raise AssertionError('Unknown state: %s' % (state,))
1414
1687
class WeaveMerge(PlanWeaveMerge):
1415
1688
"""Weave merge that takes a VersionedFile and two versions as its input."""
1767
class NoDupeAddLinesDecorator(object):
1768
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1772
def __init__(self, store):
1775
def add_lines(self, key, parents, lines, parent_texts=None,
1776
left_matching_blocks=None, nostore_sha=None, random_id=False,
1777
check_content=True):
1778
"""See VersionedFiles.add_lines.
1780
This implementation may return None as the third element of the return
1781
value when the original store wouldn't.
1784
raise NotImplementedError(
1785
"NoDupeAddLinesDecorator.add_lines does not implement the "
1786
"nostore_sha behaviour.")
1788
sha1 = osutils.sha_strings(lines)
1789
key = ("sha1:" + sha1,)
1792
if key in self._store.get_parent_map([key]):
1793
# This key has already been inserted, so don't do it again.
1795
sha1 = osutils.sha_strings(lines)
1796
return sha1, sum(map(len, lines)), None
1797
return self._store.add_lines(key, parents, lines,
1798
parent_texts=parent_texts,
1799
left_matching_blocks=left_matching_blocks,
1800
nostore_sha=nostore_sha, random_id=random_id,
1801
check_content=check_content)
1803
def __getattr__(self, name):
1804
return getattr(self._store, name)
1494
1807
def network_bytes_to_kind_and_offset(network_bytes):
1495
1808
"""Strip of a record kind from the front of network_bytes.
1513
1826
record.get_bytes_as(record.storage_kind) call.
1515
1828
self._bytes_iterator = bytes_iterator
1516
self._kind_factory = {'knit-ft-gz':knit.knit_network_to_record,
1517
'knit-delta-gz':knit.knit_network_to_record,
1518
'knit-annotated-ft-gz':knit.knit_network_to_record,
1519
'knit-annotated-delta-gz':knit.knit_network_to_record,
1520
'knit-delta-closure':knit.knit_delta_closure_to_records,
1521
'fulltext':fulltext_network_to_record,
1522
'groupcompress-block':groupcompress.network_block_to_records,
1829
self._kind_factory = {
1830
'fulltext': fulltext_network_to_record,
1831
'groupcompress-block': groupcompress.network_block_to_records,
1832
'knit-ft-gz': knit.knit_network_to_record,
1833
'knit-delta-gz': knit.knit_network_to_record,
1834
'knit-annotated-ft-gz': knit.knit_network_to_record,
1835
'knit-annotated-delta-gz': knit.knit_network_to_record,
1836
'knit-delta-closure': knit.knit_delta_closure_to_records,
1525
1839
def read(self):
1586
1900
for prefix in sorted(per_prefix_map):
1587
1901
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1588
1902
return present_keys
1905
class _KeyRefs(object):
1907
def __init__(self, track_new_keys=False):
1908
# dict mapping 'key' to 'set of keys referring to that key'
1911
# set remembering all new keys
1912
self.new_keys = set()
1914
self.new_keys = None
1920
self.new_keys.clear()
1922
def add_references(self, key, refs):
1923
# Record the new references
1924
for referenced in refs:
1926
needed_by = self.refs[referenced]
1928
needed_by = self.refs[referenced] = set()
1930
# Discard references satisfied by the new key
1933
def get_new_keys(self):
1934
return self.new_keys
1936
def get_unsatisfied_refs(self):
1937
return self.refs.iterkeys()
1939
def _satisfy_refs_for_key(self, key):
1943
# No keys depended on this key. That's ok.
1946
def add_key(self, key):
1947
# satisfy refs for key, and remember that we've seen this key.
1948
self._satisfy_refs_for_key(key)
1949
if self.new_keys is not None:
1950
self.new_keys.add(key)
1952
def satisfy_refs_for_keys(self, keys):
1954
self._satisfy_refs_for_key(key)
1956
def get_referrers(self):
1958
for referrers in self.refs.itervalues():
1959
result.update(referrers)