202
class _MPDiffGenerator(object):
203
"""Pull out the functionality for generating mp_diffs."""
205
def __init__(self, vf, keys):
207
# This is the order the keys were requested in
208
self.ordered_keys = tuple(keys)
209
# keys + their parents, what we need to compute the diffs
210
self.needed_keys = ()
211
# Map from key: mp_diff
213
# Map from key: parents_needed (may have ghosts)
215
# Parents that aren't present
216
self.ghost_parents = ()
217
# Map from parent_key => number of children for this text
219
# Content chunks that are cached while we still need them
222
def _find_needed_keys(self):
223
"""Find the set of keys we need to request.
225
This includes all the original keys passed in, and the non-ghost
226
parents of those keys.
228
:return: (needed_keys, refcounts)
229
needed_keys is the set of all texts we need to extract
230
refcounts is a dict of {key: num_children} letting us know when we
231
no longer need to cache a given parent text
233
# All the keys and their parents
234
needed_keys = set(self.ordered_keys)
235
parent_map = self.vf.get_parent_map(needed_keys)
236
self.parent_map = parent_map
237
# TODO: Should we be using a different construct here? I think this
238
# uses difference_update internally, and we expect the result to
240
missing_keys = needed_keys.difference(parent_map)
242
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
243
# Parents that might be missing. They are allowed to be ghosts, but we
244
# should check for them
246
setdefault = refcounts.setdefault
248
for child_key, parent_keys in parent_map.iteritems():
250
# parent_keys may be None if a given VersionedFile claims to
251
# not support graph operations.
253
just_parents.update(parent_keys)
254
needed_keys.update(parent_keys)
255
for p in parent_keys:
256
refcounts[p] = setdefault(p, 0) + 1
257
just_parents.difference_update(parent_map)
258
# Remove any parents that are actually ghosts from the needed set
259
self.present_parents = set(self.vf.get_parent_map(just_parents))
260
self.ghost_parents = just_parents.difference(self.present_parents)
261
needed_keys.difference_update(self.ghost_parents)
262
self.needed_keys = needed_keys
263
self.refcounts = refcounts
264
return needed_keys, refcounts
266
def _compute_diff(self, key, parent_lines, lines):
267
"""Compute a single mp_diff, and store it in self._diffs"""
268
if len(parent_lines) > 0:
269
# XXX: _extract_blocks is not usefully defined anywhere...
270
# It was meant to extract the left-parent diff without
271
# having to recompute it for Knit content (pack-0.92,
272
# etc). That seems to have regressed somewhere
273
left_parent_blocks = self.vf._extract_blocks(key,
274
parent_lines[0], lines)
276
left_parent_blocks = None
277
diff = multiparent.MultiParent.from_lines(lines,
278
parent_lines, left_parent_blocks)
279
self.diffs[key] = diff
281
def _process_one_record(self, key, this_chunks):
283
if key in self.parent_map:
284
# This record should be ready to diff, since we requested
285
# content in 'topological' order
286
parent_keys = self.parent_map.pop(key)
287
# If a VersionedFile claims 'no-graph' support, then it may return
288
# None for any parent request, so we replace it with an empty tuple
289
if parent_keys is None:
292
for p in parent_keys:
293
# Alternatively we could check p not in self.needed_keys, but
294
# ghost_parents should be tiny versus huge
295
if p in self.ghost_parents:
297
refcount = self.refcounts[p]
298
if refcount == 1: # Last child reference
299
self.refcounts.pop(p)
300
parent_chunks = self.chunks.pop(p)
302
self.refcounts[p] = refcount - 1
303
parent_chunks = self.chunks[p]
304
p_lines = osutils.chunks_to_lines(parent_chunks)
305
# TODO: Should we cache the line form? We did the
306
# computation to get it, but storing it this way will
307
# be less memory efficient...
308
parent_lines.append(p_lines)
310
lines = osutils.chunks_to_lines(this_chunks)
311
# Since we needed the lines, we'll go ahead and cache them this way
313
self._compute_diff(key, parent_lines, lines)
315
# Is this content required for any more children?
316
if key in self.refcounts:
317
self.chunks[key] = this_chunks
319
def _extract_diffs(self):
320
needed_keys, refcounts = self._find_needed_keys()
321
for record in self.vf.get_record_stream(needed_keys,
322
'topological', True):
323
if record.storage_kind == 'absent':
324
raise errors.RevisionNotPresent(record.key, self.vf)
325
self._process_one_record(record.key,
326
record.get_bytes_as('chunked'))
328
def compute_diffs(self):
329
self._extract_diffs()
330
dpop = self.diffs.pop
331
return [dpop(k) for k in self.ordered_keys]
198
334
class VersionedFile(object):
199
335
"""Versioned text file storage.
201
337
A versioned file manages versions of line-based text files,
202
338
keeping track of the originating version for each line.
825
970
raise NotImplementedError(self.add_lines)
972
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
973
"""Add a text to the store.
975
This is a private function for use by VersionedFileCommitBuilder.
977
:param key: The key tuple of the text to add. If the last element is
978
None, a CHK string will be generated during the addition.
979
:param parents: The parents key tuples of the text to add.
980
:param text: A string containing the text to be committed.
981
:param nostore_sha: Raise ExistingContent and do not add the lines to
982
the versioned file if the digest of the lines matches this.
983
:param random_id: If True a random id has been selected rather than
984
an id determined by some deterministic process such as a converter
985
from a foreign VCS. When True the backend may choose not to check
986
for uniqueness of the resulting key within the versioned file, so
987
this should only be done when the result is expected to be unique
989
:param check_content: If True, the lines supplied are verified to be
990
bytestrings that are correctly formed lines.
991
:return: The text sha1, the number of bytes in the text, and an opaque
992
representation of the inserted version which can be provided
993
back to future _add_text calls in the parent_texts dictionary.
995
# The default implementation just thunks over to .add_lines(),
996
# inefficient, but it works.
997
return self.add_lines(key, parents, osutils.split_lines(text),
998
nostore_sha=nostore_sha,
827
1002
def add_mpdiffs(self, records):
828
1003
"""Add mpdiffs to this VersionedFile.
964
1181
def make_mpdiffs(self, keys):
965
1182
"""Create multiparent diffs for specified keys."""
966
keys_order = tuple(keys)
967
keys = frozenset(keys)
968
knit_keys = set(keys)
969
parent_map = self.get_parent_map(keys)
970
for parent_keys in parent_map.itervalues():
972
knit_keys.update(parent_keys)
973
missing_keys = keys - set(parent_map)
975
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
976
# We need to filter out ghosts, because we can't diff against them.
977
maybe_ghosts = knit_keys - keys
978
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
979
knit_keys.difference_update(ghosts)
981
chunks_to_lines = osutils.chunks_to_lines
982
for record in self.get_record_stream(knit_keys, 'topological', True):
983
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
984
# line_block_dict = {}
985
# for parent, blocks in record.extract_line_blocks():
986
# line_blocks[parent] = blocks
987
# line_blocks[record.key] = line_block_dict
989
for key in keys_order:
991
parents = parent_map[key] or []
992
# Note that filtering knit_keys can lead to a parent difference
993
# between the creation and the application of the mpdiff.
994
parent_lines = [lines[p] for p in parents if p in knit_keys]
995
if len(parent_lines) > 0:
996
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
999
left_parent_blocks = None
1000
diffs.append(multiparent.MultiParent.from_lines(target,
1001
parent_lines, left_parent_blocks))
1183
generator = _MPDiffGenerator(self, keys)
1184
return generator.compute_diffs()
1186
def get_annotator(self):
1187
return annotate.Annotator(self)
1004
1189
missing_keys = index._missing_keys_from_parent_map
1006
1191
def _extract_blocks(self, version_id, source, target):
1194
def _transitive_fallbacks(self):
1195
"""Return the whole stack of fallback versionedfiles.
1197
This VersionedFiles may have a list of fallbacks, but it doesn't
1198
necessarily know about the whole stack going down, and it can't know
1199
at open time because they may change after the objects are opened.
1202
for a_vfs in self._immediate_fallback_vfs:
1203
all_fallbacks.append(a_vfs)
1204
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1205
return all_fallbacks
1010
1208
class ThunkedVersionedFiles(VersionedFiles):
1011
1209
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1386
1617
elif state == 'conflicted-b':
1387
1618
ch_b = ch_a = True
1388
1619
lines_b.append(line)
1620
elif state == 'killed-both':
1621
# This counts as a change, even though there is no associated
1390
1625
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1391
'killed-base', 'killed-both'):
1392
1627
raise AssertionError(state)
1393
1628
for struct in outstanding_struct():
1631
def base_from_plan(self):
1632
"""Construct a BASE file from the plan text."""
1634
for state, line in self.plan:
1635
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1636
# If unchanged, then this line is straight from base. If a or b
1637
# or both killed the line, then it *used* to be in base.
1638
base_lines.append(line)
1640
if state not in ('killed-base', 'irrelevant',
1641
'ghost-a', 'ghost-b',
1643
'conflicted-a', 'conflicted-b'):
1644
# killed-base, irrelevant means it doesn't apply
1645
# ghost-a/ghost-b are harder to say for sure, but they
1646
# aren't in the 'inc_c' which means they aren't in the
1647
# shared base of a & b. So we don't include them. And
1648
# obviously if the line is newly inserted, it isn't in base
1650
# If 'conflicted-a' or b, then it is new vs one base, but
1651
# old versus another base. However, if we make it present
1652
# in the base, it will be deleted from the target, and it
1653
# seems better to get a line doubled in the merge result,
1654
# rather than have it deleted entirely.
1655
# Example, each node is the 'text' at that point:
1663
# There was a criss-cross conflict merge. Both sides
1664
# include the other, but put themselves first.
1665
# Weave marks this as a 'clean' merge, picking OTHER over
1666
# THIS. (Though the details depend on order inserted into
1668
# LCA generates a plan:
1669
# [('unchanged', M),
1670
# ('conflicted-b', b),
1672
# ('conflicted-a', b),
1674
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1675
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1676
# removes one 'b', and BASE vs OTHER removes the other)
1677
# If you include neither, 3-way creates a clean "MbabN" as
1678
# THIS adds one 'b', and OTHER does too.
1679
# It seems that having the line 2 times is better than
1680
# having it omitted. (Easier to manually delete than notice
1681
# it needs to be added.)
1682
raise AssertionError('Unknown state: %s' % (state,))
1397
1686
class WeaveMerge(PlanWeaveMerge):
1398
1687
"""Weave merge that takes a VersionedFile and two versions as its input."""
1400
def __init__(self, versionedfile, ver_a, ver_b,
1689
def __init__(self, versionedfile, ver_a, ver_b,
1401
1690
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1402
1691
plan = versionedfile.plan_merge(ver_a, ver_b)
1403
1692
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1406
1695
class VirtualVersionedFiles(VersionedFiles):
1407
"""Dummy implementation for VersionedFiles that uses other functions for
1696
"""Dummy implementation for VersionedFiles that uses other functions for
1408
1697
obtaining fulltexts and parent maps.
1410
This is always on the bottom of the stack and uses string keys
1699
This is always on the bottom of the stack and uses string keys
1411
1700
(rather than tuples) internally.
1469
1758
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1470
1759
for i, (key,) in enumerate(keys):
1471
1760
if pb is not None:
1472
pb.update("iterating texts", i, len(keys))
1761
pb.update("Finding changed lines", i, len(keys))
1473
1762
for l in self._get_lines(key):
1766
class NoDupeAddLinesDecorator(object):
1767
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1771
def __init__(self, store):
1774
def add_lines(self, key, parents, lines, parent_texts=None,
1775
left_matching_blocks=None, nostore_sha=None, random_id=False,
1776
check_content=True):
1777
"""See VersionedFiles.add_lines.
1779
This implementation may return None as the third element of the return
1780
value when the original store wouldn't.
1783
raise NotImplementedError(
1784
"NoDupeAddLinesDecorator.add_lines does not implement the "
1785
"nostore_sha behaviour.")
1787
sha1 = osutils.sha_strings(lines)
1788
key = ("sha1:" + sha1,)
1791
if key in self._store.get_parent_map([key]):
1792
# This key has already been inserted, so don't do it again.
1794
sha1 = osutils.sha_strings(lines)
1795
return sha1, sum(map(len, lines)), None
1796
return self._store.add_lines(key, parents, lines,
1797
parent_texts=parent_texts,
1798
left_matching_blocks=left_matching_blocks,
1799
nostore_sha=nostore_sha, random_id=random_id,
1800
check_content=check_content)
1802
def __getattr__(self, name):
1803
return getattr(self._store, name)
1806
def network_bytes_to_kind_and_offset(network_bytes):
1807
"""Strip of a record kind from the front of network_bytes.
1809
:param network_bytes: The bytes of a record.
1810
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1812
line_end = network_bytes.find('\n')
1813
storage_kind = network_bytes[:line_end]
1814
return storage_kind, line_end + 1
1817
class NetworkRecordStream(object):
1818
"""A record_stream which reconstitures a serialised stream."""
1820
def __init__(self, bytes_iterator):
1821
"""Create a NetworkRecordStream.
1823
:param bytes_iterator: An iterator of bytes. Each item in this
1824
iterator should have been obtained from a record_streams'
1825
record.get_bytes_as(record.storage_kind) call.
1827
self._bytes_iterator = bytes_iterator
1828
self._kind_factory = {
1829
'fulltext': fulltext_network_to_record,
1830
'groupcompress-block': groupcompress.network_block_to_records,
1831
'knit-ft-gz': knit.knit_network_to_record,
1832
'knit-delta-gz': knit.knit_network_to_record,
1833
'knit-annotated-ft-gz': knit.knit_network_to_record,
1834
'knit-annotated-delta-gz': knit.knit_network_to_record,
1835
'knit-delta-closure': knit.knit_delta_closure_to_records,
1841
:return: An iterator as per VersionedFiles.get_record_stream().
1843
for bytes in self._bytes_iterator:
1844
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1845
for record in self._kind_factory[storage_kind](
1846
storage_kind, bytes, line_end):
1850
def fulltext_network_to_record(kind, bytes, line_end):
1851
"""Convert a network fulltext record to record."""
1852
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1853
record_meta = bytes[line_end+4:line_end+4+meta_len]
1854
key, parents = bencode.bdecode_as_tuple(record_meta)
1855
if parents == 'nil':
1857
fulltext = bytes[line_end+4+meta_len:]
1858
return [FulltextContentFactory(key, parents, None, fulltext)]
1861
def _length_prefix(bytes):
1862
return struct.pack('!L', len(bytes))
1865
def record_to_fulltext_bytes(record):
1866
if record.parents is None:
1869
parents = record.parents
1870
record_meta = bencode.bencode((record.key, parents))
1871
record_content = record.get_bytes_as('fulltext')
1872
return "fulltext\n%s%s%s" % (
1873
_length_prefix(record_meta), record_meta, record_content)
1876
def sort_groupcompress(parent_map):
1877
"""Sort and group the keys in parent_map into groupcompress order.
1879
groupcompress is defined (currently) as reverse-topological order, grouped
1882
:return: A sorted-list of keys
1884
# gc-optimal ordering is approximately reverse topological,
1885
# properly grouped by file-id.
1887
for item in parent_map.iteritems():
1889
if isinstance(key, str) or len(key) == 1:
1894
per_prefix_map[prefix].append(item)
1896
per_prefix_map[prefix] = [item]
1899
for prefix in sorted(per_prefix_map):
1900
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1904
class _KeyRefs(object):
1906
def __init__(self, track_new_keys=False):
1907
# dict mapping 'key' to 'set of keys referring to that key'
1910
# set remembering all new keys
1911
self.new_keys = set()
1913
self.new_keys = None
1919
self.new_keys.clear()
1921
def add_references(self, key, refs):
1922
# Record the new references
1923
for referenced in refs:
1925
needed_by = self.refs[referenced]
1927
needed_by = self.refs[referenced] = set()
1929
# Discard references satisfied by the new key
1932
def get_new_keys(self):
1933
return self.new_keys
1935
def get_unsatisfied_refs(self):
1936
return self.refs.iterkeys()
1938
def _satisfy_refs_for_key(self, key):
1942
# No keys depended on this key. That's ok.
1945
def add_key(self, key):
1946
# satisfy refs for key, and remember that we've seen this key.
1947
self._satisfy_refs_for_key(key)
1948
if self.new_keys is not None:
1949
self.new_keys.add(key)
1951
def satisfy_refs_for_keys(self, keys):
1953
self._satisfy_refs_for_key(key)
1955
def get_referrers(self):
1957
for referrers in self.refs.itervalues():
1958
result.update(referrers)