83
84
self.parents = None
87
class ChunkedContentFactory(ContentFactory):
88
"""Static data content factory.
90
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
91
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
92
satisfies this, as does a list of lines.
94
:ivar sha1: None, or the sha1 of the content fulltext.
95
:ivar storage_kind: The native storage kind of this factory. Always
97
:ivar key: The key of this content. Each key is a tuple with a single
99
:ivar parents: A tuple of parent keys for self.key. If the object has
100
no parent information, None (as opposed to () for an empty list of
104
def __init__(self, key, parents, sha1, chunks):
105
"""Create a ContentFactory."""
107
self.storage_kind = 'chunked'
109
self.parents = parents
110
self._chunks = chunks
112
def get_bytes_as(self, storage_kind):
113
if storage_kind == 'chunked':
115
elif storage_kind == 'fulltext':
116
return ''.join(self._chunks)
117
raise errors.UnavailableRepresentation(self.key, storage_kind,
86
121
class FulltextContentFactory(ContentFactory):
87
122
"""Static data content factory.
89
124
This takes a fulltext when created and just returns that during
90
125
get_bytes_as('fulltext').
92
127
:ivar sha1: None, or the sha1 of the content fulltext.
93
128
:ivar storage_kind: The native storage kind of this factory. Always
202
class _MPDiffGenerator(object):
203
"""Pull out the functionality for generating mp_diffs."""
205
def __init__(self, vf, keys):
207
# This is the order the keys were requested in
208
self.ordered_keys = tuple(keys)
209
# keys + their parents, what we need to compute the diffs
210
self.needed_keys = ()
211
# Map from key: mp_diff
213
# Map from key: parents_needed (may have ghosts)
215
# Parents that aren't present
216
self.ghost_parents = ()
217
# Map from parent_key => number of children for this text
219
# Content chunks that are cached while we still need them
222
def _find_needed_keys(self):
223
"""Find the set of keys we need to request.
225
This includes all the original keys passed in, and the non-ghost
226
parents of those keys.
228
:return: (needed_keys, refcounts)
229
needed_keys is the set of all texts we need to extract
230
refcounts is a dict of {key: num_children} letting us know when we
231
no longer need to cache a given parent text
233
# All the keys and their parents
234
needed_keys = set(self.ordered_keys)
235
parent_map = self.vf.get_parent_map(needed_keys)
236
self.parent_map = parent_map
237
# TODO: Should we be using a different construct here? I think this
238
# uses difference_update internally, and we expect the result to
240
missing_keys = needed_keys.difference(parent_map)
242
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
243
# Parents that might be missing. They are allowed to be ghosts, but we
244
# should check for them
246
setdefault = refcounts.setdefault
248
for child_key, parent_keys in parent_map.iteritems():
250
# parent_keys may be None if a given VersionedFile claims to
251
# not support graph operations.
253
just_parents.update(parent_keys)
254
needed_keys.update(parent_keys)
255
for p in parent_keys:
256
refcounts[p] = setdefault(p, 0) + 1
257
just_parents.difference_update(parent_map)
258
# Remove any parents that are actually ghosts from the needed set
259
self.present_parents = set(self.vf.get_parent_map(just_parents))
260
self.ghost_parents = just_parents.difference(self.present_parents)
261
needed_keys.difference_update(self.ghost_parents)
262
self.needed_keys = needed_keys
263
self.refcounts = refcounts
264
return needed_keys, refcounts
266
def _compute_diff(self, key, parent_lines, lines):
267
"""Compute a single mp_diff, and store it in self._diffs"""
268
if len(parent_lines) > 0:
269
# XXX: _extract_blocks is not usefully defined anywhere...
270
# It was meant to extract the left-parent diff without
271
# having to recompute it for Knit content (pack-0.92,
272
# etc). That seems to have regressed somewhere
273
left_parent_blocks = self.vf._extract_blocks(key,
274
parent_lines[0], lines)
276
left_parent_blocks = None
277
diff = multiparent.MultiParent.from_lines(lines,
278
parent_lines, left_parent_blocks)
279
self.diffs[key] = diff
281
def _process_one_record(self, key, this_chunks):
283
if key in self.parent_map:
284
# This record should be ready to diff, since we requested
285
# content in 'topological' order
286
parent_keys = self.parent_map.pop(key)
287
# If a VersionedFile claims 'no-graph' support, then it may return
288
# None for any parent request, so we replace it with an empty tuple
289
if parent_keys is None:
292
for p in parent_keys:
293
# Alternatively we could check p not in self.needed_keys, but
294
# ghost_parents should be tiny versus huge
295
if p in self.ghost_parents:
297
refcount = self.refcounts[p]
298
if refcount == 1: # Last child reference
299
self.refcounts.pop(p)
300
parent_chunks = self.chunks.pop(p)
302
self.refcounts[p] = refcount - 1
303
parent_chunks = self.chunks[p]
304
p_lines = osutils.chunks_to_lines(parent_chunks)
305
# TODO: Should we cache the line form? We did the
306
# computation to get it, but storing it this way will
307
# be less memory efficient...
308
parent_lines.append(p_lines)
310
lines = osutils.chunks_to_lines(this_chunks)
311
# Since we needed the lines, we'll go ahead and cache them this way
313
self._compute_diff(key, parent_lines, lines)
315
# Is this content required for any more children?
316
if key in self.refcounts:
317
self.chunks[key] = this_chunks
319
def _extract_diffs(self):
320
needed_keys, refcounts = self._find_needed_keys()
321
for record in self.vf.get_record_stream(needed_keys,
322
'topological', True):
323
if record.storage_kind == 'absent':
324
raise errors.RevisionNotPresent(record.key, self.vf)
325
self._process_one_record(record.key,
326
record.get_bytes_as('chunked'))
328
def compute_diffs(self):
329
self._extract_diffs()
330
dpop = self.diffs.pop
331
return [dpop(k) for k in self.ordered_keys]
159
334
class VersionedFile(object):
160
335
"""Versioned text file storage.
162
337
A versioned file manages versions of line-based text files,
163
338
keeping track of the originating version for each line.
561
740
return self._backing_vf.keys()
743
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
744
"""A VF that records calls, and returns keys in specific order.
746
:ivar calls: A list of the calls made; can be reset at any time by
750
def __init__(self, backing_vf, key_priority):
751
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
753
:param backing_vf: The versioned file to answer all methods.
754
:param key_priority: A dictionary defining what order keys should be
755
returned from an 'unordered' get_record_stream request.
756
Keys with lower priority are returned first, keys not present in
757
the map get an implicit priority of 0, and are returned in
758
lexicographical order.
760
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
761
self._key_priority = key_priority
763
def get_record_stream(self, keys, sort_order, include_delta_closure):
764
self.calls.append(("get_record_stream", list(keys), sort_order,
765
include_delta_closure))
766
if sort_order == 'unordered':
768
return (self._key_priority.get(key, 0), key)
769
# Use a defined order by asking for the keys one-by-one from the
771
for key in sorted(keys, key=sort_key):
772
for record in self._backing_vf.get_record_stream([key],
773
'unordered', include_delta_closure):
776
for record in self._backing_vf.get_record_stream(keys, sort_order,
777
include_delta_closure):
564
781
class KeyMapper(object):
565
782
"""KeyMappers map between keys and underlying partitioned storage."""
748
970
raise NotImplementedError(self.add_lines)
972
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
973
"""Add a text to the store.
975
This is a private function for use by VersionedFileCommitBuilder.
977
:param key: The key tuple of the text to add. If the last element is
978
None, a CHK string will be generated during the addition.
979
:param parents: The parents key tuples of the text to add.
980
:param text: A string containing the text to be committed.
981
:param nostore_sha: Raise ExistingContent and do not add the lines to
982
the versioned file if the digest of the lines matches this.
983
:param random_id: If True a random id has been selected rather than
984
an id determined by some deterministic process such as a converter
985
from a foreign VCS. When True the backend may choose not to check
986
for uniqueness of the resulting key within the versioned file, so
987
this should only be done when the result is expected to be unique
989
:param check_content: If True, the lines supplied are verified to be
990
bytestrings that are correctly formed lines.
991
:return: The text sha1, the number of bytes in the text, and an opaque
992
representation of the inserted version which can be provided
993
back to future _add_text calls in the parent_texts dictionary.
995
# The default implementation just thunks over to .add_lines(),
996
# inefficient, but it works.
997
return self.add_lines(key, parents, osutils.split_lines(text),
998
nostore_sha=nostore_sha,
750
1002
def add_mpdiffs(self, records):
751
1003
"""Add mpdiffs to this VersionedFile.
885
1181
def make_mpdiffs(self, keys):
886
1182
"""Create multiparent diffs for specified keys."""
887
keys_order = tuple(keys)
888
keys = frozenset(keys)
889
knit_keys = set(keys)
890
parent_map = self.get_parent_map(keys)
891
for parent_keys in parent_map.itervalues():
893
knit_keys.update(parent_keys)
894
missing_keys = keys - set(parent_map)
896
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
897
# We need to filter out ghosts, because we can't diff against them.
898
maybe_ghosts = knit_keys - keys
899
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
900
knit_keys.difference_update(ghosts)
902
split_lines = osutils.split_lines
903
for record in self.get_record_stream(knit_keys, 'topological', True):
904
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
905
# line_block_dict = {}
906
# for parent, blocks in record.extract_line_blocks():
907
# line_blocks[parent] = blocks
908
# line_blocks[record.key] = line_block_dict
910
for key in keys_order:
912
parents = parent_map[key] or []
913
# Note that filtering knit_keys can lead to a parent difference
914
# between the creation and the application of the mpdiff.
915
parent_lines = [lines[p] for p in parents if p in knit_keys]
916
if len(parent_lines) > 0:
917
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
920
left_parent_blocks = None
921
diffs.append(multiparent.MultiParent.from_lines(target,
922
parent_lines, left_parent_blocks))
1183
generator = _MPDiffGenerator(self, keys)
1184
return generator.compute_diffs()
1186
def get_annotator(self):
1187
return annotate.Annotator(self)
1189
missing_keys = index._missing_keys_from_parent_map
925
1191
def _extract_blocks(self, version_id, source, target):
1194
def _transitive_fallbacks(self):
1195
"""Return the whole stack of fallback versionedfiles.
1197
This VersionedFiles may have a list of fallbacks, but it doesn't
1198
necessarily know about the whole stack going down, and it can't know
1199
at open time because they may change after the objects are opened.
1202
for a_vfs in self._immediate_fallback_vfs:
1203
all_fallbacks.append(a_vfs)
1204
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1205
return all_fallbacks
929
1208
class ThunkedVersionedFiles(VersionedFiles):
930
1209
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1306
1617
elif state == 'conflicted-b':
1307
1618
ch_b = ch_a = True
1308
1619
lines_b.append(line)
1620
elif state == 'killed-both':
1621
# This counts as a change, even though there is no associated
1310
1625
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1311
'killed-base', 'killed-both'):
1312
1627
raise AssertionError(state)
1313
1628
for struct in outstanding_struct():
1631
def base_from_plan(self):
1632
"""Construct a BASE file from the plan text."""
1634
for state, line in self.plan:
1635
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1636
# If unchanged, then this line is straight from base. If a or b
1637
# or both killed the line, then it *used* to be in base.
1638
base_lines.append(line)
1640
if state not in ('killed-base', 'irrelevant',
1641
'ghost-a', 'ghost-b',
1643
'conflicted-a', 'conflicted-b'):
1644
# killed-base, irrelevant means it doesn't apply
1645
# ghost-a/ghost-b are harder to say for sure, but they
1646
# aren't in the 'inc_c' which means they aren't in the
1647
# shared base of a & b. So we don't include them. And
1648
# obviously if the line is newly inserted, it isn't in base
1650
# If 'conflicted-a' or b, then it is new vs one base, but
1651
# old versus another base. However, if we make it present
1652
# in the base, it will be deleted from the target, and it
1653
# seems better to get a line doubled in the merge result,
1654
# rather than have it deleted entirely.
1655
# Example, each node is the 'text' at that point:
1663
# There was a criss-cross conflict merge. Both sides
1664
# include the other, but put themselves first.
1665
# Weave marks this as a 'clean' merge, picking OTHER over
1666
# THIS. (Though the details depend on order inserted into
1668
# LCA generates a plan:
1669
# [('unchanged', M),
1670
# ('conflicted-b', b),
1672
# ('conflicted-a', b),
1674
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1675
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1676
# removes one 'b', and BASE vs OTHER removes the other)
1677
# If you include neither, 3-way creates a clean "MbabN" as
1678
# THIS adds one 'b', and OTHER does too.
1679
# It seems that having the line 2 times is better than
1680
# having it omitted. (Easier to manually delete than notice
1681
# it needs to be added.)
1682
raise AssertionError('Unknown state: %s' % (state,))
1317
1686
class WeaveMerge(PlanWeaveMerge):
1318
1687
"""Weave merge that takes a VersionedFile and two versions as its input."""
1320
def __init__(self, versionedfile, ver_a, ver_b,
1689
def __init__(self, versionedfile, ver_a, ver_b,
1321
1690
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1322
1691
plan = versionedfile.plan_merge(ver_a, ver_b)
1323
1692
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1326
1695
class VirtualVersionedFiles(VersionedFiles):
1327
"""Dummy implementation for VersionedFiles that uses other functions for
1696
"""Dummy implementation for VersionedFiles that uses other functions for
1328
1697
obtaining fulltexts and parent maps.
1330
This is always on the bottom of the stack and uses string keys
1699
This is always on the bottom of the stack and uses string keys
1331
1700
(rather than tuples) internally.
1379
1748
if lines is not None:
1380
1749
if not isinstance(lines, list):
1381
1750
raise AssertionError
1382
yield FulltextContentFactory((k,), None,
1751
yield ChunkedContentFactory((k,), None,
1383
1752
sha1=osutils.sha_strings(lines),
1384
text=''.join(lines))
1386
1755
yield AbsentContentFactory((k,))
1757
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1758
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1759
for i, (key,) in enumerate(keys):
1761
pb.update("Finding changed lines", i, len(keys))
1762
for l in self._get_lines(key):
1766
class NoDupeAddLinesDecorator(object):
1767
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1771
def __init__(self, store):
1774
def add_lines(self, key, parents, lines, parent_texts=None,
1775
left_matching_blocks=None, nostore_sha=None, random_id=False,
1776
check_content=True):
1777
"""See VersionedFiles.add_lines.
1779
This implementation may return None as the third element of the return
1780
value when the original store wouldn't.
1783
raise NotImplementedError(
1784
"NoDupeAddLinesDecorator.add_lines does not implement the "
1785
"nostore_sha behaviour.")
1787
sha1 = osutils.sha_strings(lines)
1788
key = ("sha1:" + sha1,)
1791
if key in self._store.get_parent_map([key]):
1792
# This key has already been inserted, so don't do it again.
1794
sha1 = osutils.sha_strings(lines)
1795
return sha1, sum(map(len, lines)), None
1796
return self._store.add_lines(key, parents, lines,
1797
parent_texts=parent_texts,
1798
left_matching_blocks=left_matching_blocks,
1799
nostore_sha=nostore_sha, random_id=random_id,
1800
check_content=check_content)
1802
def __getattr__(self, name):
1803
return getattr(self._store, name)
1806
def network_bytes_to_kind_and_offset(network_bytes):
1807
"""Strip of a record kind from the front of network_bytes.
1809
:param network_bytes: The bytes of a record.
1810
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1812
line_end = network_bytes.find('\n')
1813
storage_kind = network_bytes[:line_end]
1814
return storage_kind, line_end + 1
1817
class NetworkRecordStream(object):
1818
"""A record_stream which reconstitures a serialised stream."""
1820
def __init__(self, bytes_iterator):
1821
"""Create a NetworkRecordStream.
1823
:param bytes_iterator: An iterator of bytes. Each item in this
1824
iterator should have been obtained from a record_streams'
1825
record.get_bytes_as(record.storage_kind) call.
1827
self._bytes_iterator = bytes_iterator
1828
self._kind_factory = {
1829
'fulltext': fulltext_network_to_record,
1830
'groupcompress-block': groupcompress.network_block_to_records,
1831
'knit-ft-gz': knit.knit_network_to_record,
1832
'knit-delta-gz': knit.knit_network_to_record,
1833
'knit-annotated-ft-gz': knit.knit_network_to_record,
1834
'knit-annotated-delta-gz': knit.knit_network_to_record,
1835
'knit-delta-closure': knit.knit_delta_closure_to_records,
1841
:return: An iterator as per VersionedFiles.get_record_stream().
1843
for bytes in self._bytes_iterator:
1844
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1845
for record in self._kind_factory[storage_kind](
1846
storage_kind, bytes, line_end):
1850
def fulltext_network_to_record(kind, bytes, line_end):
1851
"""Convert a network fulltext record to record."""
1852
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1853
record_meta = bytes[line_end+4:line_end+4+meta_len]
1854
key, parents = bencode.bdecode_as_tuple(record_meta)
1855
if parents == 'nil':
1857
fulltext = bytes[line_end+4+meta_len:]
1858
return [FulltextContentFactory(key, parents, None, fulltext)]
1861
def _length_prefix(bytes):
1862
return struct.pack('!L', len(bytes))
1865
def record_to_fulltext_bytes(record):
1866
if record.parents is None:
1869
parents = record.parents
1870
record_meta = bencode.bencode((record.key, parents))
1871
record_content = record.get_bytes_as('fulltext')
1872
return "fulltext\n%s%s%s" % (
1873
_length_prefix(record_meta), record_meta, record_content)
1876
def sort_groupcompress(parent_map):
1877
"""Sort and group the keys in parent_map into groupcompress order.
1879
groupcompress is defined (currently) as reverse-topological order, grouped
1882
:return: A sorted-list of keys
1884
# gc-optimal ordering is approximately reverse topological,
1885
# properly grouped by file-id.
1887
for item in parent_map.iteritems():
1889
if isinstance(key, str) or len(key) == 1:
1894
per_prefix_map[prefix].append(item)
1896
per_prefix_map[prefix] = [item]
1899
for prefix in sorted(per_prefix_map):
1900
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1904
class _KeyRefs(object):
1906
def __init__(self, track_new_keys=False):
1907
# dict mapping 'key' to 'set of keys referring to that key'
1910
# set remembering all new keys
1911
self.new_keys = set()
1913
self.new_keys = None
1919
self.new_keys.clear()
1921
def add_references(self, key, refs):
1922
# Record the new references
1923
for referenced in refs:
1925
needed_by = self.refs[referenced]
1927
needed_by = self.refs[referenced] = set()
1929
# Discard references satisfied by the new key
1932
def get_new_keys(self):
1933
return self.new_keys
1935
def get_unsatisfied_refs(self):
1936
return self.refs.iterkeys()
1938
def _satisfy_refs_for_key(self, key):
1942
# No keys depended on this key. That's ok.
1945
def add_key(self, key):
1946
# satisfy refs for key, and remember that we've seen this key.
1947
self._satisfy_refs_for_key(key)
1948
if self.new_keys is not None:
1949
self.new_keys.add(key)
1951
def satisfy_refs_for_keys(self, keys):
1953
self._satisfy_refs_for_key(key)
1955
def get_referrers(self):
1957
for referrers in self.refs.itervalues():
1958
result.update(referrers)