83
85
self.parents = None
88
class ChunkedContentFactory(ContentFactory):
89
"""Static data content factory.
91
This takes a 'chunked' list of strings. The only requirement on 'chunked' is
92
that ''.join(lines) becomes a valid fulltext. A tuple of a single string
93
satisfies this, as does a list of lines.
95
:ivar sha1: None, or the sha1 of the content fulltext.
96
:ivar storage_kind: The native storage kind of this factory. Always
98
:ivar key: The key of this content. Each key is a tuple with a single
100
:ivar parents: A tuple of parent keys for self.key. If the object has
101
no parent information, None (as opposed to () for an empty list of
105
def __init__(self, key, parents, sha1, chunks):
106
"""Create a ContentFactory."""
108
self.storage_kind = 'chunked'
110
self.parents = parents
111
self._chunks = chunks
113
def get_bytes_as(self, storage_kind):
114
if storage_kind == 'chunked':
116
elif storage_kind == 'fulltext':
117
return ''.join(self._chunks)
118
raise errors.UnavailableRepresentation(self.key, storage_kind,
86
122
class FulltextContentFactory(ContentFactory):
87
123
"""Static data content factory.
89
125
This takes a fulltext when created and just returns that during
90
126
get_bytes_as('fulltext').
92
128
:ivar sha1: None, or the sha1 of the content fulltext.
93
129
:ivar storage_kind: The native storage kind of this factory. Always
203
class _MPDiffGenerator(object):
204
"""Pull out the functionality for generating mp_diffs."""
206
def __init__(self, vf, keys):
208
# This is the order the keys were requested in
209
self.ordered_keys = tuple(keys)
210
# keys + their parents, what we need to compute the diffs
211
self.needed_keys = ()
212
# Map from key: mp_diff
214
# Map from key: parents_needed (may have ghosts)
216
# Parents that aren't present
217
self.ghost_parents = ()
218
# Map from parent_key => number of children for this text
220
# Content chunks that are cached while we still need them
223
def _find_needed_keys(self):
224
"""Find the set of keys we need to request.
226
This includes all the original keys passed in, and the non-ghost
227
parents of those keys.
229
:return: (needed_keys, refcounts)
230
needed_keys is the set of all texts we need to extract
231
refcounts is a dict of {key: num_children} letting us know when we
232
no longer need to cache a given parent text
234
# All the keys and their parents
235
needed_keys = set(self.ordered_keys)
236
parent_map = self.vf.get_parent_map(needed_keys)
237
self.parent_map = parent_map
238
# TODO: Should we be using a different construct here? I think this
239
# uses difference_update internally, and we expect the result to
241
missing_keys = needed_keys.difference(parent_map)
243
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
244
# Parents that might be missing. They are allowed to be ghosts, but we
245
# should check for them
247
setdefault = refcounts.setdefault
249
for child_key, parent_keys in parent_map.iteritems():
251
# parent_keys may be None if a given VersionedFile claims to
252
# not support graph operations.
254
just_parents.update(parent_keys)
255
needed_keys.update(parent_keys)
256
for p in parent_keys:
257
refcounts[p] = setdefault(p, 0) + 1
258
just_parents.difference_update(parent_map)
259
# Remove any parents that are actually ghosts from the needed set
260
self.present_parents = set(self.vf.get_parent_map(just_parents))
261
self.ghost_parents = just_parents.difference(self.present_parents)
262
needed_keys.difference_update(self.ghost_parents)
263
self.needed_keys = needed_keys
264
self.refcounts = refcounts
265
return needed_keys, refcounts
267
def _compute_diff(self, key, parent_lines, lines):
268
"""Compute a single mp_diff, and store it in self._diffs"""
269
if len(parent_lines) > 0:
270
# XXX: _extract_blocks is not usefully defined anywhere...
271
# It was meant to extract the left-parent diff without
272
# having to recompute it for Knit content (pack-0.92,
273
# etc). That seems to have regressed somewhere
274
left_parent_blocks = self.vf._extract_blocks(key,
275
parent_lines[0], lines)
277
left_parent_blocks = None
278
diff = multiparent.MultiParent.from_lines(lines,
279
parent_lines, left_parent_blocks)
280
self.diffs[key] = diff
282
def _process_one_record(self, key, this_chunks):
284
if key in self.parent_map:
285
# This record should be ready to diff, since we requested
286
# content in 'topological' order
287
parent_keys = self.parent_map.pop(key)
288
# If a VersionedFile claims 'no-graph' support, then it may return
289
# None for any parent request, so we replace it with an empty tuple
290
if parent_keys is None:
293
for p in parent_keys:
294
# Alternatively we could check p not in self.needed_keys, but
295
# ghost_parents should be tiny versus huge
296
if p in self.ghost_parents:
298
refcount = self.refcounts[p]
299
if refcount == 1: # Last child reference
300
self.refcounts.pop(p)
301
parent_chunks = self.chunks.pop(p)
303
self.refcounts[p] = refcount - 1
304
parent_chunks = self.chunks[p]
305
p_lines = osutils.chunks_to_lines(parent_chunks)
306
# TODO: Should we cache the line form? We did the
307
# computation to get it, but storing it this way will
308
# be less memory efficient...
309
parent_lines.append(p_lines)
311
lines = osutils.chunks_to_lines(this_chunks)
312
# Since we needed the lines, we'll go ahead and cache them this way
314
self._compute_diff(key, parent_lines, lines)
316
# Is this content required for any more children?
317
if key in self.refcounts:
318
self.chunks[key] = this_chunks
320
def _extract_diffs(self):
321
needed_keys, refcounts = self._find_needed_keys()
322
for record in self.vf.get_record_stream(needed_keys,
323
'topological', True):
324
if record.storage_kind == 'absent':
325
raise errors.RevisionNotPresent(record.key, self.vf)
326
self._process_one_record(record.key,
327
record.get_bytes_as('chunked'))
329
def compute_diffs(self):
330
self._extract_diffs()
331
dpop = self.diffs.pop
332
return [dpop(k) for k in self.ordered_keys]
159
335
class VersionedFile(object):
160
336
"""Versioned text file storage.
162
338
A versioned file manages versions of line-based text files,
163
339
keeping track of the originating version for each line.
561
741
return self._backing_vf.keys()
744
class OrderingVersionedFilesDecorator(RecordingVersionedFilesDecorator):
745
"""A VF that records calls, and returns keys in specific order.
747
:ivar calls: A list of the calls made; can be reset at any time by
751
def __init__(self, backing_vf, key_priority):
752
"""Create a RecordingVersionedFilesDecorator decorating backing_vf.
754
:param backing_vf: The versioned file to answer all methods.
755
:param key_priority: A dictionary defining what order keys should be
756
returned from an 'unordered' get_record_stream request.
757
Keys with lower priority are returned first, keys not present in
758
the map get an implicit priority of 0, and are returned in
759
lexicographical order.
761
RecordingVersionedFilesDecorator.__init__(self, backing_vf)
762
self._key_priority = key_priority
764
def get_record_stream(self, keys, sort_order, include_delta_closure):
765
self.calls.append(("get_record_stream", list(keys), sort_order,
766
include_delta_closure))
767
if sort_order == 'unordered':
769
return (self._key_priority.get(key, 0), key)
770
# Use a defined order by asking for the keys one-by-one from the
772
for key in sorted(keys, key=sort_key):
773
for record in self._backing_vf.get_record_stream([key],
774
'unordered', include_delta_closure):
777
for record in self._backing_vf.get_record_stream(keys, sort_order,
778
include_delta_closure):
564
782
class KeyMapper(object):
565
783
"""KeyMappers map between keys and underlying partitioned storage."""
748
971
raise NotImplementedError(self.add_lines)
973
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
974
"""Add a text to the store.
976
This is a private function for use by VersionedFileCommitBuilder.
978
:param key: The key tuple of the text to add. If the last element is
979
None, a CHK string will be generated during the addition.
980
:param parents: The parents key tuples of the text to add.
981
:param text: A string containing the text to be committed.
982
:param nostore_sha: Raise ExistingContent and do not add the lines to
983
the versioned file if the digest of the lines matches this.
984
:param random_id: If True a random id has been selected rather than
985
an id determined by some deterministic process such as a converter
986
from a foreign VCS. When True the backend may choose not to check
987
for uniqueness of the resulting key within the versioned file, so
988
this should only be done when the result is expected to be unique
990
:param check_content: If True, the lines supplied are verified to be
991
bytestrings that are correctly formed lines.
992
:return: The text sha1, the number of bytes in the text, and an opaque
993
representation of the inserted version which can be provided
994
back to future _add_text calls in the parent_texts dictionary.
996
# The default implementation just thunks over to .add_lines(),
997
# inefficient, but it works.
998
return self.add_lines(key, parents, osutils.split_lines(text),
999
nostore_sha=nostore_sha,
1000
random_id=random_id,
750
1003
def add_mpdiffs(self, records):
751
1004
"""Add mpdiffs to this VersionedFile.
885
1182
def make_mpdiffs(self, keys):
886
1183
"""Create multiparent diffs for specified keys."""
887
keys_order = tuple(keys)
888
keys = frozenset(keys)
889
knit_keys = set(keys)
890
parent_map = self.get_parent_map(keys)
891
for parent_keys in parent_map.itervalues():
893
knit_keys.update(parent_keys)
894
missing_keys = keys - set(parent_map)
896
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
897
# We need to filter out ghosts, because we can't diff against them.
898
maybe_ghosts = knit_keys - keys
899
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
900
knit_keys.difference_update(ghosts)
902
split_lines = osutils.split_lines
903
for record in self.get_record_stream(knit_keys, 'topological', True):
904
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
905
# line_block_dict = {}
906
# for parent, blocks in record.extract_line_blocks():
907
# line_blocks[parent] = blocks
908
# line_blocks[record.key] = line_block_dict
910
for key in keys_order:
912
parents = parent_map[key] or []
913
# Note that filtering knit_keys can lead to a parent difference
914
# between the creation and the application of the mpdiff.
915
parent_lines = [lines[p] for p in parents if p in knit_keys]
916
if len(parent_lines) > 0:
917
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
920
left_parent_blocks = None
921
diffs.append(multiparent.MultiParent.from_lines(target,
922
parent_lines, left_parent_blocks))
1184
generator = _MPDiffGenerator(self, keys)
1185
return generator.compute_diffs()
1187
def get_annotator(self):
1188
return annotate.Annotator(self)
1190
missing_keys = index._missing_keys_from_parent_map
925
1192
def _extract_blocks(self, version_id, source, target):
1195
def _transitive_fallbacks(self):
1196
"""Return the whole stack of fallback versionedfiles.
1198
This VersionedFiles may have a list of fallbacks, but it doesn't
1199
necessarily know about the whole stack going down, and it can't know
1200
at open time because they may change after the objects are opened.
1203
for a_vfs in self._immediate_fallback_vfs:
1204
all_fallbacks.append(a_vfs)
1205
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1206
return all_fallbacks
929
1209
class ThunkedVersionedFiles(VersionedFiles):
930
1210
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1306
1618
elif state == 'conflicted-b':
1307
1619
ch_b = ch_a = True
1308
1620
lines_b.append(line)
1621
elif state == 'killed-both':
1622
# This counts as a change, even though there is no associated
1310
1626
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1311
'killed-base', 'killed-both'):
1312
1628
raise AssertionError(state)
1313
1629
for struct in outstanding_struct():
1632
def base_from_plan(self):
1633
"""Construct a BASE file from the plan text."""
1635
for state, line in self.plan:
1636
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1637
# If unchanged, then this line is straight from base. If a or b
1638
# or both killed the line, then it *used* to be in base.
1639
base_lines.append(line)
1641
if state not in ('killed-base', 'irrelevant',
1642
'ghost-a', 'ghost-b',
1644
'conflicted-a', 'conflicted-b'):
1645
# killed-base, irrelevant means it doesn't apply
1646
# ghost-a/ghost-b are harder to say for sure, but they
1647
# aren't in the 'inc_c' which means they aren't in the
1648
# shared base of a & b. So we don't include them. And
1649
# obviously if the line is newly inserted, it isn't in base
1651
# If 'conflicted-a' or b, then it is new vs one base, but
1652
# old versus another base. However, if we make it present
1653
# in the base, it will be deleted from the target, and it
1654
# seems better to get a line doubled in the merge result,
1655
# rather than have it deleted entirely.
1656
# Example, each node is the 'text' at that point:
1664
# There was a criss-cross conflict merge. Both sides
1665
# include the other, but put themselves first.
1666
# Weave marks this as a 'clean' merge, picking OTHER over
1667
# THIS. (Though the details depend on order inserted into
1669
# LCA generates a plan:
1670
# [('unchanged', M),
1671
# ('conflicted-b', b),
1673
# ('conflicted-a', b),
1675
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1676
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1677
# removes one 'b', and BASE vs OTHER removes the other)
1678
# If you include neither, 3-way creates a clean "MbabN" as
1679
# THIS adds one 'b', and OTHER does too.
1680
# It seems that having the line 2 times is better than
1681
# having it omitted. (Easier to manually delete than notice
1682
# it needs to be added.)
1683
raise AssertionError('Unknown state: %s' % (state,))
1317
1687
class WeaveMerge(PlanWeaveMerge):
1318
1688
"""Weave merge that takes a VersionedFile and two versions as its input."""
1320
def __init__(self, versionedfile, ver_a, ver_b,
1690
def __init__(self, versionedfile, ver_a, ver_b,
1321
1691
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1322
1692
plan = versionedfile.plan_merge(ver_a, ver_b)
1323
1693
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1326
1696
class VirtualVersionedFiles(VersionedFiles):
1327
"""Dummy implementation for VersionedFiles that uses other functions for
1697
"""Dummy implementation for VersionedFiles that uses other functions for
1328
1698
obtaining fulltexts and parent maps.
1330
This is always on the bottom of the stack and uses string keys
1700
This is always on the bottom of the stack and uses string keys
1331
1701
(rather than tuples) internally.
1379
1749
if lines is not None:
1380
1750
if not isinstance(lines, list):
1381
1751
raise AssertionError
1382
yield FulltextContentFactory((k,), None,
1752
yield ChunkedContentFactory((k,), None,
1383
1753
sha1=osutils.sha_strings(lines),
1384
text=''.join(lines))
1386
1756
yield AbsentContentFactory((k,))
1758
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1759
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1760
for i, (key,) in enumerate(keys):
1762
pb.update("Finding changed lines", i, len(keys))
1763
for l in self._get_lines(key):
1767
class NoDupeAddLinesDecorator(object):
1768
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1772
def __init__(self, store):
1775
def add_lines(self, key, parents, lines, parent_texts=None,
1776
left_matching_blocks=None, nostore_sha=None, random_id=False,
1777
check_content=True):
1778
"""See VersionedFiles.add_lines.
1780
This implementation may return None as the third element of the return
1781
value when the original store wouldn't.
1784
raise NotImplementedError(
1785
"NoDupeAddLinesDecorator.add_lines does not implement the "
1786
"nostore_sha behaviour.")
1788
sha1 = osutils.sha_strings(lines)
1789
key = ("sha1:" + sha1,)
1792
if key in self._store.get_parent_map([key]):
1793
# This key has already been inserted, so don't do it again.
1795
sha1 = osutils.sha_strings(lines)
1796
return sha1, sum(map(len, lines)), None
1797
return self._store.add_lines(key, parents, lines,
1798
parent_texts=parent_texts,
1799
left_matching_blocks=left_matching_blocks,
1800
nostore_sha=nostore_sha, random_id=random_id,
1801
check_content=check_content)
1803
def __getattr__(self, name):
1804
return getattr(self._store, name)
1807
def network_bytes_to_kind_and_offset(network_bytes):
1808
"""Strip of a record kind from the front of network_bytes.
1810
:param network_bytes: The bytes of a record.
1811
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1813
line_end = network_bytes.find('\n')
1814
storage_kind = network_bytes[:line_end]
1815
return storage_kind, line_end + 1
1818
class NetworkRecordStream(object):
1819
"""A record_stream which reconstitures a serialised stream."""
1821
def __init__(self, bytes_iterator):
1822
"""Create a NetworkRecordStream.
1824
:param bytes_iterator: An iterator of bytes. Each item in this
1825
iterator should have been obtained from a record_streams'
1826
record.get_bytes_as(record.storage_kind) call.
1828
self._bytes_iterator = bytes_iterator
1829
self._kind_factory = {
1830
'fulltext': fulltext_network_to_record,
1831
'groupcompress-block': groupcompress.network_block_to_records,
1832
'knit-ft-gz': knit.knit_network_to_record,
1833
'knit-delta-gz': knit.knit_network_to_record,
1834
'knit-annotated-ft-gz': knit.knit_network_to_record,
1835
'knit-annotated-delta-gz': knit.knit_network_to_record,
1836
'knit-delta-closure': knit.knit_delta_closure_to_records,
1842
:return: An iterator as per VersionedFiles.get_record_stream().
1844
for bytes in self._bytes_iterator:
1845
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1846
for record in self._kind_factory[storage_kind](
1847
storage_kind, bytes, line_end):
1851
def fulltext_network_to_record(kind, bytes, line_end):
1852
"""Convert a network fulltext record to record."""
1853
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1854
record_meta = bytes[line_end+4:line_end+4+meta_len]
1855
key, parents = bencode.bdecode_as_tuple(record_meta)
1856
if parents == 'nil':
1858
fulltext = bytes[line_end+4+meta_len:]
1859
return [FulltextContentFactory(key, parents, None, fulltext)]
1862
def _length_prefix(bytes):
1863
return struct.pack('!L', len(bytes))
1866
def record_to_fulltext_bytes(record):
1867
if record.parents is None:
1870
parents = record.parents
1871
record_meta = bencode.bencode((record.key, parents))
1872
record_content = record.get_bytes_as('fulltext')
1873
return "fulltext\n%s%s%s" % (
1874
_length_prefix(record_meta), record_meta, record_content)
1877
def sort_groupcompress(parent_map):
1878
"""Sort and group the keys in parent_map into groupcompress order.
1880
groupcompress is defined (currently) as reverse-topological order, grouped
1883
:return: A sorted-list of keys
1885
# gc-optimal ordering is approximately reverse topological,
1886
# properly grouped by file-id.
1888
for item in parent_map.iteritems():
1890
if isinstance(key, str) or len(key) == 1:
1895
per_prefix_map[prefix].append(item)
1897
per_prefix_map[prefix] = [item]
1900
for prefix in sorted(per_prefix_map):
1901
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1905
class _KeyRefs(object):
1907
def __init__(self, track_new_keys=False):
1908
# dict mapping 'key' to 'set of keys referring to that key'
1911
# set remembering all new keys
1912
self.new_keys = set()
1914
self.new_keys = None
1920
self.new_keys.clear()
1922
def add_references(self, key, refs):
1923
# Record the new references
1924
for referenced in refs:
1926
needed_by = self.refs[referenced]
1928
needed_by = self.refs[referenced] = set()
1930
# Discard references satisfied by the new key
1933
def get_new_keys(self):
1934
return self.new_keys
1936
def get_unsatisfied_refs(self):
1937
return self.refs.iterkeys()
1939
def _satisfy_refs_for_key(self, key):
1943
# No keys depended on this key. That's ok.
1946
def add_key(self, key):
1947
# satisfy refs for key, and remember that we've seen this key.
1948
self._satisfy_refs_for_key(key)
1949
if self.new_keys is not None:
1950
self.new_keys.add(key)
1952
def satisfy_refs_for_keys(self, keys):
1954
self._satisfy_refs_for_key(key)
1956
def get_referrers(self):
1958
for referrers in self.refs.itervalues():
1959
result.update(referrers)