208
class _MPDiffGenerator(object):
209
"""Pull out the functionality for generating mp_diffs."""
211
def __init__(self, vf, keys):
213
# This is the order the keys were requested in
214
self.ordered_keys = tuple(keys)
215
# keys + their parents, what we need to compute the diffs
216
self.needed_keys = ()
217
# Map from key: mp_diff
219
# Map from key: parents_needed (may have ghosts)
221
# Parents that aren't present
222
self.ghost_parents = ()
223
# Map from parent_key => number of children for this text
225
# Content chunks that are cached while we still need them
228
def _find_needed_keys(self):
229
"""Find the set of keys we need to request.
231
This includes all the original keys passed in, and the non-ghost
232
parents of those keys.
234
:return: (needed_keys, refcounts)
235
needed_keys is the set of all texts we need to extract
236
refcounts is a dict of {key: num_children} letting us know when we
237
no longer need to cache a given parent text
239
# All the keys and their parents
240
needed_keys = set(self.ordered_keys)
241
parent_map = self.vf.get_parent_map(needed_keys)
242
self.parent_map = parent_map
243
# TODO: Should we be using a different construct here? I think this
244
# uses difference_update internally, and we expect the result to
246
missing_keys = needed_keys.difference(parent_map)
248
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
249
# Parents that might be missing. They are allowed to be ghosts, but we
250
# should check for them
252
setdefault = refcounts.setdefault
254
for child_key, parent_keys in parent_map.iteritems():
256
# parent_keys may be None if a given VersionedFile claims to
257
# not support graph operations.
259
just_parents.update(parent_keys)
260
needed_keys.update(parent_keys)
261
for p in parent_keys:
262
refcounts[p] = setdefault(p, 0) + 1
263
just_parents.difference_update(parent_map)
264
# Remove any parents that are actually ghosts from the needed set
265
self.present_parents = set(self.vf.get_parent_map(just_parents))
266
self.ghost_parents = just_parents.difference(self.present_parents)
267
needed_keys.difference_update(self.ghost_parents)
268
self.needed_keys = needed_keys
269
self.refcounts = refcounts
270
return needed_keys, refcounts
272
def _compute_diff(self, key, parent_lines, lines):
273
"""Compute a single mp_diff, and store it in self._diffs"""
274
if len(parent_lines) > 0:
275
# XXX: _extract_blocks is not usefully defined anywhere...
276
# It was meant to extract the left-parent diff without
277
# having to recompute it for Knit content (pack-0.92,
278
# etc). That seems to have regressed somewhere
279
left_parent_blocks = self.vf._extract_blocks(key,
280
parent_lines[0], lines)
282
left_parent_blocks = None
283
diff = multiparent.MultiParent.from_lines(lines,
284
parent_lines, left_parent_blocks)
285
self.diffs[key] = diff
287
def _process_one_record(self, key, this_chunks):
289
if key in self.parent_map:
290
# This record should be ready to diff, since we requested
291
# content in 'topological' order
292
parent_keys = self.parent_map.pop(key)
293
# If a VersionedFile claims 'no-graph' support, then it may return
294
# None for any parent request, so we replace it with an empty tuple
295
if parent_keys is None:
298
for p in parent_keys:
299
# Alternatively we could check p not in self.needed_keys, but
300
# ghost_parents should be tiny versus huge
301
if p in self.ghost_parents:
303
refcount = self.refcounts[p]
304
if refcount == 1: # Last child reference
305
self.refcounts.pop(p)
306
parent_chunks = self.chunks.pop(p)
308
self.refcounts[p] = refcount - 1
309
parent_chunks = self.chunks[p]
310
p_lines = osutils.chunks_to_lines(parent_chunks)
311
# TODO: Should we cache the line form? We did the
312
# computation to get it, but storing it this way will
313
# be less memory efficient...
314
parent_lines.append(p_lines)
316
lines = osutils.chunks_to_lines(this_chunks)
317
# Since we needed the lines, we'll go ahead and cache them this way
319
self._compute_diff(key, parent_lines, lines)
321
# Is this content required for any more children?
322
if key in self.refcounts:
323
self.chunks[key] = this_chunks
325
def _extract_diffs(self):
326
needed_keys, refcounts = self._find_needed_keys()
327
for record in self.vf.get_record_stream(needed_keys,
328
'topological', True):
329
if record.storage_kind == 'absent':
330
raise errors.RevisionNotPresent(record.key, self.vf)
331
self._process_one_record(record.key,
332
record.get_bytes_as('chunked'))
334
def compute_diffs(self):
335
self._extract_diffs()
336
dpop = self.diffs.pop
337
return [dpop(k) for k in self.ordered_keys]
340
198
class VersionedFile(object):
341
199
"""Versioned text file storage.
343
201
A versioned file manages versions of line-based text files,
344
202
keeping track of the originating version for each line.
972
825
raise NotImplementedError(self.add_lines)
974
def _add_text(self, key, parents, text, nostore_sha=None, random_id=False):
975
"""Add a text to the store.
977
This is a private function for use by CommitBuilder.
979
:param key: The key tuple of the text to add. If the last element is
980
None, a CHK string will be generated during the addition.
981
:param parents: The parents key tuples of the text to add.
982
:param text: A string containing the text to be committed.
983
:param nostore_sha: Raise ExistingContent and do not add the lines to
984
the versioned file if the digest of the lines matches this.
985
:param random_id: If True a random id has been selected rather than
986
an id determined by some deterministic process such as a converter
987
from a foreign VCS. When True the backend may choose not to check
988
for uniqueness of the resulting key within the versioned file, so
989
this should only be done when the result is expected to be unique
991
:param check_content: If True, the lines supplied are verified to be
992
bytestrings that are correctly formed lines.
993
:return: The text sha1, the number of bytes in the text, and an opaque
994
representation of the inserted version which can be provided
995
back to future _add_text calls in the parent_texts dictionary.
997
# The default implementation just thunks over to .add_lines(),
998
# inefficient, but it works.
999
return self.add_lines(key, parents, osutils.split_lines(text),
1000
nostore_sha=nostore_sha,
1001
random_id=random_id,
1004
827
def add_mpdiffs(self, records):
1005
828
"""Add mpdiffs to this VersionedFile.
1048
871
raise NotImplementedError(self.annotate)
1050
873
def check(self, progress_bar=None):
1051
"""Check this object for integrity.
1053
:param progress_bar: A progress bar to output as the check progresses.
1054
:param keys: Specific keys within the VersionedFiles to check. When
1055
this parameter is not None, check() becomes a generator as per
1056
get_record_stream. The difference to get_record_stream is that
1057
more or deeper checks will be performed.
1058
:return: None, or if keys was supplied a generator as per
874
"""Check this object for integrity."""
1061
875
raise NotImplementedError(self.check)
1064
878
def check_not_reserved_id(version_id):
1065
879
revision.check_not_reserved_id(version_id)
1067
def clear_cache(self):
1068
"""Clear whatever caches this VersionedFile holds.
1070
This is generally called after an operation has been performed, when we
1071
don't expect to be using this versioned file again soon.
1074
881
def _check_lines_not_unicode(self, lines):
1075
882
"""Check that lines being added to a versioned file are not unicode."""
1076
883
for line in lines:
1183
964
def make_mpdiffs(self, keys):
1184
965
"""Create multiparent diffs for specified keys."""
1185
generator = _MPDiffGenerator(self, keys)
1186
return generator.compute_diffs()
1188
def get_annotator(self):
1189
return annotate.Annotator(self)
966
keys_order = tuple(keys)
967
keys = frozenset(keys)
968
knit_keys = set(keys)
969
parent_map = self.get_parent_map(keys)
970
for parent_keys in parent_map.itervalues():
972
knit_keys.update(parent_keys)
973
missing_keys = keys - set(parent_map)
975
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
976
# We need to filter out ghosts, because we can't diff against them.
977
maybe_ghosts = knit_keys - keys
978
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
979
knit_keys.difference_update(ghosts)
981
chunks_to_lines = osutils.chunks_to_lines
982
for record in self.get_record_stream(knit_keys, 'topological', True):
983
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
984
# line_block_dict = {}
985
# for parent, blocks in record.extract_line_blocks():
986
# line_blocks[parent] = blocks
987
# line_blocks[record.key] = line_block_dict
989
for key in keys_order:
991
parents = parent_map[key] or []
992
# Note that filtering knit_keys can lead to a parent difference
993
# between the creation and the application of the mpdiff.
994
parent_lines = [lines[p] for p in parents if p in knit_keys]
995
if len(parent_lines) > 0:
996
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
999
left_parent_blocks = None
1000
diffs.append(multiparent.MultiParent.from_lines(target,
1001
parent_lines, left_parent_blocks))
1191
1004
missing_keys = index._missing_keys_from_parent_map
1578
1386
elif state == 'conflicted-b':
1579
1387
ch_b = ch_a = True
1580
1388
lines_b.append(line)
1581
elif state == 'killed-both':
1582
# This counts as a change, even though there is no associated
1586
1390
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1391
'killed-base', 'killed-both'):
1588
1392
raise AssertionError(state)
1589
1393
for struct in outstanding_struct():
1592
def base_from_plan(self):
1593
"""Construct a BASE file from the plan text."""
1595
for state, line in self.plan:
1596
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1597
# If unchanged, then this line is straight from base. If a or b
1598
# or both killed the line, then it *used* to be in base.
1599
base_lines.append(line)
1601
if state not in ('killed-base', 'irrelevant',
1602
'ghost-a', 'ghost-b',
1604
'conflicted-a', 'conflicted-b'):
1605
# killed-base, irrelevant means it doesn't apply
1606
# ghost-a/ghost-b are harder to say for sure, but they
1607
# aren't in the 'inc_c' which means they aren't in the
1608
# shared base of a & b. So we don't include them. And
1609
# obviously if the line is newly inserted, it isn't in base
1611
# If 'conflicted-a' or b, then it is new vs one base, but
1612
# old versus another base. However, if we make it present
1613
# in the base, it will be deleted from the target, and it
1614
# seems better to get a line doubled in the merge result,
1615
# rather than have it deleted entirely.
1616
# Example, each node is the 'text' at that point:
1624
# There was a criss-cross conflict merge. Both sides
1625
# include the other, but put themselves first.
1626
# Weave marks this as a 'clean' merge, picking OTHER over
1627
# THIS. (Though the details depend on order inserted into
1629
# LCA generates a plan:
1630
# [('unchanged', M),
1631
# ('conflicted-b', b),
1633
# ('conflicted-a', b),
1635
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1636
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1637
# removes one 'b', and BASE vs OTHER removes the other)
1638
# If you include neither, 3-way creates a clean "MbabN" as
1639
# THIS adds one 'b', and OTHER does too.
1640
# It seems that having the line 2 times is better than
1641
# having it omitted. (Easier to manually delete than notice
1642
# it needs to be added.)
1643
raise AssertionError('Unknown state: %s' % (state,))
1647
1397
class WeaveMerge(PlanWeaveMerge):
1648
1398
"""Weave merge that takes a VersionedFile and two versions as its input."""
1650
def __init__(self, versionedfile, ver_a, ver_b,
1400
def __init__(self, versionedfile, ver_a, ver_b,
1651
1401
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1652
1402
plan = versionedfile.plan_merge(ver_a, ver_b)
1653
1403
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1656
1406
class VirtualVersionedFiles(VersionedFiles):
1657
"""Dummy implementation for VersionedFiles that uses other functions for
1407
"""Dummy implementation for VersionedFiles that uses other functions for
1658
1408
obtaining fulltexts and parent maps.
1660
This is always on the bottom of the stack and uses string keys
1410
This is always on the bottom of the stack and uses string keys
1661
1411
(rather than tuples) internally.
1719
1469
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1720
1470
for i, (key,) in enumerate(keys):
1721
1471
if pb is not None:
1722
pb.update("Finding changed lines", i, len(keys))
1472
pb.update("iterating texts", i, len(keys))
1723
1473
for l in self._get_lines(key):
1727
class NoDupeAddLinesDecorator(object):
1728
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1732
def __init__(self, store):
1735
def add_lines(self, key, parents, lines, parent_texts=None,
1736
left_matching_blocks=None, nostore_sha=None, random_id=False,
1737
check_content=True):
1738
"""See VersionedFiles.add_lines.
1740
This implementation may return None as the third element of the return
1741
value when the original store wouldn't.
1744
raise NotImplementedError(
1745
"NoDupeAddLinesDecorator.add_lines does not implement the "
1746
"nostore_sha behaviour.")
1748
sha1 = osutils.sha_strings(lines)
1749
key = ("sha1:" + sha1,)
1752
if key in self._store.get_parent_map([key]):
1753
# This key has already been inserted, so don't do it again.
1755
sha1 = osutils.sha_strings(lines)
1756
return sha1, sum(map(len, lines)), None
1757
return self._store.add_lines(key, parents, lines,
1758
parent_texts=parent_texts,
1759
left_matching_blocks=left_matching_blocks,
1760
nostore_sha=nostore_sha, random_id=random_id,
1761
check_content=check_content)
1763
def __getattr__(self, name):
1764
return getattr(self._store, name)
1767
def network_bytes_to_kind_and_offset(network_bytes):
1768
"""Strip of a record kind from the front of network_bytes.
1770
:param network_bytes: The bytes of a record.
1771
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1773
line_end = network_bytes.find('\n')
1774
storage_kind = network_bytes[:line_end]
1775
return storage_kind, line_end + 1
1778
class NetworkRecordStream(object):
1779
"""A record_stream which reconstitures a serialised stream."""
1781
def __init__(self, bytes_iterator):
1782
"""Create a NetworkRecordStream.
1784
:param bytes_iterator: An iterator of bytes. Each item in this
1785
iterator should have been obtained from a record_streams'
1786
record.get_bytes_as(record.storage_kind) call.
1788
self._bytes_iterator = bytes_iterator
1789
self._kind_factory = {
1790
'fulltext': fulltext_network_to_record,
1791
'groupcompress-block': groupcompress.network_block_to_records,
1792
'knit-ft-gz': knit.knit_network_to_record,
1793
'knit-delta-gz': knit.knit_network_to_record,
1794
'knit-annotated-ft-gz': knit.knit_network_to_record,
1795
'knit-annotated-delta-gz': knit.knit_network_to_record,
1796
'knit-delta-closure': knit.knit_delta_closure_to_records,
1802
:return: An iterator as per VersionedFiles.get_record_stream().
1804
for bytes in self._bytes_iterator:
1805
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1806
for record in self._kind_factory[storage_kind](
1807
storage_kind, bytes, line_end):
1811
def fulltext_network_to_record(kind, bytes, line_end):
1812
"""Convert a network fulltext record to record."""
1813
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1814
record_meta = bytes[line_end+4:line_end+4+meta_len]
1815
key, parents = bencode.bdecode_as_tuple(record_meta)
1816
if parents == 'nil':
1818
fulltext = bytes[line_end+4+meta_len:]
1819
return [FulltextContentFactory(key, parents, None, fulltext)]
1822
def _length_prefix(bytes):
1823
return struct.pack('!L', len(bytes))
1826
def record_to_fulltext_bytes(record):
1827
if record.parents is None:
1830
parents = record.parents
1831
record_meta = bencode.bencode((record.key, parents))
1832
record_content = record.get_bytes_as('fulltext')
1833
return "fulltext\n%s%s%s" % (
1834
_length_prefix(record_meta), record_meta, record_content)
1837
def sort_groupcompress(parent_map):
1838
"""Sort and group the keys in parent_map into groupcompress order.
1840
groupcompress is defined (currently) as reverse-topological order, grouped
1843
:return: A sorted-list of keys
1845
# gc-optimal ordering is approximately reverse topological,
1846
# properly grouped by file-id.
1848
for item in parent_map.iteritems():
1850
if isinstance(key, str) or len(key) == 1:
1855
per_prefix_map[prefix].append(item)
1857
per_prefix_map[prefix] = [item]
1860
for prefix in sorted(per_prefix_map):
1861
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))