203
class _MPDiffGenerator(object):
204
"""Pull out the functionality for generating mp_diffs."""
206
def __init__(self, vf, keys):
208
# This is the order the keys were requested in
209
self.ordered_keys = tuple(keys)
210
# keys + their parents, what we need to compute the diffs
211
self.needed_keys = ()
212
# Map from key: mp_diff
214
# Map from key: parents_needed (may have ghosts)
216
# Parents that aren't present
217
self.ghost_parents = ()
218
# Map from parent_key => number of children for this text
220
# Content chunks that are cached while we still need them
223
def _find_needed_keys(self):
224
"""Find the set of keys we need to request.
226
This includes all the original keys passed in, and the non-ghost
227
parents of those keys.
229
:return: (needed_keys, refcounts)
230
needed_keys is the set of all texts we need to extract
231
refcounts is a dict of {key: num_children} letting us know when we
232
no longer need to cache a given parent text
234
# All the keys and their parents
235
needed_keys = set(self.ordered_keys)
236
parent_map = self.vf.get_parent_map(needed_keys)
237
self.parent_map = parent_map
238
# TODO: Should we be using a different construct here? I think this
239
# uses difference_update internally, and we expect the result to
241
missing_keys = needed_keys.difference(parent_map)
243
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
244
# Parents that might be missing. They are allowed to be ghosts, but we
245
# should check for them
247
setdefault = refcounts.setdefault
249
for child_key, parent_keys in parent_map.iteritems():
251
# parent_keys may be None if a given VersionedFile claims to
252
# not support graph operations.
254
just_parents.update(parent_keys)
255
needed_keys.update(parent_keys)
256
for p in parent_keys:
257
refcounts[p] = setdefault(p, 0) + 1
258
just_parents.difference_update(parent_map)
259
# Remove any parents that are actually ghosts from the needed set
260
self.present_parents = set(self.vf.get_parent_map(just_parents))
261
self.ghost_parents = just_parents.difference(self.present_parents)
262
needed_keys.difference_update(self.ghost_parents)
263
self.needed_keys = needed_keys
264
self.refcounts = refcounts
265
return needed_keys, refcounts
267
def _compute_diff(self, key, parent_lines, lines):
268
"""Compute a single mp_diff, and store it in self._diffs"""
269
if len(parent_lines) > 0:
270
# XXX: _extract_blocks is not usefully defined anywhere...
271
# It was meant to extract the left-parent diff without
272
# having to recompute it for Knit content (pack-0.92,
273
# etc). That seems to have regressed somewhere
274
left_parent_blocks = self.vf._extract_blocks(key,
275
parent_lines[0], lines)
277
left_parent_blocks = None
278
diff = multiparent.MultiParent.from_lines(lines,
279
parent_lines, left_parent_blocks)
280
self.diffs[key] = diff
282
def _process_one_record(self, key, this_chunks):
284
if key in self.parent_map:
285
# This record should be ready to diff, since we requested
286
# content in 'topological' order
287
parent_keys = self.parent_map.pop(key)
288
# If a VersionedFile claims 'no-graph' support, then it may return
289
# None for any parent request, so we replace it with an empty tuple
290
if parent_keys is None:
293
for p in parent_keys:
294
# Alternatively we could check p not in self.needed_keys, but
295
# ghost_parents should be tiny versus huge
296
if p in self.ghost_parents:
298
refcount = self.refcounts[p]
299
if refcount == 1: # Last child reference
300
self.refcounts.pop(p)
301
parent_chunks = self.chunks.pop(p)
303
self.refcounts[p] = refcount - 1
304
parent_chunks = self.chunks[p]
305
p_lines = osutils.chunks_to_lines(parent_chunks)
306
# TODO: Should we cache the line form? We did the
307
# computation to get it, but storing it this way will
308
# be less memory efficient...
309
parent_lines.append(p_lines)
311
lines = osutils.chunks_to_lines(this_chunks)
312
# Since we needed the lines, we'll go ahead and cache them this way
314
self._compute_diff(key, parent_lines, lines)
316
# Is this content required for any more children?
317
if key in self.refcounts:
318
self.chunks[key] = this_chunks
320
def _extract_diffs(self):
321
needed_keys, refcounts = self._find_needed_keys()
322
for record in self.vf.get_record_stream(needed_keys,
323
'topological', True):
324
if record.storage_kind == 'absent':
325
raise errors.RevisionNotPresent(record.key, self.vf)
326
self._process_one_record(record.key,
327
record.get_bytes_as('chunked'))
329
def compute_diffs(self):
330
self._extract_diffs()
331
dpop = self.diffs.pop
332
return [dpop(k) for k in self.ordered_keys]
335
159
class VersionedFile(object):
336
160
"""Versioned text file storage.
338
162
A versioned file manages versions of line-based text files,
339
163
keeping track of the originating version for each line.
1182
882
def make_mpdiffs(self, keys):
1183
883
"""Create multiparent diffs for specified keys."""
1184
generator = _MPDiffGenerator(self, keys)
1185
return generator.compute_diffs()
1187
def get_annotator(self):
1188
return annotate.Annotator(self)
1190
missing_keys = index._missing_keys_from_parent_map
884
keys_order = tuple(keys)
885
keys = frozenset(keys)
886
knit_keys = set(keys)
887
parent_map = self.get_parent_map(keys)
888
for parent_keys in parent_map.itervalues():
890
knit_keys.update(parent_keys)
891
missing_keys = keys - set(parent_map)
893
raise errors.RevisionNotPresent(missing_keys.pop(), self)
894
# We need to filter out ghosts, because we can't diff against them.
895
maybe_ghosts = knit_keys - keys
896
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
897
knit_keys.difference_update(ghosts)
899
split_lines = osutils.split_lines
900
for record in self.get_record_stream(knit_keys, 'topological', True):
901
lines[record.key] = split_lines(record.get_bytes_as('fulltext'))
902
# line_block_dict = {}
903
# for parent, blocks in record.extract_line_blocks():
904
# line_blocks[parent] = blocks
905
# line_blocks[record.key] = line_block_dict
907
for key in keys_order:
909
parents = parent_map[key] or []
910
# Note that filtering knit_keys can lead to a parent difference
911
# between the creation and the application of the mpdiff.
912
parent_lines = [lines[p] for p in parents if p in knit_keys]
913
if len(parent_lines) > 0:
914
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
917
left_parent_blocks = None
918
diffs.append(multiparent.MultiParent.from_lines(target,
919
parent_lines, left_parent_blocks))
1192
922
def _extract_blocks(self, version_id, source, target):
1195
def _transitive_fallbacks(self):
1196
"""Return the whole stack of fallback versionedfiles.
1198
This VersionedFiles may have a list of fallbacks, but it doesn't
1199
necessarily know about the whole stack going down, and it can't know
1200
at open time because they may change after the objects are opened.
1203
for a_vfs in self._immediate_fallback_vfs:
1204
all_fallbacks.append(a_vfs)
1205
all_fallbacks.extend(a_vfs._transitive_fallbacks())
1206
return all_fallbacks
1209
926
class ThunkedVersionedFiles(VersionedFiles):
1210
927
"""Storage for many versioned files thunked onto a 'VersionedFile' class.
1618
1303
elif state == 'conflicted-b':
1619
1304
ch_b = ch_a = True
1620
1305
lines_b.append(line)
1621
elif state == 'killed-both':
1622
# This counts as a change, even though there is no associated
1626
1307
if state not in ('irrelevant', 'ghost-a', 'ghost-b',
1308
'killed-base', 'killed-both'):
1628
1309
raise AssertionError(state)
1629
1310
for struct in outstanding_struct():
1632
def base_from_plan(self):
1633
"""Construct a BASE file from the plan text."""
1635
for state, line in self.plan:
1636
if state in ('killed-a', 'killed-b', 'killed-both', 'unchanged'):
1637
# If unchanged, then this line is straight from base. If a or b
1638
# or both killed the line, then it *used* to be in base.
1639
base_lines.append(line)
1641
if state not in ('killed-base', 'irrelevant',
1642
'ghost-a', 'ghost-b',
1644
'conflicted-a', 'conflicted-b'):
1645
# killed-base, irrelevant means it doesn't apply
1646
# ghost-a/ghost-b are harder to say for sure, but they
1647
# aren't in the 'inc_c' which means they aren't in the
1648
# shared base of a & b. So we don't include them. And
1649
# obviously if the line is newly inserted, it isn't in base
1651
# If 'conflicted-a' or b, then it is new vs one base, but
1652
# old versus another base. However, if we make it present
1653
# in the base, it will be deleted from the target, and it
1654
# seems better to get a line doubled in the merge result,
1655
# rather than have it deleted entirely.
1656
# Example, each node is the 'text' at that point:
1664
# There was a criss-cross conflict merge. Both sides
1665
# include the other, but put themselves first.
1666
# Weave marks this as a 'clean' merge, picking OTHER over
1667
# THIS. (Though the details depend on order inserted into
1669
# LCA generates a plan:
1670
# [('unchanged', M),
1671
# ('conflicted-b', b),
1673
# ('conflicted-a', b),
1675
# If you mark 'conflicted-*' as part of BASE, then a 3-way
1676
# merge tool will cleanly generate "MaN" (as BASE vs THIS
1677
# removes one 'b', and BASE vs OTHER removes the other)
1678
# If you include neither, 3-way creates a clean "MbabN" as
1679
# THIS adds one 'b', and OTHER does too.
1680
# It seems that having the line 2 times is better than
1681
# having it omitted. (Easier to manually delete than notice
1682
# it needs to be added.)
1683
raise AssertionError('Unknown state: %s' % (state,))
1687
1314
class WeaveMerge(PlanWeaveMerge):
1688
1315
"""Weave merge that takes a VersionedFile and two versions as its input."""
1690
def __init__(self, versionedfile, ver_a, ver_b,
1317
def __init__(self, versionedfile, ver_a, ver_b,
1691
1318
a_marker=PlanWeaveMerge.A_MARKER, b_marker=PlanWeaveMerge.B_MARKER):
1692
1319
plan = versionedfile.plan_merge(ver_a, ver_b)
1693
1320
PlanWeaveMerge.__init__(self, plan, a_marker, b_marker)
1696
class VirtualVersionedFiles(VersionedFiles):
1697
"""Dummy implementation for VersionedFiles that uses other functions for
1698
obtaining fulltexts and parent maps.
1700
This is always on the bottom of the stack and uses string keys
1701
(rather than tuples) internally.
1704
def __init__(self, get_parent_map, get_lines):
1705
"""Create a VirtualVersionedFiles.
1707
:param get_parent_map: Same signature as Repository.get_parent_map.
1708
:param get_lines: Should return lines for specified key or None if
1711
super(VirtualVersionedFiles, self).__init__()
1712
self._get_parent_map = get_parent_map
1713
self._get_lines = get_lines
1715
def check(self, progressbar=None):
1716
"""See VersionedFiles.check.
1718
:note: Always returns True for VirtualVersionedFiles.
1722
def add_mpdiffs(self, records):
1723
"""See VersionedFiles.mpdiffs.
1725
:note: Not implemented for VirtualVersionedFiles.
1727
raise NotImplementedError(self.add_mpdiffs)
1729
def get_parent_map(self, keys):
1730
"""See VersionedFiles.get_parent_map."""
1731
return dict([((k,), tuple([(p,) for p in v]))
1732
for k,v in self._get_parent_map([k for (k,) in keys]).iteritems()])
1734
def get_sha1s(self, keys):
1735
"""See VersionedFiles.get_sha1s."""
1738
lines = self._get_lines(k)
1739
if lines is not None:
1740
if not isinstance(lines, list):
1741
raise AssertionError
1742
ret[(k,)] = osutils.sha_strings(lines)
1745
def get_record_stream(self, keys, ordering, include_delta_closure):
1746
"""See VersionedFiles.get_record_stream."""
1747
for (k,) in list(keys):
1748
lines = self._get_lines(k)
1749
if lines is not None:
1750
if not isinstance(lines, list):
1751
raise AssertionError
1752
yield ChunkedContentFactory((k,), None,
1753
sha1=osutils.sha_strings(lines),
1756
yield AbsentContentFactory((k,))
1758
def iter_lines_added_or_present_in_keys(self, keys, pb=None):
1759
"""See VersionedFile.iter_lines_added_or_present_in_versions()."""
1760
for i, (key,) in enumerate(keys):
1762
pb.update("Finding changed lines", i, len(keys))
1763
for l in self._get_lines(key):
1767
class NoDupeAddLinesDecorator(object):
1768
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1772
def __init__(self, store):
1775
def add_lines(self, key, parents, lines, parent_texts=None,
1776
left_matching_blocks=None, nostore_sha=None, random_id=False,
1777
check_content=True):
1778
"""See VersionedFiles.add_lines.
1780
This implementation may return None as the third element of the return
1781
value when the original store wouldn't.
1784
raise NotImplementedError(
1785
"NoDupeAddLinesDecorator.add_lines does not implement the "
1786
"nostore_sha behaviour.")
1788
sha1 = osutils.sha_strings(lines)
1789
key = ("sha1:" + sha1,)
1792
if key in self._store.get_parent_map([key]):
1793
# This key has already been inserted, so don't do it again.
1795
sha1 = osutils.sha_strings(lines)
1796
return sha1, sum(map(len, lines)), None
1797
return self._store.add_lines(key, parents, lines,
1798
parent_texts=parent_texts,
1799
left_matching_blocks=left_matching_blocks,
1800
nostore_sha=nostore_sha, random_id=random_id,
1801
check_content=check_content)
1803
def __getattr__(self, name):
1804
return getattr(self._store, name)
1807
def network_bytes_to_kind_and_offset(network_bytes):
1808
"""Strip of a record kind from the front of network_bytes.
1810
:param network_bytes: The bytes of a record.
1811
:return: A tuple (storage_kind, offset_of_remaining_bytes)
1813
line_end = network_bytes.find('\n')
1814
storage_kind = network_bytes[:line_end]
1815
return storage_kind, line_end + 1
1818
class NetworkRecordStream(object):
1819
"""A record_stream which reconstitures a serialised stream."""
1821
def __init__(self, bytes_iterator):
1822
"""Create a NetworkRecordStream.
1824
:param bytes_iterator: An iterator of bytes. Each item in this
1825
iterator should have been obtained from a record_streams'
1826
record.get_bytes_as(record.storage_kind) call.
1828
self._bytes_iterator = bytes_iterator
1829
self._kind_factory = {
1830
'fulltext': fulltext_network_to_record,
1831
'groupcompress-block': groupcompress.network_block_to_records,
1832
'knit-ft-gz': knit.knit_network_to_record,
1833
'knit-delta-gz': knit.knit_network_to_record,
1834
'knit-annotated-ft-gz': knit.knit_network_to_record,
1835
'knit-annotated-delta-gz': knit.knit_network_to_record,
1836
'knit-delta-closure': knit.knit_delta_closure_to_records,
1842
:return: An iterator as per VersionedFiles.get_record_stream().
1844
for bytes in self._bytes_iterator:
1845
storage_kind, line_end = network_bytes_to_kind_and_offset(bytes)
1846
for record in self._kind_factory[storage_kind](
1847
storage_kind, bytes, line_end):
1851
def fulltext_network_to_record(kind, bytes, line_end):
1852
"""Convert a network fulltext record to record."""
1853
meta_len, = struct.unpack('!L', bytes[line_end:line_end+4])
1854
record_meta = bytes[line_end+4:line_end+4+meta_len]
1855
key, parents = bencode.bdecode_as_tuple(record_meta)
1856
if parents == 'nil':
1858
fulltext = bytes[line_end+4+meta_len:]
1859
return [FulltextContentFactory(key, parents, None, fulltext)]
1862
def _length_prefix(bytes):
1863
return struct.pack('!L', len(bytes))
1866
def record_to_fulltext_bytes(record):
1867
if record.parents is None:
1870
parents = record.parents
1871
record_meta = bencode.bencode((record.key, parents))
1872
record_content = record.get_bytes_as('fulltext')
1873
return "fulltext\n%s%s%s" % (
1874
_length_prefix(record_meta), record_meta, record_content)
1877
def sort_groupcompress(parent_map):
1878
"""Sort and group the keys in parent_map into groupcompress order.
1880
groupcompress is defined (currently) as reverse-topological order, grouped
1883
:return: A sorted-list of keys
1885
# gc-optimal ordering is approximately reverse topological,
1886
# properly grouped by file-id.
1888
for item in parent_map.iteritems():
1890
if isinstance(key, str) or len(key) == 1:
1895
per_prefix_map[prefix].append(item)
1897
per_prefix_map[prefix] = [item]
1900
for prefix in sorted(per_prefix_map):
1901
present_keys.extend(reversed(tsort.topo_sort(per_prefix_map[prefix])))
1905
class _KeyRefs(object):
1907
def __init__(self, track_new_keys=False):
1908
# dict mapping 'key' to 'set of keys referring to that key'
1911
# set remembering all new keys
1912
self.new_keys = set()
1914
self.new_keys = None
1920
self.new_keys.clear()
1922
def add_references(self, key, refs):
1923
# Record the new references
1924
for referenced in refs:
1926
needed_by = self.refs[referenced]
1928
needed_by = self.refs[referenced] = set()
1930
# Discard references satisfied by the new key
1933
def get_new_keys(self):
1934
return self.new_keys
1936
def get_unsatisfied_refs(self):
1937
return self.refs.iterkeys()
1939
def _satisfy_refs_for_key(self, key):
1943
# No keys depended on this key. That's ok.
1946
def add_key(self, key):
1947
# satisfy refs for key, and remember that we've seen this key.
1948
self._satisfy_refs_for_key(key)
1949
if self.new_keys is not None:
1950
self.new_keys.add(key)
1952
def satisfy_refs_for_keys(self, keys):
1954
self._satisfy_refs_for_key(key)
1956
def get_referrers(self):
1958
for referrers in self.refs.itervalues():
1959
result.update(referrers)