209
class _MPDiffGenerator(object):
210
"""Pull out the functionality for generating mp_diffs."""
212
def __init__(self, vf, keys):
214
# This is the order the keys were requested in
215
self.ordered_keys = tuple(keys)
216
# keys + their parents, what we need to compute the diffs
217
self.needed_keys = ()
218
# Map from key: mp_diff
220
# Map from key: parents_needed (may have ghosts)
222
# Parents that aren't present
223
self.ghost_parents = ()
224
# Map from parent_key => number of children for this text
226
# Content chunks that are cached while we still need them
229
def _find_needed_keys(self):
230
"""Find the set of keys we need to request.
232
This includes all the original keys passed in, and the non-ghost
233
parents of those keys.
235
:return: (needed_keys, refcounts)
236
needed_keys is the set of all texts we need to extract
237
refcounts is a dict of {key: num_children} letting us know when we
238
no longer need to cache a given parent text
240
# All the keys and their parents
241
needed_keys = set(self.ordered_keys)
242
parent_map = self.vf.get_parent_map(needed_keys)
243
self.parent_map = parent_map
244
# TODO: Should we be using a different construct here? I think this
245
# uses difference_update internally, and we expect the result to
247
missing_keys = needed_keys.difference(parent_map)
249
raise errors.RevisionNotPresent(list(missing_keys)[0], self.vf)
250
# Parents that might be missing. They are allowed to be ghosts, but we
251
# should check for them
253
setdefault = refcounts.setdefault
255
for child_key, parent_keys in parent_map.iteritems():
257
# parent_keys may be None if a given VersionedFile claims to
258
# not support graph operations.
260
just_parents.update(parent_keys)
261
needed_keys.update(parent_keys)
262
for p in parent_keys:
263
refcounts[p] = setdefault(p, 0) + 1
264
just_parents.difference_update(parent_map)
265
# Remove any parents that are actually ghosts from the needed set
266
self.present_parents = set(self.vf.get_parent_map(just_parents))
267
self.ghost_parents = just_parents.difference(self.present_parents)
268
needed_keys.difference_update(self.ghost_parents)
269
self.needed_keys = needed_keys
270
self.refcounts = refcounts
271
return needed_keys, refcounts
273
def _compute_diff(self, key, parent_lines, lines):
274
"""Compute a single mp_diff, and store it in self._diffs"""
275
if len(parent_lines) > 0:
276
# XXX: _extract_blocks is not usefully defined anywhere...
277
# It was meant to extract the left-parent diff without
278
# having to recompute it for Knit content (pack-0.92,
279
# etc). That seems to have regressed somewhere
280
left_parent_blocks = self.vf._extract_blocks(key,
281
parent_lines[0], lines)
283
left_parent_blocks = None
284
diff = multiparent.MultiParent.from_lines(lines,
285
parent_lines, left_parent_blocks)
286
self.diffs[key] = diff
288
def _process_one_record(self, key, this_chunks):
290
if key in self.parent_map:
291
# This record should be ready to diff, since we requested
292
# content in 'topological' order
293
parent_keys = self.parent_map.pop(key)
294
# If a VersionedFile claims 'no-graph' support, then it may return
295
# None for any parent request, so we replace it with an empty tuple
296
if parent_keys is None:
299
for p in parent_keys:
300
# Alternatively we could check p not in self.needed_keys, but
301
# ghost_parents should be tiny versus huge
302
if p in self.ghost_parents:
304
refcount = self.refcounts[p]
305
if refcount == 1: # Last child reference
306
self.refcounts.pop(p)
307
parent_chunks = self.chunks.pop(p)
309
self.refcounts[p] = refcount - 1
310
parent_chunks = self.chunks[p]
311
p_lines = osutils.chunks_to_lines(parent_chunks)
312
# TODO: Should we cache the line form? We did the
313
# computation to get it, but storing it this way will
314
# be less memory efficient...
315
parent_lines.append(p_lines)
317
lines = osutils.chunks_to_lines(this_chunks)
318
# Since we needed the lines, we'll go ahead and cache them this way
320
self._compute_diff(key, parent_lines, lines)
322
# Is this content required for any more children?
323
if key in self.refcounts:
324
self.chunks[key] = this_chunks
326
def _extract_diffs(self):
327
needed_keys, refcounts = self._find_needed_keys()
328
for record in self.vf.get_record_stream(needed_keys,
329
'topological', True):
330
if record.storage_kind == 'absent':
331
raise errors.RevisionNotPresent(record.key, self.vf)
332
self._process_one_record(record.key,
333
record.get_bytes_as('chunked'))
335
def compute_diffs(self):
336
self._extract_diffs()
337
dpop = self.diffs.pop
338
return [dpop(k) for k in self.ordered_keys]
209
341
class VersionedFile(object):
210
342
"""Versioned text file storage.
1048
1184
def make_mpdiffs(self, keys):
1049
1185
"""Create multiparent diffs for specified keys."""
1050
keys_order = tuple(keys)
1051
keys = frozenset(keys)
1052
knit_keys = set(keys)
1053
parent_map = self.get_parent_map(keys)
1054
for parent_keys in parent_map.itervalues():
1056
knit_keys.update(parent_keys)
1057
missing_keys = keys - set(parent_map)
1059
raise errors.RevisionNotPresent(list(missing_keys)[0], self)
1060
# We need to filter out ghosts, because we can't diff against them.
1061
maybe_ghosts = knit_keys - keys
1062
ghosts = maybe_ghosts - set(self.get_parent_map(maybe_ghosts))
1063
knit_keys.difference_update(ghosts)
1065
chunks_to_lines = osutils.chunks_to_lines
1066
for record in self.get_record_stream(knit_keys, 'topological', True):
1067
lines[record.key] = chunks_to_lines(record.get_bytes_as('chunked'))
1068
# line_block_dict = {}
1069
# for parent, blocks in record.extract_line_blocks():
1070
# line_blocks[parent] = blocks
1071
# line_blocks[record.key] = line_block_dict
1073
for key in keys_order:
1075
parents = parent_map[key] or []
1076
# Note that filtering knit_keys can lead to a parent difference
1077
# between the creation and the application of the mpdiff.
1078
parent_lines = [lines[p] for p in parents if p in knit_keys]
1079
if len(parent_lines) > 0:
1080
left_parent_blocks = self._extract_blocks(key, parent_lines[0],
1083
left_parent_blocks = None
1084
diffs.append(multiparent.MultiParent.from_lines(target,
1085
parent_lines, left_parent_blocks))
1186
generator = _MPDiffGenerator(self, keys)
1187
return generator.compute_diffs()
1189
def get_annotator(self):
1190
return annotate.Annotator(self)
1088
1192
missing_keys = index._missing_keys_from_parent_map
1728
class NoDupeAddLinesDecorator(object):
1729
"""Decorator for a VersionedFiles that skips doing an add_lines if the key
1733
def __init__(self, store):
1736
def add_lines(self, key, parents, lines, parent_texts=None,
1737
left_matching_blocks=None, nostore_sha=None, random_id=False,
1738
check_content=True):
1739
"""See VersionedFiles.add_lines.
1741
This implementation may return None as the third element of the return
1742
value when the original store wouldn't.
1745
raise NotImplementedError(
1746
"NoDupeAddLinesDecorator.add_lines does not implement the "
1747
"nostore_sha behaviour.")
1749
sha1 = osutils.sha_strings(lines)
1750
key = ("sha1:" + sha1,)
1753
if key in self._store.get_parent_map([key]):
1754
# This key has already been inserted, so don't do it again.
1756
sha1 = osutils.sha_strings(lines)
1757
return sha1, sum(map(len, lines)), None
1758
return self._store.add_lines(key, parents, lines,
1759
parent_texts=parent_texts,
1760
left_matching_blocks=left_matching_blocks,
1761
nostore_sha=nostore_sha, random_id=random_id,
1762
check_content=check_content)
1764
def __getattr__(self, name):
1765
return getattr(self._store, name)
1627
1768
def network_bytes_to_kind_and_offset(network_bytes):
1628
1769
"""Strip of a record kind from the front of network_bytes.