1188
986
if not self.get_parent_map([key]):
1189
987
raise RevisionNotPresent(key, self)
1190
988
return cached_version
1191
generator = _VFContentMapGenerator(self, [key])
1192
return generator._get_content(key)
1194
def get_known_graph_ancestry(self, keys):
1195
"""Get a KnownGraph instance with the ancestry of keys."""
1196
parent_map, missing_keys = self._index.find_ancestry(keys)
1197
for fallback in self._fallback_vfs:
989
text_map, contents_map = self._get_content_maps([key])
990
return contents_map[key]
992
def _get_content_maps(self, keys, nonlocal_keys=None):
993
"""Produce maps of text and KnitContents
995
:param keys: The keys to produce content maps for.
996
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
997
which are known to not be in this knit, but rather in one of the
999
:return: (text_map, content_map) where text_map contains the texts for
1000
the requested versions and content_map contains the KnitContents.
1002
# FUTURE: This function could be improved for the 'extract many' case
1003
# by tracking each component and only doing the copy when the number of
1004
# children than need to apply delta's to it is > 1 or it is part of the
1007
multiple_versions = len(keys) != 1
1008
record_map = self._get_record_map(keys, allow_missing=True)
1013
if nonlocal_keys is None:
1014
nonlocal_keys = set()
1016
nonlocal_keys = frozenset(nonlocal_keys)
1017
missing_keys = set(nonlocal_keys)
1018
for source in self._fallback_vfs:
1198
1019
if not missing_keys:
1200
(f_parent_map, f_missing_keys) = fallback._index.find_ancestry(
1202
parent_map.update(f_parent_map)
1203
missing_keys = f_missing_keys
1204
kg = _mod_graph.KnownGraph(parent_map)
1021
for record in source.get_record_stream(missing_keys,
1023
if record.storage_kind == 'absent':
1025
missing_keys.remove(record.key)
1026
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1027
text_map[record.key] = lines
1028
content_map[record.key] = PlainKnitContent(lines, record.key)
1029
if record.key in keys:
1030
final_content[record.key] = content_map[record.key]
1032
if key in nonlocal_keys:
1037
while cursor is not None:
1039
record, record_details, digest, next = record_map[cursor]
1041
raise RevisionNotPresent(cursor, self)
1042
components.append((cursor, record, record_details, digest))
1044
if cursor in content_map:
1045
# no need to plan further back
1046
components.append((cursor, None, None, None))
1050
for (component_id, record, record_details,
1051
digest) in reversed(components):
1052
if component_id in content_map:
1053
content = content_map[component_id]
1055
content, delta = self._factory.parse_record(key[-1],
1056
record, record_details, content,
1057
copy_base_content=multiple_versions)
1058
if multiple_versions:
1059
content_map[component_id] = content
1061
final_content[key] = content
1063
# digest here is the digest from the last applied component.
1064
text = content.text()
1065
actual_sha = sha_strings(text)
1066
if actual_sha != digest:
1067
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
1068
text_map[key] = text
1069
return text_map, final_content
1207
1071
def get_parent_map(self, keys):
1208
1072
"""Get a map of the graph parents of keys.
2024
class _ContentMapGenerator(object):
2025
"""Generate texts or expose raw deltas for a set of texts."""
2027
def __init__(self, ordering='unordered'):
2028
self._ordering = ordering
2030
def _get_content(self, key):
2031
"""Get the content object for key."""
2032
# Note that _get_content is only called when the _ContentMapGenerator
2033
# has been constructed with just one key requested for reconstruction.
2034
if key in self.nonlocal_keys:
2035
record = self.get_record_stream().next()
2036
# Create a content object on the fly
2037
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
2038
return PlainKnitContent(lines, record.key)
2040
# local keys we can ask for directly
2041
return self._get_one_work(key)
2043
def get_record_stream(self):
2044
"""Get a record stream for the keys requested during __init__."""
2045
for record in self._work():
2049
"""Produce maps of text and KnitContents as dicts.
2051
:return: (text_map, content_map) where text_map contains the texts for
2052
the requested versions and content_map contains the KnitContents.
2054
# NB: By definition we never need to read remote sources unless texts
2055
# are requested from them: we don't delta across stores - and we
2056
# explicitly do not want to to prevent data loss situations.
2057
if self.global_map is None:
2058
self.global_map = self.vf.get_parent_map(self.keys)
2059
nonlocal_keys = self.nonlocal_keys
2061
missing_keys = set(nonlocal_keys)
2062
# Read from remote versioned file instances and provide to our caller.
2063
for source in self.vf._fallback_vfs:
2064
if not missing_keys:
2066
# Loop over fallback repositories asking them for texts - ignore
2067
# any missing from a particular fallback.
2068
for record in source.get_record_stream(missing_keys,
2069
self._ordering, True):
2070
if record.storage_kind == 'absent':
2071
# Not in thie particular stream, may be in one of the
2072
# other fallback vfs objects.
2074
missing_keys.remove(record.key)
2077
if self._raw_record_map is None:
2078
raise AssertionError('_raw_record_map should have been filled')
2080
for key in self.keys:
2081
if key in self.nonlocal_keys:
2083
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
2086
def _get_one_work(self, requested_key):
2087
# Now, if we have calculated everything already, just return the
2089
if requested_key in self._contents_map:
2090
return self._contents_map[requested_key]
2091
# To simplify things, parse everything at once - code that wants one text
2092
# probably wants them all.
2093
# FUTURE: This function could be improved for the 'extract many' case
2094
# by tracking each component and only doing the copy when the number of
2095
# children than need to apply delta's to it is > 1 or it is part of the
2097
multiple_versions = len(self.keys) != 1
2098
if self._record_map is None:
2099
self._record_map = self.vf._raw_map_to_record_map(
2100
self._raw_record_map)
2101
record_map = self._record_map
2102
# raw_record_map is key:
2103
# Have read and parsed records at this point.
2104
for key in self.keys:
2105
if key in self.nonlocal_keys:
2110
while cursor is not None:
2112
record, record_details, digest, next = record_map[cursor]
2114
raise RevisionNotPresent(cursor, self)
2115
components.append((cursor, record, record_details, digest))
2117
if cursor in self._contents_map:
2118
# no need to plan further back
2119
components.append((cursor, None, None, None))
2123
for (component_id, record, record_details,
2124
digest) in reversed(components):
2125
if component_id in self._contents_map:
2126
content = self._contents_map[component_id]
2128
content, delta = self._factory.parse_record(key[-1],
2129
record, record_details, content,
2130
copy_base_content=multiple_versions)
2131
if multiple_versions:
2132
self._contents_map[component_id] = content
2134
# digest here is the digest from the last applied component.
2135
text = content.text()
2136
actual_sha = sha_strings(text)
2137
if actual_sha != digest:
2138
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
2139
if multiple_versions:
2140
return self._contents_map[requested_key]
2144
def _wire_bytes(self):
2145
"""Get the bytes to put on the wire for 'key'.
2147
The first collection of bytes asked for returns the serialised
2148
raw_record_map and the additional details (key, parent) for key.
2149
Subsequent calls return just the additional details (key, parent).
2150
The wire storage_kind given for the first key is 'knit-delta-closure',
2151
For subsequent keys it is 'knit-delta-closure-ref'.
2153
:param key: A key from the content generator.
2154
:return: Bytes to put on the wire.
2157
# kind marker for dispatch on the far side,
2158
lines.append('knit-delta-closure')
2160
if self.vf._factory.annotated:
2161
lines.append('annotated')
2164
# then the list of keys
2165
lines.append('\t'.join(['\x00'.join(key) for key in self.keys
2166
if key not in self.nonlocal_keys]))
2167
# then the _raw_record_map in serialised form:
2169
# for each item in the map:
2171
# 1 line with parents if the key is to be yielded (None: for None, '' for ())
2172
# one line with method
2173
# one line with noeol
2174
# one line with next ('' for None)
2175
# one line with byte count of the record bytes
2177
for key, (record_bytes, (method, noeol), next) in \
2178
self._raw_record_map.iteritems():
2179
key_bytes = '\x00'.join(key)
2180
parents = self.global_map.get(key, None)
2182
parent_bytes = 'None:'
2184
parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
2185
method_bytes = method
2191
next_bytes = '\x00'.join(next)
2194
map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (
2195
key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,
2196
len(record_bytes), record_bytes))
2197
map_bytes = ''.join(map_byte_list)
2198
lines.append(map_bytes)
2199
bytes = '\n'.join(lines)
2203
class _VFContentMapGenerator(_ContentMapGenerator):
2204
"""Content map generator reading from a VersionedFiles object."""
2206
def __init__(self, versioned_files, keys, nonlocal_keys=None,
2207
global_map=None, raw_record_map=None, ordering='unordered'):
2208
"""Create a _ContentMapGenerator.
2210
:param versioned_files: The versioned files that the texts are being
2212
:param keys: The keys to produce content maps for.
2213
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
2214
which are known to not be in this knit, but rather in one of the
2216
:param global_map: The result of get_parent_map(keys) (or a supermap).
2217
This is required if get_record_stream() is to be used.
2218
:param raw_record_map: A unparsed raw record map to use for answering
2221
_ContentMapGenerator.__init__(self, ordering=ordering)
2222
# The vf to source data from
2223
self.vf = versioned_files
2225
self.keys = list(keys)
2226
# Keys known to be in fallback vfs objects
2227
if nonlocal_keys is None:
2228
self.nonlocal_keys = set()
2230
self.nonlocal_keys = frozenset(nonlocal_keys)
2231
# Parents data for keys to be returned in get_record_stream
2232
self.global_map = global_map
2233
# The chunked lists for self.keys in text form
2235
# A cache of KnitContent objects used in extracting texts.
2236
self._contents_map = {}
2237
# All the knit records needed to assemble the requested keys as full
2239
self._record_map = None
2240
if raw_record_map is None:
2241
self._raw_record_map = self.vf._get_record_map_unparsed(keys,
2244
self._raw_record_map = raw_record_map
2245
# the factory for parsing records
2246
self._factory = self.vf._factory
2249
class _NetworkContentMapGenerator(_ContentMapGenerator):
2250
"""Content map generator sourced from a network stream."""
2252
def __init__(self, bytes, line_end):
2253
"""Construct a _NetworkContentMapGenerator from a bytes block."""
2255
self.global_map = {}
2256
self._raw_record_map = {}
2257
self._contents_map = {}
2258
self._record_map = None
2259
self.nonlocal_keys = []
2260
# Get access to record parsing facilities
2261
self.vf = KnitVersionedFiles(None, None)
2264
line_end = bytes.find('\n', start)
2265
line = bytes[start:line_end]
2266
start = line_end + 1
2267
if line == 'annotated':
2268
self._factory = KnitAnnotateFactory()
2270
self._factory = KnitPlainFactory()
2271
# list of keys to emit in get_record_stream
2272
line_end = bytes.find('\n', start)
2273
line = bytes[start:line_end]
2274
start = line_end + 1
2276
tuple(segment.split('\x00')) for segment in line.split('\t')
2278
# now a loop until the end. XXX: It would be nice if this was just a
2279
# bunch of the same records as get_record_stream(..., False) gives, but
2280
# there is a decent sized gap stopping that at the moment.
2284
line_end = bytes.find('\n', start)
2285
key = tuple(bytes[start:line_end].split('\x00'))
2286
start = line_end + 1
2287
# 1 line with parents (None: for None, '' for ())
2288
line_end = bytes.find('\n', start)
2289
line = bytes[start:line_end]
2294
[tuple(segment.split('\x00')) for segment in line.split('\t')
2296
self.global_map[key] = parents
2297
start = line_end + 1
2298
# one line with method
2299
line_end = bytes.find('\n', start)
2300
line = bytes[start:line_end]
2302
start = line_end + 1
2303
# one line with noeol
2304
line_end = bytes.find('\n', start)
2305
line = bytes[start:line_end]
2307
start = line_end + 1
2308
# one line with next ('' for None)
2309
line_end = bytes.find('\n', start)
2310
line = bytes[start:line_end]
2314
next = tuple(bytes[start:line_end].split('\x00'))
2315
start = line_end + 1
2316
# one line with byte count of the record bytes
2317
line_end = bytes.find('\n', start)
2318
line = bytes[start:line_end]
2320
start = line_end + 1
2322
record_bytes = bytes[start:start+count]
2323
start = start + count
2325
self._raw_record_map[key] = (record_bytes, (method, noeol), next)
2327
def get_record_stream(self):
2328
"""Get a record stream for for keys requested by the bytestream."""
2330
for key in self.keys:
2331
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
2334
def _wire_bytes(self):
2338
1769
class _KndxIndex(object):
2339
1770
"""Manages knit index files
3430
2660
annotator = _KnitAnnotator(knit)
3431
return iter(annotator.annotate_flat(revision_id))
3434
class _KnitAnnotator(annotate.Annotator):
2661
return iter(annotator.annotate(revision_id))
2664
class _KnitAnnotator(object):
3435
2665
"""Build up the annotations for a text."""
3437
def __init__(self, vf):
3438
annotate.Annotator.__init__(self, vf)
3440
# TODO: handle Nodes which cannot be extracted
3441
# self._ghosts = set()
3443
# Map from (key, parent_key) => matching_blocks, should be 'use once'
3444
self._matching_blocks = {}
3446
# KnitContent objects
3447
self._content_objects = {}
3448
# The number of children that depend on this fulltext content object
3449
self._num_compression_children = {}
3450
# Delta records that need their compression parent before they can be
3452
self._pending_deltas = {}
3453
# Fulltext records that are waiting for their parents fulltexts before
3454
# they can be yielded for annotation
3455
self._pending_annotation = {}
2667
def __init__(self, knit):
2670
# Content objects, differs from fulltexts because of how final newlines
2671
# are treated by knits. the content objects here will always have a
2673
self._fulltext_contents = {}
2675
# Annotated lines of specific revisions
2676
self._annotated_lines = {}
2678
# Track the raw data for nodes that we could not process yet.
2679
# This maps the revision_id of the base to a list of children that will
2680
# annotated from it.
2681
self._pending_children = {}
2683
# Nodes which cannot be extracted
2684
self._ghosts = set()
2686
# Track how many children this node has, so we know if we need to keep
2688
self._annotate_children = {}
2689
self._compression_children = {}
3457
2691
self._all_build_details = {}
2692
# The children => parent revision_id graph
2693
self._revision_id_graph = {}
2695
self._heads_provider = None
2697
self._nodes_to_keep_annotations = set()
2698
self._generations_until_keep = 100
2700
def set_generations_until_keep(self, value):
2701
"""Set the number of generations before caching a node.
2703
Setting this to -1 will cache every merge node, setting this higher
2704
will cache fewer nodes.
2706
self._generations_until_keep = value
2708
def _add_fulltext_content(self, revision_id, content_obj):
2709
self._fulltext_contents[revision_id] = content_obj
2710
# TODO: jam 20080305 It might be good to check the sha1digest here
2711
return content_obj.text()
2713
def _check_parents(self, child, nodes_to_annotate):
2714
"""Check if all parents have been processed.
2716
:param child: A tuple of (rev_id, parents, raw_content)
2717
:param nodes_to_annotate: If child is ready, add it to
2718
nodes_to_annotate, otherwise put it back in self._pending_children
2720
for parent_id in child[1]:
2721
if (parent_id not in self._annotated_lines):
2722
# This parent is present, but another parent is missing
2723
self._pending_children.setdefault(parent_id,
2727
# This one is ready to be processed
2728
nodes_to_annotate.append(child)
2730
def _add_annotation(self, revision_id, fulltext, parent_ids,
2731
left_matching_blocks=None):
2732
"""Add an annotation entry.
2734
All parents should already have been annotated.
2735
:return: A list of children that now have their parents satisfied.
2737
a = self._annotated_lines
2738
annotated_parent_lines = [a[p] for p in parent_ids]
2739
annotated_lines = list(annotate.reannotate(annotated_parent_lines,
2740
fulltext, revision_id, left_matching_blocks,
2741
heads_provider=self._get_heads_provider()))
2742
self._annotated_lines[revision_id] = annotated_lines
2743
for p in parent_ids:
2744
ann_children = self._annotate_children[p]
2745
ann_children.remove(revision_id)
2746
if (not ann_children
2747
and p not in self._nodes_to_keep_annotations):
2748
del self._annotated_lines[p]
2749
del self._all_build_details[p]
2750
if p in self._fulltext_contents:
2751
del self._fulltext_contents[p]
2752
# Now that we've added this one, see if there are any pending
2753
# deltas to be done, certainly this parent is finished
2754
nodes_to_annotate = []
2755
for child in self._pending_children.pop(revision_id, []):
2756
self._check_parents(child, nodes_to_annotate)
2757
return nodes_to_annotate
3459
2759
def _get_build_graph(self, key):
3460
2760
"""Get the graphs for building texts and annotations.
3467
2767
:return: A list of (key, index_memo) records, suitable for
3468
passing to read_records_iter to start reading in the raw data from
2768
passing to read_records_iter to start reading in the raw data fro/
2771
if key in self._annotated_lines:
3471
2774
pending = set([key])
3474
self._num_needed_children[key] = 1
3476
2779
# get all pending nodes
3477
2781
this_iteration = pending
3478
build_details = self._vf._index.get_build_details(this_iteration)
2782
build_details = self._knit._index.get_build_details(this_iteration)
3479
2783
self._all_build_details.update(build_details)
3480
# new_nodes = self._vf._index._get_entries(this_iteration)
2784
# new_nodes = self._knit._index._get_entries(this_iteration)
3481
2785
pending = set()
3482
2786
for key, details in build_details.iteritems():
3483
(index_memo, compression_parent, parent_keys,
2787
(index_memo, compression_parent, parents,
3484
2788
record_details) = details
3485
self._parent_map[key] = parent_keys
3486
self._heads_provider = None
2789
self._revision_id_graph[key] = parents
3487
2790
records.append((key, index_memo))
3488
2791
# Do we actually need to check _annotated_lines?
3489
pending.update([p for p in parent_keys
3490
if p not in self._all_build_details])
3492
for parent_key in parent_keys:
3493
if parent_key in self._num_needed_children:
3494
self._num_needed_children[parent_key] += 1
3496
self._num_needed_children[parent_key] = 1
2792
pending.update(p for p in parents
2793
if p not in self._all_build_details)
3497
2794
if compression_parent:
3498
if compression_parent in self._num_compression_children:
3499
self._num_compression_children[compression_parent] += 1
3501
self._num_compression_children[compression_parent] = 1
2795
self._compression_children.setdefault(compression_parent,
2798
for parent in parents:
2799
self._annotate_children.setdefault(parent,
2801
num_gens = generation - kept_generation
2802
if ((num_gens >= self._generations_until_keep)
2803
and len(parents) > 1):
2804
kept_generation = generation
2805
self._nodes_to_keep_annotations.add(key)
3503
2807
missing_versions = this_iteration.difference(build_details.keys())
3504
if missing_versions:
3505
for key in missing_versions:
3506
if key in self._parent_map and key in self._text_cache:
3507
# We already have this text ready, we just need to
3508
# yield it later so we get it annotated
3510
parent_keys = self._parent_map[key]
3511
for parent_key in parent_keys:
3512
if parent_key in self._num_needed_children:
3513
self._num_needed_children[parent_key] += 1
3515
self._num_needed_children[parent_key] = 1
3516
pending.update([p for p in parent_keys
3517
if p not in self._all_build_details])
3519
raise errors.RevisionNotPresent(key, self._vf)
2808
self._ghosts.update(missing_versions)
2809
for missing_version in missing_versions:
2810
# add a key, no parents
2811
self._revision_id_graph[missing_version] = ()
2812
pending.discard(missing_version) # don't look for it
2813
if self._ghosts.intersection(self._compression_children):
2815
"We cannot have nodes which have a ghost compression parent:\n"
2817
"compression children: %r"
2818
% (self._ghosts, self._compression_children))
2819
# Cleanout anything that depends on a ghost so that we don't wait for
2820
# the ghost to show up
2821
for node in self._ghosts:
2822
if node in self._annotate_children:
2823
# We won't be building this node
2824
del self._annotate_children[node]
3520
2825
# Generally we will want to read the records in reverse order, because
3521
2826
# we find the parent nodes after the children
3522
2827
records.reverse()
3523
return records, ann_keys
3525
def _get_needed_texts(self, key, pb=None):
3526
# if True or len(self._vf._fallback_vfs) > 0:
3527
if len(self._vf._fallback_vfs) > 0:
3528
# If we have fallbacks, go to the generic path
3529
for v in annotate.Annotator._get_needed_texts(self, key, pb=pb):
3534
records, ann_keys = self._get_build_graph(key)
3535
for idx, (sub_key, text, num_lines) in enumerate(
3536
self._extract_texts(records)):
3538
pb.update('annotating', idx, len(records))
3539
yield sub_key, text, num_lines
3540
for sub_key in ann_keys:
3541
text = self._text_cache[sub_key]
3542
num_lines = len(text) # bad assumption
3543
yield sub_key, text, num_lines
3545
except errors.RetryWithNewPacks, e:
3546
self._vf._access.reload_or_raise(e)
3547
# The cached build_details are no longer valid
3548
self._all_build_details.clear()
3550
def _cache_delta_blocks(self, key, compression_parent, delta, lines):
3551
parent_lines = self._text_cache[compression_parent]
3552
blocks = list(KnitContent.get_line_delta_blocks(delta, parent_lines, lines))
3553
self._matching_blocks[(key, compression_parent)] = blocks
3555
def _expand_record(self, key, parent_keys, compression_parent, record,
3558
if compression_parent:
3559
if compression_parent not in self._content_objects:
3560
# Waiting for the parent
3561
self._pending_deltas.setdefault(compression_parent, []).append(
3562
(key, parent_keys, record, record_details))
3564
# We have the basis parent, so expand the delta
3565
num = self._num_compression_children[compression_parent]
3568
base_content = self._content_objects.pop(compression_parent)
3569
self._num_compression_children.pop(compression_parent)
3571
self._num_compression_children[compression_parent] = num
3572
base_content = self._content_objects[compression_parent]
3573
# It is tempting to want to copy_base_content=False for the last
3574
# child object. However, whenever noeol=False,
3575
# self._text_cache[parent_key] is content._lines. So mutating it
3576
# gives very bad results.
3577
# The alternative is to copy the lines into text cache, but then we
3578
# are copying anyway, so just do it here.
3579
content, delta = self._vf._factory.parse_record(
3580
key, record, record_details, base_content,
3581
copy_base_content=True)
3584
content, _ = self._vf._factory.parse_record(
3585
key, record, record_details, None)
3586
if self._num_compression_children.get(key, 0) > 0:
3587
self._content_objects[key] = content
3588
lines = content.text()
3589
self._text_cache[key] = lines
3590
if delta is not None:
3591
self._cache_delta_blocks(key, compression_parent, delta, lines)
3594
def _get_parent_annotations_and_matches(self, key, text, parent_key):
3595
"""Get the list of annotations for the parent, and the matching lines.
3597
:param text: The opaque value given by _get_needed_texts
3598
:param parent_key: The key for the parent text
3599
:return: (parent_annotations, matching_blocks)
3600
parent_annotations is a list as long as the number of lines in
3602
matching_blocks is a list of (parent_idx, text_idx, len) tuples
3603
indicating which lines match between the two texts
3605
block_key = (key, parent_key)
3606
if block_key in self._matching_blocks:
3607
blocks = self._matching_blocks.pop(block_key)
3608
parent_annotations = self._annotations_cache[parent_key]
3609
return parent_annotations, blocks
3610
return annotate.Annotator._get_parent_annotations_and_matches(self,
3611
key, text, parent_key)
3613
def _process_pending(self, key):
3614
"""The content for 'key' was just processed.
3616
Determine if there is any more pending work to be processed.
3619
if key in self._pending_deltas:
3620
compression_parent = key
3621
children = self._pending_deltas.pop(key)
3622
for child_key, parent_keys, record, record_details in children:
3623
lines = self._expand_record(child_key, parent_keys,
3625
record, record_details)
3626
if self._check_ready_for_annotations(child_key, parent_keys):
3627
to_return.append(child_key)
3628
# Also check any children that are waiting for this parent to be
3630
if key in self._pending_annotation:
3631
children = self._pending_annotation.pop(key)
3632
to_return.extend([c for c, p_keys in children
3633
if self._check_ready_for_annotations(c, p_keys)])
3636
def _check_ready_for_annotations(self, key, parent_keys):
3637
"""return true if this text is ready to be yielded.
3639
Otherwise, this will return False, and queue the text into
3640
self._pending_annotation
3642
for parent_key in parent_keys:
3643
if parent_key not in self._annotations_cache:
3644
# still waiting on at least one parent text, so queue it up
3645
# Note that if there are multiple parents, we need to wait
3647
self._pending_annotation.setdefault(parent_key,
3648
[]).append((key, parent_keys))
3652
def _extract_texts(self, records):
3653
"""Extract the various texts needed based on records"""
2830
def _annotate_records(self, records):
2831
"""Build the annotations for the listed records."""
3654
2832
# We iterate in the order read, rather than a strict order requested
3655
2833
# However, process what we can, and put off to the side things that
3656
2834
# still need parents, cleaning them up when those parents are
3659
# 1) As 'records' are read, see if we can expand these records into
3660
# Content objects (and thus lines)
3661
# 2) If a given line-delta is waiting on its compression parent, it
3662
# gets queued up into self._pending_deltas, otherwise we expand
3663
# it, and put it into self._text_cache and self._content_objects
3664
# 3) If we expanded the text, we will then check to see if all
3665
# parents have also been processed. If so, this text gets yielded,
3666
# else this record gets set aside into pending_annotation
3667
# 4) Further, if we expanded the text in (2), we will then check to
3668
# see if there are any children in self._pending_deltas waiting to
3669
# also be processed. If so, we go back to (2) for those
3670
# 5) Further again, if we yielded the text, we can then check if that
3671
# 'unlocks' any of the texts in pending_annotations, which should
3672
# then get yielded as well
3673
# Note that both steps 4 and 5 are 'recursive' in that unlocking one
3674
# compression child could unlock yet another, and yielding a fulltext
3675
# will also 'unlock' the children that are waiting on that annotation.
3676
# (Though also, unlocking 1 parent's fulltext, does not unlock a child
3677
# if other parents are also waiting.)
3678
# We want to yield content before expanding child content objects, so
3679
# that we know when we can re-use the content lines, and the annotation
3680
# code can know when it can stop caching fulltexts, as well.
3682
# Children that are missing their compression parent
3684
for (key, record, digest) in self._vf._read_records_iter(records):
3686
details = self._all_build_details[key]
3687
(_, compression_parent, parent_keys, record_details) = details
3688
lines = self._expand_record(key, parent_keys, compression_parent,
3689
record, record_details)
3691
# Pending delta should be queued up
2836
for (rev_id, record,
2837
digest) in self._knit._read_records_iter(records):
2838
if rev_id in self._annotated_lines:
3693
# At this point, we may be able to yield this content, if all
3694
# parents are also finished
3695
yield_this_text = self._check_ready_for_annotations(key,
3698
# All parents present
3699
yield key, lines, len(lines)
3700
to_process = self._process_pending(key)
3702
this_process = to_process
3704
for key in this_process:
3705
lines = self._text_cache[key]
3706
yield key, lines, len(lines)
3707
to_process.extend(self._process_pending(key))
2840
parent_ids = self._revision_id_graph[rev_id]
2841
parent_ids = [p for p in parent_ids if p not in self._ghosts]
2842
details = self._all_build_details[rev_id]
2843
(index_memo, compression_parent, parents,
2844
record_details) = details
2845
nodes_to_annotate = []
2846
# TODO: Remove the punning between compression parents, and
2847
# parent_ids, we should be able to do this without assuming
2849
if len(parent_ids) == 0:
2850
# There are no parents for this node, so just add it
2851
# TODO: This probably needs to be decoupled
2852
fulltext_content, delta = self._knit._factory.parse_record(
2853
rev_id, record, record_details, None)
2854
fulltext = self._add_fulltext_content(rev_id, fulltext_content)
2855
nodes_to_annotate.extend(self._add_annotation(rev_id, fulltext,
2856
parent_ids, left_matching_blocks=None))
2858
child = (rev_id, parent_ids, record)
2859
# Check if all the parents are present
2860
self._check_parents(child, nodes_to_annotate)
2861
while nodes_to_annotate:
2862
# Should we use a queue here instead of a stack?
2863
(rev_id, parent_ids, record) = nodes_to_annotate.pop()
2864
(index_memo, compression_parent, parents,
2865
record_details) = self._all_build_details[rev_id]
2867
if compression_parent is not None:
2868
comp_children = self._compression_children[compression_parent]
2869
if rev_id not in comp_children:
2870
raise AssertionError("%r not in compression children %r"
2871
% (rev_id, comp_children))
2872
# If there is only 1 child, it is safe to reuse this
2874
reuse_content = (len(comp_children) == 1
2875
and compression_parent not in
2876
self._nodes_to_keep_annotations)
2878
# Remove it from the cache since it will be changing
2879
parent_fulltext_content = self._fulltext_contents.pop(compression_parent)
2880
# Make sure to copy the fulltext since it might be
2882
parent_fulltext = list(parent_fulltext_content.text())
2884
parent_fulltext_content = self._fulltext_contents[compression_parent]
2885
parent_fulltext = parent_fulltext_content.text()
2886
comp_children.remove(rev_id)
2887
fulltext_content, delta = self._knit._factory.parse_record(
2888
rev_id, record, record_details,
2889
parent_fulltext_content,
2890
copy_base_content=(not reuse_content))
2891
fulltext = self._add_fulltext_content(rev_id,
2893
if compression_parent == parent_ids[0]:
2894
# the compression_parent is the left parent, so we can
2896
blocks = KnitContent.get_line_delta_blocks(delta,
2897
parent_fulltext, fulltext)
2899
fulltext_content = self._knit._factory.parse_fulltext(
2901
fulltext = self._add_fulltext_content(rev_id,
2903
nodes_to_annotate.extend(
2904
self._add_annotation(rev_id, fulltext, parent_ids,
2905
left_matching_blocks=blocks))
2907
def _get_heads_provider(self):
2908
"""Create a heads provider for resolving ancestry issues."""
2909
if self._heads_provider is not None:
2910
return self._heads_provider
2911
parent_provider = _mod_graph.DictParentsProvider(
2912
self._revision_id_graph)
2913
graph_obj = _mod_graph.Graph(parent_provider)
2914
head_cache = _mod_graph.FrozenHeadsCache(graph_obj)
2915
self._heads_provider = head_cache
2918
def annotate(self, key):
2919
"""Return the annotated fulltext at the given key.
2921
:param key: The key to annotate.
2923
if len(self._knit._fallback_vfs) > 0:
2924
# stacked knits can't use the fast path at present.
2925
return self._simple_annotate(key)
2928
records = self._get_build_graph(key)
2929
if key in self._ghosts:
2930
raise errors.RevisionNotPresent(key, self._knit)
2931
self._annotate_records(records)
2932
return self._annotated_lines[key]
2933
except errors.RetryWithNewPacks, e:
2934
self._knit._access.reload_or_raise(e)
2935
# The cached build_details are no longer valid
2936
self._all_build_details.clear()
2938
def _simple_annotate(self, key):
2939
"""Return annotated fulltext, rediffing from the full texts.
2941
This is slow but makes no assumptions about the repository
2942
being able to produce line deltas.
2944
# TODO: this code generates a parent maps of present ancestors; it
2945
# could be split out into a separate method, and probably should use
2946
# iter_ancestry instead. -- mbp and robertc 20080704
2947
graph = _mod_graph.Graph(self._knit)
2948
head_cache = _mod_graph.FrozenHeadsCache(graph)
2949
search = graph._make_breadth_first_searcher([key])
2953
present, ghosts = search.next_with_ghosts()
2954
except StopIteration:
2956
keys.update(present)
2957
parent_map = self._knit.get_parent_map(keys)
2959
reannotate = annotate.reannotate
2960
for record in self._knit.get_record_stream(keys, 'topological', True):
2962
fulltext = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
2963
parents = parent_map[key]
2964
if parents is not None:
2965
parent_lines = [parent_cache[parent] for parent in parent_map[key]]
2968
parent_cache[key] = list(
2969
reannotate(parent_lines, fulltext, key, None, head_cache))
2971
return parent_cache[key]
2973
raise errors.RevisionNotPresent(key, self._knit)
3710
from bzrlib._knit_load_data_pyx import _load_data_c as _load_data
3711
except ImportError, e:
3712
osutils.failed_to_load_extension(e)
2977
from bzrlib._knit_load_data_c import _load_data_c as _load_data
3713
2979
from bzrlib._knit_load_data_py import _load_data_py as _load_data