805
805
factory = KnitAnnotateFactory()
807
807
raise errors.KnitDataStreamUnknown(format)
808
access = _StreamAccess(reader_callable)
809
808
index = _StreamIndex(data_list)
809
access = _StreamAccess(reader_callable, index, self, factory)
810
810
return KnitVersionedFile(self.filename, self.transport,
811
811
factory=factory, index=index, access_method=access)
2048
2048
class _StreamAccess(object):
2049
"""A Knit Access object that provides data from a datastream."""
2051
def __init__(self, reader_callable):
2049
"""A Knit Access object that provides data from a datastream.
2051
It also provides a fallback to present as unannotated data, annotated data
2052
from a *backing* access object.
2054
This is triggered by a index_memo which is pointing to a different index
2055
than this was constructed with, and is used to allow extracting full
2056
unannotated texts for insertion into annotated knits.
2059
def __init__(self, reader_callable, stream_index, backing_knit,
2052
2061
"""Create a _StreamAccess object.
2054
2063
:param reader_callable: The reader_callable from the datastream.
2055
2064
This is called to buffer all the data immediately, for
2066
:param stream_index: The index the data stream this provides access to
2067
which will be present in native index_memo's.
2068
:param backing_knit: The knit object that will provide access to
2069
annotated texts which are not available in the stream, so as to
2070
create unannotated texts.
2071
:param orig_factory: The original content factory used to generate the
2072
stream. This is used for checking whether the thunk code for
2073
supporting _copy_texts will generate the correct form of data.
2058
2075
self.data = reader_callable(None)
2076
self.stream_index = stream_index
2077
self.backing_knit = backing_knit
2078
self.orig_factory = orig_factory
2060
2080
def get_raw_records(self, memos_for_retrieval):
2061
2081
"""Get the raw bytes for a records.
2066
2086
:return: An iterator over the bytes of the records.
2068
2088
# use a generator for memory friendliness
2069
for _, start, end in memos_for_retrieval:
2070
yield self.data[start:end]
2089
for index, start, end in memos_for_retrieval:
2090
if index is self.stream_index:
2091
yield self.data[start:end]
2093
# we have been asked to thunk. This thunking only occurs when
2094
# we are obtaining plain texts from an annotated backing knit
2095
# so that _copy_texts will work.
2096
# We could improve performance here by scanning for where we need
2097
# to do this and using get_line_list, then interleaving the output
2098
# as desired. However, for now, this is sufficient.
2099
if (index[:6] != 'thunk:' or
2100
self.orig_factory.__class__ != KnitPlainFactory):
2101
raise errors.KnitCorrupt(self, 'Bad thunk request %r' % index)
2102
version_id = index[6:]
2103
lines = self.backing_knit.get_lines(version_id)
2104
line_bytes = ''.join(lines)
2105
digest = sha_string(line_bytes)
2107
if lines[-1][-1] != '\n':
2108
lines[-1] = lines[-1] + '\n'
2110
orig_options = list(self.backing_knit._index.get_options(version_id))
2111
if 'fulltext' not in orig_options:
2112
if 'line-delta' not in orig_options:
2113
raise errors.KnitCorrupt(self,
2114
'Unknown compression method %r' % orig_options)
2115
orig_options.remove('line-delta')
2116
orig_options.append('fulltext')
2117
# We want plain data, because we expect to thunk only to allow text
2119
size, bytes = self.backing_knit._data._record_to_data(version_id,
2120
digest, lines, line_bytes)
2073
2124
class _StreamIndex(object):
2108
2159
graph[version] = parents
2109
2160
return graph.keys()
2162
def get_method(self, version_id):
2163
"""Return compression method of specified version."""
2165
options = self._by_version[version_id][0]
2167
# Strictly speaking this should checkin in the backing knit, but
2168
# until we have a test to discriminate, this will do.
2170
if 'fulltext' in options:
2172
elif 'line-delta' in options:
2175
raise errors.KnitIndexUnknownMethod(self, options)
2111
2177
def get_options(self, version_id):
2112
2178
"""Return a string representing options.
2129
2195
:return: a tuple (None, start, end).
2131
start, end = self._by_version[version_id][1]
2132
return None, start, end
2198
start, end = self._by_version[version_id][1]
2199
return self, start, end
2201
# Signal to the access object to handle this from the backing knit.
2202
return ('thunk:%s' % version_id, None, None)
2134
2204
def get_versions(self):
2135
2205
"""Get all the versions in the stream."""
2144
2214
The order is undefined, allowing for different optimisations in
2145
2215
the underlying implementation.
2147
return [(version, self._by_version[version][2]) for
2148
version in version_ids]
2218
for version in version_ids:
2220
result.append((version, self._by_version[version][2]))
2151
2226
class _KnitData(object):
2408
2483
for index, version in enumerate(to_process):
2409
2484
pb.update('Converting versioned data', index, total)
2410
2485
sha1, num_bytes, parent_text = self.target.add_lines(version,
2411
self.source.get_parents(version),
2486
self.source.get_parents_with_ghosts(version),
2412
2487
self.source.get_lines(version),
2413
2488
parent_texts=parent_cache)
2414
2489
parent_cache[version] = parent_text