287
306
self.storage_kind)
309
class LazyKnitContentFactory(ContentFactory):
310
"""A ContentFactory which can either generate full text or a wire form.
312
:seealso ContentFactory:
315
def __init__(self, key, parents, generator, first):
316
"""Create a LazyKnitContentFactory.
318
:param key: The key of the record.
319
:param parents: The parents of the record.
320
:param generator: A _ContentMapGenerator containing the record for this
322
:param first: Is this the first content object returned from generator?
323
if it is, its storage kind is knit-delta-closure, otherwise it is
324
knit-delta-closure-ref
327
self.parents = parents
329
self._generator = generator
330
self.storage_kind = "knit-delta-closure"
332
self.storage_kind = self.storage_kind + "-ref"
335
def get_bytes_as(self, storage_kind):
336
if storage_kind == self.storage_kind:
338
return self._generator._wire_bytes()
340
# all the keys etc are contained in the bytes returned in the
343
if storage_kind in ('chunked', 'fulltext'):
344
chunks = self._generator._get_one_work(self.key).text()
345
if storage_kind == 'chunked':
348
return ''.join(chunks)
349
raise errors.UnavailableRepresentation(self.key, storage_kind,
353
def knit_delta_closure_to_records(storage_kind, bytes, line_end):
354
"""Convert a network record to a iterator over stream records.
356
:param storage_kind: The storage kind of the record.
357
Must be 'knit-delta-closure'.
358
:param bytes: The bytes of the record on the network.
360
generator = _NetworkContentMapGenerator(bytes, line_end)
361
return generator.get_record_stream()
364
def knit_network_to_record(storage_kind, bytes, line_end):
365
"""Convert a network record to a record object.
367
:param storage_kind: The storage kind of the record.
368
:param bytes: The bytes of the record on the network.
371
line_end = bytes.find('\n', start)
372
key = tuple(bytes[start:line_end].split('\x00'))
374
line_end = bytes.find('\n', start)
375
parent_line = bytes[start:line_end]
376
if parent_line == 'None:':
380
[tuple(segment.split('\x00')) for segment in parent_line.split('\t')
383
noeol = bytes[start] == 'N'
384
if 'ft' in storage_kind:
387
method = 'line-delta'
388
build_details = (method, noeol)
390
raw_record = bytes[start:]
391
annotated = 'annotated' in storage_kind
392
return [KnitContentFactory(key, parents, build_details, None, raw_record,
393
annotated, network_bytes=bytes)]
290
396
class KnitContent(object):
291
397
"""Content of a knit version to which deltas can be applied.
293
399
This is always stored in memory as a list of lines with \n at the end,
294
plus a flag saying if the final ending is really there or not, because that
400
plus a flag saying if the final ending is really there or not, because that
295
401
corresponds to the on-disk knit representation.
986
1092
if not self.get_parent_map([key]):
987
1093
raise RevisionNotPresent(key, self)
988
1094
return cached_version
989
text_map, contents_map = self._get_content_maps([key])
990
return contents_map[key]
992
def _get_content_maps(self, keys, nonlocal_keys=None):
993
"""Produce maps of text and KnitContents
995
:param keys: The keys to produce content maps for.
996
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
997
which are known to not be in this knit, but rather in one of the
999
:return: (text_map, content_map) where text_map contains the texts for
1000
the requested versions and content_map contains the KnitContents.
1002
# FUTURE: This function could be improved for the 'extract many' case
1003
# by tracking each component and only doing the copy when the number of
1004
# children than need to apply delta's to it is > 1 or it is part of the
1007
multiple_versions = len(keys) != 1
1008
record_map = self._get_record_map(keys, allow_missing=True)
1013
if nonlocal_keys is None:
1014
nonlocal_keys = set()
1016
nonlocal_keys = frozenset(nonlocal_keys)
1017
missing_keys = set(nonlocal_keys)
1018
for source in self._fallback_vfs:
1019
if not missing_keys:
1021
for record in source.get_record_stream(missing_keys,
1023
if record.storage_kind == 'absent':
1025
missing_keys.remove(record.key)
1026
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1027
text_map[record.key] = lines
1028
content_map[record.key] = PlainKnitContent(lines, record.key)
1029
if record.key in keys:
1030
final_content[record.key] = content_map[record.key]
1032
if key in nonlocal_keys:
1037
while cursor is not None:
1039
record, record_details, digest, next = record_map[cursor]
1041
raise RevisionNotPresent(cursor, self)
1042
components.append((cursor, record, record_details, digest))
1044
if cursor in content_map:
1045
# no need to plan further back
1046
components.append((cursor, None, None, None))
1050
for (component_id, record, record_details,
1051
digest) in reversed(components):
1052
if component_id in content_map:
1053
content = content_map[component_id]
1055
content, delta = self._factory.parse_record(key[-1],
1056
record, record_details, content,
1057
copy_base_content=multiple_versions)
1058
if multiple_versions:
1059
content_map[component_id] = content
1061
final_content[key] = content
1063
# digest here is the digest from the last applied component.
1064
text = content.text()
1065
actual_sha = sha_strings(text)
1066
if actual_sha != digest:
1067
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
1068
text_map[key] = text
1069
return text_map, final_content
1095
generator = _VFContentMapGenerator(self, [key])
1096
return generator._get_content(key)
1071
1098
def get_parent_map(self, keys):
1072
1099
"""Get a map of the graph parents of keys.
1846
class _ContentMapGenerator(object):
1847
"""Generate texts or expose raw deltas for a set of texts."""
1849
def _get_content(self, key):
1850
"""Get the content object for key."""
1851
# Note that _get_content is only called when the _ContentMapGenerator
1852
# has been constructed with just one key requested for reconstruction.
1853
if key in self.nonlocal_keys:
1854
record = self.get_record_stream().next()
1855
# Create a content object on the fly
1856
lines = osutils.chunks_to_lines(record.get_bytes_as('chunked'))
1857
return PlainKnitContent(lines, record.key)
1859
# local keys we can ask for directly
1860
return self._get_one_work(key)
1862
def get_record_stream(self):
1863
"""Get a record stream for the keys requested during __init__."""
1864
for record in self._work():
1868
"""Produce maps of text and KnitContents as dicts.
1870
:return: (text_map, content_map) where text_map contains the texts for
1871
the requested versions and content_map contains the KnitContents.
1873
# NB: By definition we never need to read remote sources unless texts
1874
# are requested from them: we don't delta across stores - and we
1875
# explicitly do not want to to prevent data loss situations.
1876
if self.global_map is None:
1877
self.global_map = self.vf.get_parent_map(self.keys)
1878
nonlocal_keys = self.nonlocal_keys
1880
missing_keys = set(nonlocal_keys)
1881
# Read from remote versioned file instances and provide to our caller.
1882
for source in self.vf._fallback_vfs:
1883
if not missing_keys:
1885
# Loop over fallback repositories asking them for texts - ignore
1886
# any missing from a particular fallback.
1887
for record in source.get_record_stream(missing_keys,
1889
if record.storage_kind == 'absent':
1890
# Not in thie particular stream, may be in one of the
1891
# other fallback vfs objects.
1893
missing_keys.remove(record.key)
1896
self._raw_record_map = self.vf._get_record_map_unparsed(self.keys,
1899
for key in self.keys:
1900
if key in self.nonlocal_keys:
1902
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
1905
def _get_one_work(self, requested_key):
1906
# Now, if we have calculated everything already, just return the
1908
if requested_key in self._contents_map:
1909
return self._contents_map[requested_key]
1910
# To simplify things, parse everything at once - code that wants one text
1911
# probably wants them all.
1912
# FUTURE: This function could be improved for the 'extract many' case
1913
# by tracking each component and only doing the copy when the number of
1914
# children than need to apply delta's to it is > 1 or it is part of the
1916
multiple_versions = len(self.keys) != 1
1917
if self._record_map is None:
1918
self._record_map = self.vf._raw_map_to_record_map(
1919
self._raw_record_map)
1920
record_map = self._record_map
1921
# raw_record_map is key:
1922
# Have read and parsed records at this point.
1923
for key in self.keys:
1924
if key in self.nonlocal_keys:
1929
while cursor is not None:
1931
record, record_details, digest, next = record_map[cursor]
1933
raise RevisionNotPresent(cursor, self)
1934
components.append((cursor, record, record_details, digest))
1936
if cursor in self._contents_map:
1937
# no need to plan further back
1938
components.append((cursor, None, None, None))
1942
for (component_id, record, record_details,
1943
digest) in reversed(components):
1944
if component_id in self._contents_map:
1945
content = self._contents_map[component_id]
1947
content, delta = self._factory.parse_record(key[-1],
1948
record, record_details, content,
1949
copy_base_content=multiple_versions)
1950
if multiple_versions:
1951
self._contents_map[component_id] = content
1953
# digest here is the digest from the last applied component.
1954
text = content.text()
1955
actual_sha = sha_strings(text)
1956
if actual_sha != digest:
1957
raise SHA1KnitCorrupt(self, actual_sha, digest, key, text)
1958
if multiple_versions:
1959
return self._contents_map[requested_key]
1963
def _wire_bytes(self):
1964
"""Get the bytes to put on the wire for 'key'.
1966
The first collection of bytes asked for returns the serialised
1967
raw_record_map and the additional details (key, parent) for key.
1968
Subsequent calls return just the additional details (key, parent).
1969
The wire storage_kind given for the first key is 'knit-delta-closure',
1970
For subsequent keys it is 'knit-delta-closure-ref'.
1972
:param key: A key from the content generator.
1973
:return: Bytes to put on the wire.
1976
# kind marker for dispatch on the far side,
1977
lines.append('knit-delta-closure')
1979
if self.vf._factory.annotated:
1980
lines.append('annotated')
1983
# then the list of keys
1984
lines.append('\t'.join(['\x00'.join(key) for key in self.keys
1985
if key not in self.nonlocal_keys]))
1986
# then the _raw_record_map in serialised form:
1988
# for each item in the map:
1990
# 1 line with parents if the key is to be yielded (None: for None, '' for ())
1991
# one line with method
1992
# one line with noeol
1993
# one line with next ('' for None)
1994
# one line with byte count of the record bytes
1996
for key, (record_bytes, (method, noeol), next) in \
1997
self._raw_record_map.iteritems():
1998
key_bytes = '\x00'.join(key)
1999
parents = self.global_map.get(key, None)
2001
parent_bytes = 'None:'
2003
parent_bytes = '\t'.join('\x00'.join(key) for key in parents)
2004
method_bytes = method
2010
next_bytes = '\x00'.join(next)
2013
map_byte_list.append('%s\n%s\n%s\n%s\n%s\n%d\n%s' % (
2014
key_bytes, parent_bytes, method_bytes, noeol_bytes, next_bytes,
2015
len(record_bytes), record_bytes))
2016
map_bytes = ''.join(map_byte_list)
2017
lines.append(map_bytes)
2018
bytes = '\n'.join(lines)
2022
class _VFContentMapGenerator(_ContentMapGenerator):
2023
"""Content map generator reading from a VersionedFiles object."""
2025
def __init__(self, versioned_files, keys, nonlocal_keys=None,
2026
global_map=None, raw_record_map=None):
2027
"""Create a _ContentMapGenerator.
2029
:param versioned_files: The versioned files that the texts are being
2031
:param keys: The keys to produce content maps for.
2032
:param nonlocal_keys: An iterable of keys(possibly intersecting keys)
2033
which are known to not be in this knit, but rather in one of the
2035
:param global_map: The result of get_parent_map(keys) (or a supermap).
2036
This is required if get_record_stream() is to be used.
2037
:param raw_record_map: A unparsed raw record map to use for answering
2040
# The vf to source data from
2041
self.vf = versioned_files
2043
self.keys = list(keys)
2044
# Keys known to be in fallback vfs objects
2045
if nonlocal_keys is None:
2046
self.nonlocal_keys = set()
2048
self.nonlocal_keys = frozenset(nonlocal_keys)
2049
# Parents data for keys to be returned in get_record_stream
2050
self.global_map = global_map
2051
# The chunked lists for self.keys in text form
2053
# A cache of KnitContent objects used in extracting texts.
2054
self._contents_map = {}
2055
# All the knit records needed to assemble the requested keys as full
2057
self._record_map = None
2058
if raw_record_map is None:
2059
self._raw_record_map = self.vf._get_record_map_unparsed(keys,
2062
self._raw_record_map = raw_record_map
2063
# the factory for parsing records
2064
self._factory = self.vf._factory
2067
class _NetworkContentMapGenerator(_ContentMapGenerator):
2068
"""Content map generator sourced from a network stream."""
2070
def __init__(self, bytes, line_end):
2071
"""Construct a _NetworkContentMapGenerator from a bytes block."""
2073
self.global_map = {}
2074
self._raw_record_map = {}
2075
self._contents_map = {}
2076
self._record_map = None
2077
self.nonlocal_keys = []
2078
# Get access to record parsing facilities
2079
self.vf = KnitVersionedFiles(None, None)
2082
line_end = bytes.find('\n', start)
2083
line = bytes[start:line_end]
2084
start = line_end + 1
2085
if line == 'annotated':
2086
self._factory = KnitAnnotateFactory()
2088
self._factory = KnitPlainFactory()
2089
# list of keys to emit in get_record_stream
2090
line_end = bytes.find('\n', start)
2091
line = bytes[start:line_end]
2092
start = line_end + 1
2094
tuple(segment.split('\x00')) for segment in line.split('\t')
2096
# now a loop until the end. XXX: It would be nice if this was just a
2097
# bunch of the same records as get_record_stream(..., False) gives, but
2098
# there is a decent sized gap stopping that at the moment.
2102
line_end = bytes.find('\n', start)
2103
key = tuple(bytes[start:line_end].split('\x00'))
2104
start = line_end + 1
2105
# 1 line with parents (None: for None, '' for ())
2106
line_end = bytes.find('\n', start)
2107
line = bytes[start:line_end]
2112
[tuple(segment.split('\x00')) for segment in line.split('\t')
2114
self.global_map[key] = parents
2115
start = line_end + 1
2116
# one line with method
2117
line_end = bytes.find('\n', start)
2118
line = bytes[start:line_end]
2120
start = line_end + 1
2121
# one line with noeol
2122
line_end = bytes.find('\n', start)
2123
line = bytes[start:line_end]
2125
start = line_end + 1
2126
# one line with next ('' for None)
2127
line_end = bytes.find('\n', start)
2128
line = bytes[start:line_end]
2132
next = tuple(bytes[start:line_end].split('\x00'))
2133
start = line_end + 1
2134
# one line with byte count of the record bytes
2135
line_end = bytes.find('\n', start)
2136
line = bytes[start:line_end]
2138
start = line_end + 1
2140
record_bytes = bytes[start:start+count]
2141
start = start + count
2143
self._raw_record_map[key] = (record_bytes, (method, noeol), next)
2145
def get_record_stream(self):
2146
"""Get a record stream for for keys requested by the bytestream."""
2148
for key in self.keys:
2149
yield LazyKnitContentFactory(key, self.global_map[key], self, first)
2152
def _wire_bytes(self):
1769
2156
class _KndxIndex(object):
1770
2157
"""Manages knit index files