~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2007-11-30 03:07:01 UTC
mto: (3053.3.2 integrate-1.0)
mto: This revision was merged to the branch mainline in revision 3059.
Revision ID: robertc@robertcollins.net-20071130030701-r0wm01t0a8qx29gk

Handle insert_data_stream of an unannotated stream into an annotated knit.

files modified:
NEWS

bzrlib/knit.py

bzrlib/tests/test_knit.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

805

factory = KnitAnnotateFactory()

806

else:

807

raise errors.KnitDataStreamUnknown(format)

808

access = _StreamAccess(reader_callable)

809

808

index = _StreamIndex(data_list)

809

access = _StreamAccess(reader_callable, index, self, factory)

810

return KnitVersionedFile(self.filename, self.transport,

811

factory=factory, index=index, access_method=access)

812

2046

2047

2048

class _StreamAccess(object):

2049

"""A Knit Access object that provides data from a datastream."""

2050

2051

def __init__(self, reader_callable):

2049

"""A Knit Access object that provides data from a datastream.

2050

2051

It also provides a fallback to present as unannotated data, annotated data

2052

from a *backing* access object.

2053

2054

This is triggered by a index_memo which is pointing to a different index

2055

than this was constructed with, and is used to allow extracting full

2056

unannotated texts for insertion into annotated knits.

2057

"""

2058

2059

def __init__(self, reader_callable, stream_index, backing_knit,

2060

orig_factory):

2052

2061

"""Create a _StreamAccess object.

2053

2062

2054

2063

:param reader_callable: The reader_callable from the datastream.

2055

2064

This is called to buffer all the data immediately, for

2056

2065

random access.

2066

:param stream_index: The index the data stream this provides access to

2067

which will be present in native index_memo's.

2068

:param backing_knit: The knit object that will provide access to

2069

annotated texts which are not available in the stream, so as to

2070

create unannotated texts.

2071

:param orig_factory: The original content factory used to generate the

2072

stream. This is used for checking whether the thunk code for

2073

supporting _copy_texts will generate the correct form of data.

2057

2074

"""

2058

2075

self.data = reader_callable(None)

2076

self.stream_index = stream_index

2077

self.backing_knit = backing_knit

2078

self.orig_factory = orig_factory

2059

2079

2060

2080

def get_raw_records(self, memos_for_retrieval):

2061

2081

"""Get the raw bytes for a records.

2066

2086

:return: An iterator over the bytes of the records.

2067

2087

"""

2068

2088

# use a generator for memory friendliness

2069

for _, start, end in memos_for_retrieval:

2070

yield self.data[start:end]

2089

for index, start, end in memos_for_retrieval:

2090

if index is self.stream_index:

2091

yield self.data[start:end]

2092

continue

2093

# we have been asked to thunk. This thunking only occurs when

2094

# we are obtaining plain texts from an annotated backing knit

2095

# so that _copy_texts will work.

2096

# We could improve performance here by scanning for where we need

2097

# to do this and using get_line_list, then interleaving the output

2098

# as desired. However, for now, this is sufficient.

2099

if (index[:6] != 'thunk:' or

2100

self.orig_factory.__class__ != KnitPlainFactory):

2101

raise errors.KnitCorrupt(self, 'Bad thunk request %r' % index)

2102

version_id = index[6:]

2103

lines = self.backing_knit.get_lines(version_id)

2104

line_bytes = ''.join(lines)

2105

digest = sha_string(line_bytes)

2106

if lines:

2107

if lines[-1][-1] != '\n':

2108

lines[-1] = lines[-1] + '\n'

2109

line_bytes += '\n'

2110

orig_options = list(self.backing_knit._index.get_options(version_id))

2111

if 'fulltext' not in orig_options:

2112

if 'line-delta' not in orig_options:

2113

raise errors.KnitCorrupt(self,

2114

'Unknown compression method %r' % orig_options)

2115

orig_options.remove('line-delta')

2116

orig_options.append('fulltext')

2117

# We want plain data, because we expect to thunk only to allow text

2118

# extraction.

2119

size, bytes = self.backing_knit._data._record_to_data(version_id,

2120

digest, lines, line_bytes)

2121

yield bytes

2071

2122

2072

2123

2073

2124

class _StreamIndex(object):

2108

2159

graph[version] = parents

2109

2160

return graph.keys()

2110

2161

2162

def get_method(self, version_id):

2163

"""Return compression method of specified version."""

2164

try:

2165

options = self._by_version[version_id][0]

2166

except KeyError:

2167

# Strictly speaking this should checkin in the backing knit, but

2168

# until we have a test to discriminate, this will do.

2169

return 'fulltext'

2170

if 'fulltext' in options:

2171

return 'fulltext'

2172

elif 'line-delta' in options:

2173

return 'line-delta'

2174

else:

2175

raise errors.KnitIndexUnknownMethod(self, options)

2176

2111

2177

def get_options(self, version_id):

2112

2178

"""Return a string representing options.

2113

2179

2128

2194

2129

2195

:return: a tuple (None, start, end).

2130

2196

"""

2131

start, end = self._by_version[version_id][1]

2132

return None, start, end

2197

try:

2198

start, end = self._by_version[version_id][1]

2199

return self, start, end

2200

except KeyError:

2201

# Signal to the access object to handle this from the backing knit.

2202

return ('thunk:%s' % version_id, None, None)

2133

2203

2134

2204

def get_versions(self):

2135

2205

"""Get all the versions in the stream."""

2144

2214

The order is undefined, allowing for different optimisations in

2145

2215

the underlying implementation.

2146

2216

"""

2147

return [(version, self._by_version[version][2]) for

2148

version in version_ids]

2217

result = []

2218

for version in version_ids:

2219

try:

2220

result.append((version, self._by_version[version][2]))

2221

except KeyError:

2222

pass

2223

return result

2149

2224

2150

2225

2151

2226

class _KnitData(object):

2408

2483

for index, version in enumerate(to_process):

2409

2484

pb.update('Converting versioned data', index, total)

2410

2485

sha1, num_bytes, parent_text = self.target.add_lines(version,

2411

self.source.get_parents(version),

2486

self.source.get_parents_with_ghosts(version),

2412

2487

self.source.get_lines(version),

2413

2488

parent_texts=parent_cache)

2414

2489

parent_cache[version] = parent_text

Older »