2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
1 |
# Copyright (C) 2007 Canonical Ltd
|
2 |
#
|
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
16 |
||
17 |
"""Container format for Bazaar data.
|
|
18 |
||
19 |
"Containers" and "records" are described in doc/developers/container-format.txt.
|
|
20 |
"""
|
|
21 |
||
2661.2.2
by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack |
22 |
from cStringIO import StringIO |
2506.5.2
by Andrew Bennetts
Raise InvalidRecordError on invalid names. |
23 |
import re |
24 |
||
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
25 |
from bzrlib import errors |
26 |
||
27 |
||
2506.2.10
by Andrew Bennetts
Add '(introduced in 0.18)' to pack format string. |
28 |
FORMAT_ONE = "Bazaar pack format 1 (introduced in 0.18)" |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
29 |
|
30 |
||
2506.5.2
by Andrew Bennetts
Raise InvalidRecordError on invalid names. |
31 |
_whitespace_re = re.compile('[\t\n\x0b\x0c\r ]') |
32 |
||
33 |
||
34 |
def _check_name(name): |
|
35 |
"""Do some basic checking of 'name'.
|
|
36 |
|
|
37 |
At the moment, this just checks that there are no whitespace characters in a
|
|
38 |
name.
|
|
39 |
||
40 |
:raises InvalidRecordError: if name is not valid.
|
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
41 |
:seealso: _check_name_encoding
|
2506.5.2
by Andrew Bennetts
Raise InvalidRecordError on invalid names. |
42 |
"""
|
43 |
if _whitespace_re.search(name) is not None: |
|
44 |
raise errors.InvalidRecordError("%r is not a valid name." % (name,)) |
|
45 |
||
46 |
||
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
47 |
def _check_name_encoding(name): |
48 |
"""Check that 'name' is valid UTF-8.
|
|
49 |
|
|
50 |
This is separate from _check_name because UTF-8 decoding is relatively
|
|
51 |
expensive, and we usually want to avoid it.
|
|
52 |
||
53 |
:raises InvalidRecordError: if name is not valid UTF-8.
|
|
54 |
"""
|
|
55 |
try: |
|
56 |
name.decode('utf-8') |
|
57 |
except UnicodeDecodeError, e: |
|
58 |
raise errors.InvalidRecordError(str(e)) |
|
59 |
||
60 |
||
2506.3.1
by Andrew Bennetts
More progress: |
61 |
class ContainerWriter(object): |
62 |
"""A class for writing containers."""
|
|
63 |
||
64 |
def __init__(self, write_func): |
|
65 |
"""Constructor.
|
|
66 |
||
67 |
:param write_func: a callable that will be called when this
|
|
68 |
ContainerWriter needs to write some bytes.
|
|
69 |
"""
|
|
2661.2.1
by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to |
70 |
self._write_func = write_func |
71 |
self.current_offset = 0 |
|
2506.3.1
by Andrew Bennetts
More progress: |
72 |
|
73 |
def begin(self): |
|
74 |
"""Begin writing a container."""
|
|
75 |
self.write_func(FORMAT_ONE + "\n") |
|
76 |
||
2661.2.1
by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to |
77 |
def write_func(self, bytes): |
78 |
self._write_func(bytes) |
|
79 |
self.current_offset += len(bytes) |
|
80 |
||
2506.3.1
by Andrew Bennetts
More progress: |
81 |
def end(self): |
82 |
"""Finish writing a container."""
|
|
83 |
self.write_func("E") |
|
84 |
||
85 |
def add_bytes_record(self, bytes, names): |
|
2661.2.1
by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to |
86 |
"""Add a Bytes record with the given names.
|
87 |
|
|
88 |
:param bytes: The bytes to insert.
|
|
89 |
:param names: The names to give the inserted bytes.
|
|
90 |
:return: An offset, length tuple. The offset is the offset
|
|
91 |
of the record within the container, and the length is the
|
|
92 |
length of data that will need to be read to reconstitute the
|
|
93 |
record. These offset and length can only be used with the pack
|
|
94 |
interface - they might be offset by headers or other such details
|
|
95 |
and thus are only suitable for use by a ContainerReader.
|
|
96 |
"""
|
|
97 |
current_offset = self.current_offset |
|
2506.3.1
by Andrew Bennetts
More progress: |
98 |
# Kind marker
|
99 |
self.write_func("B") |
|
100 |
# Length
|
|
101 |
self.write_func(str(len(bytes)) + "\n") |
|
102 |
# Names
|
|
103 |
for name in names: |
|
2506.5.2
by Andrew Bennetts
Raise InvalidRecordError on invalid names. |
104 |
# Make sure we're writing valid names. Note that we will leave a
|
105 |
# half-written record if a name is bad!
|
|
106 |
_check_name(name) |
|
2506.3.1
by Andrew Bennetts
More progress: |
107 |
self.write_func(name + "\n") |
108 |
# End of headers
|
|
109 |
self.write_func("\n") |
|
110 |
# Finally, the contents.
|
|
111 |
self.write_func(bytes) |
|
2661.2.1
by Robert Collins
* ``bzrlib.pack.ContainerWriter`` now returns an offset, length tuple to |
112 |
# return a memo of where we wrote data to allow random access.
|
113 |
return current_offset, self.current_offset - current_offset |
|
2506.3.1
by Andrew Bennetts
More progress: |
114 |
|
115 |
||
2661.2.2
by Robert Collins
* ``bzrlib.pack.make_readv_reader`` allows readv based access to pack |
116 |
class ReadVFile(object): |
117 |
"""Adapt a readv result iterator to a file like protocol."""
|
|
118 |
||
119 |
def __init__(self, readv_result): |
|
120 |
self.readv_result = readv_result |
|
121 |
# the most recent readv result block
|
|
122 |
self._string = None |
|
123 |
||
124 |
def _next(self): |
|
125 |
if (self._string is None or |
|
126 |
self._string.tell() == self._string_length): |
|
127 |
length, data = self.readv_result.next() |
|
128 |
self._string_length = len(data) |
|
129 |
self._string = StringIO(data) |
|
130 |
||
131 |
def read(self, length): |
|
132 |
self._next() |
|
133 |
result = self._string.read(length) |
|
134 |
if len(result) < length: |
|
135 |
raise errors.BzrError('request for too much data from a readv hunk.') |
|
136 |
return result |
|
137 |
||
138 |
def readline(self): |
|
139 |
"""Note that readline will not cross readv segments."""
|
|
140 |
self._next() |
|
141 |
result = self._string.readline() |
|
142 |
if self._string.tell() == self._string_length and result[-1] != '\n': |
|
143 |
raise errors.BzrError('short readline in the readvfile hunk.') |
|
144 |
return result |
|
145 |
||
146 |
||
147 |
def make_readv_reader(transport, filename, requested_records): |
|
148 |
"""Create a ContainerReader that will read selected records only.
|
|
149 |
||
150 |
:param transport: The transport the pack file is located on.
|
|
151 |
:param filename: The filename of the pack file.
|
|
152 |
:param requested_records: The record offset, length tuples as returned
|
|
153 |
by add_bytes_record for the desired records.
|
|
154 |
"""
|
|
155 |
readv_blocks = [(0, len(FORMAT_ONE)+1)] |
|
156 |
readv_blocks.extend(requested_records) |
|
157 |
result = ContainerReader(ReadVFile( |
|
158 |
transport.readv(filename, readv_blocks))) |
|
159 |
return result |
|
160 |
||
161 |
||
2506.3.1
by Andrew Bennetts
More progress: |
162 |
class BaseReader(object): |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
163 |
|
2506.2.9
by Aaron Bentley
Use file-like objects as container input, not callables |
164 |
def __init__(self, source_file): |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
165 |
"""Constructor.
|
166 |
||
2506.2.12
by Andrew Bennetts
Update docstring for Aaron's changes. |
167 |
:param source_file: a file-like object with `read` and `readline`
|
168 |
methods.
|
|
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
169 |
"""
|
2506.2.9
by Aaron Bentley
Use file-like objects as container input, not callables |
170 |
self._source = source_file |
171 |
||
172 |
def reader_func(self, length=None): |
|
173 |
return self._source.read(length) |
|
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
174 |
|
2506.3.1
by Andrew Bennetts
More progress: |
175 |
def _read_line(self): |
2506.2.9
by Aaron Bentley
Use file-like objects as container input, not callables |
176 |
line = self._source.readline() |
177 |
if not line.endswith('\n'): |
|
178 |
raise errors.UnexpectedEndOfContainerError() |
|
179 |
return line.rstrip('\n') |
|
2506.3.1
by Andrew Bennetts
More progress: |
180 |
|
181 |
||
182 |
class ContainerReader(BaseReader): |
|
183 |
"""A class for reading Bazaar's container format."""
|
|
184 |
||
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
185 |
def iter_records(self): |
186 |
"""Iterate over the container, yielding each record as it is read.
|
|
187 |
||
2506.6.2
by Andrew Bennetts
Docstring improvements. |
188 |
Each yielded record will be a 2-tuple of (names, callable), where names
|
189 |
is a ``list`` and bytes is a function that takes one argument,
|
|
190 |
``max_length``.
|
|
191 |
||
192 |
You **must not** call the callable after advancing the interator to the
|
|
193 |
next record. That is, this code is invalid::
|
|
194 |
||
195 |
record_iter = container.iter_records()
|
|
196 |
names1, callable1 = record_iter.next()
|
|
197 |
names2, callable2 = record_iter.next()
|
|
198 |
bytes1 = callable1(None)
|
|
199 |
|
|
200 |
As it will give incorrect results and invalidate the state of the
|
|
201 |
ContainerReader.
|
|
2506.3.1
by Andrew Bennetts
More progress: |
202 |
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
203 |
:raises ContainerError: if any sort of containter corruption is
|
204 |
detected, e.g. UnknownContainerFormatError is the format of the
|
|
205 |
container is unrecognised.
|
|
2506.6.2
by Andrew Bennetts
Docstring improvements. |
206 |
:seealso: ContainerReader.read
|
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
207 |
"""
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
208 |
self._read_format() |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
209 |
return self._iter_records() |
210 |
||
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
211 |
def iter_record_objects(self): |
212 |
"""Iterate over the container, yielding each record as it is read.
|
|
213 |
||
214 |
Each yielded record will be an object with ``read`` and ``validate``
|
|
2506.6.2
by Andrew Bennetts
Docstring improvements. |
215 |
methods. Like with iter_records, it is not safe to use a record object
|
216 |
after advancing the iterator to yield next record.
|
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
217 |
|
218 |
:raises ContainerError: if any sort of containter corruption is
|
|
219 |
detected, e.g. UnknownContainerFormatError is the format of the
|
|
220 |
container is unrecognised.
|
|
2506.6.2
by Andrew Bennetts
Docstring improvements. |
221 |
:seealso: iter_records
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
222 |
"""
|
223 |
self._read_format() |
|
224 |
return self._iter_record_objects() |
|
225 |
||
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
226 |
def _iter_records(self): |
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
227 |
for record in self._iter_record_objects(): |
228 |
yield record.read() |
|
229 |
||
230 |
def _iter_record_objects(self): |
|
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
231 |
while True: |
232 |
record_kind = self.reader_func(1) |
|
233 |
if record_kind == 'B': |
|
234 |
# Bytes record.
|
|
2506.2.9
by Aaron Bentley
Use file-like objects as container input, not callables |
235 |
reader = BytesRecordReader(self._source) |
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
236 |
yield reader |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
237 |
elif record_kind == 'E': |
238 |
# End marker. There are no more records.
|
|
239 |
return
|
|
240 |
elif record_kind == '': |
|
241 |
# End of stream encountered, but no End Marker record seen, so
|
|
242 |
# this container is incomplete.
|
|
243 |
raise errors.UnexpectedEndOfContainerError() |
|
244 |
else: |
|
245 |
# Unknown record type.
|
|
246 |
raise errors.UnknownRecordTypeError(record_kind) |
|
247 |
||
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
248 |
def _read_format(self): |
249 |
format = self._read_line() |
|
250 |
if format != FORMAT_ONE: |
|
251 |
raise errors.UnknownContainerFormatError(format) |
|
252 |
||
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
253 |
def validate(self): |
254 |
"""Validate this container and its records.
|
|
255 |
||
2506.2.7
by Andrew Bennetts
Change read/iter_records to return a callable, add more validation, and |
256 |
Validating consumes the data stream just like iter_records and
|
257 |
iter_record_objects, so you cannot call it after
|
|
258 |
iter_records/iter_record_objects.
|
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
259 |
|
260 |
:raises ContainerError: if something is invalid.
|
|
261 |
"""
|
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
262 |
all_names = set() |
263 |
for record_names, read_bytes in self.iter_records(): |
|
264 |
read_bytes(None) |
|
265 |
for name in record_names: |
|
266 |
_check_name_encoding(name) |
|
267 |
# Check that the name is unique. Note that Python will refuse
|
|
268 |
# to decode non-shortest forms of UTF-8 encoding, so there is no
|
|
269 |
# risk that the same unicode string has been encoded two
|
|
270 |
# different ways.
|
|
271 |
if name in all_names: |
|
272 |
raise errors.DuplicateRecordNameError(name) |
|
273 |
all_names.add(name) |
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
274 |
excess_bytes = self.reader_func(1) |
275 |
if excess_bytes != '': |
|
276 |
raise errors.ContainerHasExcessDataError(excess_bytes) |
|
277 |
||
2506.3.1
by Andrew Bennetts
More progress: |
278 |
|
279 |
class BytesRecordReader(BaseReader): |
|
280 |
||
281 |
def read(self): |
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
282 |
"""Read this record.
|
283 |
||
2506.6.2
by Andrew Bennetts
Docstring improvements. |
284 |
You can either validate or read a record, you can't do both.
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
285 |
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
286 |
:returns: A tuple of (names, callable). The callable can be called
|
287 |
repeatedly to obtain the bytes for the record, with a max_length
|
|
288 |
argument. If max_length is None, returns all the bytes. Because
|
|
289 |
records can be arbitrarily large, using None is not recommended
|
|
290 |
unless you have reason to believe the content will fit in memory.
|
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
291 |
"""
|
2506.3.1
by Andrew Bennetts
More progress: |
292 |
# Read the content length.
|
293 |
length_line = self._read_line() |
|
294 |
try: |
|
295 |
length = int(length_line) |
|
296 |
except ValueError: |
|
297 |
raise errors.InvalidRecordError( |
|
298 |
"%r is not a valid length." % (length_line,)) |
|
299 |
||
300 |
# Read the list of names.
|
|
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
301 |
names = [] |
302 |
while True: |
|
303 |
name = self._read_line() |
|
304 |
if name == '': |
|
305 |
break
|
|
2506.5.2
by Andrew Bennetts
Raise InvalidRecordError on invalid names. |
306 |
_check_name(name) |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
307 |
names.append(name) |
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
308 |
|
309 |
self._remaining_length = length |
|
310 |
return names, self._content_reader |
|
311 |
||
312 |
def _content_reader(self, max_length): |
|
313 |
if max_length is None: |
|
314 |
length_to_read = self._remaining_length |
|
315 |
else: |
|
316 |
length_to_read = min(max_length, self._remaining_length) |
|
317 |
self._remaining_length -= length_to_read |
|
318 |
bytes = self.reader_func(length_to_read) |
|
319 |
if len(bytes) != length_to_read: |
|
2506.3.3
by Andrew Bennetts
Deal with EOF in the middle of a bytes record. |
320 |
raise errors.UnexpectedEndOfContainerError() |
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
321 |
return bytes |
2506.2.1
by Andrew Bennetts
Start implementing container format reading and writing. |
322 |
|
2506.2.6
by Andrew Bennetts
Add validate method to ContainerReader and BytesRecordReader. |
323 |
def validate(self): |
324 |
"""Validate this record.
|
|
325 |
||
326 |
You can either validate or read, you can't do both.
|
|
327 |
||
328 |
:raises ContainerError: if this record is invalid.
|
|
329 |
"""
|
|
2506.6.1
by Andrew Bennetts
Return a callable instead of a str from read, and add more validation. |
330 |
names, read_bytes = self.read() |
331 |
for name in names: |
|
332 |
_check_name_encoding(name) |
|
333 |
read_bytes(None) |
|
334 |