1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Serializer object for CHK based inventory storage."""
19
from __future__ import absolute_import
21
from cStringIO import StringIO
23
from bzrlib import lazy_import
24
lazy_import.lazy_import(globals(),
34
revision as _mod_revision,
39
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
40
# TODO: we really want an 'isascii' check for key
41
# Cast the utf8 properties into Unicode 'in place'
42
for key, value in props.iteritems():
43
props[key] = _decode(value)[0]
47
def _is_format_10(value):
49
raise ValueError('Format number was not recognized, expected 10 got %d'
54
class BEncodeRevisionSerializer1(object):
55
"""Simple revision serializer based around bencode.
58
squashes_xml_invalid_characters = False
60
# Maps {key:(Revision attribute, bencode_type, validator)}
61
# This tells us what kind we expect bdecode to create, what variable on
62
# Revision we should be using, and a function to call to validate/transform
64
# TODO: add a 'validate_utf8' for things like revision_id and file_id
65
# and a validator for parent-ids
66
_schema = {'format': (None, int, _is_format_10),
67
'committer': ('committer', str, cache_utf8.decode),
68
'timezone': ('timezone', int, None),
69
'timestamp': ('timestamp', str, float),
70
'revision-id': ('revision_id', str, None),
71
'parent-ids': ('parent_ids', list, None),
72
'inventory-sha1': ('inventory_sha1', str, None),
73
'message': ('message', str, cache_utf8.decode),
74
'properties': ('properties', dict, _validate_properties),
77
def write_revision_to_string(self, rev):
78
encode_utf8 = cache_utf8._utf8_encode
79
# Use a list of tuples rather than a dict
80
# This lets us control the ordering, so that we are able to create
84
("committer", encode_utf8(rev.committer)[0]),
86
if rev.timezone is not None:
87
ret.append(("timezone", rev.timezone))
88
# For bzr revisions, the most common property is just 'branch-nick'
89
# which changes infrequently.
91
for key, value in rev.properties.iteritems():
92
revprops[key] = encode_utf8(value)[0]
93
ret.append(('properties', revprops))
95
("timestamp", "%.3f" % rev.timestamp),
96
("revision-id", rev.revision_id),
97
("parent-ids", rev.parent_ids),
98
("inventory-sha1", rev.inventory_sha1),
99
("message", encode_utf8(rev.message)[0]),
101
return bencode.bencode(ret)
103
def write_revision(self, rev, f):
104
f.write(self.write_revision_to_string(rev))
106
def read_revision_from_string(self, text):
107
# TODO: consider writing a Revision decoder, rather than using the
108
# generic bencode decoder
109
# However, to decode all 25k revisions of bzr takes approx 1.3s
110
# If we remove all extra validation that goes down to about 1.2s.
111
# Of that time, probably 0.6s is spend in bencode.bdecode().
112
# Regardless 'time bzr log' of everything is 7+s, so 1.3s to
113
# extract revision texts isn't a majority of time.
114
ret = bencode.bdecode(text)
115
if not isinstance(ret, list):
116
raise ValueError("invalid revision text")
117
schema = self._schema
118
# timezone is allowed to be missing, but should be set
119
bits = {'timezone': None}
120
for key, value in ret:
121
# Will raise KeyError if not a valid part of the schema, or an
122
# entry is given 2 times.
123
var_name, expected_type, validator = schema[key]
124
if value.__class__ is not expected_type:
125
raise ValueError('key %s did not conform to the expected type'
127
% (key, expected_type, type(value)))
128
if validator is not None:
129
value = validator(value)
130
bits[var_name] = value
131
if len(bits) != len(schema):
132
missing = [key for key, (var_name, _, _) in schema.iteritems()
133
if var_name not in bits]
134
raise ValueError('Revision text was missing expected keys %s.'
135
' text %r' % (missing, text))
136
del bits[None] # Get rid of 'format' since it doesn't get mapped
137
rev = _mod_revision.Revision(**bits)
140
def read_revision(self, f):
141
return self.read_revision_from_string(f.read())
144
class CHKSerializer(serializer.Serializer):
145
"""A CHKInventory based serializer with 'plain' behaviour."""
148
revision_format_num = None
149
support_altered_by_hack = False
150
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference'])
152
def __init__(self, node_size, search_key_name):
153
self.maximum_size = node_size
154
self.search_key_name = search_key_name
156
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None,
157
return_from_cache=False):
158
"""Construct from XML Element"""
159
inv = xml_serializer.unpack_inventory_flat(elt, self.format_num,
160
xml_serializer.unpack_inventory_entry, entry_cache,
164
def read_inventory_from_string(self, xml_string, revision_id=None,
165
entry_cache=None, return_from_cache=False):
166
"""Read xml_string into an inventory object.
168
:param xml_string: The xml to read.
169
:param revision_id: If not-None, the expected revision id of the
171
:param entry_cache: An optional cache of InventoryEntry objects. If
172
supplied we will look up entries via (file_id, revision_id) which
173
should map to a valid InventoryEntry (File/Directory/etc) object.
174
:param return_from_cache: Return entries directly from the cache,
175
rather than copying them first. This is only safe if the caller
176
promises not to mutate the returned inventory entries, but it can
177
make some operations significantly faster.
180
return self._unpack_inventory(
181
xml_serializer.fromstring(xml_string), revision_id,
182
entry_cache=entry_cache,
183
return_from_cache=return_from_cache)
184
except xml_serializer.ParseError, e:
185
raise errors.UnexpectedInventoryFormat(e)
187
def read_inventory(self, f, revision_id=None):
188
"""Read an inventory from a file-like object."""
191
return self._unpack_inventory(self._read_element(f),
195
except xml_serializer.ParseError, e:
196
raise errors.UnexpectedInventoryFormat(e)
198
def write_inventory_to_lines(self, inv):
199
"""Return a list of lines with the encoded inventory."""
200
return self.write_inventory(inv, None)
202
def write_inventory_to_string(self, inv, working=False):
203
"""Just call write_inventory with a StringIO and return the value.
205
:param working: If True skip history data - text_sha1, text_size,
206
reference_revision, symlink_target.
209
self.write_inventory(inv, sio, working)
210
return sio.getvalue()
212
def write_inventory(self, inv, f, working=False):
213
"""Write inventory to a file.
215
:param inv: the inventory to write.
216
:param f: the file to write. (May be None if the lines are the desired
218
:param working: If True skip history data - text_sha1, text_size,
219
reference_revision, symlink_target.
220
:return: The inventory as a list of lines.
223
append = output.append
224
if inv.revision_id is not None:
225
revid1 = ' revision_id="'
226
revid2 = xml_serializer.encode_and_escape(inv.revision_id)
230
append('<inventory format="%s"%s%s>\n' % (
231
self.format_num, revid1, revid2))
232
append('<directory file_id="%s name="%s revision="%s />\n' % (
233
xml_serializer.encode_and_escape(inv.root.file_id),
234
xml_serializer.encode_and_escape(inv.root.name),
235
xml_serializer.encode_and_escape(inv.root.revision)))
236
xml_serializer.serialize_inventory_flat(inv,
238
root_id=None, supported_kinds=self.supported_kinds,
245
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
248
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
249
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""
254
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')