1
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
"""Serializer object for CHK based inventory storage."""
22
revision as _mod_revision,
27
def _validate_properties(props, _decode=cache_utf8._utf8_decode):
28
# TODO: we really want an 'isascii' check for key
29
# Cast the utf8 properties into Unicode 'in place'
30
for key, value in props.iteritems():
31
props[key] = _decode(value)[0]
35
def _is_format_10(value):
37
raise ValueError('Format number was not recognized, expected 10 got %d'
42
class BEncodeRevisionSerializer1(object):
43
"""Simple revision serializer based around bencode.
46
squashes_xml_invalid_characters = False
48
# Maps {key:(Revision attribute, bencode_type, validator)}
49
# This tells us what kind we expect bdecode to create, what variable on
50
# Revision we should be using, and a function to call to validate/transform
52
# TODO: add a 'validate_utf8' for things like revision_id and file_id
53
# and a validator for parent-ids
54
_schema = {'format': (None, int, _is_format_10),
55
'committer': ('committer', str, cache_utf8.decode),
56
'timezone': ('timezone', int, None),
57
'timestamp': ('timestamp', str, float),
58
'revision-id': ('revision_id', str, None),
59
'parent-ids': ('parent_ids', list, None),
60
'inventory-sha1': ('inventory_sha1', str, None),
61
'message': ('message', str, cache_utf8.decode),
62
'properties': ('properties', dict, _validate_properties),
65
def write_revision_to_string(self, rev):
66
encode_utf8 = cache_utf8._utf8_encode
67
# Use a list of tuples rather than a dict
68
# This lets us control the ordering, so that we are able to create
72
("committer", encode_utf8(rev.committer)[0]),
74
if rev.timezone is not None:
75
ret.append(("timezone", rev.timezone))
76
# For bzr revisions, the most common property is just 'branch-nick'
77
# which changes infrequently.
79
for key, value in rev.properties.iteritems():
80
revprops[key] = encode_utf8(value)[0]
81
ret.append(('properties', revprops))
83
("timestamp", "%.3f" % rev.timestamp),
84
("revision-id", rev.revision_id),
85
("parent-ids", rev.parent_ids),
86
("inventory-sha1", rev.inventory_sha1),
87
("message", encode_utf8(rev.message)[0]),
89
return bencode.bencode(ret)
91
def write_revision(self, rev, f):
92
f.write(self.write_revision_to_string(rev))
94
def read_revision_from_string(self, text):
95
# TODO: consider writing a Revision decoder, rather than using the
96
# generic bencode decoder
97
# However, to decode all 25k revisions of bzr takes approx 1.3s
98
# If we remove all extra validation that goes down to about 1.2s.
99
# Of that time, probably 0.6s is spend in bencode.bdecode().
100
# Regardless 'time bzr log' of everything is 7+s, so 1.3s to
101
# extract revision texts isn't a majority of time.
102
ret = bencode.bdecode(text)
103
if not isinstance(ret, list):
104
raise ValueError("invalid revision text")
105
schema = self._schema
106
# timezone is allowed to be missing, but should be set
107
bits = {'timezone': None}
108
for key, value in ret:
109
# Will raise KeyError if not a valid part of the schema, or an
110
# entry is given 2 times.
111
var_name, expected_type, validator = schema[key]
112
if value.__class__ is not expected_type:
113
raise ValueError('key %s did not conform to the expected type'
115
% (key, expected_type, type(value)))
116
if validator is not None:
117
value = validator(value)
118
bits[var_name] = value
119
if len(bits) != len(schema):
120
missing = [key for key, (var_name, _, _) in schema.iteritems()
121
if var_name not in bits]
122
raise ValueError('Revision text was missing expected keys %s.'
123
' text %r' % (missing, text))
124
del bits[None] # Get rid of 'format' since it doesn't get mapped
125
rev = _mod_revision.Revision(**bits)
128
def read_revision(self, f):
129
return self.read_revision_from_string(f.read())
132
class CHKSerializer(xml8.Serializer_v8):
133
"""A CHKInventory based serializer with 'plain' behaviour."""
136
revision_format_num = None
137
support_altered_by_hack = False
139
def __init__(self, node_size, search_key_name):
140
self.maximum_size = node_size
141
self.search_key_name = search_key_name
144
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way')
147
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer):
148
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""
153
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way')