4763.2.4
by John Arbash Meinel
merge bzr.2.1 in preparation for NEWS entry. |
1 |
# Copyright (C) 2008, 2009, 2010 Canonical Ltd
|
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
2 |
#
|
3 |
# This program is free software; you can redistribute it and/or modify
|
|
4 |
# it under the terms of the GNU General Public License as published by
|
|
5 |
# the Free Software Foundation; either version 2 of the License, or
|
|
6 |
# (at your option) any later version.
|
|
7 |
#
|
|
8 |
# This program is distributed in the hope that it will be useful,
|
|
9 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11 |
# GNU General Public License for more details.
|
|
12 |
#
|
|
13 |
# You should have received a copy of the GNU General Public License
|
|
14 |
# along with this program; if not, write to the Free Software
|
|
15 |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
16 |
||
6379.6.7
by Jelmer Vernooij
Move importing from future until after doc string, otherwise the doc string will disappear. |
17 |
"""Serializer object for CHK based inventory storage."""
|
18 |
||
6379.6.1
by Jelmer Vernooij
Import absolute_import in a few places. |
19 |
from __future__ import absolute_import |
20 |
||
6355.1.5
by Jelmer Vernooij
Use lazy imports. |
21 |
from cStringIO import StringIO |
22 |
||
23 |
from bzrlib import lazy_import |
|
24 |
lazy_import.lazy_import(globals(), |
|
25 |
"""
|
|
26 |
from bzrlib import (
|
|
27 |
xml_serializer,
|
|
28 |
)
|
|
29 |
""") |
|
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
30 |
from bzrlib import ( |
4398.5.2
by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations. |
31 |
bencode, |
4290.1.1
by Jelmer Vernooij
Add simple revision serializer based on RIO. |
32 |
cache_utf8, |
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
33 |
errors, |
4290.1.1
by Jelmer Vernooij
Add simple revision serializer based on RIO. |
34 |
revision as _mod_revision, |
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
35 |
serializer, |
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
36 |
)
|
37 |
||
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
38 |
|
39 |
def _validate_properties(props, _decode=cache_utf8._utf8_decode): |
|
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
40 |
# TODO: we really want an 'isascii' check for key
|
4398.5.15
by John Arbash Meinel
Change how schemas are validated (down to 1.02s) |
41 |
# Cast the utf8 properties into Unicode 'in place'
|
42 |
for key, value in props.iteritems(): |
|
43 |
props[key] = _decode(value)[0] |
|
44 |
return props |
|
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
45 |
|
46 |
||
47 |
def _is_format_10(value): |
|
48 |
if value != 10: |
|
49 |
raise ValueError('Format number was not recognized, expected 10 got %d' |
|
50 |
% (value,)) |
|
51 |
return 10 |
|
52 |
||
53 |
||
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
54 |
class BEncodeRevisionSerializer1(object): |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
55 |
"""Simple revision serializer based around bencode.
|
4290.1.1
by Jelmer Vernooij
Add simple revision serializer based on RIO. |
56 |
"""
|
57 |
||
4416.5.1
by Jelmer Vernooij
Move squashing of XML-invalid characters to XMLSerializer. |
58 |
squashes_xml_invalid_characters = False |
59 |
||
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
60 |
# Maps {key:(Revision attribute, bencode_type, validator)}
|
61 |
# This tells us what kind we expect bdecode to create, what variable on
|
|
62 |
# Revision we should be using, and a function to call to validate/transform
|
|
63 |
# the type.
|
|
64 |
# TODO: add a 'validate_utf8' for things like revision_id and file_id
|
|
65 |
# and a validator for parent-ids
|
|
66 |
_schema = {'format': (None, int, _is_format_10), |
|
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
67 |
'committer': ('committer', str, cache_utf8.decode), |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
68 |
'timezone': ('timezone', int, None), |
69 |
'timestamp': ('timestamp', str, float), |
|
70 |
'revision-id': ('revision_id', str, None), |
|
4398.5.19
by John Arbash Meinel
Change parent_ids back to a list, because there are other tests that expect it. |
71 |
'parent-ids': ('parent_ids', list, None), |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
72 |
'inventory-sha1': ('inventory_sha1', str, None), |
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
73 |
'message': ('message', str, cache_utf8.decode), |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
74 |
'properties': ('properties', dict, _validate_properties), |
75 |
}
|
|
76 |
||
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
77 |
def write_revision_to_string(self, rev): |
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
78 |
encode_utf8 = cache_utf8._utf8_encode |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
79 |
# Use a list of tuples rather than a dict
|
80 |
# This lets us control the ordering, so that we are able to create
|
|
81 |
# smaller deltas
|
|
82 |
ret = [ |
|
83 |
("format", 10), |
|
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
84 |
("committer", encode_utf8(rev.committer)[0]), |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
85 |
]
|
86 |
if rev.timezone is not None: |
|
87 |
ret.append(("timezone", rev.timezone)) |
|
88 |
# For bzr revisions, the most common property is just 'branch-nick'
|
|
89 |
# which changes infrequently.
|
|
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
90 |
revprops = {} |
91 |
for key, value in rev.properties.iteritems(): |
|
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
92 |
revprops[key] = encode_utf8(value)[0] |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
93 |
ret.append(('properties', revprops)) |
94 |
ret.extend([ |
|
95 |
("timestamp", "%.3f" % rev.timestamp), |
|
96 |
("revision-id", rev.revision_id), |
|
97 |
("parent-ids", rev.parent_ids), |
|
98 |
("inventory-sha1", rev.inventory_sha1), |
|
4398.5.9
by John Arbash Meinel
it seems that codecs.utf_8_decode is quite a bit faster than codecs.get_decoder('utf-8') |
99 |
("message", encode_utf8(rev.message)[0]), |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
100 |
])
|
4398.5.2
by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations. |
101 |
return bencode.bencode(ret) |
4290.1.8
by Jelmer Vernooij
Some performance tweaks. |
102 |
|
103 |
def write_revision(self, rev, f): |
|
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
104 |
f.write(self.write_revision_to_string(rev)) |
105 |
||
106 |
def read_revision_from_string(self, text): |
|
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
107 |
# TODO: consider writing a Revision decoder, rather than using the
|
108 |
# generic bencode decoder
|
|
4398.5.8
by John Arbash Meinel
Update the TODO comment a bit. |
109 |
# However, to decode all 25k revisions of bzr takes approx 1.3s
|
110 |
# If we remove all extra validation that goes down to about 1.2s.
|
|
111 |
# Of that time, probably 0.6s is spend in bencode.bdecode().
|
|
112 |
# Regardless 'time bzr log' of everything is 7+s, so 1.3s to
|
|
113 |
# extract revision texts isn't a majority of time.
|
|
4398.5.2
by John Arbash Meinel
Merge the chk serializer, and update it for the new bencode locations. |
114 |
ret = bencode.bdecode(text) |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
115 |
if not isinstance(ret, list): |
116 |
raise ValueError("invalid revision text") |
|
4398.5.15
by John Arbash Meinel
Change how schemas are validated (down to 1.02s) |
117 |
schema = self._schema |
4398.5.7
by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop |
118 |
# timezone is allowed to be missing, but should be set
|
119 |
bits = {'timezone': None} |
|
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
120 |
for key, value in ret: |
4398.5.7
by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop |
121 |
# Will raise KeyError if not a valid part of the schema, or an
|
122 |
# entry is given 2 times.
|
|
4398.5.15
by John Arbash Meinel
Change how schemas are validated (down to 1.02s) |
123 |
var_name, expected_type, validator = schema[key] |
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
124 |
if value.__class__ is not expected_type: |
125 |
raise ValueError('key %s did not conform to the expected type' |
|
126 |
' %s, but was %s' |
|
127 |
% (key, expected_type, type(value))) |
|
128 |
if validator is not None: |
|
129 |
value = validator(value) |
|
4398.5.7
by John Arbash Meinel
Spend a little bit more time optimizing the read_revision_from_string loop |
130 |
bits[var_name] = value |
4398.5.15
by John Arbash Meinel
Change how schemas are validated (down to 1.02s) |
131 |
if len(bits) != len(schema): |
132 |
missing = [key for key, (var_name, _, _) in schema.iteritems() |
|
133 |
if var_name not in bits] |
|
134 |
raise ValueError('Revision text was missing expected keys %s.' |
|
135 |
' text %r' % (missing, text)) |
|
136 |
del bits[None] # Get rid of 'format' since it doesn't get mapped |
|
4398.5.5
by John Arbash Meinel
Update the CHK Serializer to do lots more validation. |
137 |
rev = _mod_revision.Revision(**bits) |
4290.1.8
by Jelmer Vernooij
Some performance tweaks. |
138 |
return rev |
139 |
||
140 |
def read_revision(self, f): |
|
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
141 |
return self.read_revision_from_string(f.read()) |
142 |
||
143 |
||
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
144 |
class CHKSerializer(serializer.Serializer): |
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
145 |
"""A CHKInventory based serializer with 'plain' behaviour."""
|
146 |
||
147 |
format_num = '9' |
|
148 |
revision_format_num = None |
|
149 |
support_altered_by_hack = False |
|
6437.14.1
by Jelmer Vernooij
Fix support for tree-reference unpacking in 2a. |
150 |
supported_kinds = set(['file', 'directory', 'symlink', 'tree-reference']) |
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
151 |
|
152 |
def __init__(self, node_size, search_key_name): |
|
153 |
self.maximum_size = node_size |
|
154 |
self.search_key_name = search_key_name |
|
155 |
||
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
156 |
def _unpack_inventory(self, elt, revision_id=None, entry_cache=None, |
157 |
return_from_cache=False): |
|
158 |
"""Construct from XML Element"""
|
|
159 |
inv = xml_serializer.unpack_inventory_flat(elt, self.format_num, |
|
6355.1.9
by Jelmer Vernooij
Review feedback - pass entry_cache and_return_from_cache to unpack_inventory_flat. |
160 |
xml_serializer.unpack_inventory_entry, entry_cache, |
161 |
return_from_cache) |
|
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
162 |
return inv |
163 |
||
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
164 |
def read_inventory_from_string(self, xml_string, revision_id=None, |
165 |
entry_cache=None, return_from_cache=False): |
|
166 |
"""Read xml_string into an inventory object.
|
|
167 |
||
168 |
:param xml_string: The xml to read.
|
|
169 |
:param revision_id: If not-None, the expected revision id of the
|
|
170 |
inventory.
|
|
171 |
:param entry_cache: An optional cache of InventoryEntry objects. If
|
|
172 |
supplied we will look up entries via (file_id, revision_id) which
|
|
173 |
should map to a valid InventoryEntry (File/Directory/etc) object.
|
|
174 |
:param return_from_cache: Return entries directly from the cache,
|
|
175 |
rather than copying them first. This is only safe if the caller
|
|
176 |
promises not to mutate the returned inventory entries, but it can
|
|
177 |
make some operations significantly faster.
|
|
178 |
"""
|
|
179 |
try: |
|
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
180 |
return self._unpack_inventory( |
181 |
xml_serializer.fromstring(xml_string), revision_id, |
|
182 |
entry_cache=entry_cache, |
|
183 |
return_from_cache=return_from_cache) |
|
6355.1.5
by Jelmer Vernooij
Use lazy imports. |
184 |
except xml_serializer.ParseError, e: |
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
185 |
raise errors.UnexpectedInventoryFormat(e) |
186 |
||
187 |
def read_inventory(self, f, revision_id=None): |
|
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
188 |
"""Read an inventory from a file-like object."""
|
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
189 |
try: |
190 |
try: |
|
191 |
return self._unpack_inventory(self._read_element(f), |
|
192 |
revision_id=None) |
|
193 |
finally: |
|
194 |
f.close() |
|
6355.1.5
by Jelmer Vernooij
Use lazy imports. |
195 |
except xml_serializer.ParseError, e: |
6355.1.3
by Jelmer Vernooij
Split out more stuff. |
196 |
raise errors.UnexpectedInventoryFormat(e) |
197 |
||
198 |
def write_inventory_to_lines(self, inv): |
|
199 |
"""Return a list of lines with the encoded inventory."""
|
|
200 |
return self.write_inventory(inv, None) |
|
201 |
||
202 |
def write_inventory_to_string(self, inv, working=False): |
|
203 |
"""Just call write_inventory with a StringIO and return the value.
|
|
204 |
||
205 |
:param working: If True skip history data - text_sha1, text_size,
|
|
206 |
reference_revision, symlink_target.
|
|
207 |
"""
|
|
208 |
sio = StringIO() |
|
209 |
self.write_inventory(inv, sio, working) |
|
210 |
return sio.getvalue() |
|
211 |
||
212 |
def write_inventory(self, inv, f, working=False): |
|
213 |
"""Write inventory to a file.
|
|
214 |
||
215 |
:param inv: the inventory to write.
|
|
216 |
:param f: the file to write. (May be None if the lines are the desired
|
|
217 |
output).
|
|
218 |
:param working: If True skip history data - text_sha1, text_size,
|
|
219 |
reference_revision, symlink_target.
|
|
220 |
:return: The inventory as a list of lines.
|
|
221 |
"""
|
|
6355.1.7
by Jelmer Vernooij
Fix tests. |
222 |
output = [] |
223 |
append = output.append |
|
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
224 |
if inv.revision_id is not None: |
225 |
revid1 = ' revision_id="' |
|
226 |
revid2 = xml_serializer.encode_and_escape(inv.revision_id) |
|
227 |
else: |
|
228 |
revid1 = "" |
|
229 |
revid2 = "" |
|
230 |
append('<inventory format="%s"%s%s>\n' % ( |
|
231 |
self.format_num, revid1, revid2)) |
|
232 |
append('<directory file_id="%s name="%s revision="%s />\n' % ( |
|
233 |
xml_serializer.encode_and_escape(inv.root.file_id), |
|
234 |
xml_serializer.encode_and_escape(inv.root.name), |
|
235 |
xml_serializer.encode_and_escape(inv.root.revision))) |
|
6355.1.7
by Jelmer Vernooij
Fix tests. |
236 |
xml_serializer.serialize_inventory_flat(inv, |
237 |
append, |
|
238 |
root_id=None, supported_kinds=self.supported_kinds, |
|
239 |
working=working) |
|
240 |
if f is not None: |
|
241 |
f.writelines(output) |
|
242 |
return output |
|
6355.1.6
by Jelmer Vernooij
Move core inventory code to xml_serializer. |
243 |
|
4241.6.1
by Ian Clatworthy
chk_map code from brisbane-core |
244 |
|
245 |
chk_serializer_255_bigpage = CHKSerializer(65536, 'hash-255-way') |
|
4290.1.7
by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer. |
246 |
|
247 |
||
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
248 |
class CHKBEncodeSerializer(BEncodeRevisionSerializer1, CHKSerializer): |
249 |
"""A CHKInventory and BEncode based serializer with 'plain' behaviour."""
|
|
4290.1.7
by Jelmer Vernooij
Add development7-rich-root format that uses the RIO Serializer. |
250 |
|
251 |
format_num = '10' |
|
252 |
||
253 |
||
4290.1.12
by Jelmer Vernooij
Use bencode rather than rio in the new revision serialiszer. |
254 |
chk_bencode_serializer = CHKBEncodeSerializer(65536, 'hash-255-way') |