1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
# Copyright (C) 2009 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
"""Python implementation of _search_key functions, etc."""
import zlib
import struct
_LeafNode = None
_InternalNode = None
_unknown = None
def _crc32(bit):
# Depending on python version and platform, zlib.crc32 will return either a
# signed (<= 2.5 >= 3.0) or an unsigned (2.5, 2.6).
# http://docs.python.org/library/zlib.html recommends using a mask to force
# an unsigned value to ensure the same numeric value (unsigned) is obtained
# across all python versions and platforms.
# Note: However, on 32-bit platforms this causes an upcast to PyLong, which
# are generally slower than PyInts. However, if performance becomes
# critical, we should probably write the whole thing as an extension
# anyway.
# Though we really don't need that 32nd bit of accuracy. (even 2**24
# is probably enough node fan out for realistic trees.)
return zlib.crc32(bit)&0xFFFFFFFF
def _search_key_16(key):
"""Map the key tuple into a search key string which has 16-way fan out."""
return '\x00'.join(['%08X' % _crc32(bit) for bit in key])
def _search_key_255(key):
"""Map the key tuple into a search key string which has 255-way fan out.
We use 255-way because '\n' is used as a delimiter, and causes problems
while parsing.
"""
bytes = '\x00'.join([struct.pack('>L', _crc32(bit)) for bit in key])
return bytes.replace('\n', '_')
def _deserialise_leaf_node(bytes, key, search_key_func=None):
"""Deserialise bytes, with key key, into a LeafNode.
:param bytes: The bytes of the node.
:param key: The key that the serialised node has.
"""
global _unknown, _LeafNode, _InternalNode
if _LeafNode is None:
from bzrlib import chk_map
_unknown = chk_map._unknown
_LeafNode = chk_map.LeafNode
_InternalNode = chk_map.InternalNode
result = _LeafNode(search_key_func=search_key_func)
# Splitlines can split on '\r' so don't use it, split('\n') adds an
# extra '' if the bytes ends in a final newline.
lines = bytes.split('\n')
trailing = lines.pop()
if trailing != '':
raise AssertionError('We did not have a final newline for %s'
% (key,))
items = {}
if lines[0] != 'chkleaf:':
raise ValueError("not a serialised leaf node: %r" % bytes)
maximum_size = int(lines[1])
width = int(lines[2])
length = int(lines[3])
prefix = lines[4]
pos = 5
while pos < len(lines):
line = prefix + lines[pos]
elements = line.split('\x00')
pos += 1
if len(elements) != width + 1:
raise AssertionError(
'Incorrect number of elements (%d vs %d) for: %r'
% (len(elements), width + 1, line))
num_value_lines = int(elements[-1])
value_lines = lines[pos:pos+num_value_lines]
pos += num_value_lines
value = '\n'.join(value_lines)
items[tuple(elements[:-1])] = value
if len(items) != length:
raise AssertionError("item count (%d) mismatch for key %s,"
" bytes %r" % (length, key, bytes))
result._items = items
result._len = length
result._maximum_size = maximum_size
result._key = key
result._key_width = width
result._raw_size = (sum(map(len, lines[5:])) # the length of the suffix
+ (length)*(len(prefix))
+ (len(lines)-5))
if not items:
result._search_prefix = None
result._common_serialised_prefix = None
else:
result._search_prefix = _unknown
result._common_serialised_prefix = prefix
if len(bytes) != result._current_size():
raise AssertionError('_current_size computed incorrectly')
return result
def _deserialise_internal_node(bytes, key, search_key_func=None):
global _unknown, _LeafNode, _InternalNode
if _InternalNode is None:
from bzrlib import chk_map
_unknown = chk_map._unknown
_LeafNode = chk_map.LeafNode
_InternalNode = chk_map.InternalNode
result = _InternalNode(search_key_func=search_key_func)
# Splitlines can split on '\r' so don't use it, remove the extra ''
# from the result of split('\n') because we should have a trailing
# newline
lines = bytes.split('\n')
if lines[-1] != '':
raise ValueError("last line must be ''")
lines.pop(-1)
items = {}
if lines[0] != 'chknode:':
raise ValueError("not a serialised internal node: %r" % bytes)
maximum_size = int(lines[1])
width = int(lines[2])
length = int(lines[3])
common_prefix = lines[4]
for line in lines[5:]:
line = common_prefix + line
prefix, flat_key = line.rsplit('\x00', 1)
items[prefix] = (flat_key,)
if len(items) == 0:
raise AssertionError("We didn't find any item for %s" % key)
result._items = items
result._len = length
result._maximum_size = maximum_size
result._key = key
result._key_width = width
# XXX: InternalNodes don't really care about their size, and this will
# change if we add prefix compression
result._raw_size = None # len(bytes)
result._node_width = len(prefix)
result._search_prefix = common_prefix
return result
|