1
# Copyright (C) 2008 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Pyrex extensions to btree node parsing."""
21
cdef extern from "stdlib.h":
22
ctypedef unsigned size_t
23
long int strtol(char *nptr, char **endptr, int base)
26
cdef extern from "Python.h":
27
int PyDict_CheckExact(object)
28
void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
29
int PyDict_SetItem(object p, object key, object val) except -1
31
int PyList_Append(object lst, object item) except -1
32
object PyList_GET_ITEM(object lst, int index)
33
int PyList_CheckExact(object)
35
void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
37
char *PyString_AsString(object p)
38
object PyString_FromStringAndSize(char *, int)
39
object PyString_FromString(char *)
40
int PyString_Size(object p)
42
void Py_INCREF(object)
45
cdef extern from "string.h":
46
void *memchr(void *s, int c, size_t n)
47
# void *memrchr(void *s, int c, size_t n)
48
int strncmp(char *s1, char *s2, size_t n)
51
cdef void* _my_memrchr(void *s, int c, size_t n):
52
# memrchr seems to be a GNU extension, so we have to implement it ourselves
53
# It is not present in any win32 standard library
66
cdef class BTreeLeafParser:
70
cdef int ref_list_length
75
# The current start point for parsing
80
def __init__(self, bytes, key_length, ref_list_length):
82
self.key_length = key_length
83
self.ref_list_length = ref_list_length
89
cdef extract_key(self, char * last):
92
:param last: points at the byte after the last byte permitted for the key.
99
while loop_counter < self.key_length:
100
loop_counter = loop_counter + 1
102
temp_ptr = <char*>memchr(self.start, c'\0', last - self.start)
104
if loop_counter == self.key_length:
109
failure_string = ("invalid key, wanted segment from " +
110
repr(PyString_FromStringAndSize(self.start, last-self.start)))
111
raise AssertionError(failure_string)
112
# capture the key string
113
key_element = PyString_FromStringAndSize(self.start, temp_ptr - self.start)
114
# advance our pointer
115
self.start = temp_ptr + 1
116
PyList_Append(key_segments, key_element)
117
return tuple(key_segments)
119
cdef int process_line(self) except -1:
120
"""Process a line in the bytes."""
124
cdef char *next_start
125
cdef int loop_counter
127
self.start = self.cur_str
128
# Find the next newline
129
last = <char*>memchr(self.start, c'\n', self.end_str - self.start)
131
# Process until the end of the file
133
self.cur_str = self.end_str
135
# And the next string is right after it
136
self.cur_str = last + 1
137
# The last character is right before the '\n'
140
if last == self.start:
143
if last < self.start:
144
# Unexpected error condition - fail
146
if 0 == self.header_found:
147
if strncmp("type=leaf", self.start, last-self.start) == 0:
148
self.header_found = 1
151
print "failed strncmp", repr(PyString_FromStringAndSize(self.start, last-self.start))
154
key = self.extract_key(last)
155
# find the value area
156
temp_ptr = <char*>_my_memrchr(self.start, c'\0', last - self.start)
161
# capture the value string
162
value = PyString_FromStringAndSize(temp_ptr + 1, last - temp_ptr - 1)
163
# shrink the references end point
165
if self.ref_list_length:
168
while loop_counter < self.ref_list_length:
170
# extract a reference list
171
loop_counter = loop_counter + 1
172
if last < self.start:
174
# find the next reference list end point:
175
temp_ptr = <char*>memchr(self.start, c'\t', last - self.start)
177
# Only valid for the last list
178
if loop_counter != self.ref_list_length:
181
raise AssertionError("invalid key")
183
# scan to the end of the ref list area
187
# scan to the end of this ref list
189
next_start = temp_ptr + 1
190
# Now, there may be multiple keys in the ref list.
191
while self.start < ref_ptr:
192
# loop finding keys and extracting them
193
temp_ptr = <char*>memchr(self.start, c'\r', ref_ptr - self.start)
195
# key runs to the end
197
PyList_Append(ref_list, self.extract_key(temp_ptr))
198
PyList_Append(ref_lists, tuple(ref_list))
199
# prepare for the next reference list
200
self.start = next_start
201
ref_lists = tuple(ref_lists)
202
node_value = (value, ref_lists)
204
if last != self.start:
205
# unexpected reference data present
207
node_value = (value, ())
208
PyList_Append(self.keys, (key, node_value))
213
byte_count = PyString_Size(self.bytes)
214
self.cur_str = PyString_AsString(self.bytes)
215
# This points to the last character in the string
216
self.end_str = self.cur_str + byte_count
217
while self.cur_str < self.end_str:
222
def _parse_leaf_lines(bytes, key_length, ref_list_length):
223
parser = BTreeLeafParser(bytes, key_length, ref_list_length)
224
return parser.parse()