1
# Copyright (C) 2007 Canonical Ltd
3
# This program is free software; you can redistribute it and/or modify
4
# it under the terms of the GNU General Public License as published by
5
# the Free Software Foundation; either version 2 of the License, or
6
# (at your option) any later version.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17
"""Pyrex extensions to knit parsing."""
21
from bzrlib import errors
24
cdef extern from "stdlib.h":
25
ctypedef unsigned size_t
26
long int strtol(char *nptr, char **endptr, int base)
29
cdef extern from "Python.h":
30
int PyDict_CheckExact(object)
31
void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
32
int PyDict_SetItem(object p, object key, object val) except -1
34
int PyList_Append(object lst, object item) except -1
35
object PyList_GET_ITEM(object lst, int index)
36
int PyList_CheckExact(object)
38
void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
40
char *PyString_AsString(object p)
41
object PyString_FromStringAndSize(char *, int)
42
int PyString_Size(object p)
44
void Py_INCREF(object)
47
cdef extern from "string.h":
48
void *memchr(void *s, int c, size_t n)
51
cdef int string_to_int_safe(char *s, char *end, int *out) except -1:
52
"""Convert a base10 string to an integer.
54
This makes sure the whole string is consumed, or it raises ValueError.
55
This is similar to how int(s) works, except you don't need a Python
58
:param s: The string to convert
59
:param end: The character after the integer. So if the string is '12\0',
60
this should be pointing at the '\0'. If the string was '12 ' then this
61
should point at the ' '.
62
:param out: This is the integer that will be returned
63
:return: -1 if an exception is raised. 0 otherwise
65
cdef char *integer_end
67
# We can't just return the integer because of how pyrex determines when
68
# there is an exception.
69
out[0] = <int>strtol(s, &integer_end, 10)
70
if integer_end != end:
71
py_s = PyString_FromStringAndSize(s, end-s)
72
raise ValueError('%r is not a valid integer' % (py_s,))
76
cdef class KnitIndexReader:
89
def __new__(self, kndx, fp):
93
self.cache = kndx._cache
94
self.history = kndx._history
100
cdef void validate(self):
101
if not PyDict_CheckExact(self.cache):
102
raise TypeError('kndx._cache must be a python dict')
103
if not PyList_CheckExact(self.history):
104
raise TypeError('kndx._history must be a python list')
106
cdef object process_options(self, char *option_str, char *end):
107
"""Process the options string into a list."""
110
# This is alternative code which creates a python string and splits it.
111
# It is "correct" and more obvious, but slower than the following code.
112
# It can be uncommented to switch in case the other code is seen as
114
# options = PyString_FromStringAndSize(option_str,
116
# return options.split(',')
120
while option_str < end:
121
next = <char*>memchr(option_str, c',', end - option_str)
124
next_option = PyString_FromStringAndSize(option_str,
126
PyList_Append(final_options, next_option)
133
cdef object process_parents(self, char *parent_str, char *end):
136
cdef char *parent_end
138
# Alternative, correct but slower code.
140
# parents = PyString_FromStringAndSize(parent_str,
143
# for parent in parents.split():
144
# if parent[0].startswith('.'):
145
# real_parents.append(parent[1:])
147
# real_parents.append(self.history[int(parent)])
148
# return real_parents
151
while parent_str <= end:
152
next = <char*>memchr(parent_str, c' ', end - parent_str)
153
if next == NULL or next >= end or next == parent_str:
156
if parent_str[0] == c'.':
157
# This is an explicit revision id
158
parent_str = parent_str + 1
159
parent = PyString_FromStringAndSize(parent_str,
162
# This in an integer mapping to original
163
string_to_int_safe(parent_str, next, &int_parent)
165
if int_parent >= self.history_len:
166
raise IndexError('Parent index refers to a revision which'
167
' does not exist yet.'
168
' %d > %d' % (int_parent, self.history_len))
169
parent = PyList_GET_ITEM(self.history, int_parent)
170
# PyList_GET_ITEM steals a reference
172
PyList_Append(parents, parent)
173
parent_str = next + 1
176
cdef int process_one_record(self, char *start, char *end) except -1:
177
"""Take a simple string and split it into an index record."""
178
cdef char *version_id_str
179
cdef int version_id_size
180
cdef char *option_str
181
cdef char *option_end
186
cdef char *parent_str
188
cdef void *cache_entry
190
version_id_str = start
191
option_str = <char*>memchr(version_id_str, c' ', end - version_id_str)
192
if option_str == NULL or option_str >= end:
195
version_id_size = <int>(option_str - version_id_str)
196
# Move past the space character
197
option_str = option_str + 1
199
pos_str = <char*>memchr(option_str, c' ', end - option_str)
200
if pos_str == NULL or pos_str >= end:
204
pos_str = pos_str + 1
206
size_str = <char*>memchr(pos_str, c' ', end - pos_str)
207
if size_str == NULL or size_str >= end:
210
size_str = size_str + 1
212
parent_str = <char*>memchr(size_str, c' ', end - size_str)
213
if parent_str == NULL or parent_str >= end:
216
parent_str = parent_str + 1
218
version_id = PyString_FromStringAndSize(version_id_str,
220
options = self.process_options(option_str, option_end)
223
string_to_int_safe(pos_str, size_str - 1, &pos)
224
string_to_int_safe(size_str, parent_str - 1, &size)
225
parents = self.process_parents(parent_str, end)
226
except (ValueError, IndexError), e:
227
py_line = PyString_FromStringAndSize(start, end - start)
228
raise errors.KnitCorrupt(self.kndx._filename,
229
"line %r: %s" % (py_line, e))
231
cache_entry = PyDict_GetItem_void(self.cache, version_id)
232
if cache_entry == NULL:
233
PyList_Append(self.history, version_id)
234
index = self.history_len
235
self.history_len = self.history_len + 1
237
# PyTuple_GetItem_void_void does *not* increment the reference
238
# counter, but casting to <object> does.
239
index = <object>PyTuple_GetItem_void_void(cache_entry, 5)
241
PyDict_SetItem(self.cache, version_id,
251
cdef int process_next_record(self) except -1:
252
"""Process the next record in the file."""
257
# Find the next newline
258
last = <char*>memchr(start, c'\n', self.end_str - start)
260
# Process until the end of the file
261
last = self.end_str - 1
262
self.cur_str = self.end_str
264
# The last character is right before the '\n'
265
# And the next string is right after it
266
self.cur_str = last + 1
269
if last <= start or last[0] != c':':
273
return self.process_one_record(start, last)
280
self.kndx.check_header(self.fp)
282
# We read the whole thing at once
283
# TODO: jam 2007-05-09 Consider reading incrementally rather than
284
# having to have the whole thing read up front.
285
# we already know that calling f.readlines() versus lots of
286
# f.readline() calls is faster.
287
# The other possibility is to avoid a Python String here
288
# completely. However self.fp may be a 'file-like' object
289
# it is not guaranteed to be a real file.
290
text = self.fp.read()
291
text_size = PyString_Size(text)
292
self.cur_str = PyString_AsString(text)
293
# This points to the last character in the string
294
self.end_str = self.cur_str + text_size
296
while self.cur_str < self.end_str:
297
self.process_next_record()
300
def _load_data_c(kndx, fp):
301
"""Load the knit index file into memory."""
302
reader = KnitIndexReader(kndx, fp)