1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
|
# Copyright (C) 2007 Canonical Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""Pyrex extensions to knit parsing."""
import sys
from bzrlib import errors
cdef extern from "stdlib.h":
ctypedef unsigned size_t
long int strtol(char *nptr, char **endptr, int base)
cdef extern from "Python.h":
int PyDict_CheckExact(object)
void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)
int PyDict_SetItem(object p, object key, object val) except -1
int PyList_Append(object lst, object item) except -1
object PyList_GET_ITEM(object lst, int index)
int PyList_CheckExact(object)
void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
char *PyString_AsString(object p)
object PyString_FromStringAndSize(char *, int)
int PyString_Size(object p)
void Py_INCREF(object)
cdef extern from "string.h":
void *memchr(void *s, int c, size_t n)
cdef int string_to_int_safe(char *s, char *end, int *out) except -1:
"""Convert a base10 string to an integer.
This makes sure the whole string is consumed, or it raises ValueError.
This is similar to how int(s) works, except you don't need a Python
String object.
:param s: The string to convert
:param end: The character after the integer. So if the string is '12\0',
this should be pointing at the '\0'. If the string was '12 ' then this
should point at the ' '.
:param out: This is the integer that will be returned
:return: -1 if an exception is raised. 0 otherwise
"""
cdef char *integer_end
# We can't just return the integer because of how pyrex determines when
# there is an exception.
out[0] = <int>strtol(s, &integer_end, 10)
if integer_end != end:
py_s = PyString_FromStringAndSize(s, end-s)
raise ValueError('%r is not a valid integer' % (py_s,))
return 0
cdef class KnitIndexReader:
cdef object kndx
cdef object fp
cdef object cache
cdef object history
cdef char * cur_str
cdef char * end_str
cdef int history_len
def __new__(self, kndx, fp):
self.kndx = kndx
self.fp = fp
self.cache = kndx._cache
self.history = kndx._history
self.cur_str = NULL
self.end_str = NULL
self.history_len = 0
cdef void validate(self):
if not PyDict_CheckExact(self.cache):
raise TypeError('kndx._cache must be a python dict')
if not PyList_CheckExact(self.history):
raise TypeError('kndx._history must be a python list')
cdef object process_options(self, char *option_str, char *end):
"""Process the options string into a list."""
cdef char *next
# This is alternative code which creates a python string and splits it.
# It is "correct" and more obvious, but slower than the following code.
# It can be uncommented to switch in case the other code is seen as
# suspect.
# options = PyString_FromStringAndSize(option_str,
# end - option_str)
# return options.split(',')
final_options = []
while option_str < end:
next = <char*>memchr(option_str, c',', end - option_str)
if next == NULL:
next = end
next_option = PyString_FromStringAndSize(option_str,
next - option_str)
PyList_Append(final_options, next_option)
# Move past the ','
option_str = next+1
return final_options
cdef object process_parents(self, char *parent_str, char *end):
cdef char *next
cdef int int_parent
cdef char *parent_end
# Alternative, correct but slower code.
#
# parents = PyString_FromStringAndSize(parent_str,
# end - parent_str)
# real_parents = []
# for parent in parents.split():
# if parent[0].startswith('.'):
# real_parents.append(parent[1:])
# else:
# real_parents.append(self.history[int(parent)])
# return real_parents
parents = []
while parent_str <= end:
next = <char*>memchr(parent_str, c' ', end - parent_str)
if next == NULL or next >= end or next == parent_str:
break
if parent_str[0] == c'.':
# This is an explicit revision id
parent_str = parent_str + 1
parent = PyString_FromStringAndSize(parent_str,
next - parent_str)
else:
# This in an integer mapping to original
string_to_int_safe(parent_str, next, &int_parent)
if int_parent >= self.history_len:
raise IndexError('Parent index refers to a revision which'
' does not exist yet.'
' %d > %d' % (int_parent, self.history_len))
parent = PyList_GET_ITEM(self.history, int_parent)
# PyList_GET_ITEM steals a reference
Py_INCREF(parent)
PyList_Append(parents, parent)
parent_str = next + 1
return parents
cdef int process_one_record(self, char *start, char *end) except -1:
"""Take a simple string and split it into an index record."""
cdef char *version_id_str
cdef int version_id_size
cdef char *option_str
cdef char *option_end
cdef char *pos_str
cdef int pos
cdef char *size_str
cdef int size
cdef char *parent_str
cdef int parent_size
cdef void *cache_entry
version_id_str = start
option_str = <char*>memchr(version_id_str, c' ', end - version_id_str)
if option_str == NULL or option_str >= end:
# Short entry
return 0
version_id_size = <int>(option_str - version_id_str)
# Move past the space character
option_str = option_str + 1
pos_str = <char*>memchr(option_str, c' ', end - option_str)
if pos_str == NULL or pos_str >= end:
# Short entry
return 0
option_end = pos_str
pos_str = pos_str + 1
size_str = <char*>memchr(pos_str, c' ', end - pos_str)
if size_str == NULL or size_str >= end:
# Short entry
return 0
size_str = size_str + 1
parent_str = <char*>memchr(size_str, c' ', end - size_str)
if parent_str == NULL or parent_str >= end:
# Missing parents
return 0
parent_str = parent_str + 1
version_id = PyString_FromStringAndSize(version_id_str,
version_id_size)
options = self.process_options(option_str, option_end)
try:
string_to_int_safe(pos_str, size_str - 1, &pos)
string_to_int_safe(size_str, parent_str - 1, &size)
parents = self.process_parents(parent_str, end)
except (ValueError, IndexError), e:
py_line = PyString_FromStringAndSize(start, end - start)
raise errors.KnitCorrupt(self.kndx._filename,
"line %r: %s" % (py_line, e))
cache_entry = PyDict_GetItem_void(self.cache, version_id)
if cache_entry == NULL:
PyList_Append(self.history, version_id)
index = self.history_len
self.history_len = self.history_len + 1
else:
# PyTuple_GetItem_void_void does *not* increment the reference
# counter, but casting to <object> does.
index = <object>PyTuple_GetItem_void_void(cache_entry, 5)
PyDict_SetItem(self.cache, version_id,
(version_id,
options,
pos,
size,
parents,
index,
))
return 1
cdef int process_next_record(self) except -1:
"""Process the next record in the file."""
cdef char *last
cdef char *start
start = self.cur_str
# Find the next newline
last = <char*>memchr(start, c'\n', self.end_str - start)
if last == NULL:
# Process until the end of the file
last = self.end_str - 1
self.cur_str = self.end_str
else:
# The last character is right before the '\n'
# And the next string is right after it
self.cur_str = last + 1
last = last - 1
if last <= start or last[0] != c':':
# Incomplete record
return 0
return self.process_one_record(start, last)
def read(self):
cdef int text_size
self.validate()
self.kndx.check_header(self.fp)
# We read the whole thing at once
# TODO: jam 2007-05-09 Consider reading incrementally rather than
# having to have the whole thing read up front.
# we already know that calling f.readlines() versus lots of
# f.readline() calls is faster.
# The other possibility is to avoid a Python String here
# completely. However self.fp may be a 'file-like' object
# it is not guaranteed to be a real file.
text = self.fp.read()
text_size = PyString_Size(text)
self.cur_str = PyString_AsString(text)
# This points to the last character in the string
self.end_str = self.cur_str + text_size
while self.cur_str < self.end_str:
self.process_next_record()
def _load_data_c(kndx, fp):
"""Load the knit index file into memory."""
reader = KnitIndexReader(kndx, fp)
reader.read()
|