17
17
"""Pyrex extensions to knit parsing."""
20
cdef extern from "stdlib.h":
21
long int strtol(char *nptr, char **endptr, int base)
22
unsigned long int strtoul(char *nptr, char **endptr, int base)
25
cdef extern from "Python.h":
26
int PyDict_CheckExact(object)
27
void *PyDict_GetItem(object p, object key)
28
int PyDict_SetItem(object p, object key, object val) except -1
30
int PyList_Append(object lst, object item) except -1
31
void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)
32
object PyList_GET_ITEM (object lst, int index)
33
int PyList_CheckExact(object)
35
int PyTuple_CheckExact(object)
36
void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)
37
object PyTuple_New(int)
38
int PyTuple_SetItem(object tpl, int offset, object val)
39
void PyTuple_SET_ITEM(object tpl, int offset, object val)
40
object PyTuple_Pack(int n, ...)
42
char *PyString_AsString(object p)
43
char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)
44
object PyString_FromString(char *)
45
object PyString_FromStringAndSize(char *, int)
46
int PyString_Size(object p)
47
int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)
48
int PyString_CheckExact(object p)
50
void Py_INCREF(object)
51
void Py_DECREF(object)
54
cdef extern from "string.h":
55
char *strchr(char *s1, char c)
56
int strncmp(char *s1, char *s2, int len)
57
int strcmp(char *s1, char *s2)
60
cdef class KnitIndexReader:
77
def __new__(self, kndx, fp):
81
self.cache = kndx._cache
82
self.history = kndx._history
91
cdef void validate(self):
92
if not PyDict_CheckExact(self.cache):
93
raise TypeError('kndx._cache must be a python dict')
94
if not PyList_CheckExact(self.history):
95
raise TypeError('kndx._history must be a python list')
97
cdef void process_one_record(self, char *start, char *end):
98
"""Take a simple string and split it into an index record."""
99
cdef char *version_id_str
100
cdef int version_id_size
101
cdef char *option_str
107
cdef char *parent_str
110
version_id_str = start
111
option_str = strchr(version_id_str, c' ')
112
if option_str == NULL or option_str >= end:
115
version_id_size = <int>(option_str - version_id_str)
116
# Move past the space character
117
option_str = option_str + 1
119
pos_str = strchr(option_str, c' ')
120
if pos_str == NULL or pos_str >= end:
123
option_size = <int>(pos_str - option_str)
124
pos_str = pos_str + 1
126
size_str = strchr(pos_str, c' ')
127
if size_str == NULL or size_str >= end:
130
size_str = size_str + 1
132
# TODO: Make sure this works when there are no parents
133
parent_str = strchr(size_str, c' ')
134
if parent_str == NULL or parent_str >= end:
137
parent_str = parent_str + 1
139
version_id = PyString_FromStringAndSize(version_id_str,
141
options = PyString_FromStringAndSize(option_str, option_size)
142
options = options.split(',')
144
pos = strtol(pos_str, NULL, 10)
145
size = strtol(size_str, NULL, 10)
147
# TODO: Check that we are actually reading integers
148
parents = PyString_FromStringAndSize(parent_str,
149
<int>(end - parent_str))
150
parents = parents.split()
152
for parent in parents:
153
if parent[0].startswith('.'):
154
real_parents.append(parent[1:])
156
real_parents.append(self.history[int(parent)])
158
if version_id not in self.cache:
159
self.history.append(version_id)
160
index = self.history_len
161
self.history_len = self.history_len + 1
163
index = self.cache[version_id][5]
165
self.cache[version_id] = (version_id,
173
cdef void process_next_record(self):
174
"""Process the next record in the file."""
179
# Find the next newline
180
last = strchr(start, c'\n')
182
# Process until the end of the file
183
last = self.end_str-1
184
self.cur_str = self.end_str
185
line = PyString_FromStringAndSize(start, <int>(last - start))
186
ending = PyString_FromStringAndSize(last, 1)
188
# The last character is right before the '\n'
189
# And the next string is right after it
190
line = PyString_FromStringAndSize(start, <int>(last - start))
191
self.cur_str = last + 1
193
ending = PyString_FromStringAndSize(last, 3)
195
if last <= start or last[0] != c':':
199
self.process_one_record(start, last)
207
history = self.history
209
kndx.check_header(fp)
211
# We read the whole thing at once
212
# TODO: jam 2007-05-09 Consider reading incrementally rather than
213
# having to have the whole thing read up front.
214
# we already know that calling f.readlines() versus lots of
215
# f.readline() calls is faster.
216
self.text = fp.read()
217
self.text_str = PyString_AsString(self.text)
218
self.text_size = PyString_Size(self.text)
219
self.cur_str = self.text_str
220
# This points to the last character in the string
221
self.end_str = self.text_str + self.text_size
223
while self.cur_str < self.end_str:
224
self.process_next_record()
20
227
def _load_data_c(kndx, fp):
21
228
"""Load the knit index file into memory."""
23
history = kndx._history
26
# readlines reads the whole file at once:
27
# bad for transports like http, good for local disk
28
# we save 60 ms doing this one change (
29
# from calling readline each time to calling
31
# probably what we want for nice behaviour on
32
# http is a incremental readlines that yields, or
33
# a check for local vs non local indexes,
34
history_top = len(history) - 1
35
for line in fp.readlines():
37
if len(rec) < 5 or rec[-1] != ':':
39
# FIXME: in the future we should determine if its a
40
# short write - and ignore it
41
# or a different failure, and raise. RBC 20060407
45
for value in rec[4:-1]:
47
# uncompressed reference
50
parent_id = history[int(value)]
51
parents.append(parent_id)
53
version_id, options, pos, size = rec[:4]
54
version_id = version_id
56
# See kndx._cache_version
57
# only want the _history index to reference the 1st
58
# index entry for version_id
59
if version_id not in cache:
60
history_top = history_top + 1
62
history.append(version_id)
64
index = cache[version_id][5]
65
cache[version_id] = (version_id,
71
# end kndx._cache_version
229
reader = KnitIndexReader(kndx, fp)