~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_knit_load_data_c.pyx

Committer: Aaron Bentley
Date: 2007-07-17 13:27:14 UTC
mfrom: (2624 +trunk)
mto: This revision was merged to the branch mainline in revision 2631.
Revision ID: abentley@panoramicfeedback.com-20070717132714-tmzx9khmg9501k51

Merge from bzr.dev

files added:
bzrlib/_knit_load_data_c.pyx

bzrlib/_knit_load_data_py.py

bzrlib/benchmarks/bench_knit.py

bzrlib/index.py

bzrlib/tests/blackbox/test_pack.py

bzrlib/tests/repository_implementations/test_pack.py

bzrlib/tests/test_index.py

bzrlib/tests/workingtree_implementations/test_uncommit.py

bzrlib/util/tests

bzrlib/util/tests/__init__.py

bzrlib/util/tests/test_bencode.py

doc/bug_trackers.txt

doc/developers/diff.txt

doc/developers/indices.txt

doc/developers/repository.txt

files modified:
.bzrignore

NEWS

bzrlib/__init__.py

bzrlib/annotate.py

bzrlib/benchmarks/__init__.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/cmd_version_info.py

bzrlib/commands.py

bzrlib/commit.py

bzrlib/config.py

bzrlib/conflicts.py

bzrlib/debug.py

bzrlib/errors.py

bzrlib/help_topics.py

bzrlib/info.py

bzrlib/knit.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/option.py

bzrlib/plugin.py

bzrlib/plugins/launchpad/__init__.py

bzrlib/plugins/launchpad/test_register.py

bzrlib/progress.py

bzrlib/remote.py

bzrlib/repofmt/knitrepo.py

bzrlib/repository.py

bzrlib/revision.py

bzrlib/sign_my_commits.py

bzrlib/smart/protocol.py

bzrlib/smart/vfs.py

bzrlib/store/revision/knit.py

bzrlib/store/revision/text.py

bzrlib/symbol_versioning.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_add.py

bzrlib/tests/blackbox/test_added.py

bzrlib/tests/blackbox/test_aliases.py

bzrlib/tests/blackbox/test_ancestry.py

bzrlib/tests/blackbox/test_annotate.py

bzrlib/tests/blackbox/test_bound_branches.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_break_lock.py

bzrlib/tests/blackbox/test_cat.py

bzrlib/tests/blackbox/test_cat_revision.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_command_encoding.py

bzrlib/tests/blackbox/test_commit.py

bzrlib/tests/blackbox/test_conflicts.py

bzrlib/tests/blackbox/test_debug.py

bzrlib/tests/blackbox/test_diff.py

bzrlib/tests/blackbox/test_exceptions.py

bzrlib/tests/blackbox/test_export.py

bzrlib/tests/blackbox/test_find_merge_base.py

bzrlib/tests/blackbox/test_help.py

bzrlib/tests/blackbox/test_ignore.py

bzrlib/tests/blackbox/test_info.py

bzrlib/tests/blackbox/test_init.py

bzrlib/tests/blackbox/test_inventory.py

bzrlib/tests/blackbox/test_join.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_logformats.py

bzrlib/tests/blackbox/test_ls.py

bzrlib/tests/blackbox/test_lsprof.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_missing.py

bzrlib/tests/blackbox/test_mv.py

bzrlib/tests/blackbox/test_nick.py

bzrlib/tests/blackbox/test_non_ascii.py

bzrlib/tests/blackbox/test_outside_wt.py

bzrlib/tests/blackbox/test_pull.py

bzrlib/tests/blackbox/test_push.py

bzrlib/tests/blackbox/test_re_sign.py

bzrlib/tests/blackbox/test_reconcile.py

bzrlib/tests/blackbox/test_remerge.py

bzrlib/tests/blackbox/test_remove.py

bzrlib/tests/blackbox/test_remove_tree.py

bzrlib/tests/blackbox/test_revert.py

bzrlib/tests/blackbox/test_revision_history.py

bzrlib/tests/blackbox/test_revision_info.py

bzrlib/tests/blackbox/test_revno.py

bzrlib/tests/blackbox/test_selftest.py

bzrlib/tests/blackbox/test_serve.py

bzrlib/tests/blackbox/test_shared_repository.py

bzrlib/tests/blackbox/test_sign_my_commits.py

bzrlib/tests/blackbox/test_split.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/blackbox/test_submit.py

bzrlib/tests/blackbox/test_tags.py

bzrlib/tests/blackbox/test_testament.py

bzrlib/tests/blackbox/test_too_much.py

bzrlib/tests/blackbox/test_uncommit.py

bzrlib/tests/blackbox/test_update.py

bzrlib/tests/blackbox/test_upgrade.py

bzrlib/tests/blackbox/test_version.py

bzrlib/tests/blackbox/test_version_info.py

bzrlib/tests/blackbox/test_versioning.py

bzrlib/tests/blackbox/test_whoami.py

bzrlib/tests/branch_implementations/test_revision_id_to_revno.py

bzrlib/tests/branch_implementations/test_sprout.py

bzrlib/tests/branch_implementations/test_uncommit.py

bzrlib/tests/branch_implementations/test_update.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/repository_implementations/__init__.py

bzrlib/tests/revisionstore_implementations/test_all.py

bzrlib/tests/test_ancestry.py

bzrlib/tests/test_annotate.py

bzrlib/tests/test_branch.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_config.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_help.py

bzrlib/tests/test_info.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_log.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_options.py

bzrlib/tests/test_plugins.py

bzrlib/tests/test_progress.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_selftest.py

bzrlib/tests/test_source.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/workingtree_implementations/__init__.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_parents.py

bzrlib/tests/workingtree_implementations/test_remove.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/ftp.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/uncommit.py

bzrlib/util/bencode.py

bzrlib/version.py

bzrlib/win32utils.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

doc/README.1st

doc/configuration.txt

doc/developers/HACKING

doc/developers/index.txt

doc/developers/performance-roadmap.txt

doc/developers/performance.dot

doc/developers/scratch.txt

doc/http_smart_server.txt

doc/plugins.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/_knit_load_data_c.pyx

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Pyrex extensions to knit parsing."""

import sys

from bzrlib import errors

cdef extern from "stdlib.h":

ctypedef unsigned size_t

long int strtol(char *nptr, char **endptr, int base)

cdef extern from "Python.h":

int PyDict_CheckExact(object)

void *PyDict_GetItem_void "PyDict_GetItem" (object p, object key)

int PyDict_SetItem(object p, object key, object val) except -1

int PyList_Append(object lst, object item) except -1

object PyList_GET_ITEM(object lst, int index)

int PyList_CheckExact(object)

void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)

char *PyString_AsString(object p)

object PyString_FromStringAndSize(char *, int)

int PyString_Size(object p)

void Py_INCREF(object)

cdef extern from "string.h":

void *memchr(void *s, int c, size_t n)

cdef int string_to_int_safe(char *s, char *end, int *out) except -1:

"""Convert a base10 string to an integer.

This makes sure the whole string is consumed, or it raises ValueError.

This is similar to how int(s) works, except you don't need a Python

String object.

:param s: The string to convert

:param end: The character after the integer. So if the string is '12\0',

this should be pointing at the '\0'. If the string was '12 ' then this

should point at the ' '.

:param out: This is the integer that will be returned

:return: -1 if an exception is raised. 0 otherwise

"""

cdef char *integer_end

# We can't just return the integer because of how pyrex determines when

# there is an exception.

out[0] = <int>strtol(s, &integer_end, 10)

if integer_end != end:

py_s = PyString_FromStringAndSize(s, end-s)

raise ValueError('%r is not a valid integer' % (py_s,))

return 0

cdef class KnitIndexReader:

cdef object kndx

cdef object fp

cdef object cache

cdef object history

cdef char * cur_str

cdef char * end_str

cdef int history_len

def __new__(self, kndx, fp):

self.kndx = kndx

self.fp = fp

self.cache = kndx._cache

self.history = kndx._history

self.cur_str = NULL

self.end_str = NULL

self.history_len = 0

100

cdef void validate(self):

101

if not PyDict_CheckExact(self.cache):

102

raise TypeError('kndx._cache must be a python dict')

103

if not PyList_CheckExact(self.history):

104

raise TypeError('kndx._history must be a python list')

105

106

cdef object process_options(self, char *option_str, char *end):

107

"""Process the options string into a list."""

108

cdef char *next

109

110

# This is alternative code which creates a python string and splits it.

111

# It is "correct" and more obvious, but slower than the following code.

112

# It can be uncommented to switch in case the other code is seen as

113

# suspect.

114

# options = PyString_FromStringAndSize(option_str,

115

# end - option_str)

116

# return options.split(',')

117

118

final_options = []

119

120

while option_str < end:

121

next = <char*>memchr(option_str, c',', end - option_str)

122

if next == NULL:

123

next = end

124

next_option = PyString_FromStringAndSize(option_str,

125

next - option_str)

126

PyList_Append(final_options, next_option)

127

128

# Move past the ','

129

option_str = next+1

130

131

return final_options

132

133

cdef object process_parents(self, char *parent_str, char *end):

134

cdef char *next

135

cdef int int_parent

136

cdef char *parent_end

137

138

# Alternative, correct but slower code.

139

140

# parents = PyString_FromStringAndSize(parent_str,

141

# end - parent_str)

142

# real_parents = []

143

# for parent in parents.split():

144

# if parent[0].startswith('.'):

145

# real_parents.append(parent[1:])

146

# else:

147

# real_parents.append(self.history[int(parent)])

148

# return real_parents

149

150

parents = []

151

while parent_str <= end:

152

next = <char*>memchr(parent_str, c' ', end - parent_str)

153

if next == NULL or next >= end or next == parent_str:

154

break

155

156

if parent_str[0] == c'.':

157

# This is an explicit revision id

158

parent_str = parent_str + 1

159

parent = PyString_FromStringAndSize(parent_str,

160

next - parent_str)

161

else:

162

# This in an integer mapping to original

163

string_to_int_safe(parent_str, next, &int_parent)

164

165

if int_parent >= self.history_len:

166

raise IndexError('Parent index refers to a revision which'

167

' does not exist yet.'

168

' %d > %d' % (int_parent, self.history_len))

169

parent = PyList_GET_ITEM(self.history, int_parent)

170

# PyList_GET_ITEM steals a reference

171

Py_INCREF(parent)

172

PyList_Append(parents, parent)

173

parent_str = next + 1

174

return parents

175

176

cdef int process_one_record(self, char *start, char *end) except -1:

177

"""Take a simple string and split it into an index record."""

178

cdef char *version_id_str

179

cdef int version_id_size

180

cdef char *option_str

181

cdef char *option_end

182

cdef char *pos_str

183

cdef int pos

184

cdef char *size_str

185

cdef int size

186

cdef char *parent_str

187

cdef int parent_size

188

cdef void *cache_entry

189

190

version_id_str = start

191

option_str = <char*>memchr(version_id_str, c' ', end - version_id_str)

192

if option_str == NULL or option_str >= end:

193

# Short entry

194

return 0

195

version_id_size = <int>(option_str - version_id_str)

196

# Move past the space character

197

option_str = option_str + 1

198

199

pos_str = <char*>memchr(option_str, c' ', end - option_str)

200

if pos_str == NULL or pos_str >= end:

201

# Short entry

202

return 0

203

option_end = pos_str

204

pos_str = pos_str + 1

205

206

size_str = <char*>memchr(pos_str, c' ', end - pos_str)

207

if size_str == NULL or size_str >= end:

208

# Short entry

209

return 0

210

size_str = size_str + 1

211

212

parent_str = <char*>memchr(size_str, c' ', end - size_str)

213

if parent_str == NULL or parent_str >= end:

214

# Missing parents

215

return 0

216

parent_str = parent_str + 1

217

218

version_id = PyString_FromStringAndSize(version_id_str,

219

version_id_size)

220

options = self.process_options(option_str, option_end)

221

222

try:

223

string_to_int_safe(pos_str, size_str - 1, &pos)

224

string_to_int_safe(size_str, parent_str - 1, &size)

225

parents = self.process_parents(parent_str, end)

226

except (ValueError, IndexError), e:

227

py_line = PyString_FromStringAndSize(start, end - start)

228

raise errors.KnitCorrupt(self.kndx._filename,

229

"line %r: %s" % (py_line, e))

230

231

cache_entry = PyDict_GetItem_void(self.cache, version_id)

232

if cache_entry == NULL:

233

PyList_Append(self.history, version_id)

234

index = self.history_len

235

self.history_len = self.history_len + 1

236

else:

237

# PyTuple_GetItem_void_void does *not* increment the reference

238

# counter, but casting to <object> does.

239

index = <object>PyTuple_GetItem_void_void(cache_entry, 5)

240

241

PyDict_SetItem(self.cache, version_id,

242

(version_id,

243

options,

244

pos,

245

size,

246

parents,

247

index,

248

))

249

return 1

250

251

cdef int process_next_record(self) except -1:

252

"""Process the next record in the file."""

253

cdef char *last

254

cdef char *start

255

256

start = self.cur_str

257

# Find the next newline

258

last = <char*>memchr(start, c'\n', self.end_str - start)

259

if last == NULL:

260

# Process until the end of the file

261

last = self.end_str - 1

262

self.cur_str = self.end_str

263

else:

264

# The last character is right before the '\n'

265

# And the next string is right after it

266

self.cur_str = last + 1

267

last = last - 1

268

269

if last <= start or last[0] != c':':

270

# Incomplete record

271

return 0

272

273

return self.process_one_record(start, last)

274

275

def read(self):

276

cdef int text_size

277

278

self.validate()

279

280

self.kndx.check_header(self.fp)

281

282

# We read the whole thing at once

283

# TODO: jam 2007-05-09 Consider reading incrementally rather than

284

# having to have the whole thing read up front.

285

# we already know that calling f.readlines() versus lots of

286

# f.readline() calls is faster.

287

# The other possibility is to avoid a Python String here

288

# completely. However self.fp may be a 'file-like' object

289

# it is not guaranteed to be a real file.

290

text = self.fp.read()

291

text_size = PyString_Size(text)

292

self.cur_str = PyString_AsString(text)

293

# This points to the last character in the string

294

self.end_str = self.cur_str + text_size

295

296

while self.cur_str < self.end_str:

297

self.process_next_record()

298

299

300

def _load_data_c(kndx, fp):

301

"""Load the knit index file into memory."""

302

reader = KnitIndexReader(kndx, fp)

303

reader.read()

Older »