~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/_dirstate_helpers_c.pyx

Committer: Robert Collins
Date: 2007-07-25 00:52:21 UTC
mfrom: (2650 +trunk)
mto: This revision was merged to the branch mainline in revision 2651.
Revision ID: robertc@robertcollins.net-20070725005221-0ysm6il5mqnme3wz

Merge bzr.dev.

files added:
bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/bundle/serializer/v4.py

bzrlib/email_message.py

bzrlib/file_names.py

bzrlib/multiparent.py

bzrlib/plugins/multiparent.py

bzrlib/tests/commands

bzrlib/tests/commands/__init__.py

bzrlib/tests/commands/test_branch.py

bzrlib/tests/commands/test_cat.py

bzrlib/tests/commands/test_checkout.py

bzrlib/tests/commands/test_init.py

bzrlib/tests/commands/test_init_repository.py

bzrlib/tests/commands/test_merge.py

bzrlib/tests/commands/test_missing.py

bzrlib/tests/commands/test_pull.py

bzrlib/tests/commands/test_push.py

bzrlib/tests/test__dirstate_helpers.py

bzrlib/tests/test_email_message.py

bzrlib/tests/test_file_names.py

bzrlib/tests/test_multiparent.py

bzrlib/tests/transport_util.py

bzrlib/transport/unlistable.py

doc/developers/bundle-format4.txt

files removed:
bzrlib/bundle/common.py

bzrlib/bundle/old

bzrlib/bundle/old/send_changeset.py

files renamed:
bzrlib/tests/blackbox/test_bundle.py => bzrlib/tests/blackbox/test_submit.py

files modified:
.bzrignore

NEWS

bzrlib/benchmarks/__init__.py

bzrlib/benchmarks/bench_bundle.py

bzrlib/benchmarks/bench_checkout.py

bzrlib/benchmarks/bench_commit.py

bzrlib/benchmarks/bench_log.py

bzrlib/benchmarks/bench_status.py

bzrlib/branch.py

bzrlib/builtins.py

bzrlib/bundle/__init__.py

bzrlib/bundle/apply_bundle.py

bzrlib/bundle/bundle_data.py

bzrlib/bundle/commands.py

bzrlib/bundle/serializer/__init__.py

bzrlib/bundle/serializer/v08.py

bzrlib/bundle/serializer/v09.py

bzrlib/bzrdir.py

bzrlib/commit.py

bzrlib/conflicts.py

bzrlib/dirstate.py

bzrlib/errors.py

bzrlib/fetch.py

bzrlib/graph.py

bzrlib/inventory.py

bzrlib/knit.py

bzrlib/log.py

bzrlib/memorytree.py

bzrlib/merge.py

bzrlib/merge_directive.py

bzrlib/remote.py

bzrlib/repository.py

bzrlib/revisiontree.py

bzrlib/smart/client.py

bzrlib/smart/server.py

bzrlib/smtp_connection.py

bzrlib/status.py

bzrlib/tests/__init__.py

bzrlib/tests/blackbox/__init__.py

bzrlib/tests/blackbox/test_branch.py

bzrlib/tests/blackbox/test_checkout.py

bzrlib/tests/blackbox/test_log.py

bzrlib/tests/blackbox/test_merge.py

bzrlib/tests/blackbox/test_merge_directive.py

bzrlib/tests/blackbox/test_status.py

bzrlib/tests/bzrdir_implementations/test_bzrdir.py

bzrlib/tests/repository_implementations/test_repository.py

bzrlib/tests/test_bundle.py

bzrlib/tests/test_bzrdir.py

bzrlib/tests/test_conflicts.py

bzrlib/tests/test_dirstate.py

bzrlib/tests/test_errors.py

bzrlib/tests/test_graph.py

bzrlib/tests/test_http.py

bzrlib/tests/test_knit.py

bzrlib/tests/test_merge.py

bzrlib/tests/test_merge_core.py

bzrlib/tests/test_merge_directive.py

bzrlib/tests/test_permissions.py

bzrlib/tests/test_read_bundle.py

bzrlib/tests/test_sftp_transport.py

bzrlib/tests/test_smart_transport.py

bzrlib/tests/test_smtp_connection.py

bzrlib/tests/test_transform.py

bzrlib/tests/test_transport.py

bzrlib/tests/test_transport_implementations.py

bzrlib/tests/test_versionedfile.py

bzrlib/tests/test_xml.py

bzrlib/tests/tree_implementations/test_tree.py

bzrlib/tests/workingtree_implementations/test_commit.py

bzrlib/tests/workingtree_implementations/test_workingtree.py

bzrlib/transform.py

bzrlib/transport/__init__.py

bzrlib/transport/chroot.py

bzrlib/transport/decorator.py

bzrlib/transport/ftp.py

bzrlib/transport/http/__init__.py

bzrlib/transport/http/_pycurl.py

bzrlib/transport/http/_urllib.py

bzrlib/transport/local.py

bzrlib/transport/memory.py

bzrlib/transport/remote.py

bzrlib/transport/sftp.py

bzrlib/tree.py

bzrlib/versionedfile.py

bzrlib/weave.py

bzrlib/workingtree.py

bzrlib/workingtree_4.py

bzrlib/xml5.py

bzrlib/xml_serializer.py

doc/developers/HACKING

doc/developers/bundles.txt

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/_dirstate_helpers_c.pyx

# This program is free software; you can redistribute it and/or modify

# it under the terms of the GNU General Public License as published by

# the Free Software Foundation; either version 2 of the License, or

# (at your option) any later version.

# This program is distributed in the hope that it will be useful,

# but WITHOUT ANY WARRANTY; without even the implied warranty of

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License

# along with this program; if not, write to the Free Software

# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Helper functions for DirState.

This is the python implementation for DirState functions.

"""

from bzrlib.dirstate import DirState

# Give Pyrex some function definitions for it to understand.

# All of these are just hints to Pyrex, so that it can try to convert python

# objects into similar C objects. (such as PyInt => int).

# In anything defined 'cdef extern from XXX' the real C header will be

# imported, and the real definition will be used from there. So these are just

# hints, and do not need to match exactly to the C definitions.

cdef extern from *:

ctypedef unsigned long size_t

cdef extern from "stdint.h":

ctypedef int intptr_t

cdef extern from "stdlib.h":

unsigned long int strtoul(char *nptr, char **endptr, int base)

# These functions allow us access to a bit of the 'bare metal' of python

# objects, rather than going through the object abstraction. (For example,

# PyList_Append, rather than getting the 'append' attribute of the object, and

# creating a tuple, and then using PyCallObject).

# Functions that return (or take) a void* are meant to grab a C PyObject*. This

# differs from the Pyrex 'object'. If you declare a variable as 'object' Pyrex

# will automatically Py_INCREF and Py_DECREF when appropriate. But for some

# inner loops, we don't need to do that at all, as the reference only lasts for

# a very short time.

cdef extern from "Python.h":

int PyList_Append(object lst, object item) except -1

void *PyList_GetItem_object_void "PyList_GET_ITEM" (object lst, int index)

int PyList_CheckExact(object)

void *PyTuple_GetItem_void_void "PyTuple_GET_ITEM" (void* tpl, int index)

char *PyString_AsString(object p)

char *PyString_AS_STRING_void "PyString_AS_STRING" (void *p)

object PyString_FromString(char *)

object PyString_FromStringAndSize(char *, int)

int PyString_Size(object p)

int PyString_GET_SIZE_void "PyString_GET_SIZE" (void *p)

int PyString_CheckExact(object p)

cdef extern from "string.h":

int strncmp(char *s1, char *s2, int len)

void *memchr(void *s, int c, size_t len)

int memcmp(void *b1, void *b2, size_t len)

# ??? memrchr is a GNU extension :(

# void *memrchr(void *s, int c, size_t len)

cdef void* _my_memrchr(void *s, int c, size_t n):

# memrchr seems to be a GNU extension, so we have to implement it ourselves

cdef char *pos

cdef char *start

start = <char*>s

pos = start + n - 1

while pos >= start:

if pos[0] == c:

return <void*>pos

pos = pos - 1

return NULL

def _py_memrchr(s, c):

"""Just to expose _my_memrchr for testing.

:param s: The Python string to search

:param c: The character to search for

:return: The offset to the last instance of 'c' in s

"""

cdef void *_s

cdef void *found

cdef int length

100

cdef char *_c

101

102

_s = PyString_AsString(s)

103

length = PyString_Size(s)

104

105

_c = PyString_AsString(c)

106

assert PyString_Size(c) == 1,\

107

'Must be a single character string, not %s' % (c,)

108

found = _my_memrchr(_s, _c[0], length)

109

if found == NULL:

110

return None

111

return found - _s

112

113

114

cdef int _is_aligned(void *ptr):

115

"""Is this pointer aligned to an integer size offset?

116

117

:return: 1 if this pointer is aligned, 0 otherwise.

118

"""

119

return ((<intptr_t>ptr) & ((sizeof(int))-1)) == 0

120

121

122

cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):

123

cdef unsigned char *cur1

124

cdef unsigned char *cur2

125

cdef unsigned char *end1

126

cdef unsigned char *end2

127

cdef int *cur_int1

128

cdef int *cur_int2

129

cdef int *end_int1

130

cdef int *end_int2

131

132

if path1 == path2 and size1 == size2:

133

return 0

134

135

end1 = <unsigned char*>path1+size1

136

end2 = <unsigned char*>path2+size2

137

138

# Use 32-bit comparisons for the matching portion of the string.

139

# Almost all CPU's are faster at loading and comparing 32-bit integers,

140

# than they are at 8-bit integers.

141

# 99% of the time, these will be aligned, but in case they aren't just skip

142

# this loop

143

if _is_aligned(path1) and _is_aligned(path2):

144

cur_int1 = <int*>path1

145

cur_int2 = <int*>path2

146

end_int1 = <int*>(path1 + size1 - (size1 % sizeof(int)))

147

end_int2 = <int*>(path2 + size2 - (size2 % sizeof(int)))

148

149

while cur_int1 < end_int1 and cur_int2 < end_int2:

150

if cur_int1[0] != cur_int2[0]:

151

break

152

cur_int1 = cur_int1 + 1

153

cur_int2 = cur_int2 + 1

154

155

cur1 = <unsigned char*>cur_int1

156

cur2 = <unsigned char*>cur_int2

157

else:

158

cur1 = <unsigned char*>path1

159

cur2 = <unsigned char*>path2

160

161

while cur1 < end1 and cur2 < end2:

162

if cur1[0] == cur2[0]:

163

# This character matches, just go to the next one

164

cur1 = cur1 + 1

165

cur2 = cur2 + 1

166

continue

167

# The current characters do not match

168

if cur1[0] == c'/':

169

return -1 # Reached the end of path1 segment first

170

elif cur2[0] == c'/':

171

return 1 # Reached the end of path2 segment first

172

elif cur1[0] < cur2[0]:

173

return -1

174

else:

175

return 1

176

177

# We reached the end of at least one of the strings

178

if cur1 < end1:

179

return 1 # Not at the end of cur1, must be at the end of cur2

180

if cur2 < end2:

181

return -1 # At the end of cur1, but not at cur2

182

# We reached the end of both strings

183

return 0

184

185

186

def cmp_by_dirs_c(path1, path2):

187

"""Compare two paths directory by directory.

188

189

This is equivalent to doing::

190

191

cmp(path1.split('/'), path2.split('/'))

192

193

The idea is that you should compare path components separately. This

194

differs from plain ``cmp(path1, path2)`` for paths like ``'a-b'`` and

195

``a/b``. "a-b" comes after "a" but would come before "a/b" lexically.

196

197

:param path1: first path

198

:param path2: second path

199

:return: positive number if ``path1`` comes first,

200

0 if paths are equal,

201

and negative number if ``path2`` sorts first

202

"""

203

if not PyString_CheckExact(path1):

204

raise TypeError("'path1' must be a plain string, not %s: %r"

205

% (type(path1), path1))

206

if not PyString_CheckExact(path2):

207

raise TypeError("'path2' must be a plain string, not %s: %r"

208

% (type(path2), path2))

209

return _cmp_by_dirs(PyString_AsString(path1),

210

PyString_Size(path1),

211

PyString_AsString(path2),

212

PyString_Size(path2))

213

214

215

def _cmp_path_by_dirblock_c(path1, path2):

216

"""Compare two paths based on what directory they are in.

217

218

This generates a sort order, such that all children of a directory are

219

sorted together, and grandchildren are in the same order as the

220

children appear. But all grandchildren come after all children.

221

222

In other words, all entries in a directory are sorted together, and

223

directorys are sorted in cmp_by_dirs order.

224

225

:param path1: first path

226

:param path2: the second path

227

:return: positive number if ``path1`` comes first,

228

0 if paths are equal

229

and a negative number if ``path2`` sorts first

230

"""

231

if not PyString_CheckExact(path1):

232

raise TypeError("'path1' must be a plain string, not %s: %r"

233

% (type(path1), path1))

234

if not PyString_CheckExact(path2):

235

raise TypeError("'path2' must be a plain string, not %s: %r"

236

% (type(path2), path2))

237

return _cmp_path_by_dirblock(PyString_AsString(path1),

238

PyString_Size(path1),

239

PyString_AsString(path2),

240

PyString_Size(path2))

241

242

243

cdef int _cmp_path_by_dirblock(char *path1, int path1_len,

244

char *path2, int path2_len):

245

"""Compare two paths by what directory they are in.

246

247

see ``_cmp_path_by_dirblock_c`` for details.

248

"""

249

cdef char *dirname1

250

cdef int dirname1_len

251

cdef char *dirname2

252

cdef int dirname2_len

253

cdef char *basename1

254

cdef int basename1_len

255

cdef char *basename2

256

cdef int basename2_len

257

cdef int cur_len

258

cdef int cmp_val

259

260

if path1_len == 0 and path2_len == 0:

261

return 0

262

263

if path1 == path2 and path1_len == path2_len:

264

return 0

265

266

if path1_len == 0:

267

return -1

268

269

if path2_len == 0:

270

return 1

271

272

basename1 = <char*>_my_memrchr(path1, c'/', path1_len)

273

274

if basename1 == NULL:

275

basename1 = path1

276

basename1_len = path1_len

277

dirname1 = ''

278

dirname1_len = 0

279

else:

280

dirname1 = path1

281

dirname1_len = basename1 - path1

282

basename1 = basename1 + 1

283

basename1_len = path1_len - dirname1_len - 1

284

285

basename2 = <char*>_my_memrchr(path2, c'/', path2_len)

286

287

if basename2 == NULL:

288

basename2 = path2

289

basename2_len = path2_len

290

dirname2 = ''

291

dirname2_len = 0

292

else:

293

dirname2 = path2

294

dirname2_len = basename2 - path2

295

basename2 = basename2 + 1

296

basename2_len = path2_len - dirname2_len - 1

297

298

cmp_val = _cmp_by_dirs(dirname1, dirname1_len,

299

dirname2, dirname2_len)

300

if cmp_val != 0:

301

return cmp_val

302

303

cur_len = basename1_len

304

if basename2_len < basename1_len:

305

cur_len = basename2_len

306

307

cmp_val = memcmp(basename1, basename2, cur_len)

308

if cmp_val != 0:

309

return cmp_val

310

if basename1_len == basename2_len:

311

return 0

312

if basename1_len < basename2_len:

313

return -1

314

return 1

315

316

317

def _bisect_path_left_c(paths, path):

318

"""Return the index where to insert path into paths.

319

320

This uses a path-wise comparison so we get::

321

322

a-b

323

a=b

324

a/b

325

Rather than::

326

327

a-b

328

a/b

329

a=b

330

:param paths: A list of paths to search through

331

:param path: A single path to insert

332

:return: An offset where 'path' can be inserted.

333

:seealso: bisect.bisect_left

334

"""

335

cdef int _lo

336

cdef int _hi

337

cdef int _mid

338

cdef char *path_cstr

339

cdef int path_size

340

cdef char *cur_cstr

341

cdef int cur_size

342

cdef void *cur

343

344

if not PyList_CheckExact(paths):

345

raise TypeError("you must pass a python list for 'paths' not: %s %r"

346

% (type(paths), paths))

347

if not PyString_CheckExact(path):

348

raise TypeError("you must pass a string for 'path' not: %s %r"

349

% (type(path), path))

350

351

_hi = len(paths)

352

_lo = 0

353

354

path_cstr = PyString_AsString(path)

355

path_size = PyString_Size(path)

356

357

while _lo < _hi:

358

_mid = (_lo + _hi) / 2

359

cur = PyList_GetItem_object_void(paths, _mid)

360

cur_cstr = PyString_AS_STRING_void(cur)

361

cur_size = PyString_GET_SIZE_void(cur)

362

if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:

363

_lo = _mid + 1

364

else:

365

_hi = _mid

366

return _lo

367

368

369

def _bisect_path_right_c(paths, path):

370

"""Return the index where to insert path into paths.

371

372

This uses a path-wise comparison so we get::

373

374

a-b

375

a=b

376

a/b

377

Rather than::

378

379

a-b

380

a/b

381

a=b

382

:param paths: A list of paths to search through

383

:param path: A single path to insert

384

:return: An offset where 'path' can be inserted.

385

:seealso: bisect.bisect_right

386

"""

387

cdef int _lo

388

cdef int _hi

389

cdef int _mid

390

cdef char *path_cstr

391

cdef int path_size

392

cdef char *cur_cstr

393

cdef int cur_size

394

cdef void *cur

395

396

if not PyList_CheckExact(paths):

397

raise TypeError("you must pass a python list for 'paths' not: %s %r"

398

% (type(paths), paths))

399

if not PyString_CheckExact(path):

400

raise TypeError("you must pass a string for 'path' not: %s %r"

401

% (type(path), path))

402

403

_hi = len(paths)

404

_lo = 0

405

406

path_cstr = PyString_AsString(path)

407

path_size = PyString_Size(path)

408

409

while _lo < _hi:

410

_mid = (_lo + _hi) / 2

411

cur = PyList_GetItem_object_void(paths, _mid)

412

cur_cstr = PyString_AS_STRING_void(cur)

413

cur_size = PyString_GET_SIZE_void(cur)

414

if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:

415

_hi = _mid

416

else:

417

_lo = _mid + 1

418

return _lo

419

420

421

def bisect_dirblock_c(dirblocks, dirname, lo=0, hi=None, cache=None):

422

"""Return the index where to insert dirname into the dirblocks.

423

424

The return value idx is such that all directories blocks in dirblock[:idx]

425

have names < dirname, and all blocks in dirblock[idx:] have names >=

426

dirname.

427

428

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

429

slice of a to be searched.

430

"""

431

cdef int _lo

432

cdef int _hi

433

cdef int _mid

434

cdef char *dirname_cstr

435

cdef int dirname_size

436

cdef char *cur_cstr

437

cdef int cur_size

438

cdef void *cur

439

440

if not PyList_CheckExact(dirblocks):

441

raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"

442

% (type(dirblocks), dirblocks))

443

if not PyString_CheckExact(dirname):

444

raise TypeError("you must pass a string for dirname not: %s %r"

445

% (type(dirname), dirname))

446

if hi is None:

447

_hi = len(dirblocks)

448

else:

449

_hi = hi

450

451

_lo = lo

452

dirname_cstr = PyString_AsString(dirname)

453

dirname_size = PyString_Size(dirname)

454

455

while _lo < _hi:

456

_mid = (_lo + _hi) / 2

457

# Grab the dirname for the current dirblock

458

# cur = dirblocks[_mid][0]

459

cur = PyTuple_GetItem_void_void(

460

PyList_GetItem_object_void(dirblocks, _mid), 0)

461

cur_cstr = PyString_AS_STRING_void(cur)

462

cur_size = PyString_GET_SIZE_void(cur)

463

if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:

464

_lo = _mid + 1

465

else:

466

_hi = _mid

467

return _lo

468

469

470

cdef class Reader:

471

"""Maintain the current location, and return fields as you parse them."""

472

473

cdef object text # The overall string object

474

cdef char *text_cstr # Pointer to the beginning of text

475

cdef int text_size # Length of text

476

477

cdef char *end_cstr # End of text

478

cdef char *cur_cstr # Pointer to the current record

479

cdef char *next # Pointer to the end of this record

480

481

def __new__(self, text):

482

self.text = text

483

self.text_cstr = PyString_AsString(text)

484

self.text_size = PyString_Size(text)

485

self.end_cstr = self.text_cstr + self.text_size

486

self.cur_cstr = self.text_cstr

487

488

cdef char *get_next(self, int *size):

489

"""Return a pointer to the start of the next field."""

490

cdef char *next

491

next = self.cur_cstr

492

self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr-next)

493

size[0] = self.cur_cstr - next

494

self.cur_cstr = self.cur_cstr + 1

495

return next

496

497

cdef object get_next_str(self):

498

"""Get the next field as a Python string."""

499

cdef int size

500

cdef char *next

501

next = self.get_next(&size)

502

return PyString_FromStringAndSize(next, size)

503

504

cdef int _init(self) except -1:

505

"""Get the pointer ready.

506

507

This assumes that the dirstate header has already been read, and we

508

already have the dirblock string loaded into memory.

509

This just initializes our memory pointers, etc for parsing of the

510

dirblock string.

511

"""

512

cdef char *first

513

cdef int size

514

# The first field should be an empty string left over from the Header

515

first = self.get_next(&size)

516

if first[0] != c'\0' and size == 0:

517

raise AssertionError('First character should be null not: %s'

518

% (first,))

519

return 0

520

521

cdef object _get_entry(self, int num_trees, void **p_current_dirname,

522

int *new_block):

523

"""Extract the next entry.

524

525

This parses the next entry based on the current location in

526

``self.cur_cstr``.

527

Each entry can be considered a "row" in the total table. And each row

528

has a fixed number of columns. It is generally broken up into "key"

529

columns, then "current" columns, and then "parent" columns.

530

531

:param num_trees: How many parent trees need to be parsed

532

:param p_current_dirname: A pointer to the current PyString

533

representing the directory name.

534

We pass this in as a void * so that pyrex doesn't have to

535

increment/decrement the PyObject reference counter for each

536

_get_entry call.

537

We use a pointer so that _get_entry can update it with the new

538

value.

539

:param new_block: This is to let the caller know that it needs to

540

create a new directory block to store the next entry.

541

"""

542

cdef object path_name_file_id_key

543

cdef char *entry_size_cstr

544

cdef unsigned long int entry_size

545

cdef char* executable_cstr

546

cdef int is_executable

547

cdef char* dirname_cstr

548

cdef char* trailing

549

cdef int cur_size

550

cdef int i

551

cdef object minikind

552

cdef object fingerprint

553

cdef object info

554

555

# Read the 'key' information (dirname, name, file_id)

556

dirname_cstr = self.get_next(&cur_size)

557

# Check to see if we have started a new directory block.

558

# If so, then we need to create a new dirname PyString, so that it can

559

# be used in all of the tuples. This saves time and memory, by re-using

560

# the same object repeatedly.

561

562

# Do the cheap 'length of string' check first. If the string is a

563

# different length, then we *have* to be a different directory.

564

if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])

565

or strncmp(dirname_cstr,

566

# Extract the char* from our current dirname string. We

567

# know it is a PyString, so we can use

568

# PyString_AS_STRING, we use the _void version because

569

# we are tricking Pyrex by using a void* rather than an

570

# <object>

571

PyString_AS_STRING_void(p_current_dirname[0]),

572

cur_size+1) != 0):

573

dirname = PyString_FromStringAndSize(dirname_cstr, cur_size)

574

p_current_dirname[0] = <void*>dirname

575

new_block[0] = 1

576

else:

577

new_block[0] = 0

578

579

# Build up the key that will be used.

580

# By using <object>(void *) Pyrex will automatically handle the

581

# Py_INCREF that we need.

582

path_name_file_id_key = (<object>p_current_dirname[0],

583

self.get_next_str(),

584

self.get_next_str(),

585

)

586

587

# Parse all of the per-tree information. current has the information in

588

# the same location as parent trees. The only difference is that 'info'

589

# is a 'packed_stat' for current, while it is a 'revision_id' for

590

# parent trees.

591

# minikind, fingerprint, and info will be returned as regular python

592

# strings

593

# entry_size and is_executable will be parsed into a python Long and

594

# python Boolean, respectively.

595

# TODO: jam 20070718 Consider changin the entry_size conversion to

596

# prefer python Int when possible. They are generally faster to

597

# work with, and it will be rare that we have a file >2GB.

598

# Especially since this code is pretty much fixed at a max of

599

# 4GB.

600

trees = []

601

for i from 0 <= i < num_trees:

602

minikind = self.get_next_str()

603

fingerprint = self.get_next_str()

604

entry_size_cstr = self.get_next(&cur_size)

605

entry_size = strtoul(entry_size_cstr, NULL, 10)

606

executable_cstr = self.get_next(&cur_size)

607

is_executable = (executable_cstr[0] == c'y')

608

info = self.get_next_str()

609

PyList_Append(trees, (

610

minikind, # minikind

611

fingerprint, # fingerprint

612

entry_size, # size

613

is_executable,# executable

614

info, # packed_stat or revision_id

615

))

616

617

# The returned tuple is (key, [trees])

618

ret = (path_name_file_id_key, trees)

619

# Ignore the trailing newline, but assert that it does exist, this

620

# ensures that we always finish parsing a line on an end-of-entry

621

# marker.

622

trailing = self.get_next(&cur_size)

623

if cur_size != 1 or trailing[0] != c'\n':

624

raise AssertionError(

625

'Bad parse, we expected to end on \\n, not: %d %s: %s'

626

% (cur_size, PyString_FromStringAndSize(trailing, cur_size),

627

ret))

628

return ret

629

630

def _parse_dirblocks(self, state):

631

"""Parse all dirblocks in the state file."""

632

cdef int num_trees

633

cdef object current_block

634

cdef object entry

635

cdef void * current_dirname

636

cdef int new_block

637

cdef int expected_entry_count

638

cdef int entry_count

639

640

num_trees = state._num_present_parents() + 1

641

expected_entry_count = state._num_entries

642

643

# Ignore the first record

644

self._init()

645

646

current_block = []

647

state._dirblocks = [('', current_block), ('', [])]

648

obj = ''

649

current_dirname = <void*>obj

650

new_block = 0

651

entry_count = 0

652

653

# TODO: jam 2007-05-07 Consider pre-allocating some space for the

654

# members, and then growing and shrinking from there. If most

655

# directories have close to 10 entries in them, it would save a

656

# few mallocs if we default our list size to something

657

# reasonable. Or we could malloc it to something large (100 or

658

# so), and then truncate. That would give us a malloc + realloc,

659

# rather than lots of reallocs.

660

while self.cur_cstr < self.end_cstr:

661

entry = self._get_entry(num_trees, &current_dirname, &new_block)

662

if new_block:

663

# new block - different dirname

664

current_block = []

665

PyList_Append(state._dirblocks,

666

(<object>current_dirname, current_block))

667

PyList_Append(current_block, entry)

668

entry_count = entry_count + 1

669

if entry_count != expected_entry_count:

670

raise AssertionError('We read the wrong number of entries.'

671

' We expected to read %s, but read %s'

672

% (expected_entry_count, entry_count))

673

state._split_root_dirblock_into_contents()

674

675

676

def _read_dirblocks_c(state):

677

"""Read in the dirblocks for the given DirState object.

678

679

This is tightly bound to the DirState internal representation. It should be

680

thought of as a member function, which is only separated out so that we can

681

re-write it in pyrex.

682

683

:param state: A DirState object.

684

:return: None

685

:postcondition: The dirblocks will be loaded into the appropriate fields in

686

the DirState object.

687

"""

688

state._state_file.seek(state._end_of_header)

689

text = state._state_file.read()

690

# TODO: check the crc checksums. crc_measured = zlib.crc32(text)

691

692

reader = Reader(text)

693

694

reader._parse_dirblocks(state)

695

state._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

Older »