~bzr-pqm/bzr/bzr.dev : revision 2474.1.70

104

105

106

cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):

107

cdef char *cur1

108

cdef char *cur2

109

cdef char *end1

110

cdef char *end2

107

cdef unsigned char *cur1

108

cdef unsigned char *cur2

109

cdef unsigned char *end1

110

cdef unsigned char *end2

111

cdef int *cur_int1

112

cdef int *cur_int2

113

cdef int *end_int1

114

cdef int *end_int2

115

116

if path1 == path2:

116

if path1 == path2 and size1 == size2:

117

return 0

118

119

end1 = path1+size1

120

end2 = path2+size2

119

end1 = <unsigned char*>path1+size1

120

end2 = <unsigned char*>path2+size2

121

122

# Use 32-bit comparisons for the matching portion of the string.

123

# Almost all CPU's are faster at loading and comparing 32-bit integers,

127

if _is_aligned(path1) and _is_aligned(path2):

128

cur_int1 = <int*>path1

129

cur_int2 = <int*>path2

130

end_int1 = <int*>(path1 + size1 - (size1%4))

131

end_int2 = <int*>(path2 + size2 - (size2%4))

130

end_int1 = <int*>(path1 + size1 - (size1 % sizeof(int)))

131

end_int2 = <int*>(path2 + size2 - (size2 % sizeof(int)))

132

133

while cur_int1 < end_int1 and cur_int2 < end_int2:

134

if cur_int1[0] != cur_int2[0]:

136

cur_int1 = cur_int1 + 1

137

cur_int2 = cur_int2 + 1

138

139

cur1 = <char*>cur_int1

140

cur2 = <char*>cur_int2

139

cur1 = <unsigned char*>cur_int1

140

cur2 = <unsigned char*>cur_int2

141

else:

142

cur1 = path1

143

cur2 = path2

142

cur1 = <unsigned char*>path1

143

cur2 = <unsigned char*>path2

144

145

while cur1 < end1 and cur2 < end2:

146

if cur1[0] == cur2[0]:

184

0 if paths are equal,

185

and negative number if ``path2`` sorts first

186

"""

187

if not PyString_CheckExact(path1):

188

raise TypeError("'path1' must be a plain string, not %s: %r"

189

% (type(path1), path1))

190

if not PyString_CheckExact(path2):

191

raise TypeError("'path2' must be a plain string, not %s: %r"

192

% (type(path2), path2))

187

193

return _cmp_by_dirs(PyString_AsString(path1),

188

194

PyString_Size(path1),

189

195

PyString_AsString(path2),

206

212

0 if paths are equal

207

213

and a negative number if ``path2`` sorts first

208

214

"""

215

if not PyString_CheckExact(path1):

216

raise TypeError("'path1' must be a plain string, not %s: %r"

217

% (type(path1), path1))

218

if not PyString_CheckExact(path2):

219

raise TypeError("'path2' must be a plain string, not %s: %r"

220

% (type(path2), path2))

209

221

return _cmp_path_by_dirblock(PyString_AsString(path1),

210

222

PyString_Size(path1),

211

223

PyString_AsString(path2),

214

226

215

227

cdef int _cmp_path_by_dirblock(char *path1, int path1_len,

216

228

char *path2, int path2_len):

229

"""Compare two paths by what directory they are in.

230

231

see ``_cmp_path_by_dirblock_c`` for details.

232

"""

217

233

cdef char *dirname1

218

234

cdef int dirname1_len

219

235

cdef char *dirname2

228

244

if path1_len == 0 and path2_len == 0:

229

245

return 0

230

246

247

if path1 == path2 and path1_len == path2_len:

248

return 0

249

231

250

if path1_len == 0:

232

251

return -1

233

252

279

298

return 1

280

299

281

300

282

cdef object _pystr(char *s, int length):

283

if s == NULL:

284

if length == 0:

285

return ''

286

else:

287

return None

288

else:

289

return PyString_FromStringAndSize(s, length)

290

291

292

301

def _bisect_path_left_c(paths, path):

293

302

"""Return the index where to insert path into paths.

294

303

310

319

cdef int _lo

311

320

cdef int _hi

312

321

cdef int _mid

313

cdef char *path_str

322

cdef char *path_cstr

314

323

cdef int path_size

315

cdef char *cur_str

324

cdef char *cur_cstr

316

325

cdef int cur_size

317

326

cdef void *cur

318

327

319

328

if not PyList_CheckExact(paths):

320

raise TypeError('you must pass a python list for paths')

329

raise TypeError("you must pass a python list for 'paths' not: %s %r"

330

% (type(paths), paths))

321

331

if not PyString_CheckExact(path):

322

raise TypeError('you must pass a string for path')

332

raise TypeError("you must pass a string for 'path' not: %s %r"

333

% (type(path), path))

323

334

324

335

_hi = len(paths)

325

336

_lo = 0

326

337

327

path_str = PyString_AsString(path)

338

path_cstr = PyString_AsString(path)

328

339

path_size = PyString_Size(path)

329

340

330

341

while _lo < _hi:

331

342

_mid = (_lo + _hi) / 2

332

343

cur = PyList_GetItem_object_void(paths, _mid)

333

cur_str = PyString_AS_STRING_void(cur)

344

cur_cstr = PyString_AS_STRING_void(cur)

334

345

cur_size = PyString_GET_SIZE_void(cur)

335

if _cmp_path_by_dirblock(cur_str, cur_size, path_str, path_size) < 0:

346

if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:

336

347

_lo = _mid + 1

337

348

else:

338

349

_hi = _mid

360

371

cdef int _lo

361

372

cdef int _hi

362

373

cdef int _mid

363

cdef char *path_str

374

cdef char *path_cstr

364

375

cdef int path_size

365

cdef char *cur_str

376

cdef char *cur_cstr

366

377

cdef int cur_size

367

378

cdef void *cur

368

379

369

380

if not PyList_CheckExact(paths):

370

raise TypeError('you must pass a python list for paths')

381

raise TypeError("you must pass a python list for 'paths' not: %s %r"

382

% (type(paths), paths))

371

383

if not PyString_CheckExact(path):

372

raise TypeError('you must pass a string for path')

384

raise TypeError("you must pass a string for 'path' not: %s %r"

385

% (type(path), path))

373

386

374

387

_hi = len(paths)

375

388

_lo = 0

376

389

377

path_str = PyString_AsString(path)

390

path_cstr = PyString_AsString(path)

378

391

path_size = PyString_Size(path)

379

392

380

393

while _lo < _hi:

381

394

_mid = (_lo + _hi) / 2

382

395

cur = PyList_GetItem_object_void(paths, _mid)

383

cur_str = PyString_AS_STRING_void(cur)

396

cur_cstr = PyString_AS_STRING_void(cur)

384

397

cur_size = PyString_GET_SIZE_void(cur)

385

if _cmp_path_by_dirblock(path_str, path_size, cur_str, cur_size) < 0:

398

if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:

386

399

_hi = _mid

387

400

else:

388

401

_lo = _mid + 1

402

415

cdef int _lo

403

416

cdef int _hi

404

417

cdef int _mid

405

cdef char *dirname_str

418

cdef char *dirname_cstr

406

419

cdef int dirname_size

407

cdef char *cur_str

420

cdef char *cur_cstr

408

421

cdef int cur_size

409

422

cdef void *cur

410

423

424

if not PyList_CheckExact(dirblocks):

425

raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"

426

% (type(dirblocks), dirblocks))

427

if not PyString_CheckExact(dirname):

428

raise TypeError("you must pass a string for dirname not: %s %r"

429

% (type(dirname), dirname))

411

430

if hi is None:

412

431

_hi = len(dirblocks)

413

432

else:

414

433

_hi = hi

415

434

416

if not PyList_CheckExact(dirblocks):

417

raise TypeError('you must pass a python list for dirblocks')

418

435

_lo = lo

419

if not PyString_CheckExact(dirname):

420

raise TypeError('you must pass a string for dirname')

421

dirname_str = PyString_AsString(dirname)

436

dirname_cstr = PyString_AsString(dirname)

422

437

dirname_size = PyString_Size(dirname)

423

438

424

439

while _lo < _hi:

427

442

# cur = dirblocks[_mid][0]

428

443

cur = PyTuple_GetItem_void_void(

429

444

PyList_GetItem_object_void(dirblocks, _mid), 0)

430

cur_str = PyString_AS_STRING_void(cur)

445

cur_cstr = PyString_AS_STRING_void(cur)

431

446

cur_size = PyString_GET_SIZE_void(cur)

432

if _cmp_by_dirs(cur_str, cur_size, dirname_str, dirname_size) < 0:

447

if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:

433

448

_lo = _mid + 1

434

449

else:

435

450

_hi = _mid

440

455

"""Maintain the current location, and return fields as you parse them."""

441

456

442

457

cdef object text # The overall string object

443

cdef char *text_str # Pointer to the beginning of text

458

cdef char *text_cstr # Pointer to the beginning of text

444

459

cdef int text_size # Length of text

445

460

446

cdef char *end_str # End of text

447

cdef char *cur # Pointer to the current record

461

cdef char *end_cstr # End of text

462

cdef char *cur_cstr # Pointer to the current record

448

463

cdef char *next # Pointer to the end of this record

449

464

450

465

def __new__(self, text):

451

466

self.text = text

452

self.text_str = PyString_AsString(text)

467

self.text_cstr = PyString_AsString(text)

453

468

self.text_size = PyString_Size(text)

454

self.end_str = self.text_str + self.text_size

455

self.cur = self.text_str

469

self.end_cstr = self.text_cstr + self.text_size

470

self.cur_cstr = self.text_cstr

456

471

457

472

cdef char *get_next(self, int *size):

458

473

"""Return a pointer to the start of the next field."""

459

474

cdef char *next

460

next = self.cur

461

self.cur = <char*>memchr(next, c'\0', self.end_str-next)

462

size[0] = self.cur - next

463

self.cur = self.cur + 1

475

next = self.cur_cstr

476

self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr-next)

477

size[0] = self.cur_cstr - next

478

self.cur_cstr = self.cur_cstr + 1

464

479

return next

465

480

466

481

cdef object get_next_str(self):

470

485

next = self.get_next(&size)

471

486

return PyString_FromStringAndSize(next, size)

472

487

473

def init(self):

474

"""Get the pointer ready"""

488

cdef int _init(self) except -1:

489

"""Get the pointer ready.

490

491

This assumes that the dirstate header has already been read, and we

492

already have the dirblock string loaded into memory.

493

This just initializes our memory pointers, etc for parsing of the

494

dirblock string.

495

"""

475

496

cdef char *first

476

497

cdef int size

477

498

# The first field should be an empty string left over from the Header

479

500

if first[0] != c'\0' and size == 0:

480

501

raise AssertionError('First character should be null not: %s'

481

502

% (first,))

482

483

def get_all_fields(self):

484

"""Get a list of all fields"""

485

self.init()

486

fields = []

487

while self.cur < self.end_str:

488

PyList_Append(fields, self.get_next_str())

489

return fields

503

return 0

490

504

491

505

cdef object _get_entry(self, int num_trees, void **p_current_dirname,

492

506

int *new_block):

507

"""Extract the next entry.

508

509

This parses the next entry based on the current location in

510

``self.cur_cstr``.

511

Each entry can be considered a "row" in the total table. And each row

512

has a fixed number of columns. It is generally broken up into "key"

513

columns, then "current" columns, and then "parent" columns.

514

515

:param num_trees: How many parent trees need to be parsed

516

:param p_current_dirname: A pointer to the current PyString

517

representing the directory name.

518

We pass this in as a void * so that pyrex doesn't have to

519

increment/decrement the PyObject reference counter for each

520

_get_entry call.

521

We use a pointer so that _get_entry can update it with the new

522

value.

523

:param new_block: This is to let the caller know that it needs to

524

create a new directory block to store the next entry.

525

"""

493

526

cdef object path_name_file_id_key

494

cdef char *entry_size_str

527

cdef char *entry_size_cstr

495

528

cdef unsigned long int entry_size

496

cdef char* executable_str

529

cdef char* executable_cstr

497

530

cdef int is_executable

498

cdef char* dirname_str

531

cdef char* dirname_cstr

499

532

cdef char* trailing

500

533

cdef int cur_size

501

534

cdef int i

503

536

cdef object fingerprint

504

537

cdef object info

505

538

506

dirname_str = self.get_next(&cur_size)

507

if strncmp(dirname_str,

508

PyString_AS_STRING_void(p_current_dirname[0]),

509

cur_size+1) != 0:

510

dirname = PyString_FromStringAndSize(dirname_str, cur_size)

539

# Read the 'key' information (dirname, name, file_id)

540

dirname_cstr = self.get_next(&cur_size)

541

# Check to see if we have started a new directory block.

542

# If so, then we need to create a new dirname PyString, so that it can

543

# be used in all of the tuples. This saves time and memory, by re-using

544

# the same object repeatedly.

545

546

# Do the cheap 'length of string' check first. If the string is a

547

# different length, then we *have* to be a different directory.

548

if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])

549

or strncmp(dirname_cstr,

550

# Extract the char* from our current dirname string. We

551

# know it is a PyString, so we can use

552

# PyString_AS_STRING, we use the _void version because

553

# we are tricking Pyrex by using a void* rather than an

554

# <object>

555

PyString_AS_STRING_void(p_current_dirname[0]),

556

cur_size+1) != 0):

557

dirname = PyString_FromStringAndSize(dirname_cstr, cur_size)

511

558

p_current_dirname[0] = <void*>dirname

512

559

new_block[0] = 1

513

560

else:

514

561

new_block[0] = 0

562

563

# Build up the key that will be used.

564

# By using <object>(void *) Pyrex will automatically handle the

565

# Py_INCREF that we need.

515

566

path_name_file_id_key = (<object>p_current_dirname[0],

516

567

self.get_next_str(),

517

568

self.get_next_str(),

518

569

)

519

570

571

# Parse all of the per-tree information. current has the information in

572

# the same location as parent trees. The only difference is that 'info'

573

# is a 'packed_stat' for current, while it is a 'revision_id' for

574

# parent trees.

575

# minikind, fingerprint, and info will be returned as regular python

576

# strings

577

# entry_size and is_executable will be parsed into a python Long and

578

# python Boolean, respectively.

579

# TODO: jam 20070718 Consider changin the entry_size conversion to

580

# prefer python Int when possible. They are generally faster to

581

# work with, and it will be rare that we have a file >2GB.

582

# Especially since this code is pretty much fixed at a max of

583

# 4GB.

520

584

trees = []

521

585

for i from 0 <= i < num_trees:

522

586

minikind = self.get_next_str()

523

587

fingerprint = self.get_next_str()

524

entry_size_str = self.get_next(&cur_size)

525

entry_size = strtoul(entry_size_str, NULL, 10)

526

executable_str = self.get_next(&cur_size)

527

is_executable = (executable_str[0] == c'y')

588

entry_size_cstr = self.get_next(&cur_size)

589

entry_size = strtoul(entry_size_cstr, NULL, 10)

590

executable_cstr = self.get_next(&cur_size)

591

is_executable = (executable_cstr[0] == c'y')

528

592

info = self.get_next_str()

529

593

PyList_Append(trees, (

530

594

minikind, # minikind

534

598

info, # packed_stat or revision_id

535

599

))

536

600

601

# The returned tuple is (key, [trees])

537

602

ret = (path_name_file_id_key, trees)

538

# Ignore the trailing newline

603

# Ignore the trailing newline, but assert that it does exist, this

604

# ensures that we always finish parsing a line on an end-of-entry

605

# marker.

539

606

trailing = self.get_next(&cur_size)

540

607

if cur_size != 1 or trailing[0] != c'\n':

541

608

raise AssertionError(

542

609

'Bad parse, we expected to end on \\n, not: %d %s: %s'

543

% (cur_size, PyString_FromString(trailing), ret))

610

% (cur_size, PyString_FromStringAndSize(trailing, cur_size),

611

ret))

544

612

return ret

545

613

546

614

def _parse_dirblocks(self, state):

557

625

expected_entry_count = state._num_entries

558

626

559

627

# Ignore the first record

560

self.init()

628

self._init()

561

629

562

630

current_block = []

563

631

state._dirblocks = [('', current_block), ('', [])]

573

641

# reasonable. Or we could malloc it to something large (100 or

574

642

# so), and then truncate. That would give us a malloc + realloc,

575

643

# rather than lots of reallocs.

576

while self.cur < self.end_str:

644

while self.cur_cstr < self.end_cstr:

577

645

entry = self._get_entry(num_trees, &current_dirname, &new_block)

578

646

if new_block:

579

647

# new block - different dirname

598

666

599

667

:param state: A DirState object.

600

668

:return: None

669

:postcondition: The dirblocks will be loaded into the appropriate fields in

670

the DirState object.

601

671

"""

602

672

state._state_file.seek(state._end_of_header)

603

673

text = state._state_file.read()