106
106
cdef int _cmp_by_dirs(char *path1, int size1, char *path2, int size2):
107
cdef unsigned char *cur1
108
cdef unsigned char *cur2
109
cdef unsigned char *end1
110
cdef unsigned char *end2
111
111
cdef int *cur_int1
112
112
cdef int *cur_int2
113
113
cdef int *end_int1
114
114
cdef int *end_int2
116
if path1 == path2 and size1 == size2:
119
end1 = <unsigned char*>path1+size1
120
end2 = <unsigned char*>path2+size2
122
122
# Use 32-bit comparisons for the matching portion of the string.
123
123
# Almost all CPU's are faster at loading and comparing 32-bit integers,
184
184
0 if paths are equal,
185
185
and negative number if ``path2`` sorts first
187
if not PyString_CheckExact(path1):
188
raise TypeError("'path1' must be a plain string, not %s: %r"
189
% (type(path1), path1))
190
if not PyString_CheckExact(path2):
191
raise TypeError("'path2' must be a plain string, not %s: %r"
192
% (type(path2), path2))
187
193
return _cmp_by_dirs(PyString_AsString(path1),
188
194
PyString_Size(path1),
189
195
PyString_AsString(path2),
206
212
0 if paths are equal
207
213
and a negative number if ``path2`` sorts first
215
if not PyString_CheckExact(path1):
216
raise TypeError("'path1' must be a plain string, not %s: %r"
217
% (type(path1), path1))
218
if not PyString_CheckExact(path2):
219
raise TypeError("'path2' must be a plain string, not %s: %r"
220
% (type(path2), path2))
209
221
return _cmp_path_by_dirblock(PyString_AsString(path1),
210
222
PyString_Size(path1),
211
223
PyString_AsString(path2),
314
323
cdef int path_size
316
325
cdef int cur_size
319
328
if not PyList_CheckExact(paths):
320
raise TypeError('you must pass a python list for paths')
329
raise TypeError("you must pass a python list for 'paths' not: %s %r"
330
% (type(paths), paths))
321
331
if not PyString_CheckExact(path):
322
raise TypeError('you must pass a string for path')
332
raise TypeError("you must pass a string for 'path' not: %s %r"
333
% (type(path), path))
327
path_str = PyString_AsString(path)
338
path_cstr = PyString_AsString(path)
328
339
path_size = PyString_Size(path)
331
342
_mid = (_lo + _hi) / 2
332
343
cur = PyList_GetItem_object_void(paths, _mid)
333
cur_str = PyString_AS_STRING_void(cur)
344
cur_cstr = PyString_AS_STRING_void(cur)
334
345
cur_size = PyString_GET_SIZE_void(cur)
335
if _cmp_path_by_dirblock(cur_str, cur_size, path_str, path_size) < 0:
346
if _cmp_path_by_dirblock(cur_cstr, cur_size, path_cstr, path_size) < 0:
364
375
cdef int path_size
366
377
cdef int cur_size
369
380
if not PyList_CheckExact(paths):
370
raise TypeError('you must pass a python list for paths')
381
raise TypeError("you must pass a python list for 'paths' not: %s %r"
382
% (type(paths), paths))
371
383
if not PyString_CheckExact(path):
372
raise TypeError('you must pass a string for path')
384
raise TypeError("you must pass a string for 'path' not: %s %r"
385
% (type(path), path))
377
path_str = PyString_AsString(path)
390
path_cstr = PyString_AsString(path)
378
391
path_size = PyString_Size(path)
381
394
_mid = (_lo + _hi) / 2
382
395
cur = PyList_GetItem_object_void(paths, _mid)
383
cur_str = PyString_AS_STRING_void(cur)
396
cur_cstr = PyString_AS_STRING_void(cur)
384
397
cur_size = PyString_GET_SIZE_void(cur)
385
if _cmp_path_by_dirblock(path_str, path_size, cur_str, cur_size) < 0:
398
if _cmp_path_by_dirblock(path_cstr, path_size, cur_cstr, cur_size) < 0:
405
cdef char *dirname_str
418
cdef char *dirname_cstr
406
419
cdef int dirname_size
408
421
cdef int cur_size
424
if not PyList_CheckExact(dirblocks):
425
raise TypeError("you must pass a python list for 'dirblocks' not: %s %r"
426
% (type(dirblocks), dirblocks))
427
if not PyString_CheckExact(dirname):
428
raise TypeError("you must pass a string for dirname not: %s %r"
429
% (type(dirname), dirname))
412
431
_hi = len(dirblocks)
416
if not PyList_CheckExact(dirblocks):
417
raise TypeError('you must pass a python list for dirblocks')
419
if not PyString_CheckExact(dirname):
420
raise TypeError('you must pass a string for dirname')
421
dirname_str = PyString_AsString(dirname)
436
dirname_cstr = PyString_AsString(dirname)
422
437
dirname_size = PyString_Size(dirname)
427
442
# cur = dirblocks[_mid][0]
428
443
cur = PyTuple_GetItem_void_void(
429
444
PyList_GetItem_object_void(dirblocks, _mid), 0)
430
cur_str = PyString_AS_STRING_void(cur)
445
cur_cstr = PyString_AS_STRING_void(cur)
431
446
cur_size = PyString_GET_SIZE_void(cur)
432
if _cmp_by_dirs(cur_str, cur_size, dirname_str, dirname_size) < 0:
447
if _cmp_by_dirs(cur_cstr, cur_size, dirname_cstr, dirname_size) < 0:
440
455
"""Maintain the current location, and return fields as you parse them."""
442
457
cdef object text # The overall string object
443
cdef char *text_str # Pointer to the beginning of text
458
cdef char *text_cstr # Pointer to the beginning of text
444
459
cdef int text_size # Length of text
446
cdef char *end_str # End of text
447
cdef char *cur # Pointer to the current record
461
cdef char *end_cstr # End of text
462
cdef char *cur_cstr # Pointer to the current record
448
463
cdef char *next # Pointer to the end of this record
450
465
def __new__(self, text):
452
self.text_str = PyString_AsString(text)
467
self.text_cstr = PyString_AsString(text)
453
468
self.text_size = PyString_Size(text)
454
self.end_str = self.text_str + self.text_size
455
self.cur = self.text_str
469
self.end_cstr = self.text_cstr + self.text_size
470
self.cur_cstr = self.text_cstr
457
472
cdef char *get_next(self, int *size):
458
473
"""Return a pointer to the start of the next field."""
461
self.cur = <char*>memchr(next, c'\0', self.end_str-next)
462
size[0] = self.cur - next
463
self.cur = self.cur + 1
476
self.cur_cstr = <char*>memchr(next, c'\0', self.end_cstr-next)
477
size[0] = self.cur_cstr - next
478
self.cur_cstr = self.cur_cstr + 1
466
481
cdef object get_next_str(self):
470
485
next = self.get_next(&size)
471
486
return PyString_FromStringAndSize(next, size)
474
"""Get the pointer ready"""
488
cdef int _init(self) except -1:
489
"""Get the pointer ready.
491
This assumes that the dirstate header has already been read, and we
492
already have the dirblock string loaded into memory.
493
This just initializes our memory pointers, etc for parsing of the
477
498
# The first field should be an empty string left over from the Header
479
500
if first[0] != c'\0' and size == 0:
480
501
raise AssertionError('First character should be null not: %s'
483
def get_all_fields(self):
484
"""Get a list of all fields"""
487
while self.cur < self.end_str:
488
PyList_Append(fields, self.get_next_str())
491
505
cdef object _get_entry(self, int num_trees, void **p_current_dirname,
507
"""Extract the next entry.
509
This parses the next entry based on the current location in
511
Each entry can be considered a "row" in the total table. And each row
512
has a fixed number of columns. It is generally broken up into "key"
513
columns, then "current" columns, and then "parent" columns.
515
:param num_trees: How many parent trees need to be parsed
516
:param p_current_dirname: A pointer to the current PyString
517
representing the directory name.
518
We pass this in as a void * so that pyrex doesn't have to
519
increment/decrement the PyObject reference counter for each
521
We use a pointer so that _get_entry can update it with the new
523
:param new_block: This is to let the caller know that it needs to
524
create a new directory block to store the next entry.
493
526
cdef object path_name_file_id_key
494
cdef char *entry_size_str
527
cdef char *entry_size_cstr
495
528
cdef unsigned long int entry_size
496
cdef char* executable_str
529
cdef char* executable_cstr
497
530
cdef int is_executable
498
cdef char* dirname_str
531
cdef char* dirname_cstr
499
532
cdef char* trailing
500
533
cdef int cur_size
503
536
cdef object fingerprint
506
dirname_str = self.get_next(&cur_size)
507
if strncmp(dirname_str,
508
PyString_AS_STRING_void(p_current_dirname[0]),
510
dirname = PyString_FromStringAndSize(dirname_str, cur_size)
539
# Read the 'key' information (dirname, name, file_id)
540
dirname_cstr = self.get_next(&cur_size)
541
# Check to see if we have started a new directory block.
542
# If so, then we need to create a new dirname PyString, so that it can
543
# be used in all of the tuples. This saves time and memory, by re-using
544
# the same object repeatedly.
546
# Do the cheap 'length of string' check first. If the string is a
547
# different length, then we *have* to be a different directory.
548
if (cur_size != PyString_GET_SIZE_void(p_current_dirname[0])
549
or strncmp(dirname_cstr,
550
# Extract the char* from our current dirname string. We
551
# know it is a PyString, so we can use
552
# PyString_AS_STRING, we use the _void version because
553
# we are tricking Pyrex by using a void* rather than an
555
PyString_AS_STRING_void(p_current_dirname[0]),
557
dirname = PyString_FromStringAndSize(dirname_cstr, cur_size)
511
558
p_current_dirname[0] = <void*>dirname
563
# Build up the key that will be used.
564
# By using <object>(void *) Pyrex will automatically handle the
565
# Py_INCREF that we need.
515
566
path_name_file_id_key = (<object>p_current_dirname[0],
516
567
self.get_next_str(),
517
568
self.get_next_str(),
571
# Parse all of the per-tree information. current has the information in
572
# the same location as parent trees. The only difference is that 'info'
573
# is a 'packed_stat' for current, while it is a 'revision_id' for
575
# minikind, fingerprint, and info will be returned as regular python
577
# entry_size and is_executable will be parsed into a python Long and
578
# python Boolean, respectively.
579
# TODO: jam 20070718 Consider changin the entry_size conversion to
580
# prefer python Int when possible. They are generally faster to
581
# work with, and it will be rare that we have a file >2GB.
582
# Especially since this code is pretty much fixed at a max of
521
585
for i from 0 <= i < num_trees:
522
586
minikind = self.get_next_str()
523
587
fingerprint = self.get_next_str()
524
entry_size_str = self.get_next(&cur_size)
525
entry_size = strtoul(entry_size_str, NULL, 10)
526
executable_str = self.get_next(&cur_size)
527
is_executable = (executable_str[0] == c'y')
588
entry_size_cstr = self.get_next(&cur_size)
589
entry_size = strtoul(entry_size_cstr, NULL, 10)
590
executable_cstr = self.get_next(&cur_size)
591
is_executable = (executable_cstr[0] == c'y')
528
592
info = self.get_next_str()
529
593
PyList_Append(trees, (
530
594
minikind, # minikind
534
598
info, # packed_stat or revision_id
601
# The returned tuple is (key, [trees])
537
602
ret = (path_name_file_id_key, trees)
538
# Ignore the trailing newline
603
# Ignore the trailing newline, but assert that it does exist, this
604
# ensures that we always finish parsing a line on an end-of-entry
539
606
trailing = self.get_next(&cur_size)
540
607
if cur_size != 1 or trailing[0] != c'\n':
541
608
raise AssertionError(
542
609
'Bad parse, we expected to end on \\n, not: %d %s: %s'
543
% (cur_size, PyString_FromString(trailing), ret))
610
% (cur_size, PyString_FromStringAndSize(trailing, cur_size),
546
614
def _parse_dirblocks(self, state):
573
641
# reasonable. Or we could malloc it to something large (100 or
574
642
# so), and then truncate. That would give us a malloc + realloc,
575
643
# rather than lots of reallocs.
576
while self.cur < self.end_str:
644
while self.cur_cstr < self.end_cstr:
577
645
entry = self._get_entry(num_trees, ¤t_dirname, &new_block)
579
647
# new block - different dirname