64
64
# void *memrchr(void *s, int c, size_t n)
65
65
int strncmp(char *s1, char *s2, size_t n)
66
66
unsigned long strtoul(char *s1, char **out, int base)
67
unsigned long long strtoull(char *s1, char **out, int base)
67
long long strtoll(char *s1, char **out, int base)
69
69
# It seems we need to import the definitions so that the pyrex compiler has
70
70
# local names to access them.
503
503
# We then strip those bits, and use the next 8 bits as 'interesting bits'.
504
504
# offsets is then the bisect() information for a key with those bits (where
505
505
# would you insert a key starting with these bits)
506
cdef public long bisect_steps
507
cdef public unsigned int common_mask
508
cdef public unsigned int common_bits
509
506
# The first N bits of all entries in this node have the same content
510
507
cdef public unsigned char common_shift
511
508
cdef unsigned char offsets[257]
606
603
mid = (lo + hi) / 2
607
604
the_cmp = memcmp(self.entries[mid].sha1, sha1, 20)
608
self.bisect_steps += 1
610
606
# print mid, offset, self._offsets[offset], self._offsets[offset+1]
611
607
return &self.entries[mid]
690
685
cur_record = self.entries
692
687
while c_cur != NULL and c_cur < c_end and entry < num_entries:
693
if strncmp(c_cur, 'sha1:', 5):
694
raise ValueError('At byte %d, line did not start with sha1: %r'
695
% (c_cur - c_bytes, safe_string_from_size(c_cur, 10)))
697
c_next = <char *>memchr(c_cur, c'\0', c_end - c_cur)
698
if c_next == NULL or (c_next - c_cur != 40):
699
raise ValueError('Line did not contain 40 hex bytes')
700
if not _unhexlify_sha1(c_cur, cur_record.sha1):
701
raise ValueError('We failed to unhexlify')
703
if c_cur[0] != c'\0':
704
raise ValueError('only 1 null, not 2 as expected')
706
cur_record.block_offset = strtoull(c_cur, &c_next, 10)
707
if c_cur == c_next or c_next[0] != c' ':
708
raise ValueError('Failed to parse block offset')
710
cur_record.block_length = strtoul(c_cur, &c_next, 10)
711
if c_cur == c_next or c_next[0] != c' ':
712
raise ValueError('Failed to parse block length')
714
cur_record.record_start = strtoul(c_cur, &c_next, 10)
715
if c_cur == c_next or c_next[0] != c' ':
716
raise ValueError('Failed to parse block length')
718
cur_record.record_end = strtoul(c_cur, &c_next, 10)
719
if c_cur == c_next or c_next[0] != c'\n':
720
raise ValueError('Failed to parse record end')
688
c_cur = self._parse_one_entry(c_cur, c_end, cur_record)
724
691
if (entry != self.num_entries
728
695
# Pass 3: build the offset map
729
696
self._compute_common()
698
cdef char *_parse_one_entry(self, char *c_cur, char *c_end,
699
gc_chk_sha1_record *cur_record) except NULL:
700
"""Read a single sha record from the bytes.
701
:param c_cur: The pointer to the start of bytes
705
if strncmp(c_cur, 'sha1:', 5):
706
raise ValueError('line did not start with sha1: %r'
707
% (safe_string_from_size(c_cur, 10),))
709
c_next = <char *>memchr(c_cur, c'\0', c_end - c_cur)
710
if c_next == NULL or (c_next - c_cur != 40):
711
raise ValueError('Line did not contain 40 hex bytes')
712
if not _unhexlify_sha1(c_cur, cur_record.sha1):
713
raise ValueError('We failed to unhexlify')
715
if c_cur[0] != c'\0':
716
raise ValueError('only 1 null, not 2 as expected')
718
cur_record.block_offset = strtoll(c_cur, &c_next, 10)
719
if c_cur == c_next or c_next[0] != c' ':
720
raise ValueError('Failed to parse block offset')
722
cur_record.block_length = strtoul(c_cur, &c_next, 10)
723
if c_cur == c_next or c_next[0] != c' ':
724
raise ValueError('Failed to parse block length')
726
cur_record.record_start = strtoul(c_cur, &c_next, 10)
727
if c_cur == c_next or c_next[0] != c' ':
728
raise ValueError('Failed to parse block length')
730
cur_record.record_end = strtoul(c_cur, &c_next, 10)
731
if c_cur == c_next or c_next[0] != c'\n':
732
raise ValueError('Failed to parse record end')
731
736
cdef int _offset_for_sha1(self, char *sha1) except -1:
732
737
"""Find the first interesting 8-bits of this sha1."""
733
738
cdef int this_offset
734
739
cdef unsigned int as_uint
735
740
as_uint = _sha1_to_uint(sha1)
736
741
this_offset = (as_uint >> self.common_shift) & 0xFF
737
# assert 0 < this_offset < 256
738
742
return this_offset
740
744
def test_offset_for_sha1(self, sha1):
766
769
this = _sha1_to_uint(self.entries[i].sha1)
767
770
common_mask = (~(first ^ this)) & common_mask
768
771
common_shift = 24
769
self.common_mask = 0 # common_mask
770
772
while common_mask & 0x80000000 and common_shift > 0:
771
773
common_mask = common_mask << 1
772
774
common_shift -= 1
773
self.common_mask = (self.common_mask >> 1) | 0x80000000
774
775
self.common_shift = common_shift
775
self.common_bits = first & self.common_mask
777
777
max_offset = self.num_entries
778
778
# We cap this loop at 254 entries. All the other offsets just get