~bzr-pqm/bzr/bzr.dev

Viewing changes to bzrlib/_btree_serializer_pyx.pyx

Committer: John Arbash Meinel
Date: 2010-08-04 16:08:12 UTC
mto: This revision was merged to the branch mainline in revision 5390.
Revision ID: john@arbash-meinel.com-20100804160812-zvqj8pn99i3d6ks3

Trim the class a little bit.

All we need is the common_shift, common_bits and common_mask aren't used.
This puts as at about 42.? bytes/record.
We have 40 actual, and then <3 bytes/record of other overhead.

files modified:
bzrlib/_btree_serializer_pyx.pyx

bzrlib/python-compat.h

bzrlib/tests/test__btree_serializer.py

Show diffs side-by-side

added added

removed removed

bzrlib/_btree_serializer_pyx.pyx

# void *memrchr(void *s, int c, size_t n)

int strncmp(char *s1, char *s2, size_t n)

unsigned long strtoul(char *s1, char **out, int base)

unsigned long long strtoull(char *s1, char **out, int base)

long long strtoll(char *s1, char **out, int base)

# It seems we need to import the definitions so that the pyrex compiler has

# local names to access them.

503

# We then strip those bits, and use the next 8 bits as 'interesting bits'.

504

# offsets is then the bisect() information for a key with those bits (where

505

# would you insert a key starting with these bits)

506

cdef public long bisect_steps

507

cdef public unsigned int common_mask

508

cdef public unsigned int common_bits

509

506

# The first N bits of all entries in this node have the same content

510

507

cdef public unsigned char common_shift

511

508

cdef unsigned char offsets[257]

605

602

while lo < hi:

606

603

mid = (lo + hi) / 2

607

604

the_cmp = memcmp(self.entries[mid].sha1, sha1, 20)

608

self.bisect_steps += 1

609

605

if the_cmp == 0:

610

606

# print mid, offset, self._offsets[offset], self._offsets[offset+1]

611

607

return &self.entries[mid]

656

652

cdef char *c_content

657

653

cdef char *c_cur

658

654

cdef char *c_end

659

cdef char *c_next

660

655

cdef Py_ssize_t n_bytes

661

656

cdef int num_entries

662

657

cdef int entry

690

685

cur_record = self.entries

691

686

entry = 0

692

687

while c_cur != NULL and c_cur < c_end and entry < num_entries:

693

if strncmp(c_cur, 'sha1:', 5):

694

raise ValueError('At byte %d, line did not start with sha1: %r'

695

% (c_cur - c_bytes, safe_string_from_size(c_cur, 10)))

696

c_cur += 5

697

c_next = <char *>memchr(c_cur, c'\0', c_end - c_cur)

698

if c_next == NULL or (c_next - c_cur != 40):

699

raise ValueError('Line did not contain 40 hex bytes')

700

if not _unhexlify_sha1(c_cur, cur_record.sha1):

701

raise ValueError('We failed to unhexlify')

702

c_cur = c_next + 1

703

if c_cur[0] != c'\0':

704

raise ValueError('only 1 null, not 2 as expected')

705

c_cur += 1

706

cur_record.block_offset = strtoull(c_cur, &c_next, 10)

707

if c_cur == c_next or c_next[0] != c' ':

708

raise ValueError('Failed to parse block offset')

709

c_cur = c_next + 1

710

cur_record.block_length = strtoul(c_cur, &c_next, 10)

711

if c_cur == c_next or c_next[0] != c' ':

712

raise ValueError('Failed to parse block length')

713

c_cur = c_next + 1

714

cur_record.record_start = strtoul(c_cur, &c_next, 10)

715

if c_cur == c_next or c_next[0] != c' ':

716

raise ValueError('Failed to parse block length')

717

c_cur = c_next + 1

718

cur_record.record_end = strtoul(c_cur, &c_next, 10)

719

if c_cur == c_next or c_next[0] != c'\n':

720

raise ValueError('Failed to parse record end')

721

c_cur = c_next + 1

688

c_cur = self._parse_one_entry(c_cur, c_end, cur_record)

722

689

cur_record += 1

723

690

entry += 1

724

691

if (entry != self.num_entries

728

695

# Pass 3: build the offset map

729

696

self._compute_common()

730

697

698

cdef char *_parse_one_entry(self, char *c_cur, char *c_end,

699

gc_chk_sha1_record *cur_record) except NULL:

700

"""Read a single sha record from the bytes.

701

:param c_cur: The pointer to the start of bytes

702

:param cur_record:

703

"""

704

cdef char *c_next

705

if strncmp(c_cur, 'sha1:', 5):

706

raise ValueError('line did not start with sha1: %r'

707

% (safe_string_from_size(c_cur, 10),))

708

c_cur += 5

709

c_next = <char *>memchr(c_cur, c'\0', c_end - c_cur)

710

if c_next == NULL or (c_next - c_cur != 40):

711

raise ValueError('Line did not contain 40 hex bytes')

712

if not _unhexlify_sha1(c_cur, cur_record.sha1):

713

raise ValueError('We failed to unhexlify')

714

c_cur = c_next + 1

715

if c_cur[0] != c'\0':

716

raise ValueError('only 1 null, not 2 as expected')

717

c_cur += 1

718

cur_record.block_offset = strtoll(c_cur, &c_next, 10)

719

if c_cur == c_next or c_next[0] != c' ':

720

raise ValueError('Failed to parse block offset')

721

c_cur = c_next + 1

722

cur_record.block_length = strtoul(c_cur, &c_next, 10)

723

if c_cur == c_next or c_next[0] != c' ':

724

raise ValueError('Failed to parse block length')

725

c_cur = c_next + 1

726

cur_record.record_start = strtoul(c_cur, &c_next, 10)

727

if c_cur == c_next or c_next[0] != c' ':

728

raise ValueError('Failed to parse block length')

729

c_cur = c_next + 1

730

cur_record.record_end = strtoul(c_cur, &c_next, 10)

731

if c_cur == c_next or c_next[0] != c'\n':

732

raise ValueError('Failed to parse record end')

733

c_cur = c_next + 1

734

return c_cur

735

731

736

cdef int _offset_for_sha1(self, char *sha1) except -1:

732

737

"""Find the first interesting 8-bits of this sha1."""

733

738

cdef int this_offset

734

739

cdef unsigned int as_uint

735

740

as_uint = _sha1_to_uint(sha1)

736

741

this_offset = (as_uint >> self.common_shift) & 0xFF

737

# assert 0 < this_offset < 256

738

742

return this_offset

739

743

740

744

def test_offset_for_sha1(self, sha1):

757

761

if self.num_entries < 2:

758

762

# Everything is in common if you have 0 or 1 leaves

759

763

# So we'll always just shift to the first byte

760

self.common_mask = 0x0

761

764

self.common_shift = 24

762

765

else:

763

766

common_mask = 0xFFFFFFFF

766

769

this = _sha1_to_uint(self.entries[i].sha1)

767

770

common_mask = (~(first ^ this)) & common_mask

768

771

common_shift = 24

769

self.common_mask = 0 # common_mask

770

772

while common_mask & 0x80000000 and common_shift > 0:

771

773

common_mask = common_mask << 1

772

774

common_shift -= 1

773

self.common_mask = (self.common_mask >> 1) | 0x80000000

774

775

self.common_shift = common_shift

775

self.common_bits = first & self.common_mask

776

offset = 0

777

max_offset = self.num_entries

778

# We cap this loop at 254 entries. All the other offsets just get

Older »