~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/index.py

Committer: Canonical.com Patch Queue Manager
Date: 2008-09-02 18:44:47 UTC
mfrom: (3665.3.5 graph_index_autobuffer)
Revision ID: pqm@pqm.ubuntu.com-20080902184447-n1nsxw1wcaumxwkb

(jam) Tweaks to index code. If we readv a whole index,
treat it as a GET, and if we read >50% of an index, read it all.

files modified:
NEWS

bzrlib/index.py

bzrlib/tests/test_index.py

Show diffs side-by-side

added added

removed removed

bzrlib/index.py

272

self._keys_by_offset = None

273

self._nodes_by_key = None

274

self._size = size

275

# The number of bytes we've read so far in trying to process this file

276

self._bytes_read = 0

275

277

276

278

def __eq__(self, other):

277

279

"""Equal when self and other were created with the same parameters."""

288

290

return "%s(%r)" % (self.__class__.__name__,

289

291

self._transport.abspath(self._name))

290

292

291

def _buffer_all(self):

293

def _buffer_all(self, stream=None):

292

294

"""Buffer all the index data.

293

295

294

296

Mutates self._nodes and self.keys_by_offset.

295

297

"""

298

if self._nodes is not None:

299

# We already did this

300

return

296

301

if 'index' in debug.debug_flags:

297

302

mutter('Reading entire index %s', self._transport.abspath(self._name))

298

stream = self._transport.get(self._name)

303

if stream is None:

304

stream = self._transport.get(self._name)

299

305

self._read_prefix(stream)

300

306

self._expected_elements = 3 + self._key_length

301

307

line_count = 0

473

479

return []

474

480

if self._size is None and self._nodes is None:

475

481

self._buffer_all()

482

476

483

# We fit about 20 keys per minimum-read (4K), so if we are looking for

477

484

# more than 1/20th of the index its likely (assuming homogenous key

478

485

# spread) that we'll read the entire index. If we're going to do that,

629

636

if self._bisect_nodes is None:

630

637

readv_ranges.append(_HEADER_READV)

631

638

self._read_and_parse(readv_ranges)

639

result = []

640

if self._nodes is not None:

641

# _read_and_parse triggered a _buffer_all because we requested the

642

# whole data range

643

for location, key in location_keys:

644

if key not in self._nodes: # not present

645

result.append(((location, key), False))

646

elif self.node_ref_lists:

647

value, refs = self._nodes[key]

648

result.append(((location, key),

649

(self, key, value, refs)))

650

else:

651

result.append(((location, key),

652

(self, key, self._nodes[key])))

653

return result

632

654

# generate results:

633

655

# - figure out <, >, missing, present

634

656

# - result present references so we can return them.

635

result = []

636

657

# keys that we cannot answer until we resolve references

637

658

pending_references = []

638

659

pending_locations = set()

688

709

if length > 0:

689

710

readv_ranges.append((location, length))

690

711

self._read_and_parse(readv_ranges)

712

if self._nodes is not None:

713

# The _read_and_parse triggered a _buffer_all, grab the data and

714

# return it

715

for location, key in pending_references:

716

value, refs = self._nodes[key]

717

result.append(((location, key), (self, key, value, refs)))

718

return result

691

719

for location, key in pending_references:

692

720

# answer key references we had to look-up-late.

693

index = self._parsed_key_index(key)

694

721

value, refs = self._bisect_nodes[key]

695

722

result.append(((location, key), (self, key,

696

723

value, self._resolve_references(refs))))

966

993

967

994

:param readv_ranges: A prepared readv range list.

968

995

"""

969

if readv_ranges:

970

readv_data = self._transport.readv(self._name, readv_ranges, True,

971

self._size)

972

# parse

973

for offset, data in readv_data:

974

if self._bisect_nodes is None:

975

# this must be the start

976

if not (offset == 0):

977

raise AssertionError()

978

offset, data = self._parse_header_from_bytes(data)

979

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

980

self._parse_region(offset, data)

996

if not readv_ranges:

997

return

998

if self._nodes is None and self._bytes_read * 2 >= self._size:

999

# We've already read more than 50% of the file and we are about to

1000

# request more data, just _buffer_all() and be done

1001

self._buffer_all()

1002

return

1003

1004

readv_data = self._transport.readv(self._name, readv_ranges, True,

1005

self._size)

1006

# parse

1007

for offset, data in readv_data:

1008

self._bytes_read += len(data)

1009

if offset == 0 and len(data) == self._size:

1010

# We read the whole range, most likely because the

1011

# Transport upcast our readv ranges into one long request

1012

# for enough total data to grab the whole index.

1013

self._buffer_all(StringIO(data))

1014

return

1015

if self._bisect_nodes is None:

1016

# this must be the start

1017

if not (offset == 0):

1018

raise AssertionError()

1019

offset, data = self._parse_header_from_bytes(data)

1020

# print readv_ranges, "[%d:%d]" % (offset, offset + len(data))

1021

self._parse_region(offset, data)

981

1022

982

1023

def _signature(self):

983

1024

"""The file signature for this index type."""

Older »