~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/knit.py

Committer: Robert Collins
Date: 2006-03-10 22:19:08 UTC
mto: (1615.1.2 bzr.mbp.integration)
mto: This revision was merged to the branch mainline in revision 1616.
Revision ID: robertc@robertcollins.net-20060310221908-7c6d446b9aa88eb1

More microopimisations on index reading, now down to 16000 records/seconds.

files modified:
bzrlib/knit.py

Show diffs side-by-side

added added

removed removed

bzrlib/knit.py

719

720

HEADER = "# bzr knit index 7\n"

721

722

# speed of knit parsing went from 280 ms to 280 ms with slots addition.

723

# __slots__ = ['_cache', '_history', '_transport', '_filename']

724

722

725

def _cache_version(self, version_id, options, pos, size, parents):

723

val = (version_id, options, pos, size, parents)

726

"""Cache a version record in the history array and index cache.

727

728

This is inlined into __init__ for performance. KEEP IN SYNC.

729

(It saves 60ms, 25% of the __init__ overhead on local 4000 record

730

indexes).

731

"""

724

732

# only want the _history index to reference the 1st index entry

725

733

# for version_id

726

if not version_id in self._cache:

734

if version_id not in self._cache:

727

735

self._history.append(version_id)

728

self._cache[version_id] = val

729

730

def _iter_index(self, fp):

731

l = fp.readline()

732

while l != '':

733

yield l.split()

734

l = fp.readline()

735

#lines = fp.read()

736

#for l in lines.splitlines(False):

737

# yield l.split()

736

self._cache[version_id] = (version_id, options, pos, size, parents)

738

737

739

738

def __init__(self, transport, filename, mode, create=False):

740

739

_KnitComponentFile.__init__(self, transport, filename, mode)

752

751

pb.update('read knit index', count, total)

753

752

fp = self._transport.get(self._filename)

754

753

self.check_header(fp)

755

for rec in self._iter_index(fp):

754

# readlines reads the whole file at once:

755

# bad for transports like http, good for local disk

756

# we save 60 ms doing this one change (

757

# from calling readline each time to calling

758

# readlines once.

759

# probably what we want for nice behaviour on

760

# http is a incremental readlines that yields, or

761

# a check for local vs non local indexes,

762

for l in fp.readlines():

763

rec = l.split()

756

764

count += 1

757

765

total += 1

758

pb.update('read knit index', count, total)

759

parents = self._parse_parents(rec[4:])

760

self._cache_version(rec[0], rec[1].split(','), int(rec[2]), int(rec[3]),

761

parents)

766

#pb.update('read knit index', count, total)

767

# See self._parse_parents

768

parents = []

769

for value in rec[4:]:

770

if '.' == value[-1]:

771

# uncompressed reference

772

parents.append(value[1:])

773

else:

774

# this is 15/4000ms faster than isinstance,

775

# (in lsprof)

776

# this function is called thousands of times a

777

# second so small variations add up.

778

assert value.__class__ is str

779

parents.append(self._history[int(value)])

780

# end self._parse_parents

781

# self._cache_version(rec[0],

782

# rec[1].split(','),

783

# int(rec[2]),

784

# int(rec[3]),

785

# parents)

786

# --- self._cache_version

787

# only want the _history index to reference the 1st

788

# index entry for version_id

789

version_id = rec[0]

790

if version_id not in self._cache:

791

self._history.append(version_id)

792

self._cache[version_id] = (version_id,

793

rec[1].split(','),

794

int(rec[2]),

795

int(rec[3]),

796

parents)

797

# --- self._cache_version

762

798

except NoSuchFile, e:

763

799

if mode != 'w' or not create:

764

800

raise

772

808

773

809

ints are looked up in the index.

774

810

.FOO values are ghosts and converted in to FOO.

811

812

NOTE: the function is retained here for clarity, and for possible

813

use in partial index reads. However bulk processing now has

814

it inlined in __init__ for inner-loop optimisation.

775

815

"""

776

816

result = []

777

817

for value in compressed_parents:

778

818

if value[-1] == '.':

819

# uncompressed reference

779

820

result.append(value[1:])

780

821

else:

781

822

# this is 15/4000ms faster than isinstance,

Older »