~bzr-pqm/bzr/bzr.dev

« back to all changes in this revision

Viewing changes to bzrlib/dirstate.py

Committer: Canonical.com Patch Queue Manager
Date: 2007-07-20 19:48:22 UTC
mfrom: (2474.1.74 dirstate_pyrex)
Revision ID: pqm@pqm.ubuntu.com-20070720194822-smqttk05w6efypf0

(John Arbash Meinel) Implement DirState._read_dirblocks() in pyrex

files added:
bzrlib/_dirstate_helpers_c.pyx

bzrlib/_dirstate_helpers_py.py

bzrlib/benchmarks/bench_dirstate.py

bzrlib/tests/test__dirstate_helpers.py

files modified:
.bzrignore

NEWS

bzrlib/benchmarks/__init__.py

bzrlib/dirstate.py

bzrlib/tests/__init__.py

bzrlib/tests/test_dirstate.py

bzrlib/workingtree_4.py

setup.py

Show diffs side-by-side

added added

removed removed

bzrlib/dirstate.py

199

200

"""

201

202

import bisect

203

import binascii

204

import bisect

205

204

import errno

206

205

import os

207

206

from stat import S_IEXEC

220

219

)

221

220

222

221

223

class _Bisector(object):

224

"""This just keeps track of information as we are bisecting."""

225

226

227

222

def pack_stat(st, _encode=binascii.b2a_base64, _pack=struct.pack):

228

223

"""Convert stat values into a packed representation."""

229

224

# jam 20060614 it isn't really worth removing more entries if we

462

457

if self._id_index:

463

458

self._id_index.setdefault(entry_key[2], set()).add(entry_key)

464

459

465

def _bisect(self, dir_name_list):

460

def _bisect(self, paths):

466

461

"""Bisect through the disk structure for specific rows.

467

462

468

:param dir_name_list: A list of (dir, name) pairs.

469

:return: A dict mapping (dir, name) => entry for found entries. Missing

463

:param paths: A list of paths to find

464

:return: A dict mapping path => entries for found entries. Missing

470

465

entries will not be in the map.

466

The list is not sorted, and entries will be populated

467

based on when they were read.

471

468

"""

472

469

self._requires_lock()

473

470

# We need the file pointer to be right after the initial header block

493

490

found = {}

494

491

495

492

# Avoid infinite seeking

496

max_count = 30*len(dir_name_list)

493

max_count = 30*len(paths)

497

494

count = 0

498

495

# pending is a list of places to look.

499

496

# each entry is a tuple of low, high, dir_names

501

498

# high -> the last byte offset (inclusive)

502

499

# dir_names -> The list of (dir, name) pairs that should be found in

503

500

# the [low, high] range

504

pending = [(low, high, dir_name_list)]

501

pending = [(low, high, paths)]

505

502

506

503

page_size = self._bisect_page_size

507

504

560

557

# Find what entries we are looking for, which occur before and

561

558

# after this first record.

562

559

after = start

563

first_dir_name = (first_fields[1], first_fields[2])

564

first_loc = bisect.bisect_left(cur_files, first_dir_name)

560

if first_fields[1]:

561

first_path = first_fields[1] + '/' + first_fields[2]

562

else:

563

first_path = first_fields[2]

564

first_loc = _bisect_path_left(cur_files, first_path)

565

566

# These exist before the current location

567

pre = cur_files[:first_loc]

584

else:

585

after = mid + len(block)

586

587

last_dir_name = (last_fields[1], last_fields[2])

588

last_loc = bisect.bisect_right(post, last_dir_name)

587

if last_fields[1]:

588

last_path = last_fields[1] + '/' + last_fields[2]

589

else:

590

last_path = last_fields[2]

591

last_loc = _bisect_path_right(post, last_path)

589

592

590

593

middle_files = post[:last_loc]

591

594

post = post[last_loc:]

596

599

# Either we will find them here, or we can mark them as

597

600

# missing.

598

601

599

if middle_files[0] == first_dir_name:

602

if middle_files[0] == first_path:

600

603

# We might need to go before this location

601

pre.append(first_dir_name)

602

if middle_files[-1] == last_dir_name:

603

post.insert(0, last_dir_name)

604

pre.append(first_path)

605

if middle_files[-1] == last_path:

606

post.insert(0, last_path)

604

607

605

608

# Find out what paths we have

606

paths = {first_dir_name:[first_fields]}

607

# last_dir_name might == first_dir_name so we need to be

609

paths = {first_path:[first_fields]}

610

# last_path might == first_path so we need to be

608

611

# careful if we should append rather than overwrite

609

612

if last_entry_num != first_entry_num:

610

paths.setdefault(last_dir_name, []).append(last_fields)

613

paths.setdefault(last_path, []).append(last_fields)

611

614

for num in xrange(first_entry_num+1, last_entry_num):

612

615

# TODO: jam 20070223 We are already splitting here, so

613

616

# shouldn't we just split the whole thing rather

614

617

# than doing the split again in add_one_record?

615

618

fields = entries[num].split('\0')

616

dir_name = (fields[1], fields[2])

617

paths.setdefault(dir_name, []).append(fields)

619

if fields[1]:

620

path = fields[1] + '/' + fields[2]

621

else:

622

path = fields[2]

623

paths.setdefault(path, []).append(fields)

618

624

619

for dir_name in middle_files:

620

for fields in paths.get(dir_name, []):

625

for path in middle_files:

626

for fields in paths.get(path, []):

621

627

# offset by 1 because of the opening '\0'

622

628

# consider changing fields_to_entry to avoid the

623

629

# extra list slice

624

630

entry = fields_to_entry(fields[1:])

625

found.setdefault(dir_name, []).append(entry)

631

found.setdefault(path, []).append(entry)

626

632

627

633

# Now we have split up everything into pre, middle, and post, and

628

634

# we have handled everything that fell in 'middle'.

818

824

819

825

return found

820

826

821

def _bisect_recursive(self, dir_name_list):

827

def _bisect_recursive(self, paths):

822

828

"""Bisect for entries for all paths and their children.

823

829

824

830

This will use bisect to find all records for the supplied paths. It

837

843

# Directories that have been read

838

844

processed_dirs = set()

839

845

# Get the ball rolling with the first bisect for all entries.

840

newly_found = self._bisect(dir_name_list)

846

newly_found = self._bisect(paths)

841

847

842

848

while newly_found:

843

849

# Directories that need to be read

867

873

if dir_name[0] in pending_dirs:

868

874

# This entry will be found in the dir search

869

875

continue

870

# TODO: We need to check if this entry has

871

# already been found. Otherwise we might be

872

# hitting infinite recursion.

873

876

if dir_name not in found_dir_names:

874

paths_to_search.add(dir_name)

877

paths_to_search.add(tree_info[1])

875

878

# Now we have a list of paths to look for directly, and

876

879

# directory blocks that need to be read.

877

880

# newly_found is mixing the keys between (dir, name) and path

1550

1553

"""

1551

1554

self._read_header_if_needed()

1552

1555

if self._dirblock_state == DirState.NOT_IN_MEMORY:

1553

# move the _state_file pointer to after the header (in case bisect

1554

# has been called in the mean time)

1555

self._state_file.seek(self._end_of_header)

1556

text = self._state_file.read()

1557

# TODO: check the crc checksums. crc_measured = zlib.crc32(text)

1558

1559

fields = text.split('\0')

1560

# Remove the last blank entry

1561

trailing = fields.pop()

1562

assert trailing == ''

1563

# consider turning fields into a tuple.

1564

1565

# skip the first field which is the trailing null from the header.

1566

cur = 1

1567

# Each line now has an extra '\n' field which is not used

1568

# so we just skip over it

1569

# entry size:

1570

# 3 fields for the key

1571

# + number of fields per tree_data (5) * tree count

1572

# + newline

1573

num_present_parents = self._num_present_parents()

1574

tree_count = 1 + num_present_parents

1575

entry_size = self._fields_per_entry()

1576

expected_field_count = entry_size * self._num_entries

1577

field_count = len(fields)

1578

# this checks our adjustment, and also catches file too short.

1579

assert field_count - cur == expected_field_count, \

1580

'field count incorrect %s != %s, entry_size=%s, '\

1581

'num_entries=%s fields=%r' % (

1582

field_count - cur, expected_field_count, entry_size,

1583

self._num_entries, fields)

1584

1585

if num_present_parents == 1:

1586

# Bind external functions to local names

1587

_int = int

1588

# We access all fields in order, so we can just iterate over

1589

# them. Grab an straight iterator over the fields. (We use an

1590

# iterator because we don't want to do a lot of additions, nor

1591

# do we want to do a lot of slicing)

1592

next = iter(fields).next

1593

# Move the iterator to the current position

1594

for x in xrange(cur):

1595

next()

1596

# The two blocks here are deliberate: the root block and the

1597

# contents-of-root block.

1598

self._dirblocks = [('', []), ('', [])]

1599

current_block = self._dirblocks[0][1]

1600

current_dirname = ''

1601

append_entry = current_block.append

1602

for count in xrange(self._num_entries):

1603

dirname = next()

1604

name = next()

1605

file_id = next()

1606

if dirname != current_dirname:

1607

# new block - different dirname

1608

current_block = []

1609

current_dirname = dirname

1610

self._dirblocks.append((current_dirname, current_block))

1611

append_entry = current_block.append

1612

# we know current_dirname == dirname, so re-use it to avoid

1613

# creating new strings

1614

entry = ((current_dirname, name, file_id),

1615

[(# Current Tree

1616

next(), # minikind

1617

next(), # fingerprint

1618

_int(next()), # size

1619

next() == 'y', # executable

1620

next(), # packed_stat or revision_id

1621

1622

( # Parent 1

1623

next(), # minikind

1624

next(), # fingerprint

1625

_int(next()), # size

1626

next() == 'y', # executable

1627

next(), # packed_stat or revision_id

1628

1629

])

1630

trailing = next()

1631

assert trailing == '\n'

1632

# append the entry to the current block

1633

append_entry(entry)

1634

self._split_root_dirblock_into_contents()

1635

else:

1636

fields_to_entry = self._get_fields_to_entry()

1637

entries = [fields_to_entry(fields[pos:pos+entry_size])

1638

for pos in xrange(cur, field_count, entry_size)]

1639

self._entries_to_current_state(entries)

1640

# To convert from format 2 => format 3

1641

# self._dirblocks = sorted(self._dirblocks,

1642

# key=lambda blk:blk[0].split('/'))

1643

# To convert from format 3 => format 2

1644

# self._dirblocks = sorted(self._dirblocks)

1645

self._dirblock_state = DirState.IN_MEMORY_UNMODIFIED

1556

_read_dirblocks(self)

1646

1557

1647

1558

def _read_header(self):

1648

1559

"""This reads in the metadata header, and the parent ids.

2372

2283

raise errors.ObjectNotLocked(self)

2373

2284

2374

2285

2375

def bisect_dirblock(dirblocks, dirname, lo=0, hi=None, cache={}):

2376

"""Return the index where to insert dirname into the dirblocks.

2377

2378

The return value idx is such that all directories blocks in dirblock[:idx]

2379

have names < dirname, and all blocks in dirblock[idx:] have names >=

2380

dirname.

2381

2382

Optional args lo (default 0) and hi (default len(dirblocks)) bound the

2383

slice of a to be searched.

2384

"""

2385

if hi is None:

2386

hi = len(dirblocks)

2387

try:

2388

dirname_split = cache[dirname]

2389

except KeyError:

2390

dirname_split = dirname.split('/')

2391

cache[dirname] = dirname_split

2392

while lo < hi:

2393

mid = (lo+hi)//2

2394

# Grab the dirname for the current dirblock

2395

cur = dirblocks[mid][0]

2396

try:

2397

cur_split = cache[cur]

2398

except KeyError:

2399

cur_split = cur.split('/')

2400

cache[cur] = cur_split

2401

if cur_split < dirname_split: lo = mid+1

2402

else: hi = mid

2403

return lo

2286

# Try to load the compiled form if possible

2287

try:

2288

from bzrlib._dirstate_helpers_c import (

2289

_read_dirblocks_c as _read_dirblocks,

2290

bisect_dirblock_c as bisect_dirblock,

2291

_bisect_path_left_c as _bisect_path_left,

2292

_bisect_path_right_c as _bisect_path_right,

2293

cmp_by_dirs_c as cmp_by_dirs,

2294

)

2295

except ImportError:

2296

from bzrlib._dirstate_helpers_py import (

2297

_read_dirblocks_py as _read_dirblocks,

2298

bisect_dirblock_py as bisect_dirblock,

2299

_bisect_path_left_py as _bisect_path_left,

2300

_bisect_path_right_py as _bisect_path_right,

2301

cmp_by_dirs_py as cmp_by_dirs,

2302

)

Older »